npm - vellum - Versions diffs - 0.2.2 → 0.2.7 - Mend

vellum 0.2.2 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/bun.lock +68 -100
package/package.json +3 -3
package/src/__tests__/config-schema.test.ts +6 -0
package/src/__tests__/handlers-twilio-config.test.ts +221 -0
package/src/__tests__/ipc-snapshot.test.ts +9 -0
package/src/__tests__/memory-regressions.test.ts +100 -2
package/src/__tests__/provider-commit-message-generator.test.ts +303 -0
package/src/__tests__/session-conflict-gate.test.ts +28 -25
package/src/calls/__tests__/twilio-webhook-urls.test.ts +162 -0
package/src/calls/call-domain.ts +3 -3
package/src/calls/twilio-config.ts +8 -8
package/src/calls/twilio-provider.ts +4 -4
package/src/calls/twilio-webhook-urls.ts +50 -0
package/src/cli/map.ts +30 -6
package/src/config/defaults.ts +1 -0
package/src/config/schema.ts +4 -0
package/src/config/vellum-skills/telegram-setup/SKILL.md +1 -5
package/src/daemon/handlers/config.ts +44 -2
package/src/daemon/ipc-contract-inventory.json +4 -0
package/src/daemon/ipc-contract.ts +23 -0
package/src/daemon/ride-shotgun-handler.ts +2 -1
package/src/daemon/session-agent-loop.ts +37 -2
package/src/daemon/session-conflict-gate.ts +18 -109
package/src/memory/conflict-intent.ts +114 -0
package/src/memory/job-handlers/conflict.ts +23 -1
package/src/runtime/gateway-client.ts +36 -0
package/src/runtime/http-server.ts +58 -2
package/src/runtime/routes/channel-routes.ts +121 -79
package/src/tools/browser/api-map.ts +123 -50
package/src/tools/claude-code/claude-code.ts +130 -0
package/src/workspace/commit-message-enrichment-service.ts +3 -3
package/src/workspace/provider-commit-message-generator.ts +28 -1

package/src/runtime/routes/channel-routes.ts CHANGED Viewed

@@ -10,10 +10,10 @@ import { renderHistoryContent } from '../../daemon/handlers.js';
 import { checkIngressForSecrets } from '../../security/secret-ingress.js';
 import { IngressBlockedError } from '../../util/errors.js';
 import { getLogger } from '../../util/logger.js';
+import { deliverChannelReply } from '../gateway-client.js';
 import type {
   MessageProcessor,
   RuntimeAttachmentMetadata,
-  RuntimeMessagePayload,
 } from '../http-types.js';
 const log = getLogger('runtime-http');
@@ -54,6 +54,7 @@ export async function handleChannelInbound(
     senderExternalUserId?: string;
     senderUsername?: string;
     sourceMetadata?: Record<string, unknown>;
+    replyCallbackUrl?: string;
   };
   const {
@@ -185,41 +186,92 @@ export async function handleChannelInbound(
     ? sourceMetadata.uxBrief.trim()
     : undefined;
-  // For new (non-duplicate) messages, run the agent loop to generate a reply.
-  let processingSucceeded = false;
+  const replyCallbackUrl = body.replyCallbackUrl;
+  // For new (non-duplicate) messages, run the secret ingress check
+  // synchronously, then fire off the agent loop in the background.
   if (!result.duplicate && processMessage) {
+    // Persist the raw payload first so dead-lettered events can always be
+    // replayed. If the ingress check later detects secrets we clear it
+    // before throwing, so secret-bearing content is never left on disk.
+    channelDeliveryStore.storePayload(result.eventId, {
+      sourceChannel, externalChatId, externalMessageId, content,
+      attachmentIds, sourceMetadata: body.sourceMetadata,
+      senderName: body.senderName,
+      senderExternalUserId: body.senderExternalUserId,
+      senderUsername: body.senderUsername,
+      replyCallbackUrl,
+    });
+    const contentToCheck = content ?? '';
+    let ingressCheck: ReturnType<typeof checkIngressForSecrets>;
     try {
-      // Persist the raw payload first so dead-lettered events can always be
-      // replayed. If the ingress check later detects secrets we clear it
-      // before throwing, so secret-bearing content is never left on disk.
-      channelDeliveryStore.storePayload(result.eventId, {
-        sourceChannel, externalChatId, externalMessageId, content,
-        attachmentIds, sourceMetadata: body.sourceMetadata,
-        senderName: body.senderName,
-        senderExternalUserId: body.senderExternalUserId,
-        senderUsername: body.senderUsername,
-      });
+      ingressCheck = checkIngressForSecrets(contentToCheck);
+    } catch (checkErr) {
+      channelDeliveryStore.clearPayload(result.eventId);
+      throw checkErr;
+    }
+    if (ingressCheck.blocked) {
+      channelDeliveryStore.clearPayload(result.eventId);
+      throw new IngressBlockedError(ingressCheck.userNotice!, ingressCheck.detectedTypes);
+    }
-      const contentToCheck = content ?? '';
-      let ingressCheck: ReturnType<typeof checkIngressForSecrets>;
-      try {
-        ingressCheck = checkIngressForSecrets(contentToCheck);
-      } catch (checkErr) {
-        // If the secret check itself throws (e.g. ConfigError from corrupt
-        // config), clear the stored payload so secret-bearing content is
-        // never left on disk.
-        channelDeliveryStore.clearPayload(result.eventId);
-        throw checkErr;
-      }
-      if (ingressCheck.blocked) {
-        channelDeliveryStore.clearPayload(result.eventId);
-        throw new IngressBlockedError(ingressCheck.userNotice!, ingressCheck.detectedTypes);
-      }
+    // Fire-and-forget: process the message and deliver the reply in the background.
+    // The HTTP response returns immediately so the gateway webhook is not blocked.
+    processChannelMessageInBackground({
+      processMessage,
+      conversationId: result.conversationId,
+      eventId: result.eventId,
+      content: content ?? '',
+      attachmentIds: hasAttachments ? attachmentIds : undefined,
+      sourceChannel,
+      externalChatId,
+      metadataHints,
+      metadataUxBrief,
+      replyCallbackUrl,
+    });
+  }
+  return Response.json({
+    accepted: result.accepted,
+    duplicate: result.duplicate,
+    eventId: result.eventId,
+  });
+}
+interface BackgroundProcessingParams {
+  processMessage: MessageProcessor;
+  conversationId: string;
+  eventId: string;
+  content: string;
+  attachmentIds?: string[];
+  sourceChannel: string;
+  externalChatId: string;
+  metadataHints: string[];
+  metadataUxBrief?: string;
+  replyCallbackUrl?: string;
+}
+function processChannelMessageInBackground(params: BackgroundProcessingParams): void {
+  const {
+    processMessage,
+    conversationId,
+    eventId,
+    content,
+    attachmentIds,
+    sourceChannel,
+    externalChatId,
+    metadataHints,
+    metadataUxBrief,
+    replyCallbackUrl,
+  } = params;
+  (async () => {
+    try {
       const { messageId: userMessageId } = await processMessage(
-        result.conversationId,
-        content ?? '',
-        hasAttachments ? attachmentIds : undefined,
+        conversationId,
+        content,
+        attachmentIds,
         {
           transport: {
             channelId: sourceChannel,
@@ -229,60 +281,50 @@ export async function handleChannelInbound(
         },
         sourceChannel,
       );
-      // Link the user message to the inbound event so edits can find it later
-      channelDeliveryStore.linkMessage(result.eventId, userMessageId);
-      channelDeliveryStore.markProcessed(result.eventId);
-      processingSucceeded = true;
+      channelDeliveryStore.linkMessage(eventId, userMessageId);
+      channelDeliveryStore.markProcessed(eventId);
+      if (replyCallbackUrl) {
+        await deliverReplyViaCallback(conversationId, externalChatId, replyCallbackUrl);
+      }
     } catch (err) {
-      // Secret ingress blocks are not retryable — let the top-level handler return 422
-      if (err instanceof IngressBlockedError) throw err;
-      log.error({ err, conversationId: result.conversationId }, 'Failed to process channel inbound message');
-      channelDeliveryStore.recordProcessingFailure(result.eventId, err);
+      log.error({ err, conversationId }, 'Background channel message processing failed');
+      channelDeliveryStore.recordProcessingFailure(eventId, err);
     }
-  }
+  })();
+}
-  // Only look up the assistant reply when processing succeeded for a new
-  // (non-duplicate) message.  For duplicates or failed processing, returning
-  // a stale assistant message could cause the caller to resend old replies.
-  let assistantMessage: RuntimeMessagePayload | undefined;
-  if (processingSucceeded) {
-    const msgs = conversationStore.getMessages(result.conversationId);
-    for (let i = msgs.length - 1; i >= 0; i--) {
-      if (msgs[i].role === 'assistant') {
-        let parsed: unknown;
-        try { parsed = JSON.parse(msgs[i].content); } catch { parsed = msgs[i].content; }
-        const rendered = renderHistoryContent(parsed);
-        const linked = attachmentsStore.getAttachmentMetadataForMessage(msgs[i].id);
-        const replyAttachments: RuntimeAttachmentMetadata[] = linked.map((a) => ({
-          id: a.id,
-          filename: a.originalFilename,
-          mimeType: a.mimeType,
-          sizeBytes: a.sizeBytes,
-          kind: a.kind,
-        }));
-        // Include the reply if it has text or attachments
-        if (rendered.text || replyAttachments.length > 0) {
-          assistantMessage = {
-            id: msgs[i].id,
-            role: 'assistant',
-            content: rendered.text,
-            timestamp: new Date(msgs[i].createdAt).toISOString(),
-            attachments: replyAttachments,
-          };
-        }
-        break;
+async function deliverReplyViaCallback(
+  conversationId: string,
+  externalChatId: string,
+  callbackUrl: string,
+): Promise<void> {
+  const msgs = conversationStore.getMessages(conversationId);
+  for (let i = msgs.length - 1; i >= 0; i--) {
+    if (msgs[i].role === 'assistant') {
+      let parsed: unknown;
+      try { parsed = JSON.parse(msgs[i].content); } catch { parsed = msgs[i].content; }
+      const rendered = renderHistoryContent(parsed);
+      const linked = attachmentsStore.getAttachmentMetadataForMessage(msgs[i].id);
+      const replyAttachments: RuntimeAttachmentMetadata[] = linked.map((a) => ({
+        id: a.id,
+        filename: a.originalFilename,
+        mimeType: a.mimeType,
+        sizeBytes: a.sizeBytes,
+        kind: a.kind,
+      }));
+      if (rendered.text || replyAttachments.length > 0) {
+        await deliverChannelReply(callbackUrl, {
+          chatId: externalChatId,
+          text: rendered.text || undefined,
+          attachments: replyAttachments.length > 0 ? replyAttachments : undefined,
+        });
       }
+      break;
     }
   }
-  return Response.json({
-    accepted: result.accepted,
-    duplicate: result.duplicate,
-    eventId: result.eventId,
-    ...(assistantMessage ? { assistantMessage } : {}),
-  });
 }
 export function handleListDeadLetters(): Response {

package/src/tools/browser/api-map.ts CHANGED Viewed

@@ -38,12 +38,31 @@ export interface ApiMapResult {
 const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
 const NUMERIC_RE = /^\d+$/;
 const HEX_HASH_RE = /^[0-9a-f]{8,}$/i;
+const DATE_RE = /^\d{4}-\d{2}-\d{2}$/;
+/** URL path patterns that indicate non-API noise. */
+const NOISE_PATH_PATTERNS = [
+  /\/web-translations\//,
+  /\/cdn-cgi\//,
+  /\.properties$/,
+  /\.js$/,
+  /\.css$/,
+  /\.woff2?$/,
+  /\.png$/,
+  /\.jpg$/,
+  /\.svg$/,
+  /\.ico$/,
+  /\.map$/,
+  /\/preference\//,
+  /\/userpreference-service\//,
+];
 /** Returns true when a path segment looks like a dynamic ID. */
 function isIdSegment(segment: string): boolean {
   if (NUMERIC_RE.test(segment)) return true;
   if (UUID_RE.test(segment)) return true;
   if (HEX_HASH_RE.test(segment)) return true;
+  if (DATE_RE.test(segment)) return true;
   return false;
 }
@@ -69,27 +88,43 @@ function tryParseJson(text: string | undefined): Record<string, unknown> | undef
   return undefined;
 }
+/** Extract GraphQL operation name from request body. */
+function extractGraphQLOperationName(postData: string | undefined): string | null {
+  if (!postData) return null;
+  const body = tryParseJson(postData);
+  if (!body) return null;
+  if (typeof body.operationName === 'string' && body.operationName) return body.operationName;
+  // Try extracting from query string: "query FooBar { ..." or "mutation FooBar { ..."
+  if (typeof body.query === 'string') {
+    const named = body.query.match(/(?:query|mutation|subscription)\s+(\w+)/);
+    if (named) return named[1];
+    // Unnamed query — extract the first field name: "query{fooBar(" or "query { fooBar {"
+    const firstField = body.query.match(/(?:query|mutation|subscription)\s*\{?\s*(\w+)/);
+    if (firstField) return firstField[1];
+  }
+  return null;
+}
 // ---------------------------------------------------------------------------
 // Core analysis
 // ---------------------------------------------------------------------------
+interface GroupData {
+  method: string;
+  urlPattern: string;
+  exampleUrl: string;
+  queryParams: Set<string>;
+  requestBodyKeys: Set<string>;
+  responseStatus: Set<number>;
+  responseBodyKeys: Set<string>;
+  count: number;
+}
 export function analyzeApiMap(
   entries: NetworkRecordedEntry[],
   domain: string,
 ): ApiMapResult {
-  const groups = new Map<
-    string,
-    {
-      method: string;
-      urlPattern: string;
-      exampleUrl: string;
-      queryParams: Set<string>;
-      requestBodyKeys: Set<string>;
-      responseStatus: Set<number>;
-      responseBodyKeys: Set<string>;
-      count: number;
-    }
-  >();
+  const groups = new Map<string, GroupData>();
   for (const entry of entries) {
     const { request, response } = entry;
@@ -97,11 +132,30 @@ export function analyzeApiMap(
     try {
       parsed = new URL(request.url);
     } catch {
-      continue; // skip malformed URLs
+      continue;
     }
+    // Skip non-API noise
+    if (NOISE_PATH_PATTERNS.some(p => p.test(parsed.pathname))) continue;
+    // Skip non-JSON responses
+    const mimeType = response?.mimeType ?? '';
+    if (response && !mimeType.includes('json') && !mimeType.includes('graphql')) continue;
     const method = request.method.toUpperCase();
-    const urlPattern = `${parsed.hostname}${normalizePathSegments(parsed.pathname)}`;
+    const normalizedPath = normalizePathSegments(parsed.pathname);
+    const basePattern = `${parsed.hostname}${normalizedPath}`;
+    // For GraphQL endpoints, split by operation name
+    let urlPattern = basePattern;
+    const isGraphQL = normalizedPath.includes('graphql');
+    if (isGraphQL && method === 'POST') {
+      const opName = extractGraphQLOperationName(request.postData);
+      if (opName) {
+        urlPattern = `${basePattern} → ${opName}`;
+      }
+    }
     const key = `${method} ${urlPattern}`;
     let group = groups.get(key);
@@ -121,26 +175,23 @@ export function analyzeApiMap(
     group.count++;
-    // Collect query param keys
     for (const paramKey of parsed.searchParams.keys()) {
       group.queryParams.add(paramKey);
     }
-    // Request body keys (POST/PUT/PATCH)
     if (['POST', 'PUT', 'PATCH'].includes(method)) {
       const body = tryParseJson(request.postData);
       if (body) {
         for (const k of Object.keys(body)) {
-          group.requestBodyKeys.add(k);
+          if (k !== 'query' && k !== 'operationName' && k !== 'extensions') {
+            group.requestBodyKeys.add(k);
+          }
         }
       }
     }
-    // Response status
     if (response) {
       group.responseStatus.add(response.status);
-      // Response body keys
       const resBody = tryParseJson(response.body);
       if (resBody) {
         for (const k of Object.keys(resBody)) {
@@ -161,13 +212,21 @@ export function analyzeApiMap(
     count: g.count,
   }));
-  // Sort by count descending, then by urlPattern for stability
-  endpoints.sort((a, b) => b.count - a.count || a.urlPattern.localeCompare(b.urlPattern));
+  // Sort: data endpoints first (low count = unique pages), then boilerplate
+  // Within each tier, sort alphabetically by pattern for readability
+  endpoints.sort((a, b) => {
+    const aIsBoilerplate = a.count > 15;
+    const bIsBoilerplate = b.count > 15;
+    if (aIsBoilerplate !== bIsBoilerplate) return aIsBoilerplate ? 1 : -1;
+    return a.urlPattern.localeCompare(b.urlPattern);
+  });
+  const totalApiRequests = endpoints.reduce((sum, ep) => sum + ep.count, 0);
   return {
     domain,
     analyzedAt: Date.now(),
-    totalRequests: entries.length,
+    totalRequests: totalApiRequests,
     endpoints,
   };
 }
@@ -191,30 +250,44 @@ export function saveApiMap(domain: string, result: ApiMapResult): string {
 // ---------------------------------------------------------------------------
 export function printApiMapTable(result: ApiMapResult): void {
-  console.log(`\nAPI Map for ${result.domain} — ${result.totalRequests} total requests, ${result.endpoints.length} unique endpoints\n`);
-  const header = ['Method', 'URL Pattern', 'Count', 'Status', 'Query Params'];
-  const rows = result.endpoints.map((ep) => [
-    ep.method,
-    ep.urlPattern,
-    String(ep.count),
-    ep.responseStatus.join(',') || '-',
-    ep.queryParams.join(',') || '-',
-  ]);
-  // Calculate column widths
-  const widths = header.map((h, i) =>
-    Math.max(h.length, ...rows.map((r) => r[i].length)),
-  );
-  const sep = widths.map((w) => '-'.repeat(w)).join(' | ');
-  const fmt = (row: string[]) =>
-    row.map((cell, i) => cell.padEnd(widths[i])).join(' | ');
-  console.log(fmt(header));
-  console.log(sep);
-  for (const row of rows) {
-    console.log(fmt(row));
-  }
-  console.log();
+  const dataEndpoints = result.endpoints.filter(ep => ep.count <= 15);
+  const boilerplate = result.endpoints.filter(ep => ep.count > 15);
+  console.log(`\nAPI Map for ${result.domain} — ${result.endpoints.length} endpoints discovered\n`);
+  const stripDomain = (pattern: string) => {
+    const idx = pattern.indexOf('/');
+    return idx >= 0 ? pattern.slice(idx) : pattern;
+  };
+  const printSection = (title: string, eps: ApiEndpoint[]) => {
+    if (eps.length === 0) return;
+    console.log(`  ${title} (${eps.length})\n`);
+    const header = ['Method', 'Endpoint', 'Hits', 'Response Keys'];
+    const rows = eps.map((ep) => [
+      ep.method,
+      stripDomain(ep.urlPattern),
+      String(ep.count),
+      ep.responseBodyKeys.slice(0, 5).join(', ') || '-',
+    ]);
+    const widths = header.map((h, i) =>
+      Math.min(i === 1 ? 72 : i === 3 ? 50 : 200, Math.max(h.length, ...rows.map((r) => r[i].length))),
+    );
+    const sep = widths.map((w) => '-'.repeat(w)).join(' | ');
+    const fmt = (row: string[]) =>
+      row.map((cell, i) => cell.slice(0, widths[i]).padEnd(widths[i])).join(' | ');
+    console.log(`  ${fmt(header)}`);
+    console.log(`  ${sep}`);
+    for (const row of rows) {
+      console.log(`  ${fmt(row)}`);
+    }
+    console.log();
+  };
+  printSection('DATA ENDPOINTS', dataEndpoints);
+  printSection('PAGE-LOAD BOILERPLATE', boilerplate);
 }

package/src/tools/claude-code/claude-code.ts CHANGED Viewed

@@ -28,6 +28,25 @@ const VALID_PROFILES: readonly WorkerProfile[] = ['general', 'researcher', 'code
 const MAX_CLAUDE_CODE_DEPTH = 1;
 const DEPTH_ENV_VAR = 'VELLUM_CLAUDE_CODE_DEPTH';
+function summarizeToolInput(toolName: string, input: Record<string, unknown>): string {
+  // Extract the most relevant field for each tool type
+  const name = toolName.toLowerCase();
+  if (name === 'bash') return String(input.command ?? '');
+  if (name === 'read' || name === 'file_read') return String(input.file_path ?? input.path ?? '');
+  if (name === 'edit' || name === 'file_edit') return String(input.file_path ?? input.path ?? '');
+  if (name === 'write' || name === 'file_write') return String(input.file_path ?? input.path ?? '');
+  if (name === 'glob') return String(input.pattern ?? '');
+  if (name === 'grep') return String(input.pattern ?? '');
+  if (name === 'websearch' || name === 'web_search') return String(input.query ?? '');
+  if (name === 'webfetch' || name === 'web_fetch') return String(input.url ?? '');
+  if (name === 'task') return String(input.description ?? '');
+  // Fallback: first string value
+  for (const val of Object.values(input)) {
+    if (typeof val === 'string' && val.length > 0 && val.length < 200) return val;
+  }
+  return '';
+}
 export const claudeCodeTool: Tool = {
   name: 'claude_code',
   description: 'Delegate a coding task to Claude Code, an AI-powered coding agent that can read, write, and edit files, run shell commands, and perform complex multi-step software engineering tasks autonomously.',
@@ -203,12 +222,22 @@ export const claudeCodeTool: Tool = {
       queryOptions.resume = resumeSessionId;
     }
+    // Declared outside try so the catch block can emit a final tool_complete on error.
+    let lastSubToolName: string | null = null;
     try {
       const conversation = query({ prompt, options: queryOptions });
       let resultText = '';
       let sessionId = '';
       let hasError = false;
+      // Track tool_use_id → {name, inputSummary} for enriching progress events.
+      const toolUseIdInfo = new Map<string, { name: string; inputSummary: string }>();
+      // Track tool_use_ids that we've already emitted tool_start for (to avoid duplicates).
+      const emittedToolUseIds = new Set<string>();
+      // Track the currently active tool_use_id from tool_progress events.
+      let activeToolUseId: string | null = null;
       for await (const message of conversation) {
         switch (message.type) {
           case 'assistant': {
@@ -225,12 +254,103 @@ export const claudeCodeTool: Tool = {
                   context.onOutput?.(block.text);
                   resultText += block.text;
                 }
+                if (block.type === 'tool_use') {
+                  // Capture info keyed by tool_use_id for enriching tool_progress events.
+                  const inputSummary = summarizeToolInput(block.name, block.input as Record<string, unknown>);
+                  toolUseIdInfo.set(block.id, { name: block.name, inputSummary });
+                  // Emit tool_start if we haven't already (tool_progress may have fired first).
+                  // NOTE: Do NOT emit tool_complete for the previous tool here. An assistant
+                  // message may contain multiple tool_use blocks (parallel tool use) and none
+                  // of them have executed yet at this point. Completions are handled by
+                  // tool_use_summary and tool_progress events.
+                  if (!emittedToolUseIds.has(block.id)) {
+                    context.onOutput?.(JSON.stringify({
+                      subType: 'tool_start',
+                      subToolName: block.name,
+                      subToolInput: inputSummary,
+                      subToolId: block.id,
+                    }));
+                    emittedToolUseIds.add(block.id);
+                    lastSubToolName = block.name;
+                    activeToolUseId = block.id;
+                  }
+                }
               }
             }
             sessionId = message.session_id;
             break;
           }
+          case 'tool_progress': {
+            // The SDK fires tool_progress periodically DURING tool execution.
+            // This is our primary signal for live sub-tool progress.
+            const toolUseId = message.tool_use_id;
+            const toolName = message.tool_name;
+            sessionId = message.session_id;
+            // Record tool name if we don't have it yet (tool_progress fires before assistant sometimes).
+            if (!toolUseIdInfo.has(toolUseId)) {
+              toolUseIdInfo.set(toolUseId, { name: toolName, inputSummary: '' });
+            }
+            if (!emittedToolUseIds.has(toolUseId)) {
+              // New tool — mark previous as complete and emit tool_start.
+              if (lastSubToolName && activeToolUseId !== toolUseId) {
+                context.onOutput?.(JSON.stringify({
+                  subType: 'tool_complete',
+                  subToolName: lastSubToolName,
+                  subToolId: activeToolUseId,
+                }));
+              }
+              const inputSummary = toolUseIdInfo.get(toolUseId)?.inputSummary ?? '';
+              context.onOutput?.(JSON.stringify({
+                subType: 'tool_start',
+                subToolName: toolName,
+                subToolInput: inputSummary,
+                subToolId: toolUseId,
+              }));
+              emittedToolUseIds.add(toolUseId);
+              lastSubToolName = toolName;
+            }
+            activeToolUseId = toolUseId;
+            break;
+          }
+          case 'tool_use_summary': {
+            // The SDK fires tool_use_summary after tool execution with a summary
+            // and the IDs of tools that were executed.
+            sessionId = message.session_id;
+            for (const completedId of message.preceding_tool_use_ids) {
+              const info = toolUseIdInfo.get(completedId);
+              const completedName: string | null = info?.name ?? lastSubToolName;
+              if (completedName && emittedToolUseIds.has(completedId)) {
+                context.onOutput?.(JSON.stringify({
+                  subType: 'tool_complete',
+                  subToolName: completedName,
+                  subToolId: completedId,
+                }));
+                if (lastSubToolName === completedName) {
+                  lastSubToolName = null;
+                }
+              }
+              // Prune completed entries to keep memory flat across long sessions.
+              toolUseIdInfo.delete(completedId);
+              emittedToolUseIds.delete(completedId);
+            }
+            activeToolUseId = null;
+            break;
+          }
           case 'result': {
+            // Mark the final sub-tool as complete (flag error if the session failed).
+            if (lastSubToolName) {
+              const isFailure = message.subtype !== 'success';
+              context.onOutput?.(JSON.stringify({
+                subType: 'tool_complete',
+                subToolName: lastSubToolName,
+                subToolId: activeToolUseId,
+                ...(isFailure && { subToolIsError: true }),
+              }));
+              lastSubToolName = null;
+            }
             sessionId = message.session_id;
             const resultMeta = {
               subtype: message.subtype,
@@ -281,6 +401,16 @@ export const claudeCodeTool: Tool = {
         isError: hasError,
       };
     } catch (err) {
+      // Mark the last sub-tool as failed so the UI shows an error icon.
+      if (lastSubToolName) {
+        context.onOutput?.(JSON.stringify({
+          subType: 'tool_complete',
+          subToolName: lastSubToolName,
+          subToolIsError: true,
+        }));
+        lastSubToolName = null;
+      }
       const errMessage = err instanceof Error ? err.message : String(err);
       const recentStderr = stderrLines.slice(-20);
       log.error({ err, stderrTail: recentStderr }, 'Claude Code execution failed');

package/src/workspace/commit-message-enrichment-service.ts CHANGED Viewed

@@ -183,6 +183,9 @@ export class CommitEnrichmentService {
       // has already settled with the timeout error, that rejection is orphaned.
       // The .catch() swallows it to prevent an unhandled promise rejection.
       const enrichmentPromise = this.doEnrichment(job, controller.signal);
+      enrichmentPromise.catch(() => {
+        // Intentionally swallowed — the timeout branch already handled the error
+      });
       await Promise.race([
         enrichmentPromise,
         new Promise<never>((_, reject) => {
@@ -192,9 +195,6 @@ export class CommitEnrichmentService {
           }, this.jobTimeoutMs);
         }),
       ]);
-      enrichmentPromise.catch(() => {
-        // Intentionally swallowed — the timeout branch already handled the error
-      });
       this.succeededCount++;
       log.debug(
         { commitHash: job.commitHash, attempts: job.attempts },