vellum 0.2.2 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/bun.lock +68 -100
  2. package/package.json +3 -3
  3. package/src/__tests__/config-schema.test.ts +6 -0
  4. package/src/__tests__/handlers-twilio-config.test.ts +221 -0
  5. package/src/__tests__/ipc-snapshot.test.ts +9 -0
  6. package/src/__tests__/memory-regressions.test.ts +100 -2
  7. package/src/__tests__/provider-commit-message-generator.test.ts +303 -0
  8. package/src/__tests__/session-conflict-gate.test.ts +28 -25
  9. package/src/calls/__tests__/twilio-webhook-urls.test.ts +162 -0
  10. package/src/calls/call-domain.ts +3 -3
  11. package/src/calls/twilio-config.ts +8 -8
  12. package/src/calls/twilio-provider.ts +4 -4
  13. package/src/calls/twilio-webhook-urls.ts +50 -0
  14. package/src/cli/map.ts +30 -6
  15. package/src/config/defaults.ts +1 -0
  16. package/src/config/schema.ts +4 -0
  17. package/src/config/vellum-skills/telegram-setup/SKILL.md +1 -5
  18. package/src/daemon/handlers/config.ts +44 -2
  19. package/src/daemon/ipc-contract-inventory.json +4 -0
  20. package/src/daemon/ipc-contract.ts +23 -0
  21. package/src/daemon/ride-shotgun-handler.ts +2 -1
  22. package/src/daemon/session-agent-loop.ts +37 -2
  23. package/src/daemon/session-conflict-gate.ts +18 -109
  24. package/src/memory/conflict-intent.ts +114 -0
  25. package/src/memory/job-handlers/conflict.ts +23 -1
  26. package/src/runtime/gateway-client.ts +36 -0
  27. package/src/runtime/http-server.ts +58 -2
  28. package/src/runtime/routes/channel-routes.ts +121 -79
  29. package/src/tools/browser/api-map.ts +123 -50
  30. package/src/tools/claude-code/claude-code.ts +130 -0
  31. package/src/workspace/commit-message-enrichment-service.ts +3 -3
  32. package/src/workspace/provider-commit-message-generator.ts +28 -1
@@ -10,10 +10,10 @@ import { renderHistoryContent } from '../../daemon/handlers.js';
10
10
  import { checkIngressForSecrets } from '../../security/secret-ingress.js';
11
11
  import { IngressBlockedError } from '../../util/errors.js';
12
12
  import { getLogger } from '../../util/logger.js';
13
+ import { deliverChannelReply } from '../gateway-client.js';
13
14
  import type {
14
15
  MessageProcessor,
15
16
  RuntimeAttachmentMetadata,
16
- RuntimeMessagePayload,
17
17
  } from '../http-types.js';
18
18
 
19
19
  const log = getLogger('runtime-http');
@@ -54,6 +54,7 @@ export async function handleChannelInbound(
54
54
  senderExternalUserId?: string;
55
55
  senderUsername?: string;
56
56
  sourceMetadata?: Record<string, unknown>;
57
+ replyCallbackUrl?: string;
57
58
  };
58
59
 
59
60
  const {
@@ -185,41 +186,92 @@ export async function handleChannelInbound(
185
186
  ? sourceMetadata.uxBrief.trim()
186
187
  : undefined;
187
188
 
188
- // For new (non-duplicate) messages, run the agent loop to generate a reply.
189
- let processingSucceeded = false;
189
+ const replyCallbackUrl = body.replyCallbackUrl;
190
+
191
+ // For new (non-duplicate) messages, run the secret ingress check
192
+ // synchronously, then fire off the agent loop in the background.
190
193
  if (!result.duplicate && processMessage) {
194
+ // Persist the raw payload first so dead-lettered events can always be
195
+ // replayed. If the ingress check later detects secrets we clear it
196
+ // before throwing, so secret-bearing content is never left on disk.
197
+ channelDeliveryStore.storePayload(result.eventId, {
198
+ sourceChannel, externalChatId, externalMessageId, content,
199
+ attachmentIds, sourceMetadata: body.sourceMetadata,
200
+ senderName: body.senderName,
201
+ senderExternalUserId: body.senderExternalUserId,
202
+ senderUsername: body.senderUsername,
203
+ replyCallbackUrl,
204
+ });
205
+
206
+ const contentToCheck = content ?? '';
207
+ let ingressCheck: ReturnType<typeof checkIngressForSecrets>;
191
208
  try {
192
- // Persist the raw payload first so dead-lettered events can always be
193
- // replayed. If the ingress check later detects secrets we clear it
194
- // before throwing, so secret-bearing content is never left on disk.
195
- channelDeliveryStore.storePayload(result.eventId, {
196
- sourceChannel, externalChatId, externalMessageId, content,
197
- attachmentIds, sourceMetadata: body.sourceMetadata,
198
- senderName: body.senderName,
199
- senderExternalUserId: body.senderExternalUserId,
200
- senderUsername: body.senderUsername,
201
- });
209
+ ingressCheck = checkIngressForSecrets(contentToCheck);
210
+ } catch (checkErr) {
211
+ channelDeliveryStore.clearPayload(result.eventId);
212
+ throw checkErr;
213
+ }
214
+ if (ingressCheck.blocked) {
215
+ channelDeliveryStore.clearPayload(result.eventId);
216
+ throw new IngressBlockedError(ingressCheck.userNotice!, ingressCheck.detectedTypes);
217
+ }
202
218
 
203
- const contentToCheck = content ?? '';
204
- let ingressCheck: ReturnType<typeof checkIngressForSecrets>;
205
- try {
206
- ingressCheck = checkIngressForSecrets(contentToCheck);
207
- } catch (checkErr) {
208
- // If the secret check itself throws (e.g. ConfigError from corrupt
209
- // config), clear the stored payload so secret-bearing content is
210
- // never left on disk.
211
- channelDeliveryStore.clearPayload(result.eventId);
212
- throw checkErr;
213
- }
214
- if (ingressCheck.blocked) {
215
- channelDeliveryStore.clearPayload(result.eventId);
216
- throw new IngressBlockedError(ingressCheck.userNotice!, ingressCheck.detectedTypes);
217
- }
219
+ // Fire-and-forget: process the message and deliver the reply in the background.
220
+ // The HTTP response returns immediately so the gateway webhook is not blocked.
221
+ processChannelMessageInBackground({
222
+ processMessage,
223
+ conversationId: result.conversationId,
224
+ eventId: result.eventId,
225
+ content: content ?? '',
226
+ attachmentIds: hasAttachments ? attachmentIds : undefined,
227
+ sourceChannel,
228
+ externalChatId,
229
+ metadataHints,
230
+ metadataUxBrief,
231
+ replyCallbackUrl,
232
+ });
233
+ }
234
+
235
+ return Response.json({
236
+ accepted: result.accepted,
237
+ duplicate: result.duplicate,
238
+ eventId: result.eventId,
239
+ });
240
+ }
218
241
 
242
+ interface BackgroundProcessingParams {
243
+ processMessage: MessageProcessor;
244
+ conversationId: string;
245
+ eventId: string;
246
+ content: string;
247
+ attachmentIds?: string[];
248
+ sourceChannel: string;
249
+ externalChatId: string;
250
+ metadataHints: string[];
251
+ metadataUxBrief?: string;
252
+ replyCallbackUrl?: string;
253
+ }
254
+
255
+ function processChannelMessageInBackground(params: BackgroundProcessingParams): void {
256
+ const {
257
+ processMessage,
258
+ conversationId,
259
+ eventId,
260
+ content,
261
+ attachmentIds,
262
+ sourceChannel,
263
+ externalChatId,
264
+ metadataHints,
265
+ metadataUxBrief,
266
+ replyCallbackUrl,
267
+ } = params;
268
+
269
+ (async () => {
270
+ try {
219
271
  const { messageId: userMessageId } = await processMessage(
220
- result.conversationId,
221
- content ?? '',
222
- hasAttachments ? attachmentIds : undefined,
272
+ conversationId,
273
+ content,
274
+ attachmentIds,
223
275
  {
224
276
  transport: {
225
277
  channelId: sourceChannel,
@@ -229,60 +281,50 @@ export async function handleChannelInbound(
229
281
  },
230
282
  sourceChannel,
231
283
  );
232
- // Link the user message to the inbound event so edits can find it later
233
- channelDeliveryStore.linkMessage(result.eventId, userMessageId);
234
- channelDeliveryStore.markProcessed(result.eventId);
235
- processingSucceeded = true;
284
+ channelDeliveryStore.linkMessage(eventId, userMessageId);
285
+ channelDeliveryStore.markProcessed(eventId);
286
+
287
+ if (replyCallbackUrl) {
288
+ await deliverReplyViaCallback(conversationId, externalChatId, replyCallbackUrl);
289
+ }
236
290
  } catch (err) {
237
- // Secret ingress blocks are not retryable let the top-level handler return 422
238
- if (err instanceof IngressBlockedError) throw err;
239
- log.error({ err, conversationId: result.conversationId }, 'Failed to process channel inbound message');
240
- channelDeliveryStore.recordProcessingFailure(result.eventId, err);
291
+ log.error({ err, conversationId }, 'Background channel message processing failed');
292
+ channelDeliveryStore.recordProcessingFailure(eventId, err);
241
293
  }
242
- }
294
+ })();
295
+ }
243
296
 
244
- // Only look up the assistant reply when processing succeeded for a new
245
- // (non-duplicate) message. For duplicates or failed processing, returning
246
- // a stale assistant message could cause the caller to resend old replies.
247
- let assistantMessage: RuntimeMessagePayload | undefined;
248
- if (processingSucceeded) {
249
- const msgs = conversationStore.getMessages(result.conversationId);
250
- for (let i = msgs.length - 1; i >= 0; i--) {
251
- if (msgs[i].role === 'assistant') {
252
- let parsed: unknown;
253
- try { parsed = JSON.parse(msgs[i].content); } catch { parsed = msgs[i].content; }
254
- const rendered = renderHistoryContent(parsed);
255
-
256
- const linked = attachmentsStore.getAttachmentMetadataForMessage(msgs[i].id);
257
- const replyAttachments: RuntimeAttachmentMetadata[] = linked.map((a) => ({
258
- id: a.id,
259
- filename: a.originalFilename,
260
- mimeType: a.mimeType,
261
- sizeBytes: a.sizeBytes,
262
- kind: a.kind,
263
- }));
264
-
265
- // Include the reply if it has text or attachments
266
- if (rendered.text || replyAttachments.length > 0) {
267
- assistantMessage = {
268
- id: msgs[i].id,
269
- role: 'assistant',
270
- content: rendered.text,
271
- timestamp: new Date(msgs[i].createdAt).toISOString(),
272
- attachments: replyAttachments,
273
- };
274
- }
275
- break;
297
+ async function deliverReplyViaCallback(
298
+ conversationId: string,
299
+ externalChatId: string,
300
+ callbackUrl: string,
301
+ ): Promise<void> {
302
+ const msgs = conversationStore.getMessages(conversationId);
303
+ for (let i = msgs.length - 1; i >= 0; i--) {
304
+ if (msgs[i].role === 'assistant') {
305
+ let parsed: unknown;
306
+ try { parsed = JSON.parse(msgs[i].content); } catch { parsed = msgs[i].content; }
307
+ const rendered = renderHistoryContent(parsed);
308
+
309
+ const linked = attachmentsStore.getAttachmentMetadataForMessage(msgs[i].id);
310
+ const replyAttachments: RuntimeAttachmentMetadata[] = linked.map((a) => ({
311
+ id: a.id,
312
+ filename: a.originalFilename,
313
+ mimeType: a.mimeType,
314
+ sizeBytes: a.sizeBytes,
315
+ kind: a.kind,
316
+ }));
317
+
318
+ if (rendered.text || replyAttachments.length > 0) {
319
+ await deliverChannelReply(callbackUrl, {
320
+ chatId: externalChatId,
321
+ text: rendered.text || undefined,
322
+ attachments: replyAttachments.length > 0 ? replyAttachments : undefined,
323
+ });
276
324
  }
325
+ break;
277
326
  }
278
327
  }
279
-
280
- return Response.json({
281
- accepted: result.accepted,
282
- duplicate: result.duplicate,
283
- eventId: result.eventId,
284
- ...(assistantMessage ? { assistantMessage } : {}),
285
- });
286
328
  }
287
329
 
288
330
  export function handleListDeadLetters(): Response {
@@ -38,12 +38,31 @@ export interface ApiMapResult {
38
38
  const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
39
39
  const NUMERIC_RE = /^\d+$/;
40
40
  const HEX_HASH_RE = /^[0-9a-f]{8,}$/i;
41
+ const DATE_RE = /^\d{4}-\d{2}-\d{2}$/;
42
+
43
+ /** URL path patterns that indicate non-API noise. */
44
+ const NOISE_PATH_PATTERNS = [
45
+ /\/web-translations\//,
46
+ /\/cdn-cgi\//,
47
+ /\.properties$/,
48
+ /\.js$/,
49
+ /\.css$/,
50
+ /\.woff2?$/,
51
+ /\.png$/,
52
+ /\.jpg$/,
53
+ /\.svg$/,
54
+ /\.ico$/,
55
+ /\.map$/,
56
+ /\/preference\//,
57
+ /\/userpreference-service\//,
58
+ ];
41
59
 
42
60
  /** Returns true when a path segment looks like a dynamic ID. */
43
61
  function isIdSegment(segment: string): boolean {
44
62
  if (NUMERIC_RE.test(segment)) return true;
45
63
  if (UUID_RE.test(segment)) return true;
46
64
  if (HEX_HASH_RE.test(segment)) return true;
65
+ if (DATE_RE.test(segment)) return true;
47
66
  return false;
48
67
  }
49
68
 
@@ -69,27 +88,43 @@ function tryParseJson(text: string | undefined): Record<string, unknown> | undef
69
88
  return undefined;
70
89
  }
71
90
 
91
+ /** Extract GraphQL operation name from request body. */
92
+ function extractGraphQLOperationName(postData: string | undefined): string | null {
93
+ if (!postData) return null;
94
+ const body = tryParseJson(postData);
95
+ if (!body) return null;
96
+ if (typeof body.operationName === 'string' && body.operationName) return body.operationName;
97
+ // Try extracting from query string: "query FooBar { ..." or "mutation FooBar { ..."
98
+ if (typeof body.query === 'string') {
99
+ const named = body.query.match(/(?:query|mutation|subscription)\s+(\w+)/);
100
+ if (named) return named[1];
101
+ // Unnamed query — extract the first field name: "query{fooBar(" or "query { fooBar {"
102
+ const firstField = body.query.match(/(?:query|mutation|subscription)\s*\{?\s*(\w+)/);
103
+ if (firstField) return firstField[1];
104
+ }
105
+ return null;
106
+ }
107
+
72
108
  // ---------------------------------------------------------------------------
73
109
  // Core analysis
74
110
  // ---------------------------------------------------------------------------
75
111
 
112
+ interface GroupData {
113
+ method: string;
114
+ urlPattern: string;
115
+ exampleUrl: string;
116
+ queryParams: Set<string>;
117
+ requestBodyKeys: Set<string>;
118
+ responseStatus: Set<number>;
119
+ responseBodyKeys: Set<string>;
120
+ count: number;
121
+ }
122
+
76
123
  export function analyzeApiMap(
77
124
  entries: NetworkRecordedEntry[],
78
125
  domain: string,
79
126
  ): ApiMapResult {
80
- const groups = new Map<
81
- string,
82
- {
83
- method: string;
84
- urlPattern: string;
85
- exampleUrl: string;
86
- queryParams: Set<string>;
87
- requestBodyKeys: Set<string>;
88
- responseStatus: Set<number>;
89
- responseBodyKeys: Set<string>;
90
- count: number;
91
- }
92
- >();
127
+ const groups = new Map<string, GroupData>();
93
128
 
94
129
  for (const entry of entries) {
95
130
  const { request, response } = entry;
@@ -97,11 +132,30 @@ export function analyzeApiMap(
97
132
  try {
98
133
  parsed = new URL(request.url);
99
134
  } catch {
100
- continue; // skip malformed URLs
135
+ continue;
101
136
  }
102
137
 
138
+ // Skip non-API noise
139
+ if (NOISE_PATH_PATTERNS.some(p => p.test(parsed.pathname))) continue;
140
+
141
+ // Skip non-JSON responses
142
+ const mimeType = response?.mimeType ?? '';
143
+ if (response && !mimeType.includes('json') && !mimeType.includes('graphql')) continue;
144
+
103
145
  const method = request.method.toUpperCase();
104
- const urlPattern = `${parsed.hostname}${normalizePathSegments(parsed.pathname)}`;
146
+ const normalizedPath = normalizePathSegments(parsed.pathname);
147
+ const basePattern = `${parsed.hostname}${normalizedPath}`;
148
+
149
+ // For GraphQL endpoints, split by operation name
150
+ let urlPattern = basePattern;
151
+ const isGraphQL = normalizedPath.includes('graphql');
152
+ if (isGraphQL && method === 'POST') {
153
+ const opName = extractGraphQLOperationName(request.postData);
154
+ if (opName) {
155
+ urlPattern = `${basePattern} → ${opName}`;
156
+ }
157
+ }
158
+
105
159
  const key = `${method} ${urlPattern}`;
106
160
 
107
161
  let group = groups.get(key);
@@ -121,26 +175,23 @@ export function analyzeApiMap(
121
175
 
122
176
  group.count++;
123
177
 
124
- // Collect query param keys
125
178
  for (const paramKey of parsed.searchParams.keys()) {
126
179
  group.queryParams.add(paramKey);
127
180
  }
128
181
 
129
- // Request body keys (POST/PUT/PATCH)
130
182
  if (['POST', 'PUT', 'PATCH'].includes(method)) {
131
183
  const body = tryParseJson(request.postData);
132
184
  if (body) {
133
185
  for (const k of Object.keys(body)) {
134
- group.requestBodyKeys.add(k);
186
+ if (k !== 'query' && k !== 'operationName' && k !== 'extensions') {
187
+ group.requestBodyKeys.add(k);
188
+ }
135
189
  }
136
190
  }
137
191
  }
138
192
 
139
- // Response status
140
193
  if (response) {
141
194
  group.responseStatus.add(response.status);
142
-
143
- // Response body keys
144
195
  const resBody = tryParseJson(response.body);
145
196
  if (resBody) {
146
197
  for (const k of Object.keys(resBody)) {
@@ -161,13 +212,21 @@ export function analyzeApiMap(
161
212
  count: g.count,
162
213
  }));
163
214
 
164
- // Sort by count descending, then by urlPattern for stability
165
- endpoints.sort((a, b) => b.count - a.count || a.urlPattern.localeCompare(b.urlPattern));
215
+ // Sort: data endpoints first (low count = unique pages), then boilerplate
216
+ // Within each tier, sort alphabetically by pattern for readability
217
+ endpoints.sort((a, b) => {
218
+ const aIsBoilerplate = a.count > 15;
219
+ const bIsBoilerplate = b.count > 15;
220
+ if (aIsBoilerplate !== bIsBoilerplate) return aIsBoilerplate ? 1 : -1;
221
+ return a.urlPattern.localeCompare(b.urlPattern);
222
+ });
223
+
224
+ const totalApiRequests = endpoints.reduce((sum, ep) => sum + ep.count, 0);
166
225
 
167
226
  return {
168
227
  domain,
169
228
  analyzedAt: Date.now(),
170
- totalRequests: entries.length,
229
+ totalRequests: totalApiRequests,
171
230
  endpoints,
172
231
  };
173
232
  }
@@ -191,30 +250,44 @@ export function saveApiMap(domain: string, result: ApiMapResult): string {
191
250
  // ---------------------------------------------------------------------------
192
251
 
193
252
  export function printApiMapTable(result: ApiMapResult): void {
194
- console.log(`\nAPI Map for ${result.domain} — ${result.totalRequests} total requests, ${result.endpoints.length} unique endpoints\n`);
195
-
196
- const header = ['Method', 'URL Pattern', 'Count', 'Status', 'Query Params'];
197
- const rows = result.endpoints.map((ep) => [
198
- ep.method,
199
- ep.urlPattern,
200
- String(ep.count),
201
- ep.responseStatus.join(',') || '-',
202
- ep.queryParams.join(',') || '-',
203
- ]);
204
-
205
- // Calculate column widths
206
- const widths = header.map((h, i) =>
207
- Math.max(h.length, ...rows.map((r) => r[i].length)),
208
- );
209
-
210
- const sep = widths.map((w) => '-'.repeat(w)).join(' | ');
211
- const fmt = (row: string[]) =>
212
- row.map((cell, i) => cell.padEnd(widths[i])).join(' | ');
213
-
214
- console.log(fmt(header));
215
- console.log(sep);
216
- for (const row of rows) {
217
- console.log(fmt(row));
218
- }
219
- console.log();
253
+ const dataEndpoints = result.endpoints.filter(ep => ep.count <= 15);
254
+ const boilerplate = result.endpoints.filter(ep => ep.count > 15);
255
+
256
+ console.log(`\nAPI Map for ${result.domain} — ${result.endpoints.length} endpoints discovered\n`);
257
+
258
+ const stripDomain = (pattern: string) => {
259
+ const idx = pattern.indexOf('/');
260
+ return idx >= 0 ? pattern.slice(idx) : pattern;
261
+ };
262
+
263
+ const printSection = (title: string, eps: ApiEndpoint[]) => {
264
+ if (eps.length === 0) return;
265
+ console.log(` ${title} (${eps.length})\n`);
266
+
267
+ const header = ['Method', 'Endpoint', 'Hits', 'Response Keys'];
268
+ const rows = eps.map((ep) => [
269
+ ep.method,
270
+ stripDomain(ep.urlPattern),
271
+ String(ep.count),
272
+ ep.responseBodyKeys.slice(0, 5).join(', ') || '-',
273
+ ]);
274
+
275
+ const widths = header.map((h, i) =>
276
+ Math.min(i === 1 ? 72 : i === 3 ? 50 : 200, Math.max(h.length, ...rows.map((r) => r[i].length))),
277
+ );
278
+
279
+ const sep = widths.map((w) => '-'.repeat(w)).join(' | ');
280
+ const fmt = (row: string[]) =>
281
+ row.map((cell, i) => cell.slice(0, widths[i]).padEnd(widths[i])).join(' | ');
282
+
283
+ console.log(` ${fmt(header)}`);
284
+ console.log(` ${sep}`);
285
+ for (const row of rows) {
286
+ console.log(` ${fmt(row)}`);
287
+ }
288
+ console.log();
289
+ };
290
+
291
+ printSection('DATA ENDPOINTS', dataEndpoints);
292
+ printSection('PAGE-LOAD BOILERPLATE', boilerplate);
220
293
  }
@@ -28,6 +28,25 @@ const VALID_PROFILES: readonly WorkerProfile[] = ['general', 'researcher', 'code
28
28
  const MAX_CLAUDE_CODE_DEPTH = 1;
29
29
  const DEPTH_ENV_VAR = 'VELLUM_CLAUDE_CODE_DEPTH';
30
30
 
31
+ function summarizeToolInput(toolName: string, input: Record<string, unknown>): string {
32
+ // Extract the most relevant field for each tool type
33
+ const name = toolName.toLowerCase();
34
+ if (name === 'bash') return String(input.command ?? '');
35
+ if (name === 'read' || name === 'file_read') return String(input.file_path ?? input.path ?? '');
36
+ if (name === 'edit' || name === 'file_edit') return String(input.file_path ?? input.path ?? '');
37
+ if (name === 'write' || name === 'file_write') return String(input.file_path ?? input.path ?? '');
38
+ if (name === 'glob') return String(input.pattern ?? '');
39
+ if (name === 'grep') return String(input.pattern ?? '');
40
+ if (name === 'websearch' || name === 'web_search') return String(input.query ?? '');
41
+ if (name === 'webfetch' || name === 'web_fetch') return String(input.url ?? '');
42
+ if (name === 'task') return String(input.description ?? '');
43
+ // Fallback: first string value
44
+ for (const val of Object.values(input)) {
45
+ if (typeof val === 'string' && val.length > 0 && val.length < 200) return val;
46
+ }
47
+ return '';
48
+ }
49
+
31
50
  export const claudeCodeTool: Tool = {
32
51
  name: 'claude_code',
33
52
  description: 'Delegate a coding task to Claude Code, an AI-powered coding agent that can read, write, and edit files, run shell commands, and perform complex multi-step software engineering tasks autonomously.',
@@ -203,12 +222,22 @@ export const claudeCodeTool: Tool = {
203
222
  queryOptions.resume = resumeSessionId;
204
223
  }
205
224
 
225
+ // Declared outside try so the catch block can emit a final tool_complete on error.
226
+ let lastSubToolName: string | null = null;
227
+
206
228
  try {
207
229
  const conversation = query({ prompt, options: queryOptions });
208
230
  let resultText = '';
209
231
  let sessionId = '';
210
232
  let hasError = false;
211
233
 
234
+ // Track tool_use_id → {name, inputSummary} for enriching progress events.
235
+ const toolUseIdInfo = new Map<string, { name: string; inputSummary: string }>();
236
+ // Track tool_use_ids that we've already emitted tool_start for (to avoid duplicates).
237
+ const emittedToolUseIds = new Set<string>();
238
+ // Track the currently active tool_use_id from tool_progress events.
239
+ let activeToolUseId: string | null = null;
240
+
212
241
  for await (const message of conversation) {
213
242
  switch (message.type) {
214
243
  case 'assistant': {
@@ -225,12 +254,103 @@ export const claudeCodeTool: Tool = {
225
254
  context.onOutput?.(block.text);
226
255
  resultText += block.text;
227
256
  }
257
+ if (block.type === 'tool_use') {
258
+ // Capture info keyed by tool_use_id for enriching tool_progress events.
259
+ const inputSummary = summarizeToolInput(block.name, block.input as Record<string, unknown>);
260
+ toolUseIdInfo.set(block.id, { name: block.name, inputSummary });
261
+
262
+ // Emit tool_start if we haven't already (tool_progress may have fired first).
263
+ // NOTE: Do NOT emit tool_complete for the previous tool here. An assistant
264
+ // message may contain multiple tool_use blocks (parallel tool use) and none
265
+ // of them have executed yet at this point. Completions are handled by
266
+ // tool_use_summary and tool_progress events.
267
+ if (!emittedToolUseIds.has(block.id)) {
268
+ context.onOutput?.(JSON.stringify({
269
+ subType: 'tool_start',
270
+ subToolName: block.name,
271
+ subToolInput: inputSummary,
272
+ subToolId: block.id,
273
+ }));
274
+ emittedToolUseIds.add(block.id);
275
+ lastSubToolName = block.name;
276
+ activeToolUseId = block.id;
277
+ }
278
+ }
228
279
  }
229
280
  }
230
281
  sessionId = message.session_id;
231
282
  break;
232
283
  }
284
+ case 'tool_progress': {
285
+ // The SDK fires tool_progress periodically DURING tool execution.
286
+ // This is our primary signal for live sub-tool progress.
287
+ const toolUseId = message.tool_use_id;
288
+ const toolName = message.tool_name;
289
+ sessionId = message.session_id;
290
+
291
+ // Record tool name if we don't have it yet (tool_progress fires before assistant sometimes).
292
+ if (!toolUseIdInfo.has(toolUseId)) {
293
+ toolUseIdInfo.set(toolUseId, { name: toolName, inputSummary: '' });
294
+ }
295
+
296
+ if (!emittedToolUseIds.has(toolUseId)) {
297
+ // New tool — mark previous as complete and emit tool_start.
298
+ if (lastSubToolName && activeToolUseId !== toolUseId) {
299
+ context.onOutput?.(JSON.stringify({
300
+ subType: 'tool_complete',
301
+ subToolName: lastSubToolName,
302
+ subToolId: activeToolUseId,
303
+ }));
304
+ }
305
+ const inputSummary = toolUseIdInfo.get(toolUseId)?.inputSummary ?? '';
306
+ context.onOutput?.(JSON.stringify({
307
+ subType: 'tool_start',
308
+ subToolName: toolName,
309
+ subToolInput: inputSummary,
310
+ subToolId: toolUseId,
311
+ }));
312
+ emittedToolUseIds.add(toolUseId);
313
+ lastSubToolName = toolName;
314
+ }
315
+ activeToolUseId = toolUseId;
316
+ break;
317
+ }
318
+ case 'tool_use_summary': {
319
+ // The SDK fires tool_use_summary after tool execution with a summary
320
+ // and the IDs of tools that were executed.
321
+ sessionId = message.session_id;
322
+ for (const completedId of message.preceding_tool_use_ids) {
323
+ const info = toolUseIdInfo.get(completedId);
324
+ const completedName: string | null = info?.name ?? lastSubToolName;
325
+ if (completedName && emittedToolUseIds.has(completedId)) {
326
+ context.onOutput?.(JSON.stringify({
327
+ subType: 'tool_complete',
328
+ subToolName: completedName,
329
+ subToolId: completedId,
330
+ }));
331
+ if (lastSubToolName === completedName) {
332
+ lastSubToolName = null;
333
+ }
334
+ }
335
+ // Prune completed entries to keep memory flat across long sessions.
336
+ toolUseIdInfo.delete(completedId);
337
+ emittedToolUseIds.delete(completedId);
338
+ }
339
+ activeToolUseId = null;
340
+ break;
341
+ }
233
342
  case 'result': {
343
+ // Mark the final sub-tool as complete (flag error if the session failed).
344
+ if (lastSubToolName) {
345
+ const isFailure = message.subtype !== 'success';
346
+ context.onOutput?.(JSON.stringify({
347
+ subType: 'tool_complete',
348
+ subToolName: lastSubToolName,
349
+ subToolId: activeToolUseId,
350
+ ...(isFailure && { subToolIsError: true }),
351
+ }));
352
+ lastSubToolName = null;
353
+ }
234
354
  sessionId = message.session_id;
235
355
  const resultMeta = {
236
356
  subtype: message.subtype,
@@ -281,6 +401,16 @@ export const claudeCodeTool: Tool = {
281
401
  isError: hasError,
282
402
  };
283
403
  } catch (err) {
404
+ // Mark the last sub-tool as failed so the UI shows an error icon.
405
+ if (lastSubToolName) {
406
+ context.onOutput?.(JSON.stringify({
407
+ subType: 'tool_complete',
408
+ subToolName: lastSubToolName,
409
+ subToolIsError: true,
410
+ }));
411
+ lastSubToolName = null;
412
+ }
413
+
284
414
  const errMessage = err instanceof Error ? err.message : String(err);
285
415
  const recentStderr = stderrLines.slice(-20);
286
416
  log.error({ err, stderrTail: recentStderr }, 'Claude Code execution failed');
@@ -183,6 +183,9 @@ export class CommitEnrichmentService {
183
183
  // has already settled with the timeout error, that rejection is orphaned.
184
184
  // The .catch() swallows it to prevent an unhandled promise rejection.
185
185
  const enrichmentPromise = this.doEnrichment(job, controller.signal);
186
+ enrichmentPromise.catch(() => {
187
+ // Intentionally swallowed — the timeout branch already handled the error
188
+ });
186
189
  await Promise.race([
187
190
  enrichmentPromise,
188
191
  new Promise<never>((_, reject) => {
@@ -192,9 +195,6 @@ export class CommitEnrichmentService {
192
195
  }, this.jobTimeoutMs);
193
196
  }),
194
197
  ]);
195
- enrichmentPromise.catch(() => {
196
- // Intentionally swallowed — the timeout branch already handled the error
197
- });
198
198
  this.succeededCount++;
199
199
  log.debug(
200
200
  { commitHash: job.commitHash, attempts: job.attempts },