@ranger-testing/ranger-cli 1.1.7 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +47 -45
  2. package/build/cli.js +649 -277
  3. package/build/cli.js.map +1 -1
  4. package/build/commands/addEnv.js +1 -1
  5. package/build/commands/addEnv.js.map +1 -1
  6. package/build/commands/authEncrypt.js +5 -10
  7. package/build/commands/authEncrypt.js.map +1 -1
  8. package/build/commands/clean.js +1 -1
  9. package/build/commands/clean.js.map +1 -1
  10. package/build/commands/config.js +9 -15
  11. package/build/commands/config.js.map +1 -1
  12. package/build/commands/env.js +10 -13
  13. package/build/commands/env.js.map +1 -1
  14. package/build/commands/feature.js +174 -123
  15. package/build/commands/feature.js.map +1 -1
  16. package/build/commands/hooks/autoPrompt.js +1 -1
  17. package/build/commands/hooks/disable.js +1 -1
  18. package/build/commands/hooks/enable.js +9 -4
  19. package/build/commands/hooks/enable.js.map +1 -1
  20. package/build/commands/hooks/exitPlanMode.js +8 -8
  21. package/build/commands/hooks/planReminder.js +7 -7
  22. package/build/commands/hooks/planStart.js +4 -4
  23. package/build/commands/hooks/postEdit.js +4 -4
  24. package/build/commands/hooks/postEdit.js.map +1 -1
  25. package/build/commands/hooks/preCompact.js +3 -3
  26. package/build/commands/hooks/preCompact.js.map +1 -1
  27. package/build/commands/hooks/sessionStart.js +19 -5
  28. package/build/commands/hooks/sessionStart.js.map +1 -1
  29. package/build/commands/hooks/stopHook.js +114 -20
  30. package/build/commands/hooks/stopHook.js.map +1 -1
  31. package/build/commands/index.js +1 -2
  32. package/build/commands/index.js.map +1 -1
  33. package/build/commands/login.js +2 -5
  34. package/build/commands/login.js.map +1 -1
  35. package/build/commands/setupCi.js +189 -0
  36. package/build/commands/setupCi.js.map +1 -0
  37. package/build/commands/skillup.js +16 -68
  38. package/build/commands/skillup.js.map +1 -1
  39. package/build/commands/start.js +1 -1
  40. package/build/commands/start.js.map +1 -1
  41. package/build/commands/status.js +14 -13
  42. package/build/commands/status.js.map +1 -1
  43. package/build/commands/update.js +52 -5
  44. package/build/commands/update.js.map +1 -1
  45. package/build/commands/updateEnv.js +1 -1
  46. package/build/commands/updateEnv.js.map +1 -1
  47. package/build/commands/useEnv.js +1 -1
  48. package/build/commands/useEnv.js.map +1 -1
  49. package/build/commands/utils/activeProfile.js +76 -0
  50. package/build/commands/utils/activeProfile.js.map +1 -0
  51. package/build/commands/utils/browserSessionsApi.js +1 -1
  52. package/build/commands/utils/browserSessionsApi.js.map +1 -1
  53. package/build/commands/utils/deviceAuth.js +53 -5
  54. package/build/commands/utils/deviceAuth.js.map +1 -1
  55. package/build/commands/utils/environment.js +11 -12
  56. package/build/commands/utils/environment.js.map +1 -1
  57. package/build/commands/utils/featureApi.js +55 -30
  58. package/build/commands/utils/featureApi.js.map +1 -1
  59. package/build/commands/utils/featureReportGenerator.js +6 -6
  60. package/build/commands/utils/featureReportGenerator.js.map +1 -1
  61. package/build/commands/utils/keychain.js +1 -1
  62. package/build/commands/utils/localAgentInstallationsApi.js +1 -1
  63. package/build/commands/utils/profileMessages.js +8 -0
  64. package/build/commands/utils/profileMessages.js.map +1 -0
  65. package/build/commands/utils/profileSetupBanner.js +167 -0
  66. package/build/commands/utils/profileSetupBanner.js.map +1 -0
  67. package/build/commands/utils/settings.js +20 -2
  68. package/build/commands/utils/settings.js.map +1 -1
  69. package/build/commands/utils/skills.js +1 -1
  70. package/build/commands/utils/telemetry.js +254 -0
  71. package/build/commands/utils/telemetry.js.map +1 -0
  72. package/build/commands/utils/userApi.js +4 -4
  73. package/build/commands/utils/userApi.js.map +1 -1
  74. package/build/commands/verifyFeature.js +816 -526
  75. package/build/commands/verifyFeature.js.map +1 -1
  76. package/build/commands/verifyInBrowser.js +1 -1
  77. package/build/commands/verifyInBrowser.js.map +1 -1
  78. package/build/skills/ranger/SKILL.md +65 -64
  79. package/build/skills/ranger/create.md +31 -31
  80. package/build/skills/ranger/feedback.md +25 -17
  81. package/build/skills/ranger/start.md +37 -37
  82. package/build/skills/ranger/verify.md +59 -55
  83. package/package.json +1 -1
  84. package/scripts/postinstall.js +1 -1
  85. package/build/commands/dataMcpServer.js +0 -1
  86. package/build/commands/dataMcpServer.js.map +0 -1
  87. package/build/commands/utils/cliSecret.js +0 -1
  88. package/build/commands/utils/cliSecret.js.map +0 -1
  89. package/build/skills/bug-bash.md +0 -329
  90. package/build/skills/e2e-test-recommender.md +0 -168
@@ -1,4 +1,5 @@
1
1
  import { query, } from '@anthropic-ai/claude-agent-sdk';
2
+ import { createTelemetryCollector, } from './utils/telemetry.js';
2
3
  import { join, dirname } from 'path';
3
4
  import { readFile, readdir, appendFile, mkdir, rm, stat } from 'fs/promises';
4
5
  import { existsSync } from 'fs';
@@ -6,11 +7,16 @@ import { execSync } from 'child_process';
6
7
  import { tmpdir } from 'os';
7
8
  import inquirer from 'inquirer';
8
9
  import { loadSettings, resolveEnvVars, buildPlaywrightConfig, cleanupTempFiles, getEnvDir, } from './utils/settings.js';
9
- import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getAnthropicApiKey, createVerificationStep, createStepAsset, } from './utils/browserSessionsApi.js';
10
+ import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getProxySessionToken, createVerificationStep, createStepAsset, } from './utils/browserSessionsApi.js';
11
+ import { getAiProxyUrl } from './utils/environment.js';
10
12
  import { getToken } from './utils/keychain.js';
11
13
  import { getActiveFeatureId } from './feature.js';
12
- import { getFeature, updateFeature, updateChecklistItem, startSession, getActionItems, getItemFeedback, } from './utils/featureApi.js';
14
+ import { readActiveProfileName } from './utils/activeProfile.js';
15
+ import { getEnvNames } from './env.js';
16
+ import { formatProfileRequiredMessage } from './utils/profileMessages.js';
17
+ import { getFeature, updateFeature, updateChecklistItem, startSession, getActionItems, getItemFeedback, markCommentsAddressed, } from './utils/featureApi.js';
13
18
  import { getRangerDir } from './utils/rangerRoot.js';
19
+ const bold = (text) => `\x1b[1m${text}\x1b[0m`;
14
20
  /**
15
21
  * Get the current git branch
16
22
  */
@@ -73,7 +79,7 @@ function getMockEvaluation(outcome) {
73
79
  success: true,
74
80
  summary: '[DEBUG] Mock verification completed successfully.',
75
81
  evaluation: 'verified',
76
- evaluationReason: 'All checklist requirements were met.',
82
+ evaluationReason: 'All scenario requirements were met.',
77
83
  },
78
84
  partial: {
79
85
  success: false,
@@ -91,7 +97,7 @@ function getMockEvaluation(outcome) {
91
97
  incomplete: {
92
98
  success: false,
93
99
  summary: '[DEBUG] Mock incomplete verification.',
94
- evaluation: 'partial',
100
+ evaluation: 'incomplete',
95
101
  evaluationReason: 'Implementation is incomplete and needs additional work.',
96
102
  issues: [
97
103
  {
@@ -157,42 +163,52 @@ function getDebugPrompt() {
157
163
  Return your findings in the structured output format.`;
158
164
  }
159
165
  /**
160
- * Prompt user to select a checklist item
166
+ * Prompt user to select a scenario
161
167
  */
162
168
  async function selectChecklistItem(items) {
163
169
  if (items.length === 0) {
164
170
  return null;
165
171
  }
166
- const choices = items.map((item, i) => {
167
- const emoji = item.status === 'verified'
172
+ const choices = items.map((item) => {
173
+ const emoji = item.status === 'closed' && item.terminalReason === 'approved'
168
174
  ? '\u2705'
169
- : item.status === 'incomplete'
170
- ? '\ud83d\udfe0' // orange circle
171
- : item.status === 'blocked'
172
- ? '\ud83d\uded1'
173
- : item.status === 'closed'
174
- ? '\u26d4'
175
- : '\u2b1c';
175
+ : item.status === 'verified'
176
+ ? '\ud83d\udfe2' // green circle
177
+ : item.status === 'incomplete'
178
+ ? '\ud83d\udfe0' // orange circle
179
+ : item.status === 'blocked'
180
+ ? '\ud83d\uded1'
181
+ : item.status === 'closed'
182
+ ? '\u26d4'
183
+ : item.status === 'verification_in_progress'
184
+ ? '\u23f3'
185
+ : '\u2b1c';
176
186
  const commentBadge = item.unaddressedCommentCount > 0
177
187
  ? ` [${item.unaddressedCommentCount} comments]`
178
188
  : '';
189
+ const disabledReason = item.actionable
190
+ ? false
191
+ : item.status === 'closed' && item.terminalReason
192
+ ? item.terminalReason
193
+ : 'not actionable';
179
194
  return {
180
- name: `${i + 1}. ${emoji} ${item.description}${commentBadge}`,
195
+ name: `${item.displayIndex + 1}. ${emoji} ${item.description}${commentBadge}`,
181
196
  value: item.id,
197
+ disabled: disabledReason,
182
198
  };
183
199
  });
184
200
  const { selected } = await inquirer.prompt([
185
201
  {
186
202
  type: 'list',
187
203
  name: 'selected',
188
- message: 'Which checklist item does this verify?',
204
+ message: 'Which scenario does this verify?',
189
205
  choices,
190
206
  },
191
207
  ]);
192
208
  return items.find((i) => i.id === selected) || null;
193
209
  }
194
210
  /**
195
- * Handle incomplete verification - check if all other items are terminal and prompt user
211
+ * Handle incomplete verification - check if all other scenarios are terminal and prompt user
196
212
  */
197
213
  async function handleIncompleteItem(featureId, incompleteItem, result) {
198
214
  // Get action items to check if there are other items to work on
@@ -217,46 +233,98 @@ async function handleIncompleteItem(featureId, incompleteItem, result) {
217
233
  }
218
234
  console.log(`\nNext steps:`);
219
235
  console.log(` 1. Fix the issues above in your code`);
220
- console.log(` 2. Run 'ranger verify-feature' again to re-verify`);
236
+ console.log(` 2. Run 'ranger go' again to re-verify`);
221
237
  if (allOthersTerminal && otherItems.length > 0) {
222
- console.log(`\nAll other checklist items are complete.`);
223
- console.log(`If you're done for now, run 'ranger feature conclude-session' to end this session.`);
238
+ console.log(`\nAll other scenarios are complete.`);
239
+ console.log(`If you're done for now, you can stop and resume later with 'ranger resume'.`);
224
240
  }
225
241
  console.log(`${'='.repeat(60)}\n`);
226
242
  }
227
243
  /**
228
- * PostToolUse hook that logs browser tool calls to stdout.
244
+ * Create a PostToolUse hook that logs browser tool calls to stdout and tracks
245
+ * all tool calls via telemetry with per-call timing.
246
+ */
247
+ function createToolCallTrackingHook(telemetry) {
248
+ const toolCallCounts = new Map();
249
+ const hook = async (input) => {
250
+ if (input.hook_event_name !== 'PostToolUse')
251
+ return {};
252
+ const postInput = input;
253
+ const toolInput = postInput.tool_input;
254
+ const shortName = postInput.tool_name.replace('mcp__ranger-browser__', '');
255
+ // Track count
256
+ toolCallCounts.set(shortName, (toolCallCounts.get(shortName) || 0) + 1);
257
+ // Log tool call as telemetry event
258
+ telemetry.trackPhaseStart('tool_call', { toolName: shortName });
259
+ telemetry.trackPhaseEnd('tool_call', {
260
+ toolName: shortName,
261
+ toolInput: summarizeToolInput(shortName, toolInput),
262
+ });
263
+ // Console log
264
+ switch (postInput.tool_name) {
265
+ case 'mcp__ranger-browser__browser_navigate':
266
+ console.log(`[browser] Navigate → ${toolInput.url}`);
267
+ break;
268
+ case 'mcp__ranger-browser__browser_click':
269
+ console.log(`[browser] Click → "${toolInput.element}"`);
270
+ break;
271
+ case 'mcp__ranger-browser__browser_type':
272
+ console.log(`[browser] Type → "${toolInput.text}" into "${toolInput.element}"`);
273
+ break;
274
+ case 'mcp__ranger-browser__browser_press_key':
275
+ console.log(`[browser] Press key → ${toolInput.key}`);
276
+ break;
277
+ case 'mcp__ranger-browser__browser_wait_for':
278
+ console.log(`[browser] Wait → ${toolInput.time ? `${toolInput.time}ms` : toolInput.text || 'condition'}`);
279
+ break;
280
+ }
281
+ return {};
282
+ };
283
+ return { hook, toolCallCounts };
284
+ }
285
+ /**
286
+ * Create a PostToolUseFailure hook that tracks tool failures via telemetry.
229
287
  */
230
- const browserToolLogHook = async (input) => {
231
- if (input.hook_event_name !== 'PostToolUse')
288
+ function createToolFailureHook(telemetry) {
289
+ return async (input) => {
290
+ if (input.hook_event_name !== 'PostToolUseFailure')
291
+ return {};
292
+ const failInput = input;
293
+ const shortName = failInput.tool_name.replace('mcp__ranger-browser__', '');
294
+ await telemetry.trackPhaseError('tool_failure', failInput.error, {
295
+ toolName: shortName,
296
+ isInterrupt: failInput.is_interrupt,
297
+ });
232
298
  return {};
233
- const postInput = input;
234
- const toolInput = postInput.tool_input;
235
- switch (postInput.tool_name) {
236
- case 'mcp__ranger-browser__browser_navigate':
237
- console.log(`[browser] Navigate → ${toolInput.url}`);
238
- break;
239
- case 'mcp__ranger-browser__browser_click':
240
- console.log(`[browser] Click → "${toolInput.element}"`);
241
- break;
242
- case 'mcp__ranger-browser__browser_type':
243
- console.log(`[browser] Type → "${toolInput.text}" into "${toolInput.element}"`);
244
- break;
245
- case 'mcp__ranger-browser__browser_press_key':
246
- console.log(`[browser] Press key → ${toolInput.key}`);
247
- break;
248
- case 'mcp__ranger-browser__browser_wait_for':
249
- console.log(`[browser] Wait → ${toolInput.time ? `${toolInput.time}ms` : toolInput.text || 'condition'}`);
250
- break;
299
+ };
300
+ }
301
+ /**
302
+ * Summarize tool input for telemetry (avoid logging sensitive/large data).
303
+ */
304
+ function summarizeToolInput(toolName, input) {
305
+ switch (toolName) {
306
+ case 'browser_navigate':
307
+ return { url: input.url };
308
+ case 'browser_click':
309
+ return { element: input.element };
310
+ case 'browser_type':
311
+ return { element: input.element };
312
+ case 'browser_take_screenshot':
313
+ return { filename: input.filename };
314
+ case 'browser_press_key':
315
+ return { key: input.key };
316
+ case 'browser_wait_for':
317
+ return { time: input.time, text: input.text };
318
+ default:
319
+ return {};
251
320
  }
252
- return {};
253
- };
321
+ }
254
322
  /**
255
323
  * Create a PostToolUse hook that uploads screenshots immediately after they're taken.
256
324
  * Returns the hook callback and a set of filenames that were successfully uploaded,
257
325
  * so the post-hoc fallback can skip them.
258
326
  */
259
- function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
327
+ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir, telemetry) {
260
328
  const uploadedFiles = new Set();
261
329
  let position = 1;
262
330
  const hook = async (input) => {
@@ -271,11 +339,15 @@ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
271
339
  const filename = toolInput?.filename;
272
340
  if (!filename)
273
341
  return {};
342
+ const isKeyFrame = filename.toLowerCase().startsWith('key_');
343
+ telemetry.trackPhaseStart('hook_screenshot_upload', {
344
+ filename,
345
+ isKeyFrame,
346
+ });
274
347
  try {
275
348
  const pngPath = join(traceDir, filename);
276
349
  const pngBuffer = await readFile(pngPath);
277
350
  const pngStat = await stat(pngPath);
278
- const isKeyFrame = filename.toLowerCase().startsWith('key_');
279
351
  const displayName = filename
280
352
  .replace(/\.png$/i, '')
281
353
  .replace(/^key_/i, '')
@@ -315,96 +387,142 @@ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
315
387
  await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
316
388
  // Track as uploaded
317
389
  uploadedFiles.add(filename);
390
+ telemetry.trackPhaseEnd('hook_screenshot_upload', {
391
+ filename,
392
+ bytes: pngBuffer.length,
393
+ });
318
394
  }
319
395
  catch (err) {
320
- // swallow error for now
321
- // TODO: should log / report these
396
+ await telemetry.trackPhaseError('hook_screenshot_upload', err, {
397
+ filename,
398
+ });
322
399
  }
323
400
  return {};
324
401
  };
325
402
  return { hook, uploadedFiles };
326
403
  }
327
404
  /**
328
- * Verify a checklist item in the browser
405
+ * Phase 1: Setup through agent completion.
406
+ * Returns context for processVerificationResult.
329
407
  */
330
- export async function verifyFeature(options) {
408
+ async function runVerification(options, telemetry) {
331
409
  const isDebugMode = !!options.debugOutcome;
332
410
  if (isDebugMode) {
333
411
  console.log(`\n[DEBUG MODE] Running minimal browser test with outcome: ${options.debugOutcome}`);
334
412
  }
335
- // 1. Check for active feature
413
+ // --- Phase: feature_load ---
414
+ telemetry.trackPhaseStart('feature_load');
336
415
  const featureId = await getActiveFeatureId();
337
416
  if (!featureId) {
338
- throw new Error('No active feature. Run: ranger feature resume <id> or ranger feature create');
417
+ throw new Error('No active feature review. Run: ranger resume <id> or ranger create');
339
418
  }
340
- // Load feature details
341
419
  const feature = await getFeature(featureId);
342
- // Update the feature's gitBranch to the current branch
420
+ telemetry.setContext({ featureId });
343
421
  const currentBranch = getGitBranch();
344
422
  if (currentBranch && currentBranch !== feature.gitBranch) {
345
423
  await updateFeature(featureId, { gitBranch: currentBranch });
346
424
  console.log(` Updated branch to: ${currentBranch}`);
347
425
  }
348
- console.log(`\nActive feature: ${feature.name} (${featureId})`);
349
- // Get action items - leaf items that can be verified (non-closed with no non-closed children)
426
+ console.log(`\nActive feature review: ${feature.name} (${featureId})`);
350
427
  const { items: actionItems } = await getActionItems(featureId);
351
- // 2. Determine which checklist item we're verifying
428
+ const actionItemsById = new Map(actionItems.map((item) => [item.id, item]));
429
+ const displayItems = feature.checklistItems.map((item, index) => {
430
+ const actionItem = actionItemsById.get(item.id);
431
+ return {
432
+ ...item,
433
+ unaddressedCommentCount: actionItem?.unaddressedCommentCount ?? 0,
434
+ displayIndex: index,
435
+ actionable: !!actionItem && item.status !== 'closed',
436
+ };
437
+ });
438
+ telemetry.trackPhaseEnd('feature_load', {
439
+ itemCount: actionItems.length,
440
+ });
441
+ // --- Phase: scenario_select ---
442
+ telemetry.trackPhaseStart('scenario_select');
352
443
  let checklistItem = null;
353
- let taskDescription = options.task;
354
- if (options.item !== undefined) {
444
+ let taskDescription = options.notes;
445
+ if (options.scenario !== undefined) {
355
446
  // Use specified item index (1-based)
356
- const itemIndex = options.item - 1; // 1-based to 0-based
357
- if (itemIndex < 0 || itemIndex >= actionItems.length) {
358
- throw new Error(`Invalid item index: ${options.item}. Feature has ${actionItems.length} actionable items.`);
447
+ const itemIndex = options.scenario - 1; // 1-based to 0-based
448
+ if (itemIndex < 0 || itemIndex >= displayItems.length) {
449
+ throw new Error(`Invalid scenario index: ${options.scenario}. Feature review has ${displayItems.length} scenarios.`);
359
450
  }
360
- checklistItem = actionItems[itemIndex];
451
+ const displayItem = displayItems[itemIndex];
452
+ if (!displayItem.actionable) {
453
+ const reason = displayItem.status === 'closed' && displayItem.terminalReason
454
+ ? displayItem.terminalReason
455
+ : 'not actionable';
456
+ throw new Error(`Scenario ${options.scenario} is ${reason} and cannot be verified. Choose a different scenario.`);
457
+ }
458
+ const actionItem = actionItemsById.get(displayItem.id);
459
+ if (!actionItem) {
460
+ throw new Error(`Scenario ${options.scenario} is not currently actionable. Try another scenario.`);
461
+ }
462
+ checklistItem = actionItem;
361
463
  if (!taskDescription) {
362
464
  taskDescription = checklistItem.description;
363
465
  }
364
466
  }
365
467
  else {
366
- // Check if running in non-TTY environment (CI, scripts, Claude Code, etc.)
367
468
  const isInteractive = process.stdin.isTTY && process.stdout.isTTY;
368
469
  if (!isInteractive) {
369
- // Non-TTY mode: require --item flag, show available items
370
- console.log('\nNon-interactive mode detected. The --item flag is required.');
371
- console.log('\nAvailable checklist items to verify:');
372
- actionItems.forEach((item, i) => {
373
- const emoji = item.status === 'verified'
470
+ // Non-TTY mode: require --scenario flag, show available scenarios
471
+ console.log('\nNon-interactive mode detected. The --scenario flag is required.');
472
+ console.log('\nAvailable scenarios to verify:');
473
+ displayItems.forEach((item) => {
474
+ const emoji = item.status === 'closed' &&
475
+ item.terminalReason === 'approved'
374
476
  ? '\u2705'
375
- : item.status === 'incomplete'
376
- ? '\ud83d\udfe0' // orange circle
377
- : item.status === 'blocked'
378
- ? '\ud83d\uded1'
379
- : item.status === 'closed'
380
- ? '\u26d4'
381
- : '\u2b1c';
477
+ : item.status === 'verified'
478
+ ? '\ud83d\udfe2'
479
+ : item.status === 'incomplete'
480
+ ? '\ud83d\udfe0'
481
+ : item.status === 'blocked'
482
+ ? '\ud83d\uded1'
483
+ : item.status === 'closed'
484
+ ? '\u26d4'
485
+ : item.status === 'verification_in_progress'
486
+ ? '\u23f3'
487
+ : '\u2b1c';
382
488
  const commentBadge = item.unaddressedCommentCount > 0
383
489
  ? ` [${item.unaddressedCommentCount} comments]`
384
490
  : '';
385
- console.log(` ${i + 1}. ${emoji} ${item.description}${commentBadge}`);
491
+ const actionHint = item.actionable ? '' : ' [not actionable]';
492
+ console.log(` ${item.displayIndex + 1}. ${emoji} ${item.description}${commentBadge}${actionHint}`);
386
493
  });
387
- console.log('\nUsage: ranger verify-feature --item <number>');
388
- console.log('Example: ranger verify-feature --item 1');
389
- throw new Error('The --item flag is required in non-interactive mode. See available items above.');
494
+ console.log('\nUsage: ranger go --scenario <number>');
495
+ console.log('Example: ranger go --scenario 1');
496
+ throw new Error('The --scenario flag is required in non-interactive mode. See available scenarios above.');
390
497
  }
391
- // Interactive selection
392
- checklistItem = await selectChecklistItem(actionItems);
393
- if (!taskDescription && checklistItem) {
394
- taskDescription = checklistItem.description;
498
+ const selectedItem = await selectChecklistItem(displayItems);
499
+ if (selectedItem) {
500
+ const actionItem = actionItemsById.get(selectedItem.id);
501
+ if (!actionItem) {
502
+ throw new Error('Selected scenario is not currently actionable. Choose another scenario.');
503
+ }
504
+ checklistItem = actionItem;
505
+ if (!taskDescription) {
506
+ taskDescription = checklistItem.description;
507
+ }
395
508
  }
396
509
  }
397
510
  if (!checklistItem) {
398
- throw new Error('No checklist item selected. Create items when creating the feature with -c flag.');
511
+ throw new Error('No scenario selected. Create scenarios when creating the feature review with -c or --scenario flags.');
399
512
  }
400
513
  if (checklistItem.status === 'closed') {
401
- throw new Error(`Cannot verify item "${checklistItem.description}" — it is concluded (${checklistItem.terminalReason || 'unknown reason'}).`);
514
+ throw new Error(`Cannot verify scenario "${checklistItem.description}" — it is concluded (${checklistItem.terminalReason || 'unknown reason'}).`);
402
515
  }
403
516
  if (!taskDescription) {
404
- throw new Error('No task description provided');
517
+ throw new Error('No notes provided');
405
518
  }
406
- console.log(`\nVerifying: ${checklistItem.description}`);
407
- console.log(`Task: ${taskDescription}`);
519
+ telemetry.setContext({ checklistItemId: checklistItem.id });
520
+ telemetry.trackPhaseEnd('scenario_select', {
521
+ selectionMethod: options.scenario !== undefined ? 'flag' : 'interactive',
522
+ itemStatus: checklistItem.status,
523
+ });
524
+ console.log(`\nVerifying scenario: ${checklistItem.description}`);
525
+ console.log(`Notes: ${taskDescription}`);
408
526
  // Fetch reviewer feedback if item has parent or unaddressed comments
409
527
  let itemFeedback = null;
410
528
  if (checklistItem.parentItemId ||
@@ -415,8 +533,9 @@ export async function verifyFeature(options) {
415
533
  console.log(`Reviewer feedback: ${itemFeedback.unaddressedComments.length} comment(s) to verify`);
416
534
  }
417
535
  }
418
- catch {
419
- // Non-fatal - continue without feedback
536
+ catch (err) {
537
+ // Non-fatal - continue without feedback, but log it
538
+ await telemetry.trackPhaseError('feedback_fetch', err);
420
539
  }
421
540
  }
422
541
  // Start the session if it's in ready status
@@ -427,41 +546,38 @@ export async function verifyFeature(options) {
427
546
  await startSession(featureId, feature.currentSessionId);
428
547
  }
429
548
  catch (error) {
430
- // Ignore if session is already started (race condition)
431
549
  const message = error instanceof Error ? error.message : String(error);
432
550
  if (!message.includes('already')) {
433
551
  throw error;
434
552
  }
435
553
  }
436
554
  }
437
- // Update checklist item status to verification_in_progress
555
+ // Update scenario status to verification_in_progress
438
556
  await updateChecklistItem(featureId, checklistItem.id, {
439
557
  status: 'verification_in_progress',
440
558
  });
441
- // 3. Determine which environment to use (same pattern as verifyInBrowser)
442
- let activeEnv;
443
- if (options.env) {
444
- activeEnv = options.env;
559
+ // --- Phase: profile_resolution ---
560
+ telemetry.trackPhaseStart('profile_resolution');
561
+ let activeProfile = null;
562
+ if (options.profile) {
563
+ activeProfile = options.profile;
445
564
  }
446
565
  else {
447
- const activeEnvPath = join(getRangerDir(), 'active-env.txt');
448
- if (!existsSync(activeEnvPath)) {
449
- throw new Error('No active environment. Run: ranger use <env-name>');
450
- }
451
- activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
566
+ activeProfile = await readActiveProfileName();
567
+ }
568
+ if (!activeProfile) {
569
+ throw new Error('No active profile. Run: ranger profile use <profile-name>');
452
570
  }
453
- const envDir = getEnvDir(activeEnv);
571
+ const envDir = getEnvDir(activeProfile);
454
572
  if (!existsSync(envDir)) {
455
- throw new Error(`Environment "${activeEnv}" not found. Run: ranger add env ${activeEnv}`);
573
+ throw new Error(`Profile "${activeProfile}" not found. Run: ranger profile add ${activeProfile}`);
456
574
  }
457
- const settings = await loadSettings(activeEnv);
575
+ const settings = await loadSettings(activeProfile);
458
576
  const resolvedSettings = resolveEnvVars(settings);
459
- // Get base URL from settings
460
577
  let url = resolvedSettings.baseUrl;
461
578
  if (!url) {
462
- throw new Error(`No baseUrl configured for environment "${activeEnv}". Run: ranger config set ${activeEnv} baseUrl <url>`);
579
+ throw new Error(`No baseUrl configured for profile "${activeProfile}". Run: ranger profile config set ${activeProfile} baseUrl <url>`);
463
580
  }
464
- // Append startPath if provided
465
581
  if (options.startPath) {
466
582
  const base = url.endsWith('/') ? url.slice(0, -1) : url;
467
583
  const path = options.startPath.startsWith('/')
@@ -469,13 +585,17 @@ export async function verifyFeature(options) {
469
585
  : '/' + options.startPath;
470
586
  url = base + path;
471
587
  }
472
- // 4. Create browser session
588
+ telemetry.trackPhaseEnd('profile_resolution', {
589
+ profileName: activeProfile,
590
+ });
591
+ // --- Phase: browser_session_create ---
592
+ telemetry.trackPhaseStart('browser_session_create');
473
593
  const token = await getToken();
474
594
  if (!token) {
475
- throw new Error('No API token configured. Run: ranger start <token>');
595
+ throw new Error('No API token configured. Run: ranger setup [token]');
476
596
  }
477
597
  const browserSession = await createBrowserSession({
478
- environmentName: activeEnv,
598
+ environmentName: activeProfile,
479
599
  settings: resolvedSettings,
480
600
  task: taskDescription,
481
601
  url,
@@ -483,116 +603,90 @@ export async function verifyFeature(options) {
483
603
  checklistItemId: checklistItem.id,
484
604
  });
485
605
  console.log(`Browser session created: ${browserSession.id}`);
486
- // Link the browser session to the checklist item immediately so steps
606
+ telemetry.setContext({ browserSessionId: browserSession.id });
607
+ // Link the browser session to the scenario immediately so steps
487
608
  // are visible in the dashboard while verification is in progress
488
609
  await updateChecklistItem(featureId, checklistItem.id, {
489
610
  browserSessionId: browserSession.id,
490
611
  });
612
+ telemetry.trackPhaseEnd('browser_session_create');
613
+ // --- Phase: playwright_config ---
614
+ telemetry.trackPhaseStart('playwright_config');
491
615
  let configResult;
492
- const startTime = Date.now();
493
- // Handle process interruption (Ctrl+C or coding agent killing the process)
494
- let interrupted = false;
495
- const handleInterrupt = async () => {
496
- if (interrupted)
497
- return;
498
- interrupted = true;
499
- console.log('\nVerification interrupted. Cleaning up...');
616
+ let sessionToken;
617
+ try {
618
+ sessionToken = await getProxySessionToken();
619
+ }
620
+ catch (error) {
621
+ const message = error instanceof Error ? error.message : String(error);
622
+ const errorMsg = `Failed to fetch proxy session token: ${message}`;
500
623
  try {
501
624
  await updateBrowserSession(browserSession.id, {
502
- status: 'interrupted',
503
- durationMs: Date.now() - startTime,
504
- });
505
- }
506
- catch {
507
- // Best effort
508
- }
509
- try {
510
- await updateChecklistItem(featureId, checklistItem.id, {
511
- status: 'pending',
625
+ status: 'failed',
626
+ durationMs: 0,
627
+ errorMessage: errorMsg,
512
628
  });
513
629
  }
514
- catch {
515
- // Best effort
630
+ catch (updateErr) {
631
+ await telemetry.trackPhaseError('session_error_update', updateErr);
516
632
  }
517
- if (configResult) {
518
- try {
519
- await cleanupTempFiles(configResult);
520
- }
521
- catch {
522
- // Best effort
523
- }
524
- }
525
- console.log('Checklist item reset to pending. Partial steps are preserved.');
526
- process.exit(0);
633
+ throw new Error(errorMsg);
634
+ }
635
+ configResult = await buildPlaywrightConfig(resolvedSettings, activeProfile, browserSession?.id);
636
+ telemetry.trackPhaseEnd('playwright_config');
637
+ const startTime = Date.now();
638
+ const rangerBrowserMcp = {
639
+ command: 'npx',
640
+ args: [
641
+ '@ranger-testing/playwright',
642
+ 'run-mcp-server',
643
+ '--config',
644
+ configResult.configPath,
645
+ ],
527
646
  };
528
- process.on('SIGINT', handleInterrupt);
529
- process.on('SIGTERM', handleInterrupt);
530
- let anthropicApiKey;
531
- let conversationDir;
647
+ // Build verifier prompt
532
648
  let verifierPrompt;
533
- let checklistItemResolved = false;
534
- try {
535
- // Fetch Anthropic API key
536
- try {
537
- anthropicApiKey = await getAnthropicApiKey();
538
- }
539
- catch (error) {
540
- const message = error instanceof Error ? error.message : String(error);
541
- const errorMsg = `Failed to fetch Anthropic API key: ${message}`;
542
- try {
543
- await updateBrowserSession(browserSession.id, {
544
- status: 'failed',
545
- durationMs: Date.now() - startTime,
546
- errorMessage: errorMsg,
547
- });
548
- }
549
- catch {
550
- // Ignore
551
- }
552
- throw new Error(errorMsg);
553
- }
554
- configResult = await buildPlaywrightConfig(resolvedSettings, activeEnv, browserSession?.id);
555
- const rangerBrowserMcp = {
556
- command: 'npx',
557
- args: [
558
- '@ranger-testing/playwright',
559
- 'run-mcp-server',
560
- '--config',
561
- configResult.configPath,
562
- ],
563
- };
564
- // 5. UI Verifier + Evaluation Agent prompt
565
- if (isDebugMode) {
566
- verifierPrompt = getDebugPrompt();
649
+ if (isDebugMode) {
650
+ let debugFeedbackSection = '';
651
+ if (itemFeedback && itemFeedback.unaddressedComments.length > 0) {
652
+ const commentLines = itemFeedback.unaddressedComments
653
+ .map((c) => `- [${c.id}] "${c.content}"`)
654
+ .join('\n');
655
+ const instruction = options.debugAddressComments
656
+ ? 'For debug purposes, mark ALL of the following comments as addressed by including every ID in addressedCommentIds.'
657
+ : 'For debug purposes, do NOT mark any comments as addressed. Return an empty addressedCommentIds array.';
658
+ debugFeedbackSection = `\n\n## Debug: Reviewer Comments\n${instruction}\n\n${commentLines}`;
567
659
  }
568
- else {
569
- const notesSection = checklistItem.notes
570
- ? `\n\n## Additional Notes\n${checklistItem.notes}`
571
- : '';
572
- // Build reviewer feedback section if available
573
- let feedbackSection = '';
574
- if (itemFeedback && itemFeedback.unaddressedComments.length > 0) {
575
- const commentLines = itemFeedback.unaddressedComments
576
- .map((c) => {
577
- const date = new Date(c.createdAt).toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
578
- const author = c.authorName || c.authorEmail || 'Reviewer';
579
- return `- **${author}** (${date}): "${c.content}"`;
580
- })
581
- .join('\n');
582
- feedbackSection = `\n\n## Reviewer Feedback to Address
660
+ verifierPrompt = getDebugPrompt() + debugFeedbackSection;
661
+ }
662
+ else {
663
+ const notesSection = checklistItem.notes
664
+ ? `\n\n## Additional Notes\n${checklistItem.notes}`
665
+ : '';
666
+ let feedbackSection = '';
667
+ if (itemFeedback && itemFeedback.unaddressedComments.length > 0) {
668
+ const commentLines = itemFeedback.unaddressedComments
669
+ .map((c) => {
670
+ const date = new Date(c.createdAt).toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
671
+ const author = c.authorName || c.authorEmail || 'Reviewer';
672
+ return `- [${c.id}] **${author}** (${date}): "${c.content}"`;
673
+ })
674
+ .join('\n');
675
+ feedbackSection = `\n\n## Reviewer Feedback to Address
583
676
  The following reviewer comments were left on the previous version of this item.
584
- Verify that each concern has been addressed in the current implementation:
677
+ Verify that each concern has been addressed in the current implementation.
678
+ For each comment you believe has been addressed, include its ID (the bracketed value) in the addressedCommentIds array in your output.
585
679
 
586
680
  ${commentLines}`;
587
- }
588
- let canonicalFlowSection = '';
589
- if (itemFeedback?.canonicalFlow) {
590
- canonicalFlowSection = `\n\n## Expected Flow (from previous verification)
681
+ }
682
+ let canonicalFlowSection = '';
683
+ if (itemFeedback?.canonicalFlow) {
684
+ canonicalFlowSection = `\n\n## Expected Flow (from previous verification)
591
685
  ${itemFeedback.canonicalFlow}`;
592
- }
593
- verifierPrompt = `You are a Feature Verifier. Your job is to verify a checklist item by executing a UI flow and evaluating whether it adequately completes the checklist item.
686
+ }
687
+ verifierPrompt = `You are a Feature Review Verifier. Your job is to verify a scenario by executing a UI flow and evaluating whether it adequately completes the scenario.
594
688
 
595
- ## Checklist Item to Verify
689
+ ## Scenario to Verify
596
690
  ${checklistItem.description}${notesSection}${feedbackSection}${canonicalFlowSection}
597
691
 
598
692
  ## Task to Execute
@@ -605,7 +699,7 @@ Your base URL is: ${url}
605
699
  - DO NOT navigate to any different domain, host, or port under any circumstances
606
700
  - IGNORE any URLs from product documentation (mcp__ranger__get_product_docs) that have a different base URL
607
701
  - If documentation or code diffs suggest a path exists (e.g., "/dashboard"), you may navigate to that path ONLY under the base URL above
608
- - The base URL above is the ONLY authorized environment for this verification
702
+ - The base URL above is the ONLY authorized profile for this verification
609
703
 
610
704
  ## Instructions
611
705
  1. Navigate to the URL above using browser_navigate
@@ -614,7 +708,7 @@ Your base URL is: ${url}
614
708
  4. Execute the task step-by-step using browser tools
615
709
  5. **Take screenshots at key moments** (see Screenshot Guidelines below)
616
710
  6. Document any issues found (bugs, errors, unexpected behavior)
617
- 7. After completing the verification, evaluate whether the result adequately verifies the checklist item
711
+ 7. After completing the verification, evaluate whether the result adequately verifies the scenario
618
712
 
619
713
  ## Screenshot Guidelines - IMPORTANT
620
714
  Take screenshots throughout the verification flow so a human can review it for completeness. Screenshots are your evidence trail.
@@ -630,7 +724,7 @@ Take screenshots throughout the verification flow so a human can review it for c
630
724
  **Screenshot naming:**
631
725
  - Use descriptive filenames: "01_login-page-loaded.png", "02_form-filled.png", "03_dashboard-visible.png"
632
726
  - Number prefixes (01_, 02_, etc.) help maintain chronological order
633
- - For KEY MOMENTS that prove the checklist item is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
727
+ - For KEY MOMENTS that prove the scenario is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
634
728
  - The "key_" prefix marks screenshots as high-priority evidence for human reviewers
635
729
 
636
730
  **Aim for 3-6 screenshots per verification** to document the complete flow. Mark 1-2 of the most important ones with the "key_" prefix.
@@ -657,266 +751,340 @@ After step 2 (taking initial snapshot), IMMEDIATELY check for blocking HTTP erro
657
751
  This early exit prevents wasting time on tasks that cannot succeed due to fundamental errors.
658
752
 
659
753
  ## Evaluation Criteria
660
- - VERIFIED: The task completed successfully and the checklist item requirements are fully met
661
- - PARTIAL: The task partially completed but some aspects of the checklist item are not verified
754
+ - VERIFIED: The task completed successfully and the scenario requirements are fully met
755
+ - PARTIAL: The task partially completed but some aspects of the scenario are not verified
662
756
  - BLOCKED: A blocking issue (bug, error, missing feature) prevents completion
663
757
  - FAILED: The task could not be completed due to errors
664
758
 
665
759
  Return your findings in the structured output format with your evaluation.`;
666
- }
667
- const outputSchema = {
668
- type: 'object',
669
- properties: {
670
- success: { type: 'boolean' },
671
- summary: { type: 'string' },
672
- evaluation: {
673
- type: 'string',
674
- enum: ['verified', 'partial', 'blocked', 'failed'],
675
- },
676
- evaluationReason: { type: 'string' },
677
- issues: {
678
- type: 'array',
679
- items: {
680
- type: 'object',
681
- properties: {
682
- severity: {
683
- type: 'string',
684
- enum: ['BLOCKER', 'MAJOR', 'MINOR'],
685
- },
686
- type: {
687
- type: 'string',
688
- enum: [
689
- 'HTTP_404',
690
- 'HTTP_500',
691
- 'HTTP_400',
692
- 'NAVIGATION_ERROR',
693
- 'OTHER',
694
- ],
695
- },
696
- description: { type: 'string' },
697
- screenshot: { type: 'string' },
760
+ }
761
+ const outputSchema = {
762
+ type: 'object',
763
+ properties: {
764
+ success: { type: 'boolean' },
765
+ summary: { type: 'string' },
766
+ evaluation: {
767
+ type: 'string',
768
+ enum: ['verified', 'partial', 'blocked', 'failed'],
769
+ },
770
+ evaluationReason: { type: 'string' },
771
+ issues: {
772
+ type: 'array',
773
+ items: {
774
+ type: 'object',
775
+ properties: {
776
+ severity: {
777
+ type: 'string',
778
+ enum: ['BLOCKER', 'MAJOR', 'MINOR'],
698
779
  },
699
- required: ['severity', 'description'],
780
+ type: {
781
+ type: 'string',
782
+ enum: [
783
+ 'HTTP_404',
784
+ 'HTTP_500',
785
+ 'HTTP_400',
786
+ 'NAVIGATION_ERROR',
787
+ 'OTHER',
788
+ ],
789
+ },
790
+ description: { type: 'string' },
791
+ screenshot: { type: 'string' },
700
792
  },
793
+ required: ['severity', 'description'],
701
794
  },
702
795
  },
703
- required: ['success', 'summary', 'evaluation', 'evaluationReason'],
704
- };
705
- // 6. Execute agent
706
- const traceDir = getTraceDirectory(browserSession.id);
707
- const screenshotHook = createScreenshotUploadHook(browserSession.id, checklistItem.id, traceDir);
708
- const result = query({
709
- prompt: verifierPrompt,
710
- options: {
711
- cwd: process.cwd(),
712
- model: 'claude-opus-4-6',
713
- mcpServers: {
714
- 'ranger-browser': rangerBrowserMcp,
715
- },
716
- tools: ['mcp__ranger-browser__*'],
717
- permissionMode: 'acceptEdits',
718
- allowedTools: [
719
- 'mcp__ranger-browser__*',
720
- 'Read',
721
- 'Glob',
722
- 'Grep',
796
+ addressedCommentIds: {
797
+ type: 'array',
798
+ description: 'IDs of reviewer comments that have been addressed in the current implementation',
799
+ items: { type: 'string' },
800
+ },
801
+ },
802
+ required: ['success', 'summary', 'evaluation', 'evaluationReason'],
803
+ };
804
+ // --- Phase: agent_execution ---
805
+ telemetry.trackPhaseStart('agent_execution');
806
+ const traceDir = getTraceDirectory(browserSession.id);
807
+ const screenshotHook = createScreenshotUploadHook(browserSession.id, checklistItem.id, traceDir, telemetry);
808
+ const toolCallHook = createToolCallTrackingHook(telemetry);
809
+ const toolFailureHook = createToolFailureHook(telemetry);
810
+ const result = query({
811
+ prompt: verifierPrompt,
812
+ options: {
813
+ cwd: process.cwd(),
814
+ model: 'claude-opus-4-6',
815
+ mcpServers: {
816
+ 'ranger-browser': rangerBrowserMcp,
817
+ },
818
+ tools: ['mcp__ranger-browser__*'],
819
+ permissionMode: 'acceptEdits',
820
+ allowedTools: ['mcp__ranger-browser__*', 'Read', 'Glob', 'Grep'],
821
+ outputFormat: {
822
+ type: 'json_schema',
823
+ schema: outputSchema,
824
+ },
825
+ hooks: {
826
+ PostToolUse: [
827
+ {
828
+ hooks: [toolCallHook.hook, screenshotHook.hook],
829
+ },
830
+ ],
831
+ PostToolUseFailure: [
832
+ {
833
+ hooks: [toolFailureHook],
834
+ },
723
835
  ],
724
- outputFormat: {
725
- type: 'json_schema',
726
- schema: outputSchema,
727
- },
728
- hooks: {
729
- PostToolUse: [
730
- {
731
- hooks: [browserToolLogHook, screenshotHook.hook],
732
- },
733
- ],
734
- },
735
- env: {
736
- ...process.env,
737
- ANTHROPIC_API_KEY: anthropicApiKey,
738
- },
739
- persistSession: false,
740
836
  },
741
- });
742
- // 7. Collect messages
743
- let finalResult = null;
744
- let agentError = null;
745
- // Fallback: capture StructuredOutput tool call input in case SDK fails to populate structured_output
746
- let lastStructuredOutputInput = null;
747
- const conversationFilePath = getConversationFilePath(browserSession.id);
748
- conversationDir = dirname(conversationFilePath);
749
- await mkdir(conversationDir, { recursive: true });
750
- const TIMEOUT_MS = 59 * 60 * 1000;
751
- const timeoutPromise = new Promise((_, reject) => {
752
- setTimeout(() => {
753
- reject(new Error('Agent execution timed out after 59 minutes'));
754
- }, TIMEOUT_MS);
755
- });
756
- try {
757
- await Promise.race([
758
- (async () => {
759
- for await (const message of result) {
760
- try {
761
- const jsonLine = JSON.stringify(message) + '\n';
762
- await appendFile(conversationFilePath, jsonLine, 'utf-8');
763
- }
764
- catch {
765
- // Ignore
766
- }
767
- const msg = message;
768
- // Capture StructuredOutput tool call input as fallback
769
- // This handles SDK bug where structured_output is not populated in result
770
- if (msg.type === 'assistant' && msg.message?.content) {
771
- for (const block of msg.message.content) {
772
- if (block.type === 'tool_use' &&
773
- block.name === 'StructuredOutput' &&
774
- block.input) {
775
- lastStructuredOutputInput =
776
- block.input;
777
- }
837
+ env: {
838
+ ...process.env,
839
+ ANTHROPIC_API_KEY: sessionToken,
840
+ ANTHROPIC_BASE_URL: getAiProxyUrl(),
841
+ },
842
+ persistSession: false,
843
+ },
844
+ });
845
+ // Collect messages
846
+ let finalResult = null;
847
+ let agentError = null;
848
+ let lastStructuredOutputInput = null;
849
+ let resultMeta = {};
850
+ const conversationFilePath = getConversationFilePath(browserSession.id);
851
+ const conversationDir = dirname(conversationFilePath);
852
+ await mkdir(conversationDir, { recursive: true });
853
+ const TIMEOUT_MS = 59 * 60 * 1000;
854
+ const timeoutPromise = new Promise((_, reject) => {
855
+ setTimeout(() => {
856
+ reject(new Error('Agent execution timed out after 59 minutes'));
857
+ }, TIMEOUT_MS);
858
+ });
859
+ try {
860
+ await Promise.race([
861
+ (async () => {
862
+ for await (const message of result) {
863
+ try {
864
+ const jsonLine = JSON.stringify(message) + '\n';
865
+ await appendFile(conversationFilePath, jsonLine, 'utf-8');
866
+ }
867
+ catch {
868
+ // Ignore
869
+ }
870
+ const msg = message;
871
+ // Capture StructuredOutput tool call input as fallback
872
+ if (msg.type === 'assistant' && msg.message?.content) {
873
+ for (const block of msg.message.content) {
874
+ if (block.type === 'tool_use' &&
875
+ block.name === 'StructuredOutput' &&
876
+ block.input) {
877
+ lastStructuredOutputInput =
878
+ block.input;
778
879
  }
779
880
  }
780
- if (msg.error) {
781
- let errorText = msg.error;
782
- if (msg.message?.content &&
783
- Array.isArray(msg.message.content)) {
784
- const texts = msg.message.content
785
- .filter((c) => c.type === 'text')
786
- .map((c) => c.text || '')
787
- .filter(Boolean);
788
- if (texts.length > 0) {
789
- errorText = texts.join(' ');
790
- }
881
+ }
882
+ if (msg.error) {
883
+ let errorText = msg.error;
884
+ if (msg.message?.content &&
885
+ Array.isArray(msg.message.content)) {
886
+ const texts = msg.message.content
887
+ .filter((c) => c.type === 'text')
888
+ .map((c) => c.text || '')
889
+ .filter(Boolean);
890
+ if (texts.length > 0) {
891
+ errorText = texts.join(' ');
791
892
  }
792
- agentError = errorText;
793
893
  }
794
- if (message.type === 'result') {
795
- if (message.subtype === 'success' &&
796
- message.structured_output) {
797
- finalResult =
798
- message.structured_output;
894
+ agentError = errorText;
895
+ }
896
+ if (msg.type === 'result') {
897
+ // Capture SDK result metadata
898
+ resultMeta = {
899
+ numTurns: msg.num_turns,
900
+ totalCostUsd: msg.total_cost_usd,
901
+ durationApiMs: msg.duration_api_ms,
902
+ sdkDurationMs: msg.duration_ms,
903
+ inputTokens: msg.usage?.input_tokens,
904
+ outputTokens: msg.usage?.output_tokens,
905
+ cacheReadTokens: msg.usage?.cache_read_input_tokens,
906
+ cacheCreationTokens: msg.usage?.cache_creation_input_tokens,
907
+ };
908
+ if (msg.subtype === 'success' &&
909
+ message.structured_output) {
910
+ finalResult = message.structured_output;
911
+ }
912
+ else if (msg.subtype !== 'success') {
913
+ if (lastStructuredOutputInput &&
914
+ msg.errors?.length === 0) {
915
+ finalResult = lastStructuredOutputInput;
916
+ agentError = null;
799
917
  }
800
- else if (message.subtype !== 'success') {
801
- // SDK bug workaround: If we got error_during_execution but have
802
- // a StructuredOutput tool call, use that instead
803
- if (lastStructuredOutputInput &&
804
- message.errors?.length === 0) {
805
- finalResult = lastStructuredOutputInput;
806
- // Clear the error since we actually succeeded
807
- agentError = null;
808
- }
809
- else if (!agentError) {
810
- agentError =
811
- message.errors?.join(', ') ||
812
- 'Unknown error';
813
- }
918
+ else if (!agentError) {
919
+ agentError =
920
+ msg.errors?.join(', ') || 'Unknown error';
814
921
  }
815
922
  }
816
923
  }
817
- })(),
818
- timeoutPromise,
819
- ]);
820
- }
821
- catch (error) {
822
- agentError = error instanceof Error ? error.message : String(error);
823
- }
824
- const durationMs = Date.now() - startTime;
825
- // 8. Upload trace, videos, screenshots with metadata, and update session
826
- let traceDownloadUrl;
827
- try {
828
- const traceDir = getTraceDirectory(browserSession.id);
829
- if (existsSync(traceDir)) {
830
- const files = await readdir(traceDir);
831
- if (files.length > 0) {
832
- // Upload trace zip
924
+ }
925
+ })(),
926
+ timeoutPromise,
927
+ ]);
928
+ }
929
+ catch (error) {
930
+ agentError = error instanceof Error ? error.message : String(error);
931
+ }
932
+ const durationMs = Date.now() - startTime;
933
+ telemetry.trackPhaseEnd('agent_execution', {
934
+ ...resultMeta,
935
+ toolCallCounts: Object.fromEntries(toolCallHook.toolCallCounts),
936
+ hasResult: !!finalResult,
937
+ hasError: !!agentError,
938
+ });
939
+ return {
940
+ featureId,
941
+ checklistItem,
942
+ browserSession,
943
+ finalResult,
944
+ agentError,
945
+ lastStructuredOutputInput,
946
+ screenshotHook,
947
+ toolCallCounts: toolCallHook.toolCallCounts,
948
+ configResult,
949
+ startTime,
950
+ durationMs,
951
+ conversationFilePath,
952
+ conversationDir,
953
+ isDebugMode,
954
+ debugOutcome: options.debugOutcome,
955
+ debugAddressComments: options.debugAddressComments,
956
+ resultMeta,
957
+ telemetry,
958
+ feedbackCommentIds: itemFeedback
959
+ ? itemFeedback.unaddressedComments.map((c) => c.id)
960
+ : [],
961
+ };
962
+ }
963
+ /**
964
+ * Phase 2: Upload artifacts, evaluate result, update scenario.
965
+ */
966
+ async function processVerificationResult(ctx) {
967
+ const { featureId, checklistItem, browserSession, screenshotHook, durationMs, conversationFilePath, isDebugMode, debugOutcome, debugAddressComments, telemetry, feedbackCommentIds, } = ctx;
968
+ const { finalResult, agentError } = ctx;
969
+ let traceDownloadUrl;
970
+ // --- Upload trace ---
971
+ try {
972
+ const traceDir = getTraceDirectory(browserSession.id);
973
+ if (existsSync(traceDir)) {
974
+ const files = await readdir(traceDir);
975
+ if (files.length > 0) {
976
+ telemetry.trackPhaseStart('upload_trace');
977
+ try {
833
978
  const traceUrls = await getUploadUrls(browserSession.id, 'trace.zip', 'zip');
834
979
  const traceBuffer = await zipDirectory(traceDir);
835
980
  await uploadTrace(traceUrls.uploadUrl, traceBuffer);
836
981
  traceDownloadUrl = traceUrls.downloadUrl;
837
- // Upload videos from videos/ subdirectory
838
- const videos = await loadSessionVideos(traceDir);
839
- for (const video of videos) {
840
- try {
841
- const videoBuffer = await readFile(video.path);
842
- const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
843
- await uploadVideo(videoUrls.uploadUrl, videoBuffer);
844
- }
845
- catch {
846
- // Ignore individual video upload errors
847
- }
982
+ telemetry.trackPhaseEnd('upload_trace', {
983
+ bytes: traceBuffer.length,
984
+ });
985
+ }
986
+ catch (err) {
987
+ await telemetry.trackPhaseError('upload_trace', err);
988
+ }
989
+ // --- Upload videos ---
990
+ const videos = await loadSessionVideos(traceDir);
991
+ for (const video of videos) {
992
+ telemetry.trackPhaseStart('upload_video', {
993
+ filename: video.filename,
994
+ });
995
+ try {
996
+ const videoBuffer = await readFile(video.path);
997
+ const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
998
+ await uploadVideo(videoUrls.uploadUrl, videoBuffer);
999
+ telemetry.trackPhaseEnd('upload_video', {
1000
+ filename: video.filename,
1001
+ bytes: videoBuffer.length,
1002
+ });
848
1003
  }
849
- // Create verification steps and upload screenshots as step assets
850
- // Filter out screenshots already uploaded by the PostToolUse hook
851
- const pngFiles = files
852
- .filter((f) => f.toLowerCase().endsWith('.png'))
853
- .filter((f) => !screenshotHook.uploadedFiles.has(f))
854
- .sort(); // Sort to maintain order by filename (01_, 02_, etc.)
855
- // Start position after any screenshots already uploaded by the hook
856
- const positionOffset = screenshotHook.uploadedFiles.size + 1;
857
- for (let i = 0; i < pngFiles.length; i++) {
858
- const pngFile = pngFiles[i];
859
- try {
860
- const pngPath = join(traceDir, pngFile);
861
- const pngBuffer = await readFile(pngPath);
862
- const pngStat = await stat(pngPath);
863
- // Detect "key_" prefix for high-priority screenshots
864
- const isKeyFrame = pngFile
865
- .toLowerCase()
866
- .startsWith('key_');
867
- const displayName = pngFile
868
- .replace(/\.png$/i, '')
869
- .replace(/^key_/i, '')
870
- .replace(/^\d+_/, '')
871
- .replace(/-/g, ' ');
872
- // Create a verification step for this screenshot
873
- const { step } = await createVerificationStep(browserSession.id, {
874
- checklistItemId: checklistItem.id,
875
- position: positionOffset + i,
876
- stepType: 'screenshot',
877
- stepName: displayName,
878
- description: isKeyFrame
879
- ? 'Key moment captured during verification'
880
- : 'Screenshot captured during verification',
881
- isKeyStep: isKeyFrame,
882
- status: 'success',
883
- metadata: {
884
- filename: pngFile,
885
- timestamp: pngStat.mtime.toISOString(),
886
- },
887
- });
888
- // Create step asset with upload URL
889
- const assetResponse = await createStepAsset(browserSession.id, step.id, {
1004
+ catch (err) {
1005
+ await telemetry.trackPhaseError('upload_video', err, {
1006
+ filename: video.filename,
1007
+ });
1008
+ }
1009
+ }
1010
+ // --- Upload remaining screenshots ---
1011
+ const pngFiles = files
1012
+ .filter((f) => f.toLowerCase().endsWith('.png'))
1013
+ .filter((f) => !screenshotHook.uploadedFiles.has(f))
1014
+ .sort();
1015
+ const positionOffset = screenshotHook.uploadedFiles.size + 1;
1016
+ for (let i = 0; i < pngFiles.length; i++) {
1017
+ const pngFile = pngFiles[i];
1018
+ const isKeyFrame = pngFile.toLowerCase().startsWith('key_');
1019
+ telemetry.trackPhaseStart('upload_screenshot', {
1020
+ filename: pngFile,
1021
+ isKeyFrame,
1022
+ });
1023
+ try {
1024
+ const pngPath = join(traceDir, pngFile);
1025
+ const pngBuffer = await readFile(pngPath);
1026
+ const pngStat = await stat(pngPath);
1027
+ const displayName = pngFile
1028
+ .replace(/\.png$/i, '')
1029
+ .replace(/^key_/i, '')
1030
+ .replace(/^\d+_/, '')
1031
+ .replace(/-/g, ' ');
1032
+ const { step } = await createVerificationStep(browserSession.id, {
1033
+ checklistItemId: checklistItem.id,
1034
+ position: positionOffset + i,
1035
+ stepType: 'screenshot',
1036
+ stepName: displayName,
1037
+ description: isKeyFrame
1038
+ ? 'Key moment captured during verification'
1039
+ : 'Screenshot captured during verification',
1040
+ isKeyStep: isKeyFrame,
1041
+ status: 'success',
1042
+ metadata: {
890
1043
  filename: pngFile,
891
- assetType: 'screenshot',
892
- timing: 'after',
893
- position: 0,
894
- capturedAt: pngStat.mtime.toISOString(),
895
- metadata: {
896
- name: displayName,
897
- highPriority: isKeyFrame,
898
- },
899
- });
900
- // Upload the screenshot to the signed URL
901
- await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
902
- }
903
- catch {
904
- // Ignore individual screenshot upload errors
905
- }
1044
+ timestamp: pngStat.mtime.toISOString(),
1045
+ },
1046
+ });
1047
+ const assetResponse = await createStepAsset(browserSession.id, step.id, {
1048
+ filename: pngFile,
1049
+ assetType: 'screenshot',
1050
+ timing: 'after',
1051
+ position: 0,
1052
+ capturedAt: pngStat.mtime.toISOString(),
1053
+ metadata: {
1054
+ name: displayName,
1055
+ highPriority: isKeyFrame,
1056
+ },
1057
+ });
1058
+ await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
1059
+ telemetry.trackPhaseEnd('upload_screenshot', {
1060
+ filename: pngFile,
1061
+ bytes: pngBuffer.length,
1062
+ });
1063
+ }
1064
+ catch (err) {
1065
+ await telemetry.trackPhaseError('upload_screenshot', err, { filename: pngFile });
906
1066
  }
907
1067
  }
908
1068
  }
909
- if (existsSync(conversationFilePath)) {
910
- try {
911
- const conversationUrls = await getUploadUrls(browserSession.id, 'conversation.jsonl', 'jsonl');
912
- const conversationBuffer = await readFile(conversationFilePath);
913
- await uploadConversation(conversationUrls.uploadUrl, conversationBuffer);
914
- }
915
- catch {
916
- // Ignore
917
- }
1069
+ }
1070
+ // --- Upload conversation ---
1071
+ if (existsSync(conversationFilePath)) {
1072
+ telemetry.trackPhaseStart('upload_conversation');
1073
+ try {
1074
+ const conversationUrls = await getUploadUrls(browserSession.id, 'conversation.jsonl', 'jsonl');
1075
+ const conversationBuffer = await readFile(conversationFilePath);
1076
+ await uploadConversation(conversationUrls.uploadUrl, conversationBuffer);
1077
+ telemetry.trackPhaseEnd('upload_conversation', {
1078
+ bytes: conversationBuffer.length,
1079
+ });
918
1080
  }
919
- // Cast to help TypeScript understand the type after complex control flow
1081
+ catch (err) {
1082
+ await telemetry.trackPhaseError('upload_conversation', err);
1083
+ }
1084
+ }
1085
+ // --- Update browser session ---
1086
+ telemetry.trackPhaseStart('update_session');
1087
+ try {
920
1088
  const typedResult = finalResult;
921
1089
  const updateData = {
922
1090
  status: (agentError ? 'failed' : 'completed'),
@@ -933,121 +1101,243 @@ Return your findings in the structured output format with your evaluation.`;
933
1101
  typedResult.durationMs = durationMs;
934
1102
  typedResult.checklistItemId = checklistItem.id;
935
1103
  }
1104
+ telemetry.trackPhaseEnd('update_session');
936
1105
  }
937
- catch {
938
- // Ignore upload errors
1106
+ catch (err) {
1107
+ await telemetry.trackPhaseError('update_session', err);
939
1108
  }
940
- // 9. Determine the result to use for evaluation
941
- // In debug mode, use mock evaluation; otherwise use agent result
942
- let resultForEval;
943
- if (isDebugMode && options.debugOutcome) {
944
- const mockEval = getMockEvaluation(options.debugOutcome);
945
- resultForEval = {
946
- ...mockEval,
947
- sessionId: browserSession.id,
948
- sessionDir: getTraceDirectory(browserSession.id),
949
- durationMs,
950
- traceViewerUrl: traceDownloadUrl
951
- ? buildTraceViewerUrl(traceDownloadUrl)
952
- : undefined,
953
- checklistItemId: checklistItem.id,
954
- };
955
- console.log(`\n[DEBUG MODE] Using mock evaluation: ${options.debugOutcome}`);
1109
+ }
1110
+ catch {
1111
+ // Ignore upload errors
1112
+ }
1113
+ // --- Phase: evaluation ---
1114
+ telemetry.trackPhaseStart('evaluation');
1115
+ let resultForEval;
1116
+ if (isDebugMode && debugOutcome) {
1117
+ const mockEval = getMockEvaluation(debugOutcome);
1118
+ const typedResult = finalResult;
1119
+ resultForEval = {
1120
+ ...mockEval,
1121
+ sessionId: browserSession.id,
1122
+ sessionDir: getTraceDirectory(browserSession.id),
1123
+ durationMs,
1124
+ traceViewerUrl: traceDownloadUrl
1125
+ ? buildTraceViewerUrl(traceDownloadUrl)
1126
+ : undefined,
1127
+ checklistItemId: checklistItem.id,
1128
+ addressedCommentIds: typedResult?.addressedCommentIds ?? [],
1129
+ };
1130
+ console.log(`\n[DEBUG MODE] Using mock evaluation: ${debugOutcome}`);
1131
+ }
1132
+ else {
1133
+ const typedResult = finalResult;
1134
+ if (agentError && !typedResult) {
1135
+ throw new Error(`Verification failed: ${agentError}`);
956
1136
  }
957
- else {
958
- const typedResult = finalResult;
959
- if (agentError && !typedResult) {
960
- throw new Error(`Verification failed: ${agentError}`);
961
- }
962
- if (!typedResult) {
963
- throw new Error('No result received from agent');
1137
+ if (!typedResult) {
1138
+ throw new Error('No result received from agent');
1139
+ }
1140
+ resultForEval = typedResult;
1141
+ }
1142
+ telemetry.trackPhaseEnd('evaluation', {
1143
+ evaluation: resultForEval.evaluation,
1144
+ issueCount: resultForEval.issues?.length ?? 0,
1145
+ });
1146
+ // --- Phase: scenario_update ---
1147
+ telemetry.trackPhaseStart('scenario_update');
1148
+ const evaluation = resultForEval.evaluation;
1149
+ if (evaluation === 'verified') {
1150
+ await updateChecklistItem(featureId, checklistItem.id, {
1151
+ status: 'verified',
1152
+ browserSessionId: browserSession.id,
1153
+ });
1154
+ console.log(`\n\u2705 Scenario verified!`);
1155
+ }
1156
+ else if (evaluation === 'blocked') {
1157
+ await updateChecklistItem(featureId, checklistItem.id, {
1158
+ status: 'blocked',
1159
+ browserSessionId: browserSession.id,
1160
+ blockedReason: resultForEval.evaluationReason,
1161
+ });
1162
+ // Enhanced output for Claude Code
1163
+ console.log(`\n${'='.repeat(60)}`);
1164
+ console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
1165
+ console.log(`${'='.repeat(60)}`);
1166
+ console.log(`\nIssue: ${resultForEval.evaluationReason}`);
1167
+ if (resultForEval.issues?.length) {
1168
+ console.log(`\nDetails:`);
1169
+ for (const issue of resultForEval.issues) {
1170
+ const typeStr = issue.type ? ` (${issue.type})` : '';
1171
+ console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
964
1172
  }
965
- resultForEval = typedResult;
966
1173
  }
967
- // 10. Update checklist item based on evaluation
968
- const evaluation = resultForEval.evaluation;
969
- if (evaluation === 'verified') {
970
- await updateChecklistItem(featureId, checklistItem.id, {
971
- status: 'verified',
972
- browserSessionId: browserSession.id,
973
- });
974
- console.log(`\n\u2705 Checklist item verified!`);
1174
+ if (resultForEval.traceViewerUrl) {
1175
+ console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
975
1176
  }
976
- else if (evaluation === 'blocked') {
977
- await updateChecklistItem(featureId, checklistItem.id, {
978
- status: 'blocked',
979
- browserSessionId: browserSession.id,
980
- blockedReason: resultForEval.evaluationReason,
1177
+ console.log(`\nSuggested action: Debug this issue in your code, then run go again.`);
1178
+ console.log(`${'='.repeat(60)}\n`);
1179
+ }
1180
+ else if (evaluation === 'partial' ||
1181
+ evaluation === 'failed' ||
1182
+ evaluation === 'incomplete') {
1183
+ // Mark as incomplete - verification happened but requirements not fully met
1184
+ await updateChecklistItem(featureId, checklistItem.id, {
1185
+ status: 'incomplete',
1186
+ browserSessionId: browserSession.id,
1187
+ incompleteReason: resultForEval.evaluationReason,
1188
+ });
1189
+ // Check if other items are terminal and prompt user
1190
+ await handleIncompleteItem(featureId, checklistItem, resultForEval);
1191
+ }
1192
+ telemetry.trackPhaseEnd('scenario_update', { newStatus: evaluation });
1193
+ // --- Phase: comment_addressing ---
1194
+ telemetry.trackPhaseStart('comment_addressing', {
1195
+ totalFeedbackComments: feedbackCommentIds.length,
1196
+ agentAddressedCount: resultForEval.addressedCommentIds?.length ?? 0,
1197
+ });
1198
+ const addressedIds = resultForEval.addressedCommentIds?.filter((id) => feedbackCommentIds.includes(id));
1199
+ if (addressedIds && addressedIds.length > 0) {
1200
+ try {
1201
+ await markCommentsAddressed(featureId, checklistItem.id, addressedIds);
1202
+ console.log(`Marked ${addressedIds.length} comment(s) as addressed`);
1203
+ telemetry.trackPhaseEnd('comment_addressing', {
1204
+ addressedCount: addressedIds.length,
981
1205
  });
982
- // Enhanced output for Claude Code
983
- console.log(`\n${'='.repeat(60)}`);
984
- console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
985
- console.log(`${'='.repeat(60)}`);
986
- console.log(`\nIssue: ${resultForEval.evaluationReason}`);
987
- if (resultForEval.issues?.length) {
988
- console.log(`\nDetails:`);
989
- for (const issue of resultForEval.issues) {
990
- const typeStr = issue.type ? ` (${issue.type})` : '';
991
- console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
992
- }
1206
+ }
1207
+ catch (err) {
1208
+ await telemetry.trackPhaseError('comment_addressing', err);
1209
+ }
1210
+ }
1211
+ else {
1212
+ telemetry.trackPhaseEnd('comment_addressing', {
1213
+ addressedCount: 0,
1214
+ });
1215
+ }
1216
+ return resultForEval;
1217
+ }
1218
+ /**
1219
+ * Verify a scenario in the browser.
1220
+ * Orchestrates runVerification -> processVerificationResult with telemetry.
1221
+ */
1222
+ export async function verifyFeature(options) {
1223
+ const telemetry = createTelemetryCollector('go');
1224
+ await telemetry.trackCommandStart({
1225
+ hasProfile: !!options.profile,
1226
+ hasScenario: options.scenario !== undefined,
1227
+ hasNotes: !!options.notes,
1228
+ isDebugMode: !!options.debugOutcome,
1229
+ });
1230
+ const { envNames } = await getEnvNames();
1231
+ if (envNames.length === 0) {
1232
+ throw new Error(formatProfileRequiredMessage((text) => bold(text)));
1233
+ }
1234
+ let ctx;
1235
+ let interrupted = false;
1236
+ let checklistItemResolved = false;
1237
+ const handleInterrupt = async () => {
1238
+ if (interrupted)
1239
+ return;
1240
+ interrupted = true;
1241
+ console.log('\nVerification interrupted. Cleaning up...');
1242
+ await telemetry.trackCommandEnd('interrupted', {
1243
+ durationMs: ctx ? Date.now() - ctx.startTime : 0,
1244
+ });
1245
+ if (ctx) {
1246
+ try {
1247
+ await updateBrowserSession(ctx.browserSession.id, {
1248
+ status: 'interrupted',
1249
+ durationMs: Date.now() - ctx.startTime,
1250
+ });
993
1251
  }
994
- if (resultForEval.traceViewerUrl) {
995
- console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
1252
+ catch {
1253
+ // Best effort
1254
+ }
1255
+ try {
1256
+ await updateChecklistItem(ctx.featureId, ctx.checklistItem.id, {
1257
+ status: 'pending',
1258
+ });
1259
+ }
1260
+ catch {
1261
+ // Best effort
1262
+ }
1263
+ if (ctx.configResult) {
1264
+ try {
1265
+ await cleanupTempFiles(ctx.configResult);
1266
+ }
1267
+ catch {
1268
+ // Best effort
1269
+ }
996
1270
  }
997
- console.log(`\nSuggested action: Debug this issue in your code, then run verify-feature again.`);
998
- console.log(`${'='.repeat(60)}\n`);
999
- }
1000
- else if (evaluation === 'partial' || evaluation === 'failed') {
1001
- // Mark as incomplete - verification happened but requirements not fully met
1002
- await updateChecklistItem(featureId, checklistItem.id, {
1003
- status: 'incomplete',
1004
- browserSessionId: browserSession.id,
1005
- incompleteReason: resultForEval.evaluationReason,
1006
- });
1007
- // Check if other items are terminal and prompt user
1008
- await handleIncompleteItem(featureId, checklistItem, resultForEval);
1009
1271
  }
1272
+ console.log('Scenario reset to pending. Partial steps are preserved.');
1273
+ process.exit(0);
1274
+ };
1275
+ process.on('SIGINT', handleInterrupt);
1276
+ process.on('SIGTERM', handleInterrupt);
1277
+ try {
1278
+ // Phase 1: Setup + agent execution
1279
+ ctx = await runVerification(options, telemetry);
1280
+ // Boundary flush — all agent execution telemetry is now safe
1281
+ await telemetry.flush();
1282
+ // Phase 2: Uploads + evaluation + status update
1283
+ const resultForEval = await processVerificationResult(ctx);
1010
1284
  checklistItemResolved = true;
1285
+ await telemetry.trackCommandEnd('success', {
1286
+ evaluation: resultForEval.evaluation,
1287
+ durationMs: ctx.durationMs,
1288
+ ...ctx.resultMeta,
1289
+ });
1011
1290
  return resultForEval;
1012
1291
  }
1292
+ catch (error) {
1293
+ await telemetry.trackCommandError(error);
1294
+ throw error;
1295
+ }
1013
1296
  finally {
1014
- // Remove interrupt handlers to avoid double-firing after normal completion
1015
1297
  process.removeListener('SIGINT', handleInterrupt);
1016
1298
  process.removeListener('SIGTERM', handleInterrupt);
1017
- // If the checklist item was never resolved (agent error, throw, etc.),
1299
+ // If the scenario was never resolved (agent error, throw, etc.),
1018
1300
  // reset it to pending so it doesn't stay stuck in verification_in_progress.
1019
- if (!interrupted && !checklistItemResolved) {
1301
+ if (ctx && !interrupted && !checklistItemResolved) {
1020
1302
  try {
1021
- await updateChecklistItem(featureId, checklistItem.id, {
1303
+ await updateChecklistItem(ctx.featureId, ctx.checklistItem.id, {
1022
1304
  status: 'pending',
1023
1305
  });
1024
- console.log('Checklist item reset to pending after unexpected error.');
1306
+ console.log('Scenario reset to pending after unexpected error.');
1025
1307
  }
1026
- catch {
1027
- // Best effort
1308
+ catch (resetErr) {
1309
+ await telemetry.trackPhaseError('scenario_reset', resetErr);
1028
1310
  }
1029
1311
  }
1030
- // Cleanup
1031
- if (configResult) {
1032
- await cleanupTempFiles(configResult);
1033
- }
1034
- try {
1035
- const traceDir = getTraceDirectory(browserSession.id);
1036
- if (existsSync(traceDir)) {
1037
- await rm(traceDir, { recursive: true, force: true });
1312
+ // --- Phase: cleanup ---
1313
+ if (ctx) {
1314
+ telemetry.trackPhaseStart('cleanup');
1315
+ if (ctx.configResult) {
1316
+ await cleanupTempFiles(ctx.configResult);
1038
1317
  }
1039
- }
1040
- catch {
1041
- // Ignore
1042
- }
1043
- try {
1044
- if (conversationDir && existsSync(conversationDir)) {
1045
- await rm(conversationDir, { recursive: true, force: true });
1318
+ try {
1319
+ const traceDir = getTraceDirectory(ctx.browserSession.id);
1320
+ if (existsSync(traceDir)) {
1321
+ await rm(traceDir, { recursive: true, force: true });
1322
+ }
1046
1323
  }
1324
+ catch {
1325
+ // Ignore
1326
+ }
1327
+ try {
1328
+ if (ctx.conversationDir && existsSync(ctx.conversationDir)) {
1329
+ await rm(ctx.conversationDir, {
1330
+ recursive: true,
1331
+ force: true,
1332
+ });
1333
+ }
1334
+ }
1335
+ catch {
1336
+ // Ignore
1337
+ }
1338
+ telemetry.trackPhaseEnd('cleanup');
1047
1339
  }
1048
- catch {
1049
- // Ignore
1050
- }
1340
+ await telemetry.flush();
1051
1341
  }
1052
1342
  }
1053
1343
  //# sourceMappingURL=verifyFeature.js.map