@ranger-testing/ranger-cli 1.1.6 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/README.md +47 -45
  2. package/build/cli.js +671 -291
  3. package/build/cli.js.map +1 -1
  4. package/build/commands/addEnv.js +1 -1
  5. package/build/commands/addEnv.js.map +1 -1
  6. package/build/commands/authEncrypt.js +5 -10
  7. package/build/commands/authEncrypt.js.map +1 -1
  8. package/build/commands/clean.js +1 -1
  9. package/build/commands/clean.js.map +1 -1
  10. package/build/commands/config.js +9 -15
  11. package/build/commands/config.js.map +1 -1
  12. package/build/commands/env.js +10 -13
  13. package/build/commands/env.js.map +1 -1
  14. package/build/commands/feature.js +138 -67
  15. package/build/commands/feature.js.map +1 -1
  16. package/build/commands/hook.js +9 -4
  17. package/build/commands/hook.js.map +1 -1
  18. package/build/commands/hooks/autoPrompt.js +32 -0
  19. package/build/commands/hooks/autoPrompt.js.map +1 -0
  20. package/build/commands/hooks/disable.js +8 -5
  21. package/build/commands/hooks/disable.js.map +1 -1
  22. package/build/commands/hooks/enable.js +16 -9
  23. package/build/commands/hooks/enable.js.map +1 -1
  24. package/build/commands/hooks/exitPlanMode.js +10 -10
  25. package/build/commands/hooks/exitPlanMode.js.map +1 -1
  26. package/build/commands/hooks/index.js +1 -0
  27. package/build/commands/hooks/index.js.map +1 -1
  28. package/build/commands/hooks/output.js +20 -2
  29. package/build/commands/hooks/output.js.map +1 -1
  30. package/build/commands/hooks/planReminder.js +9 -9
  31. package/build/commands/hooks/planReminder.js.map +1 -1
  32. package/build/commands/hooks/planStart.js +6 -6
  33. package/build/commands/hooks/planStart.js.map +1 -1
  34. package/build/commands/hooks/postEdit.js +6 -6
  35. package/build/commands/hooks/postEdit.js.map +1 -1
  36. package/build/commands/hooks/preCompact.js +5 -5
  37. package/build/commands/hooks/preCompact.js.map +1 -1
  38. package/build/commands/hooks/sessionEnd.js +8 -4
  39. package/build/commands/hooks/sessionEnd.js.map +1 -1
  40. package/build/commands/hooks/sessionStart.js +41 -25
  41. package/build/commands/hooks/sessionStart.js.map +1 -1
  42. package/build/commands/hooks/stopHook.js +30 -6
  43. package/build/commands/hooks/stopHook.js.map +1 -1
  44. package/build/commands/index.js +1 -2
  45. package/build/commands/index.js.map +1 -1
  46. package/build/commands/login.js +2 -5
  47. package/build/commands/login.js.map +1 -1
  48. package/build/commands/setupCi.js +189 -0
  49. package/build/commands/setupCi.js.map +1 -0
  50. package/build/commands/skillup.js +16 -68
  51. package/build/commands/skillup.js.map +1 -1
  52. package/build/commands/start.js +1 -1
  53. package/build/commands/start.js.map +1 -1
  54. package/build/commands/status.js +14 -13
  55. package/build/commands/status.js.map +1 -1
  56. package/build/commands/update.js +34 -5
  57. package/build/commands/update.js.map +1 -1
  58. package/build/commands/updateEnv.js +1 -1
  59. package/build/commands/updateEnv.js.map +1 -1
  60. package/build/commands/useEnv.js +1 -1
  61. package/build/commands/useEnv.js.map +1 -1
  62. package/build/commands/utils/activeProfile.js +76 -0
  63. package/build/commands/utils/activeProfile.js.map +1 -0
  64. package/build/commands/utils/browserSessionsApi.js +1 -1
  65. package/build/commands/utils/browserSessionsApi.js.map +1 -1
  66. package/build/commands/utils/desirePathLog.js +39 -34
  67. package/build/commands/utils/desirePathLog.js.map +1 -1
  68. package/build/commands/utils/deviceAuth.js +53 -5
  69. package/build/commands/utils/deviceAuth.js.map +1 -1
  70. package/build/commands/utils/environment.js +11 -12
  71. package/build/commands/utils/environment.js.map +1 -1
  72. package/build/commands/utils/featureApi.js +49 -46
  73. package/build/commands/utils/featureApi.js.map +1 -1
  74. package/build/commands/utils/featureReportGenerator.js +6 -6
  75. package/build/commands/utils/featureReportGenerator.js.map +1 -1
  76. package/build/commands/utils/keychain.js +1 -1
  77. package/build/commands/utils/localAgentInstallationsApi.js +1 -1
  78. package/build/commands/utils/profileMessages.js +8 -0
  79. package/build/commands/utils/profileMessages.js.map +1 -0
  80. package/build/commands/utils/profileSetupBanner.js +167 -0
  81. package/build/commands/utils/profileSetupBanner.js.map +1 -0
  82. package/build/commands/utils/retry.js +25 -0
  83. package/build/commands/utils/retry.js.map +1 -0
  84. package/build/commands/utils/sessionCache.js +17 -0
  85. package/build/commands/utils/sessionCache.js.map +1 -1
  86. package/build/commands/utils/settings.js +23 -2
  87. package/build/commands/utils/settings.js.map +1 -1
  88. package/build/commands/utils/skills.js +1 -1
  89. package/build/commands/utils/telemetry.js +254 -0
  90. package/build/commands/utils/telemetry.js.map +1 -0
  91. package/build/commands/utils/userApi.js +4 -4
  92. package/build/commands/utils/userApi.js.map +1 -1
  93. package/build/commands/verifyFeature.js +678 -407
  94. package/build/commands/verifyFeature.js.map +1 -1
  95. package/build/commands/verifyInBrowser.js +1 -1
  96. package/build/commands/verifyInBrowser.js.map +1 -1
  97. package/build/skills/ranger/SKILL.md +65 -64
  98. package/build/skills/ranger/create.md +31 -31
  99. package/build/skills/ranger/feedback.md +25 -17
  100. package/build/skills/ranger/start.md +37 -37
  101. package/build/skills/ranger/verify.md +59 -55
  102. package/package.json +1 -1
  103. package/scripts/postinstall.js +1 -1
  104. package/build/commands/dataMcpServer.js +0 -1
  105. package/build/commands/dataMcpServer.js.map +0 -1
  106. package/build/commands/utils/cliSecret.js +0 -1
  107. package/build/commands/utils/cliSecret.js.map +0 -1
  108. package/build/skills/bug-bash.md +0 -329
  109. package/build/skills/e2e-test-recommender.md +0 -168
@@ -1,4 +1,5 @@
1
1
  import { query, } from '@anthropic-ai/claude-agent-sdk';
2
+ import { createTelemetryCollector, } from './utils/telemetry.js';
2
3
  import { join, dirname } from 'path';
3
4
  import { readFile, readdir, appendFile, mkdir, rm, stat } from 'fs/promises';
4
5
  import { existsSync } from 'fs';
@@ -6,11 +7,16 @@ import { execSync } from 'child_process';
6
7
  import { tmpdir } from 'os';
7
8
  import inquirer from 'inquirer';
8
9
  import { loadSettings, resolveEnvVars, buildPlaywrightConfig, cleanupTempFiles, getEnvDir, } from './utils/settings.js';
9
- import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getAnthropicApiKey, createVerificationStep, createStepAsset, } from './utils/browserSessionsApi.js';
10
+ import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getProxySessionToken, createVerificationStep, createStepAsset, } from './utils/browserSessionsApi.js';
11
+ import { getAiProxyUrl } from './utils/environment.js';
10
12
  import { getToken } from './utils/keychain.js';
11
13
  import { getActiveFeatureId } from './feature.js';
14
+ import { readActiveProfileName } from './utils/activeProfile.js';
15
+ import { getEnvNames } from './env.js';
16
+ import { formatProfileRequiredMessage } from './utils/profileMessages.js';
12
17
  import { getFeature, updateFeature, updateChecklistItem, startSession, getActionItems, getItemFeedback, } from './utils/featureApi.js';
13
18
  import { getRangerDir } from './utils/rangerRoot.js';
19
+ const bold = (text) => `\x1b[1m${text}\x1b[0m`;
14
20
  /**
15
21
  * Get the current git branch
16
22
  */
@@ -73,7 +79,7 @@ function getMockEvaluation(outcome) {
73
79
  success: true,
74
80
  summary: '[DEBUG] Mock verification completed successfully.',
75
81
  evaluation: 'verified',
76
- evaluationReason: 'All checklist requirements were met.',
82
+ evaluationReason: 'All scenario requirements were met.',
77
83
  },
78
84
  partial: {
79
85
  success: false,
@@ -91,7 +97,7 @@ function getMockEvaluation(outcome) {
91
97
  incomplete: {
92
98
  success: false,
93
99
  summary: '[DEBUG] Mock incomplete verification.',
94
- evaluation: 'partial',
100
+ evaluation: 'incomplete',
95
101
  evaluationReason: 'Implementation is incomplete and needs additional work.',
96
102
  issues: [
97
103
  {
@@ -157,42 +163,52 @@ function getDebugPrompt() {
157
163
  Return your findings in the structured output format.`;
158
164
  }
159
165
  /**
160
- * Prompt user to select a checklist item
166
+ * Prompt user to select a scenario
161
167
  */
162
168
  async function selectChecklistItem(items) {
163
169
  if (items.length === 0) {
164
170
  return null;
165
171
  }
166
- const choices = items.map((item, i) => {
167
- const emoji = item.status === 'verified'
172
+ const choices = items.map((item) => {
173
+ const emoji = item.status === 'closed' && item.terminalReason === 'approved'
168
174
  ? '\u2705'
169
- : item.status === 'incomplete'
170
- ? '\ud83d\udfe0' // orange circle
171
- : item.status === 'blocked'
172
- ? '\ud83d\uded1'
173
- : item.status === 'closed'
174
- ? '\u26d4'
175
- : '\u2b1c';
175
+ : item.status === 'verified'
176
+ ? '\ud83d\udfe2' // green circle
177
+ : item.status === 'incomplete'
178
+ ? '\ud83d\udfe0' // orange circle
179
+ : item.status === 'blocked'
180
+ ? '\ud83d\uded1'
181
+ : item.status === 'closed'
182
+ ? '\u26d4'
183
+ : item.status === 'verification_in_progress'
184
+ ? '\u23f3'
185
+ : '\u2b1c';
176
186
  const commentBadge = item.unaddressedCommentCount > 0
177
187
  ? ` [${item.unaddressedCommentCount} comments]`
178
188
  : '';
189
+ const disabledReason = item.actionable
190
+ ? false
191
+ : item.status === 'closed' && item.terminalReason
192
+ ? item.terminalReason
193
+ : 'not actionable';
179
194
  return {
180
- name: `${i + 1}. ${emoji} ${item.description}${commentBadge}`,
195
+ name: `${item.displayIndex + 1}. ${emoji} ${item.description}${commentBadge}`,
181
196
  value: item.id,
197
+ disabled: disabledReason,
182
198
  };
183
199
  });
184
200
  const { selected } = await inquirer.prompt([
185
201
  {
186
202
  type: 'list',
187
203
  name: 'selected',
188
- message: 'Which checklist item does this verify?',
204
+ message: 'Which scenario does this verify?',
189
205
  choices,
190
206
  },
191
207
  ]);
192
208
  return items.find((i) => i.id === selected) || null;
193
209
  }
194
210
  /**
195
- * Handle incomplete verification - check if all other items are terminal and prompt user
211
+ * Handle incomplete verification - check if all other scenarios are terminal and prompt user
196
212
  */
197
213
  async function handleIncompleteItem(featureId, incompleteItem, result) {
198
214
  // Get action items to check if there are other items to work on
@@ -217,46 +233,98 @@ async function handleIncompleteItem(featureId, incompleteItem, result) {
217
233
  }
218
234
  console.log(`\nNext steps:`);
219
235
  console.log(` 1. Fix the issues above in your code`);
220
- console.log(` 2. Run 'ranger verify-feature' again to re-verify`);
236
+ console.log(` 2. Run 'ranger go' again to re-verify`);
221
237
  if (allOthersTerminal && otherItems.length > 0) {
222
- console.log(`\nAll other checklist items are complete.`);
223
- console.log(`If you're done for now, run 'ranger feature conclude-session' to end this session.`);
238
+ console.log(`\nAll other scenarios are complete.`);
239
+ console.log(`If you're done for now, you can stop and resume later with 'ranger resume'.`);
224
240
  }
225
241
  console.log(`${'='.repeat(60)}\n`);
226
242
  }
227
243
  /**
228
- * PostToolUse hook that logs browser tool calls to stdout.
244
+ * Create a PostToolUse hook that logs browser tool calls to stdout and tracks
245
+ * all tool calls via telemetry with per-call timing.
246
+ */
247
+ function createToolCallTrackingHook(telemetry) {
248
+ const toolCallCounts = new Map();
249
+ const hook = async (input) => {
250
+ if (input.hook_event_name !== 'PostToolUse')
251
+ return {};
252
+ const postInput = input;
253
+ const toolInput = postInput.tool_input;
254
+ const shortName = postInput.tool_name.replace('mcp__ranger-browser__', '');
255
+ // Track count
256
+ toolCallCounts.set(shortName, (toolCallCounts.get(shortName) || 0) + 1);
257
+ // Log tool call as telemetry event
258
+ telemetry.trackPhaseStart('tool_call', { toolName: shortName });
259
+ telemetry.trackPhaseEnd('tool_call', {
260
+ toolName: shortName,
261
+ toolInput: summarizeToolInput(shortName, toolInput),
262
+ });
263
+ // Console log
264
+ switch (postInput.tool_name) {
265
+ case 'mcp__ranger-browser__browser_navigate':
266
+ console.log(`[browser] Navigate → ${toolInput.url}`);
267
+ break;
268
+ case 'mcp__ranger-browser__browser_click':
269
+ console.log(`[browser] Click → "${toolInput.element}"`);
270
+ break;
271
+ case 'mcp__ranger-browser__browser_type':
272
+ console.log(`[browser] Type → "${toolInput.text}" into "${toolInput.element}"`);
273
+ break;
274
+ case 'mcp__ranger-browser__browser_press_key':
275
+ console.log(`[browser] Press key → ${toolInput.key}`);
276
+ break;
277
+ case 'mcp__ranger-browser__browser_wait_for':
278
+ console.log(`[browser] Wait → ${toolInput.time ? `${toolInput.time}ms` : toolInput.text || 'condition'}`);
279
+ break;
280
+ }
281
+ return {};
282
+ };
283
+ return { hook, toolCallCounts };
284
+ }
285
+ /**
286
+ * Create a PostToolUseFailure hook that tracks tool failures via telemetry.
229
287
  */
230
- const browserToolLogHook = async (input) => {
231
- if (input.hook_event_name !== 'PostToolUse')
288
+ function createToolFailureHook(telemetry) {
289
+ return async (input) => {
290
+ if (input.hook_event_name !== 'PostToolUseFailure')
291
+ return {};
292
+ const failInput = input;
293
+ const shortName = failInput.tool_name.replace('mcp__ranger-browser__', '');
294
+ await telemetry.trackPhaseError('tool_failure', failInput.error, {
295
+ toolName: shortName,
296
+ isInterrupt: failInput.is_interrupt,
297
+ });
232
298
  return {};
233
- const postInput = input;
234
- const toolInput = postInput.tool_input;
235
- switch (postInput.tool_name) {
236
- case 'mcp__ranger-browser__browser_navigate':
237
- console.log(`[browser] Navigate → ${toolInput.url}`);
238
- break;
239
- case 'mcp__ranger-browser__browser_click':
240
- console.log(`[browser] Click → "${toolInput.element}"`);
241
- break;
242
- case 'mcp__ranger-browser__browser_type':
243
- console.log(`[browser] Type → "${toolInput.text}" into "${toolInput.element}"`);
244
- break;
245
- case 'mcp__ranger-browser__browser_press_key':
246
- console.log(`[browser] Press key → ${toolInput.key}`);
247
- break;
248
- case 'mcp__ranger-browser__browser_wait_for':
249
- console.log(`[browser] Wait → ${toolInput.time ? `${toolInput.time}ms` : toolInput.text || 'condition'}`);
250
- break;
299
+ };
300
+ }
301
+ /**
302
+ * Summarize tool input for telemetry (avoid logging sensitive/large data).
303
+ */
304
+ function summarizeToolInput(toolName, input) {
305
+ switch (toolName) {
306
+ case 'browser_navigate':
307
+ return { url: input.url };
308
+ case 'browser_click':
309
+ return { element: input.element };
310
+ case 'browser_type':
311
+ return { element: input.element };
312
+ case 'browser_take_screenshot':
313
+ return { filename: input.filename };
314
+ case 'browser_press_key':
315
+ return { key: input.key };
316
+ case 'browser_wait_for':
317
+ return { time: input.time, text: input.text };
318
+ default:
319
+ return {};
251
320
  }
252
- return {};
253
- };
321
+ }
254
322
  /**
255
323
  * Create a PostToolUse hook that uploads screenshots immediately after they're taken.
256
324
  * Returns the hook callback and a set of filenames that were successfully uploaded,
257
325
  * so the post-hoc fallback can skip them.
258
326
  */
259
- function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
327
+ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir, telemetry) {
260
328
  const uploadedFiles = new Set();
261
329
  let position = 1;
262
330
  const hook = async (input) => {
@@ -271,11 +339,15 @@ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
271
339
  const filename = toolInput?.filename;
272
340
  if (!filename)
273
341
  return {};
342
+ const isKeyFrame = filename.toLowerCase().startsWith('key_');
343
+ telemetry.trackPhaseStart('hook_screenshot_upload', {
344
+ filename,
345
+ isKeyFrame,
346
+ });
274
347
  try {
275
348
  const pngPath = join(traceDir, filename);
276
349
  const pngBuffer = await readFile(pngPath);
277
350
  const pngStat = await stat(pngPath);
278
- const isKeyFrame = filename.toLowerCase().startsWith('key_');
279
351
  const displayName = filename
280
352
  .replace(/\.png$/i, '')
281
353
  .replace(/^key_/i, '')
@@ -315,96 +387,142 @@ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
315
387
  await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
316
388
  // Track as uploaded
317
389
  uploadedFiles.add(filename);
390
+ telemetry.trackPhaseEnd('hook_screenshot_upload', {
391
+ filename,
392
+ bytes: pngBuffer.length,
393
+ });
318
394
  }
319
395
  catch (err) {
320
- // swallow error for now
321
- // TODO: should log / report these
396
+ await telemetry.trackPhaseError('hook_screenshot_upload', err, {
397
+ filename,
398
+ });
322
399
  }
323
400
  return {};
324
401
  };
325
402
  return { hook, uploadedFiles };
326
403
  }
327
404
  /**
328
- * Verify a checklist item in the browser
405
+ * Phase 1: Setup through agent completion.
406
+ * Returns context for processVerificationResult.
329
407
  */
330
- export async function verifyFeature(options) {
408
+ async function runVerification(options, telemetry) {
331
409
  const isDebugMode = !!options.debugOutcome;
332
410
  if (isDebugMode) {
333
411
  console.log(`\n[DEBUG MODE] Running minimal browser test with outcome: ${options.debugOutcome}`);
334
412
  }
335
- // 1. Check for active feature
413
+ // --- Phase: feature_load ---
414
+ telemetry.trackPhaseStart('feature_load');
336
415
  const featureId = await getActiveFeatureId();
337
416
  if (!featureId) {
338
- throw new Error('No active feature. Run: ranger feature resume <id> or ranger feature create');
417
+ throw new Error('No active feature review. Run: ranger resume <id> or ranger create');
339
418
  }
340
- // Load feature details
341
419
  const feature = await getFeature(featureId);
342
- // Update the feature's gitBranch to the current branch
420
+ telemetry.setContext({ featureId });
343
421
  const currentBranch = getGitBranch();
344
422
  if (currentBranch && currentBranch !== feature.gitBranch) {
345
423
  await updateFeature(featureId, { gitBranch: currentBranch });
346
424
  console.log(` Updated branch to: ${currentBranch}`);
347
425
  }
348
- console.log(`\nActive feature: ${feature.name} (${featureId})`);
349
- // Get action items - leaf items that can be verified (non-closed with no non-closed children)
426
+ console.log(`\nActive feature review: ${feature.name} (${featureId})`);
350
427
  const { items: actionItems } = await getActionItems(featureId);
351
- // 2. Determine which checklist item we're verifying
428
+ const actionItemsById = new Map(actionItems.map((item) => [item.id, item]));
429
+ const displayItems = feature.checklistItems.map((item, index) => {
430
+ const actionItem = actionItemsById.get(item.id);
431
+ return {
432
+ ...item,
433
+ unaddressedCommentCount: actionItem?.unaddressedCommentCount ?? 0,
434
+ displayIndex: index,
435
+ actionable: !!actionItem && item.status !== 'closed',
436
+ };
437
+ });
438
+ telemetry.trackPhaseEnd('feature_load', {
439
+ itemCount: actionItems.length,
440
+ });
441
+ // --- Phase: scenario_select ---
442
+ telemetry.trackPhaseStart('scenario_select');
352
443
  let checklistItem = null;
353
- let taskDescription = options.task;
354
- if (options.item !== undefined) {
444
+ let taskDescription = options.notes;
445
+ if (options.scenario !== undefined) {
355
446
  // Use specified item index (1-based)
356
- const itemIndex = options.item - 1; // 1-based to 0-based
357
- if (itemIndex < 0 || itemIndex >= actionItems.length) {
358
- throw new Error(`Invalid item index: ${options.item}. Feature has ${actionItems.length} actionable items.`);
447
+ const itemIndex = options.scenario - 1; // 1-based to 0-based
448
+ if (itemIndex < 0 || itemIndex >= displayItems.length) {
449
+ throw new Error(`Invalid scenario index: ${options.scenario}. Feature review has ${displayItems.length} scenarios.`);
450
+ }
451
+ const displayItem = displayItems[itemIndex];
452
+ if (!displayItem.actionable) {
453
+ const reason = displayItem.status === 'closed' && displayItem.terminalReason
454
+ ? displayItem.terminalReason
455
+ : 'not actionable';
456
+ throw new Error(`Scenario ${options.scenario} is ${reason} and cannot be verified. Choose a different scenario.`);
359
457
  }
360
- checklistItem = actionItems[itemIndex];
458
+ const actionItem = actionItemsById.get(displayItem.id);
459
+ if (!actionItem) {
460
+ throw new Error(`Scenario ${options.scenario} is not currently actionable. Try another scenario.`);
461
+ }
462
+ checklistItem = actionItem;
361
463
  if (!taskDescription) {
362
464
  taskDescription = checklistItem.description;
363
465
  }
364
466
  }
365
467
  else {
366
- // Check if running in non-TTY environment (CI, scripts, Claude Code, etc.)
367
468
  const isInteractive = process.stdin.isTTY && process.stdout.isTTY;
368
469
  if (!isInteractive) {
369
- // Non-TTY mode: require --item flag, show available items
370
- console.log('\nNon-interactive mode detected. The --item flag is required.');
371
- console.log('\nAvailable checklist items to verify:');
372
- actionItems.forEach((item, i) => {
373
- const emoji = item.status === 'verified'
470
+ // Non-TTY mode: require --scenario flag, show available scenarios
471
+ console.log('\nNon-interactive mode detected. The --scenario flag is required.');
472
+ console.log('\nAvailable scenarios to verify:');
473
+ displayItems.forEach((item) => {
474
+ const emoji = item.status === 'closed' &&
475
+ item.terminalReason === 'approved'
374
476
  ? '\u2705'
375
- : item.status === 'incomplete'
376
- ? '\ud83d\udfe0' // orange circle
377
- : item.status === 'blocked'
378
- ? '\ud83d\uded1'
379
- : item.status === 'closed'
380
- ? '\u26d4'
381
- : '\u2b1c';
477
+ : item.status === 'verified'
478
+ ? '\ud83d\udfe2'
479
+ : item.status === 'incomplete'
480
+ ? '\ud83d\udfe0'
481
+ : item.status === 'blocked'
482
+ ? '\ud83d\uded1'
483
+ : item.status === 'closed'
484
+ ? '\u26d4'
485
+ : item.status === 'verification_in_progress'
486
+ ? '\u23f3'
487
+ : '\u2b1c';
382
488
  const commentBadge = item.unaddressedCommentCount > 0
383
489
  ? ` [${item.unaddressedCommentCount} comments]`
384
490
  : '';
385
- console.log(` ${i + 1}. ${emoji} ${item.description}${commentBadge}`);
491
+ const actionHint = item.actionable ? '' : ' [not actionable]';
492
+ console.log(` ${item.displayIndex + 1}. ${emoji} ${item.description}${commentBadge}${actionHint}`);
386
493
  });
387
- console.log('\nUsage: ranger verify-feature --item <number>');
388
- console.log('Example: ranger verify-feature --item 1');
389
- throw new Error('The --item flag is required in non-interactive mode. See available items above.');
494
+ console.log('\nUsage: ranger go --scenario <number>');
495
+ console.log('Example: ranger go --scenario 1');
496
+ throw new Error('The --scenario flag is required in non-interactive mode. See available scenarios above.');
390
497
  }
391
- // Interactive selection
392
- checklistItem = await selectChecklistItem(actionItems);
393
- if (!taskDescription && checklistItem) {
394
- taskDescription = checklistItem.description;
498
+ const selectedItem = await selectChecklistItem(displayItems);
499
+ if (selectedItem) {
500
+ const actionItem = actionItemsById.get(selectedItem.id);
501
+ if (!actionItem) {
502
+ throw new Error('Selected scenario is not currently actionable. Choose another scenario.');
503
+ }
504
+ checklistItem = actionItem;
505
+ if (!taskDescription) {
506
+ taskDescription = checklistItem.description;
507
+ }
395
508
  }
396
509
  }
397
510
  if (!checklistItem) {
398
- throw new Error('No checklist item selected. Create items when creating the feature with -c flag.');
511
+ throw new Error('No scenario selected. Create scenarios when creating the feature review with -c or --scenario flags.');
399
512
  }
400
513
  if (checklistItem.status === 'closed') {
401
- throw new Error(`Cannot verify item "${checklistItem.description}" — it is concluded (${checklistItem.terminalReason || 'unknown reason'}).`);
514
+ throw new Error(`Cannot verify scenario "${checklistItem.description}" — it is concluded (${checklistItem.terminalReason || 'unknown reason'}).`);
402
515
  }
403
516
  if (!taskDescription) {
404
- throw new Error('No task description provided');
517
+ throw new Error('No notes provided');
405
518
  }
406
- console.log(`\nVerifying: ${checklistItem.description}`);
407
- console.log(`Task: ${taskDescription}`);
519
+ telemetry.setContext({ checklistItemId: checklistItem.id });
520
+ telemetry.trackPhaseEnd('scenario_select', {
521
+ selectionMethod: options.scenario !== undefined ? 'flag' : 'interactive',
522
+ itemStatus: checklistItem.status,
523
+ });
524
+ console.log(`\nVerifying scenario: ${checklistItem.description}`);
525
+ console.log(`Notes: ${taskDescription}`);
408
526
  // Fetch reviewer feedback if item has parent or unaddressed comments
409
527
  let itemFeedback = null;
410
528
  if (checklistItem.parentItemId ||
@@ -415,8 +533,9 @@ export async function verifyFeature(options) {
415
533
  console.log(`Reviewer feedback: ${itemFeedback.unaddressedComments.length} comment(s) to verify`);
416
534
  }
417
535
  }
418
- catch {
419
- // Non-fatal - continue without feedback
536
+ catch (err) {
537
+ // Non-fatal - continue without feedback, but log it
538
+ await telemetry.trackPhaseError('feedback_fetch', err);
420
539
  }
421
540
  }
422
541
  // Start the session if it's in ready status
@@ -427,41 +546,38 @@ export async function verifyFeature(options) {
427
546
  await startSession(featureId, feature.currentSessionId);
428
547
  }
429
548
  catch (error) {
430
- // Ignore if session is already started (race condition)
431
549
  const message = error instanceof Error ? error.message : String(error);
432
550
  if (!message.includes('already')) {
433
551
  throw error;
434
552
  }
435
553
  }
436
554
  }
437
- // Update checklist item status to verification_in_progress
555
+ // Update scenario status to verification_in_progress
438
556
  await updateChecklistItem(featureId, checklistItem.id, {
439
557
  status: 'verification_in_progress',
440
558
  });
441
- // 3. Determine which environment to use (same pattern as verifyInBrowser)
442
- let activeEnv;
443
- if (options.env) {
444
- activeEnv = options.env;
559
+ // --- Phase: profile_resolution ---
560
+ telemetry.trackPhaseStart('profile_resolution');
561
+ let activeProfile = null;
562
+ if (options.profile) {
563
+ activeProfile = options.profile;
445
564
  }
446
565
  else {
447
- const activeEnvPath = join(getRangerDir(), 'active-env.txt');
448
- if (!existsSync(activeEnvPath)) {
449
- throw new Error('No active environment. Run: ranger use <env-name>');
450
- }
451
- activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
566
+ activeProfile = await readActiveProfileName();
567
+ }
568
+ if (!activeProfile) {
569
+ throw new Error('No active profile. Run: ranger profile use <profile-name>');
452
570
  }
453
- const envDir = getEnvDir(activeEnv);
571
+ const envDir = getEnvDir(activeProfile);
454
572
  if (!existsSync(envDir)) {
455
- throw new Error(`Environment "${activeEnv}" not found. Run: ranger add env ${activeEnv}`);
573
+ throw new Error(`Profile "${activeProfile}" not found. Run: ranger profile add ${activeProfile}`);
456
574
  }
457
- const settings = await loadSettings(activeEnv);
575
+ const settings = await loadSettings(activeProfile);
458
576
  const resolvedSettings = resolveEnvVars(settings);
459
- // Get base URL from settings
460
577
  let url = resolvedSettings.baseUrl;
461
578
  if (!url) {
462
- throw new Error(`No baseUrl configured for environment "${activeEnv}". Run: ranger config set ${activeEnv} baseUrl <url>`);
579
+ throw new Error(`No baseUrl configured for profile "${activeProfile}". Run: ranger profile config set ${activeProfile} baseUrl <url>`);
463
580
  }
464
- // Append startPath if provided
465
581
  if (options.startPath) {
466
582
  const base = url.endsWith('/') ? url.slice(0, -1) : url;
467
583
  const path = options.startPath.startsWith('/')
@@ -469,13 +585,17 @@ export async function verifyFeature(options) {
469
585
  : '/' + options.startPath;
470
586
  url = base + path;
471
587
  }
472
- // 4. Create browser session
588
+ telemetry.trackPhaseEnd('profile_resolution', {
589
+ profileName: activeProfile,
590
+ });
591
+ // --- Phase: browser_session_create ---
592
+ telemetry.trackPhaseStart('browser_session_create');
473
593
  const token = await getToken();
474
594
  if (!token) {
475
- throw new Error('No API token configured. Run: ranger start <token>');
595
+ throw new Error('No API token configured. Run: ranger setup [token]');
476
596
  }
477
597
  const browserSession = await createBrowserSession({
478
- environmentName: activeEnv,
598
+ environmentName: activeProfile,
479
599
  settings: resolvedSettings,
480
600
  task: taskDescription,
481
601
  url,
@@ -483,78 +603,48 @@ export async function verifyFeature(options) {
483
603
  checklistItemId: checklistItem.id,
484
604
  });
485
605
  console.log(`Browser session created: ${browserSession.id}`);
486
- // Link the browser session to the checklist item immediately so steps
606
+ telemetry.setContext({ browserSessionId: browserSession.id });
607
+ // Link the browser session to the scenario immediately so steps
487
608
  // are visible in the dashboard while verification is in progress
488
609
  await updateChecklistItem(featureId, checklistItem.id, {
489
610
  browserSessionId: browserSession.id,
490
611
  });
491
- const configResult = await buildPlaywrightConfig(resolvedSettings, activeEnv, browserSession?.id);
492
- const rangerBrowserMcp = {
493
- command: 'npx',
494
- args: [
495
- '@ranger-testing/playwright',
496
- 'run-mcp-server',
497
- '--config',
498
- configResult.configPath,
499
- ],
500
- };
501
- const startTime = Date.now();
502
- // Handle process interruption (Ctrl+C or coding agent killing the process)
503
- let interrupted = false;
504
- const handleInterrupt = async () => {
505
- if (interrupted)
506
- return;
507
- interrupted = true;
508
- console.log('\nVerification interrupted. Cleaning up...');
509
- try {
510
- await updateBrowserSession(browserSession.id, {
511
- status: 'interrupted',
512
- durationMs: Date.now() - startTime,
513
- });
514
- }
515
- catch {
516
- // Best effort
517
- }
518
- try {
519
- await updateChecklistItem(featureId, checklistItem.id, {
520
- status: 'pending',
521
- });
522
- }
523
- catch {
524
- // Best effort
525
- }
526
- try {
527
- await cleanupTempFiles(configResult);
528
- }
529
- catch {
530
- // Best effort
531
- }
532
- console.log('Checklist item reset to pending. Partial steps are preserved.');
533
- process.exit(0);
534
- };
535
- process.on('SIGINT', handleInterrupt);
536
- process.on('SIGTERM', handleInterrupt);
537
- // Fetch Anthropic API key
538
- let anthropicApiKey;
612
+ telemetry.trackPhaseEnd('browser_session_create');
613
+ // --- Phase: playwright_config ---
614
+ telemetry.trackPhaseStart('playwright_config');
615
+ let configResult;
616
+ let sessionToken;
539
617
  try {
540
- anthropicApiKey = await getAnthropicApiKey();
618
+ sessionToken = await getProxySessionToken();
541
619
  }
542
620
  catch (error) {
543
621
  const message = error instanceof Error ? error.message : String(error);
544
- const errorMsg = `Failed to fetch Anthropic API key: ${message}`;
622
+ const errorMsg = `Failed to fetch proxy session token: ${message}`;
545
623
  try {
546
624
  await updateBrowserSession(browserSession.id, {
547
625
  status: 'failed',
548
- durationMs: Date.now() - startTime,
626
+ durationMs: 0,
549
627
  errorMessage: errorMsg,
550
628
  });
551
629
  }
552
- catch {
553
- // Ignore
630
+ catch (updateErr) {
631
+ await telemetry.trackPhaseError('session_error_update', updateErr);
554
632
  }
555
633
  throw new Error(errorMsg);
556
634
  }
557
- // 5. UI Verifier + Evaluation Agent prompt
635
+ configResult = await buildPlaywrightConfig(resolvedSettings, activeProfile, browserSession?.id);
636
+ telemetry.trackPhaseEnd('playwright_config');
637
+ const startTime = Date.now();
638
+ const rangerBrowserMcp = {
639
+ command: 'npx',
640
+ args: [
641
+ '@ranger-testing/playwright',
642
+ 'run-mcp-server',
643
+ '--config',
644
+ configResult.configPath,
645
+ ],
646
+ };
647
+ // Build verifier prompt
558
648
  let verifierPrompt;
559
649
  if (isDebugMode) {
560
650
  verifierPrompt = getDebugPrompt();
@@ -563,7 +653,6 @@ export async function verifyFeature(options) {
563
653
  const notesSection = checklistItem.notes
564
654
  ? `\n\n## Additional Notes\n${checklistItem.notes}`
565
655
  : '';
566
- // Build reviewer feedback section if available
567
656
  let feedbackSection = '';
568
657
  if (itemFeedback && itemFeedback.unaddressedComments.length > 0) {
569
658
  const commentLines = itemFeedback.unaddressedComments
@@ -584,9 +673,9 @@ ${commentLines}`;
584
673
  canonicalFlowSection = `\n\n## Expected Flow (from previous verification)
585
674
  ${itemFeedback.canonicalFlow}`;
586
675
  }
587
- verifierPrompt = `You are a Feature Verifier. Your job is to verify a checklist item by executing a UI flow and evaluating whether it adequately completes the checklist item.
676
+ verifierPrompt = `You are a Feature Review Verifier. Your job is to verify a scenario by executing a UI flow and evaluating whether it adequately completes the scenario.
588
677
 
589
- ## Checklist Item to Verify
678
+ ## Scenario to Verify
590
679
  ${checklistItem.description}${notesSection}${feedbackSection}${canonicalFlowSection}
591
680
 
592
681
  ## Task to Execute
@@ -599,7 +688,7 @@ Your base URL is: ${url}
599
688
  - DO NOT navigate to any different domain, host, or port under any circumstances
600
689
  - IGNORE any URLs from product documentation (mcp__ranger__get_product_docs) that have a different base URL
601
690
  - If documentation or code diffs suggest a path exists (e.g., "/dashboard"), you may navigate to that path ONLY under the base URL above
602
- - The base URL above is the ONLY authorized environment for this verification
691
+ - The base URL above is the ONLY authorized profile for this verification
603
692
 
604
693
  ## Instructions
605
694
  1. Navigate to the URL above using browser_navigate
@@ -608,7 +697,7 @@ Your base URL is: ${url}
608
697
  4. Execute the task step-by-step using browser tools
609
698
  5. **Take screenshots at key moments** (see Screenshot Guidelines below)
610
699
  6. Document any issues found (bugs, errors, unexpected behavior)
611
- 7. After completing the verification, evaluate whether the result adequately verifies the checklist item
700
+ 7. After completing the verification, evaluate whether the result adequately verifies the scenario
612
701
 
613
702
  ## Screenshot Guidelines - IMPORTANT
614
703
  Take screenshots throughout the verification flow so a human can review it for completeness. Screenshots are your evidence trail.
@@ -624,7 +713,7 @@ Take screenshots throughout the verification flow so a human can review it for c
624
713
  **Screenshot naming:**
625
714
  - Use descriptive filenames: "01_login-page-loaded.png", "02_form-filled.png", "03_dashboard-visible.png"
626
715
  - Number prefixes (01_, 02_, etc.) help maintain chronological order
627
- - For KEY MOMENTS that prove the checklist item is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
716
+ - For KEY MOMENTS that prove the scenario is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
628
717
  - The "key_" prefix marks screenshots as high-priority evidence for human reviewers
629
718
 
630
719
  **Aim for 3-6 screenshots per verification** to document the complete flow. Mark 1-2 of the most important ones with the "key_" prefix.
@@ -651,8 +740,8 @@ After step 2 (taking initial snapshot), IMMEDIATELY check for blocking HTTP erro
651
740
  This early exit prevents wasting time on tasks that cannot succeed due to fundamental errors.
652
741
 
653
742
  ## Evaluation Criteria
654
- - VERIFIED: The task completed successfully and the checklist item requirements are fully met
655
- - PARTIAL: The task partially completed but some aspects of the checklist item are not verified
743
+ - VERIFIED: The task completed successfully and the scenario requirements are fully met
744
+ - PARTIAL: The task partially completed but some aspects of the scenario are not verified
656
745
  - BLOCKED: A blocking issue (bug, error, missing feature) prevents completion
657
746
  - FAILED: The task could not be completed due to errors
658
747
 
@@ -696,9 +785,12 @@ Return your findings in the structured output format with your evaluation.`;
696
785
  },
697
786
  required: ['success', 'summary', 'evaluation', 'evaluationReason'],
698
787
  };
699
- // 6. Execute agent
788
+ // --- Phase: agent_execution ---
789
+ telemetry.trackPhaseStart('agent_execution');
700
790
  const traceDir = getTraceDirectory(browserSession.id);
701
- const screenshotHook = createScreenshotUploadHook(browserSession.id, checklistItem.id, traceDir);
791
+ const screenshotHook = createScreenshotUploadHook(browserSession.id, checklistItem.id, traceDir, telemetry);
792
+ const toolCallHook = createToolCallTrackingHook(telemetry);
793
+ const toolFailureHook = createToolFailureHook(telemetry);
702
794
  const result = query({
703
795
  prompt: verifierPrompt,
704
796
  options: {
@@ -708,8 +800,8 @@ Return your findings in the structured output format with your evaluation.`;
708
800
  'ranger-browser': rangerBrowserMcp,
709
801
  },
710
802
  tools: ['mcp__ranger-browser__*'],
711
- permissionMode: 'bypassPermissions',
712
- allowDangerouslySkipPermissions: true,
803
+ permissionMode: 'acceptEdits',
804
+ allowedTools: ['mcp__ranger-browser__*', 'Read', 'Glob', 'Grep'],
713
805
  outputFormat: {
714
806
  type: 'json_schema',
715
807
  schema: outputSchema,
@@ -717,22 +809,28 @@ Return your findings in the structured output format with your evaluation.`;
717
809
  hooks: {
718
810
  PostToolUse: [
719
811
  {
720
- hooks: [browserToolLogHook, screenshotHook.hook],
812
+ hooks: [toolCallHook.hook, screenshotHook.hook],
813
+ },
814
+ ],
815
+ PostToolUseFailure: [
816
+ {
817
+ hooks: [toolFailureHook],
721
818
  },
722
819
  ],
723
820
  },
724
821
  env: {
725
822
  ...process.env,
726
- ANTHROPIC_API_KEY: anthropicApiKey,
823
+ ANTHROPIC_API_KEY: sessionToken,
824
+ ANTHROPIC_BASE_URL: getAiProxyUrl(),
727
825
  },
728
826
  persistSession: false,
729
827
  },
730
828
  });
731
- // 7. Collect messages
829
+ // Collect messages
732
830
  let finalResult = null;
733
831
  let agentError = null;
734
- // Fallback: capture StructuredOutput tool call input in case SDK fails to populate structured_output
735
832
  let lastStructuredOutputInput = null;
833
+ let resultMeta = {};
736
834
  const conversationFilePath = getConversationFilePath(browserSession.id);
737
835
  const conversationDir = dirname(conversationFilePath);
738
836
  await mkdir(conversationDir, { recursive: true });
@@ -743,170 +841,230 @@ Return your findings in the structured output format with your evaluation.`;
743
841
  }, TIMEOUT_MS);
744
842
  });
745
843
  try {
746
- try {
747
- await Promise.race([
748
- (async () => {
749
- for await (const message of result) {
750
- try {
751
- const jsonLine = JSON.stringify(message) + '\n';
752
- await appendFile(conversationFilePath, jsonLine, 'utf-8');
753
- }
754
- catch {
755
- // Ignore
756
- }
757
- const msg = message;
758
- // Capture StructuredOutput tool call input as fallback
759
- // This handles SDK bug where structured_output is not populated in result
760
- if (msg.type === 'assistant' && msg.message?.content) {
761
- for (const block of msg.message.content) {
762
- if (block.type === 'tool_use' &&
763
- block.name === 'StructuredOutput' &&
764
- block.input) {
765
- lastStructuredOutputInput =
766
- block.input;
767
- }
844
+ await Promise.race([
845
+ (async () => {
846
+ for await (const message of result) {
847
+ try {
848
+ const jsonLine = JSON.stringify(message) + '\n';
849
+ await appendFile(conversationFilePath, jsonLine, 'utf-8');
850
+ }
851
+ catch {
852
+ // Ignore
853
+ }
854
+ const msg = message;
855
+ // Capture StructuredOutput tool call input as fallback
856
+ if (msg.type === 'assistant' && msg.message?.content) {
857
+ for (const block of msg.message.content) {
858
+ if (block.type === 'tool_use' &&
859
+ block.name === 'StructuredOutput' &&
860
+ block.input) {
861
+ lastStructuredOutputInput =
862
+ block.input;
768
863
  }
769
864
  }
770
- if (msg.error) {
771
- let errorText = msg.error;
772
- if (msg.message?.content &&
773
- Array.isArray(msg.message.content)) {
774
- const texts = msg.message.content
775
- .filter((c) => c.type === 'text')
776
- .map((c) => c.text || '')
777
- .filter(Boolean);
778
- if (texts.length > 0) {
779
- errorText = texts.join(' ');
780
- }
865
+ }
866
+ if (msg.error) {
867
+ let errorText = msg.error;
868
+ if (msg.message?.content &&
869
+ Array.isArray(msg.message.content)) {
870
+ const texts = msg.message.content
871
+ .filter((c) => c.type === 'text')
872
+ .map((c) => c.text || '')
873
+ .filter(Boolean);
874
+ if (texts.length > 0) {
875
+ errorText = texts.join(' ');
781
876
  }
782
- agentError = errorText;
783
877
  }
784
- if (message.type === 'result') {
785
- if (message.subtype === 'success' &&
786
- message.structured_output) {
787
- finalResult =
788
- message.structured_output;
878
+ agentError = errorText;
879
+ }
880
+ if (msg.type === 'result') {
881
+ // Capture SDK result metadata
882
+ resultMeta = {
883
+ numTurns: msg.num_turns,
884
+ totalCostUsd: msg.total_cost_usd,
885
+ durationApiMs: msg.duration_api_ms,
886
+ sdkDurationMs: msg.duration_ms,
887
+ inputTokens: msg.usage?.input_tokens,
888
+ outputTokens: msg.usage?.output_tokens,
889
+ cacheReadTokens: msg.usage?.cache_read_input_tokens,
890
+ cacheCreationTokens: msg.usage?.cache_creation_input_tokens,
891
+ };
892
+ if (msg.subtype === 'success' &&
893
+ message.structured_output) {
894
+ finalResult = message.structured_output;
895
+ }
896
+ else if (msg.subtype !== 'success') {
897
+ if (lastStructuredOutputInput &&
898
+ msg.errors?.length === 0) {
899
+ finalResult = lastStructuredOutputInput;
900
+ agentError = null;
789
901
  }
790
- else if (message.subtype !== 'success') {
791
- // SDK bug workaround: If we got error_during_execution but have
792
- // a StructuredOutput tool call, use that instead
793
- if (lastStructuredOutputInput &&
794
- message.errors?.length === 0) {
795
- finalResult = lastStructuredOutputInput;
796
- // Clear the error since we actually succeeded
797
- agentError = null;
798
- }
799
- else if (!agentError) {
800
- agentError =
801
- message.errors?.join(', ') ||
802
- 'Unknown error';
803
- }
902
+ else if (!agentError) {
903
+ agentError =
904
+ msg.errors?.join(', ') || 'Unknown error';
804
905
  }
805
906
  }
806
907
  }
807
- })(),
808
- timeoutPromise,
809
- ]);
810
- }
811
- catch (error) {
812
- agentError = error instanceof Error ? error.message : String(error);
813
- }
814
- const durationMs = Date.now() - startTime;
815
- // 8. Upload trace, videos, screenshots with metadata, and update session
816
- let traceDownloadUrl;
817
- try {
818
- const traceDir = getTraceDirectory(browserSession.id);
819
- if (existsSync(traceDir)) {
820
- const files = await readdir(traceDir);
821
- if (files.length > 0) {
822
- // Upload trace zip
908
+ }
909
+ })(),
910
+ timeoutPromise,
911
+ ]);
912
+ }
913
+ catch (error) {
914
+ agentError = error instanceof Error ? error.message : String(error);
915
+ }
916
+ const durationMs = Date.now() - startTime;
917
+ telemetry.trackPhaseEnd('agent_execution', {
918
+ ...resultMeta,
919
+ toolCallCounts: Object.fromEntries(toolCallHook.toolCallCounts),
920
+ hasResult: !!finalResult,
921
+ hasError: !!agentError,
922
+ });
923
+ return {
924
+ featureId,
925
+ checklistItem,
926
+ browserSession,
927
+ finalResult,
928
+ agentError,
929
+ lastStructuredOutputInput,
930
+ screenshotHook,
931
+ toolCallCounts: toolCallHook.toolCallCounts,
932
+ configResult,
933
+ startTime,
934
+ durationMs,
935
+ conversationFilePath,
936
+ conversationDir,
937
+ isDebugMode,
938
+ debugOutcome: options.debugOutcome,
939
+ resultMeta,
940
+ telemetry,
941
+ };
942
+ }
943
+ /**
944
+ * Phase 2: Upload artifacts, evaluate result, update scenario.
945
+ */
946
+ async function processVerificationResult(ctx) {
947
+ const { featureId, checklistItem, browserSession, screenshotHook, durationMs, conversationFilePath, isDebugMode, debugOutcome, telemetry, } = ctx;
948
+ const { finalResult, agentError } = ctx;
949
+ let traceDownloadUrl;
950
+ // --- Upload trace ---
951
+ try {
952
+ const traceDir = getTraceDirectory(browserSession.id);
953
+ if (existsSync(traceDir)) {
954
+ const files = await readdir(traceDir);
955
+ if (files.length > 0) {
956
+ telemetry.trackPhaseStart('upload_trace');
957
+ try {
823
958
  const traceUrls = await getUploadUrls(browserSession.id, 'trace.zip', 'zip');
824
959
  const traceBuffer = await zipDirectory(traceDir);
825
960
  await uploadTrace(traceUrls.uploadUrl, traceBuffer);
826
961
  traceDownloadUrl = traceUrls.downloadUrl;
827
- // Upload videos from videos/ subdirectory
828
- const videos = await loadSessionVideos(traceDir);
829
- for (const video of videos) {
830
- try {
831
- const videoBuffer = await readFile(video.path);
832
- const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
833
- await uploadVideo(videoUrls.uploadUrl, videoBuffer);
834
- }
835
- catch {
836
- // Ignore individual video upload errors
837
- }
962
+ telemetry.trackPhaseEnd('upload_trace', {
963
+ bytes: traceBuffer.length,
964
+ });
965
+ }
966
+ catch (err) {
967
+ await telemetry.trackPhaseError('upload_trace', err);
968
+ }
969
+ // --- Upload videos ---
970
+ const videos = await loadSessionVideos(traceDir);
971
+ for (const video of videos) {
972
+ telemetry.trackPhaseStart('upload_video', {
973
+ filename: video.filename,
974
+ });
975
+ try {
976
+ const videoBuffer = await readFile(video.path);
977
+ const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
978
+ await uploadVideo(videoUrls.uploadUrl, videoBuffer);
979
+ telemetry.trackPhaseEnd('upload_video', {
980
+ filename: video.filename,
981
+ bytes: videoBuffer.length,
982
+ });
983
+ }
984
+ catch (err) {
985
+ await telemetry.trackPhaseError('upload_video', err, {
986
+ filename: video.filename,
987
+ });
838
988
  }
839
- // Create verification steps and upload screenshots as step assets
840
- // Filter out screenshots already uploaded by the PostToolUse hook
841
- const pngFiles = files
842
- .filter((f) => f.toLowerCase().endsWith('.png'))
843
- .filter((f) => !screenshotHook.uploadedFiles.has(f))
844
- .sort(); // Sort to maintain order by filename (01_, 02_, etc.)
845
- // Start position after any screenshots already uploaded by the hook
846
- const positionOffset = screenshotHook.uploadedFiles.size + 1;
847
- for (let i = 0; i < pngFiles.length; i++) {
848
- const pngFile = pngFiles[i];
849
- try {
850
- const pngPath = join(traceDir, pngFile);
851
- const pngBuffer = await readFile(pngPath);
852
- const pngStat = await stat(pngPath);
853
- // Detect "key_" prefix for high-priority screenshots
854
- const isKeyFrame = pngFile
855
- .toLowerCase()
856
- .startsWith('key_');
857
- const displayName = pngFile
858
- .replace(/\.png$/i, '')
859
- .replace(/^key_/i, '')
860
- .replace(/^\d+_/, '')
861
- .replace(/-/g, ' ');
862
- // Create a verification step for this screenshot
863
- const { step } = await createVerificationStep(browserSession.id, {
864
- checklistItemId: checklistItem.id,
865
- position: positionOffset + i,
866
- stepType: 'screenshot',
867
- stepName: displayName,
868
- description: isKeyFrame
869
- ? 'Key moment captured during verification'
870
- : 'Screenshot captured during verification',
871
- isKeyStep: isKeyFrame,
872
- status: 'success',
873
- metadata: {
874
- filename: pngFile,
875
- timestamp: pngStat.mtime.toISOString(),
876
- },
877
- });
878
- // Create step asset with upload URL
879
- const assetResponse = await createStepAsset(browserSession.id, step.id, {
989
+ }
990
+ // --- Upload remaining screenshots ---
991
+ const pngFiles = files
992
+ .filter((f) => f.toLowerCase().endsWith('.png'))
993
+ .filter((f) => !screenshotHook.uploadedFiles.has(f))
994
+ .sort();
995
+ const positionOffset = screenshotHook.uploadedFiles.size + 1;
996
+ for (let i = 0; i < pngFiles.length; i++) {
997
+ const pngFile = pngFiles[i];
998
+ const isKeyFrame = pngFile.toLowerCase().startsWith('key_');
999
+ telemetry.trackPhaseStart('upload_screenshot', {
1000
+ filename: pngFile,
1001
+ isKeyFrame,
1002
+ });
1003
+ try {
1004
+ const pngPath = join(traceDir, pngFile);
1005
+ const pngBuffer = await readFile(pngPath);
1006
+ const pngStat = await stat(pngPath);
1007
+ const displayName = pngFile
1008
+ .replace(/\.png$/i, '')
1009
+ .replace(/^key_/i, '')
1010
+ .replace(/^\d+_/, '')
1011
+ .replace(/-/g, ' ');
1012
+ const { step } = await createVerificationStep(browserSession.id, {
1013
+ checklistItemId: checklistItem.id,
1014
+ position: positionOffset + i,
1015
+ stepType: 'screenshot',
1016
+ stepName: displayName,
1017
+ description: isKeyFrame
1018
+ ? 'Key moment captured during verification'
1019
+ : 'Screenshot captured during verification',
1020
+ isKeyStep: isKeyFrame,
1021
+ status: 'success',
1022
+ metadata: {
880
1023
  filename: pngFile,
881
- assetType: 'screenshot',
882
- timing: 'after',
883
- position: 0,
884
- capturedAt: pngStat.mtime.toISOString(),
885
- metadata: {
886
- name: displayName,
887
- highPriority: isKeyFrame,
888
- },
889
- });
890
- // Upload the screenshot to the signed URL
891
- await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
892
- }
893
- catch {
894
- // Ignore individual screenshot upload errors
895
- }
1024
+ timestamp: pngStat.mtime.toISOString(),
1025
+ },
1026
+ });
1027
+ const assetResponse = await createStepAsset(browserSession.id, step.id, {
1028
+ filename: pngFile,
1029
+ assetType: 'screenshot',
1030
+ timing: 'after',
1031
+ position: 0,
1032
+ capturedAt: pngStat.mtime.toISOString(),
1033
+ metadata: {
1034
+ name: displayName,
1035
+ highPriority: isKeyFrame,
1036
+ },
1037
+ });
1038
+ await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
1039
+ telemetry.trackPhaseEnd('upload_screenshot', {
1040
+ filename: pngFile,
1041
+ bytes: pngBuffer.length,
1042
+ });
1043
+ }
1044
+ catch (err) {
1045
+ await telemetry.trackPhaseError('upload_screenshot', err, { filename: pngFile });
896
1046
  }
897
1047
  }
898
1048
  }
899
- if (existsSync(conversationFilePath)) {
900
- try {
901
- const conversationUrls = await getUploadUrls(browserSession.id, 'conversation.jsonl', 'jsonl');
902
- const conversationBuffer = await readFile(conversationFilePath);
903
- await uploadConversation(conversationUrls.uploadUrl, conversationBuffer);
904
- }
905
- catch {
906
- // Ignore
907
- }
1049
+ }
1050
+ // --- Upload conversation ---
1051
+ if (existsSync(conversationFilePath)) {
1052
+ telemetry.trackPhaseStart('upload_conversation');
1053
+ try {
1054
+ const conversationUrls = await getUploadUrls(browserSession.id, 'conversation.jsonl', 'jsonl');
1055
+ const conversationBuffer = await readFile(conversationFilePath);
1056
+ await uploadConversation(conversationUrls.uploadUrl, conversationBuffer);
1057
+ telemetry.trackPhaseEnd('upload_conversation', {
1058
+ bytes: conversationBuffer.length,
1059
+ });
1060
+ }
1061
+ catch (err) {
1062
+ await telemetry.trackPhaseError('upload_conversation', err);
908
1063
  }
909
- // Cast to help TypeScript understand the type after complex control flow
1064
+ }
1065
+ // --- Update browser session ---
1066
+ telemetry.trackPhaseStart('update_session');
1067
+ try {
910
1068
  const typedResult = finalResult;
911
1069
  const updateData = {
912
1070
  status: (agentError ? 'failed' : 'completed'),
@@ -923,105 +1081,218 @@ Return your findings in the structured output format with your evaluation.`;
923
1081
  typedResult.durationMs = durationMs;
924
1082
  typedResult.checklistItemId = checklistItem.id;
925
1083
  }
1084
+ telemetry.trackPhaseEnd('update_session');
926
1085
  }
927
- catch {
928
- // Ignore upload errors
1086
+ catch (err) {
1087
+ await telemetry.trackPhaseError('update_session', err);
929
1088
  }
930
- // 9. Determine the result to use for evaluation
931
- // In debug mode, use mock evaluation; otherwise use agent result
932
- let resultForEval;
933
- if (isDebugMode && options.debugOutcome) {
934
- const mockEval = getMockEvaluation(options.debugOutcome);
935
- resultForEval = {
936
- ...mockEval,
937
- sessionId: browserSession.id,
938
- sessionDir: getTraceDirectory(browserSession.id),
939
- durationMs,
940
- traceViewerUrl: traceDownloadUrl
941
- ? buildTraceViewerUrl(traceDownloadUrl)
942
- : undefined,
943
- checklistItemId: checklistItem.id,
944
- };
945
- console.log(`\n[DEBUG MODE] Using mock evaluation: ${options.debugOutcome}`);
1089
+ }
1090
+ catch {
1091
+ // Ignore upload errors
1092
+ }
1093
+ // --- Phase: evaluation ---
1094
+ telemetry.trackPhaseStart('evaluation');
1095
+ let resultForEval;
1096
+ if (isDebugMode && debugOutcome) {
1097
+ const mockEval = getMockEvaluation(debugOutcome);
1098
+ resultForEval = {
1099
+ ...mockEval,
1100
+ sessionId: browserSession.id,
1101
+ sessionDir: getTraceDirectory(browserSession.id),
1102
+ durationMs,
1103
+ traceViewerUrl: traceDownloadUrl
1104
+ ? buildTraceViewerUrl(traceDownloadUrl)
1105
+ : undefined,
1106
+ checklistItemId: checklistItem.id,
1107
+ };
1108
+ console.log(`\n[DEBUG MODE] Using mock evaluation: ${debugOutcome}`);
1109
+ }
1110
+ else {
1111
+ const typedResult = finalResult;
1112
+ if (agentError && !typedResult) {
1113
+ throw new Error(`Verification failed: ${agentError}`);
946
1114
  }
947
- else {
948
- const typedResult = finalResult;
949
- if (agentError && !typedResult) {
950
- throw new Error(`Verification failed: ${agentError}`);
951
- }
952
- if (!typedResult) {
953
- throw new Error('No result received from agent');
1115
+ if (!typedResult) {
1116
+ throw new Error('No result received from agent');
1117
+ }
1118
+ resultForEval = typedResult;
1119
+ }
1120
+ telemetry.trackPhaseEnd('evaluation', {
1121
+ evaluation: resultForEval.evaluation,
1122
+ issueCount: resultForEval.issues?.length ?? 0,
1123
+ });
1124
+ // --- Phase: scenario_update ---
1125
+ telemetry.trackPhaseStart('scenario_update');
1126
+ const evaluation = resultForEval.evaluation;
1127
+ if (evaluation === 'verified') {
1128
+ await updateChecklistItem(featureId, checklistItem.id, {
1129
+ status: 'verified',
1130
+ browserSessionId: browserSession.id,
1131
+ });
1132
+ console.log(`\n\u2705 Scenario verified!`);
1133
+ }
1134
+ else if (evaluation === 'blocked') {
1135
+ await updateChecklistItem(featureId, checklistItem.id, {
1136
+ status: 'blocked',
1137
+ browserSessionId: browserSession.id,
1138
+ blockedReason: resultForEval.evaluationReason,
1139
+ });
1140
+ // Enhanced output for Claude Code
1141
+ console.log(`\n${'='.repeat(60)}`);
1142
+ console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
1143
+ console.log(`${'='.repeat(60)}`);
1144
+ console.log(`\nIssue: ${resultForEval.evaluationReason}`);
1145
+ if (resultForEval.issues?.length) {
1146
+ console.log(`\nDetails:`);
1147
+ for (const issue of resultForEval.issues) {
1148
+ const typeStr = issue.type ? ` (${issue.type})` : '';
1149
+ console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
954
1150
  }
955
- resultForEval = typedResult;
956
1151
  }
957
- // 10. Update checklist item based on evaluation
958
- const evaluation = resultForEval.evaluation;
959
- if (evaluation === 'verified') {
960
- await updateChecklistItem(featureId, checklistItem.id, {
961
- status: 'verified',
962
- browserSessionId: browserSession.id,
963
- });
964
- console.log(`\n\u2705 Checklist item verified!`);
1152
+ if (resultForEval.traceViewerUrl) {
1153
+ console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
965
1154
  }
966
- else if (evaluation === 'blocked') {
967
- await updateChecklistItem(featureId, checklistItem.id, {
968
- status: 'blocked',
969
- browserSessionId: browserSession.id,
970
- blockedReason: resultForEval.evaluationReason,
971
- });
972
- // Enhanced output for Claude Code
973
- console.log(`\n${'='.repeat(60)}`);
974
- console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
975
- console.log(`${'='.repeat(60)}`);
976
- console.log(`\nIssue: ${resultForEval.evaluationReason}`);
977
- if (resultForEval.issues?.length) {
978
- console.log(`\nDetails:`);
979
- for (const issue of resultForEval.issues) {
980
- const typeStr = issue.type ? ` (${issue.type})` : '';
981
- console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
982
- }
1155
+ console.log(`\nSuggested action: Debug this issue in your code, then run go again.`);
1156
+ console.log(`${'='.repeat(60)}\n`);
1157
+ }
1158
+ else if (evaluation === 'partial' ||
1159
+ evaluation === 'failed' ||
1160
+ evaluation === 'incomplete') {
1161
+ // Mark as incomplete - verification happened but requirements not fully met
1162
+ await updateChecklistItem(featureId, checklistItem.id, {
1163
+ status: 'incomplete',
1164
+ browserSessionId: browserSession.id,
1165
+ incompleteReason: resultForEval.evaluationReason,
1166
+ });
1167
+ // Check if other items are terminal and prompt user
1168
+ await handleIncompleteItem(featureId, checklistItem, resultForEval);
1169
+ }
1170
+ telemetry.trackPhaseEnd('scenario_update', { newStatus: evaluation });
1171
+ return resultForEval;
1172
+ }
1173
+ /**
1174
+ * Verify a scenario in the browser.
1175
+ * Orchestrates runVerification -> processVerificationResult with telemetry.
1176
+ */
1177
+ export async function verifyFeature(options) {
1178
+ const telemetry = createTelemetryCollector('go');
1179
+ await telemetry.trackCommandStart({
1180
+ hasProfile: !!options.profile,
1181
+ hasScenario: options.scenario !== undefined,
1182
+ hasNotes: !!options.notes,
1183
+ isDebugMode: !!options.debugOutcome,
1184
+ });
1185
+ const { envNames } = await getEnvNames();
1186
+ if (envNames.length === 0) {
1187
+ throw new Error(formatProfileRequiredMessage((text) => bold(text)));
1188
+ }
1189
+ let ctx;
1190
+ let interrupted = false;
1191
+ let checklistItemResolved = false;
1192
+ const handleInterrupt = async () => {
1193
+ if (interrupted)
1194
+ return;
1195
+ interrupted = true;
1196
+ console.log('\nVerification interrupted. Cleaning up...');
1197
+ await telemetry.trackCommandEnd('interrupted', {
1198
+ durationMs: ctx ? Date.now() - ctx.startTime : 0,
1199
+ });
1200
+ if (ctx) {
1201
+ try {
1202
+ await updateBrowserSession(ctx.browserSession.id, {
1203
+ status: 'interrupted',
1204
+ durationMs: Date.now() - ctx.startTime,
1205
+ });
983
1206
  }
984
- if (resultForEval.traceViewerUrl) {
985
- console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
1207
+ catch {
1208
+ // Best effort
1209
+ }
1210
+ try {
1211
+ await updateChecklistItem(ctx.featureId, ctx.checklistItem.id, {
1212
+ status: 'pending',
1213
+ });
1214
+ }
1215
+ catch {
1216
+ // Best effort
1217
+ }
1218
+ if (ctx.configResult) {
1219
+ try {
1220
+ await cleanupTempFiles(ctx.configResult);
1221
+ }
1222
+ catch {
1223
+ // Best effort
1224
+ }
986
1225
  }
987
- console.log(`\nSuggested action: Debug this issue in your code, then run verify-feature again.`);
988
- console.log(`${'='.repeat(60)}\n`);
989
- }
990
- else if (evaluation === 'partial' || evaluation === 'failed') {
991
- // Mark as incomplete - verification happened but requirements not fully met
992
- await updateChecklistItem(featureId, checklistItem.id, {
993
- status: 'incomplete',
994
- browserSessionId: browserSession.id,
995
- incompleteReason: resultForEval.evaluationReason,
996
- });
997
- // Check if other items are terminal and prompt user
998
- await handleIncompleteItem(featureId, checklistItem, resultForEval);
999
1226
  }
1227
+ console.log('Scenario reset to pending. Partial steps are preserved.');
1228
+ process.exit(0);
1229
+ };
1230
+ process.on('SIGINT', handleInterrupt);
1231
+ process.on('SIGTERM', handleInterrupt);
1232
+ try {
1233
+ // Phase 1: Setup + agent execution
1234
+ ctx = await runVerification(options, telemetry);
1235
+ // Boundary flush — all agent execution telemetry is now safe
1236
+ await telemetry.flush();
1237
+ // Phase 2: Uploads + evaluation + status update
1238
+ const resultForEval = await processVerificationResult(ctx);
1239
+ checklistItemResolved = true;
1240
+ await telemetry.trackCommandEnd('success', {
1241
+ evaluation: resultForEval.evaluation,
1242
+ durationMs: ctx.durationMs,
1243
+ ...ctx.resultMeta,
1244
+ });
1000
1245
  return resultForEval;
1001
1246
  }
1247
+ catch (error) {
1248
+ await telemetry.trackCommandError(error);
1249
+ throw error;
1250
+ }
1002
1251
  finally {
1003
- // Remove interrupt handlers to avoid double-firing after normal completion
1004
1252
  process.removeListener('SIGINT', handleInterrupt);
1005
1253
  process.removeListener('SIGTERM', handleInterrupt);
1006
- // Cleanup
1007
- await cleanupTempFiles(configResult);
1008
- try {
1009
- const traceDir = getTraceDirectory(browserSession.id);
1010
- if (existsSync(traceDir)) {
1011
- await rm(traceDir, { recursive: true, force: true });
1254
+ // If the scenario was never resolved (agent error, throw, etc.),
1255
+ // reset it to pending so it doesn't stay stuck in verification_in_progress.
1256
+ if (ctx && !interrupted && !checklistItemResolved) {
1257
+ try {
1258
+ await updateChecklistItem(ctx.featureId, ctx.checklistItem.id, {
1259
+ status: 'pending',
1260
+ });
1261
+ console.log('Scenario reset to pending after unexpected error.');
1012
1262
  }
1013
- }
1014
- catch {
1015
- // Ignore
1016
- }
1017
- try {
1018
- if (existsSync(conversationDir)) {
1019
- await rm(conversationDir, { recursive: true, force: true });
1263
+ catch (resetErr) {
1264
+ await telemetry.trackPhaseError('scenario_reset', resetErr);
1020
1265
  }
1021
1266
  }
1022
- catch {
1023
- // Ignore
1267
+ // --- Phase: cleanup ---
1268
+ if (ctx) {
1269
+ telemetry.trackPhaseStart('cleanup');
1270
+ if (ctx.configResult) {
1271
+ await cleanupTempFiles(ctx.configResult);
1272
+ }
1273
+ try {
1274
+ const traceDir = getTraceDirectory(ctx.browserSession.id);
1275
+ if (existsSync(traceDir)) {
1276
+ await rm(traceDir, { recursive: true, force: true });
1277
+ }
1278
+ }
1279
+ catch {
1280
+ // Ignore
1281
+ }
1282
+ try {
1283
+ if (ctx.conversationDir && existsSync(ctx.conversationDir)) {
1284
+ await rm(ctx.conversationDir, {
1285
+ recursive: true,
1286
+ force: true,
1287
+ });
1288
+ }
1289
+ }
1290
+ catch {
1291
+ // Ignore
1292
+ }
1293
+ telemetry.trackPhaseEnd('cleanup');
1024
1294
  }
1295
+ await telemetry.flush();
1025
1296
  }
1026
1297
  }
1027
1298
  //# sourceMappingURL=verifyFeature.js.map