@ranger-testing/ranger-cli 1.0.13 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +72 -183
  2. package/build/cli.js +219 -278
  3. package/build/cli.js.map +1 -1
  4. package/build/commands/addEnv.js +1 -1
  5. package/build/commands/addEnv.js.map +1 -1
  6. package/build/commands/authEncrypt.js +7 -6
  7. package/build/commands/authEncrypt.js.map +1 -1
  8. package/build/commands/clean.js +1 -1
  9. package/build/commands/clean.js.map +1 -1
  10. package/build/commands/config.js +5 -4
  11. package/build/commands/config.js.map +1 -1
  12. package/build/commands/dataMcpServer.js +1 -1
  13. package/build/commands/dataMcpServer.js.map +1 -1
  14. package/build/commands/env.js +17 -10
  15. package/build/commands/env.js.map +1 -1
  16. package/build/commands/feature.js +277 -285
  17. package/build/commands/feature.js.map +1 -1
  18. package/build/commands/hook.js +27 -0
  19. package/build/commands/hook.js.map +1 -0
  20. package/build/commands/hooks/disable.js +25 -0
  21. package/build/commands/hooks/disable.js.map +1 -0
  22. package/build/commands/hooks/enable.js +44 -0
  23. package/build/commands/hooks/enable.js.map +1 -0
  24. package/build/commands/hooks/exitPlanMode.js +35 -0
  25. package/build/commands/hooks/exitPlanMode.js.map +1 -0
  26. package/build/commands/hooks/index.js +10 -0
  27. package/build/commands/hooks/index.js.map +1 -0
  28. package/build/commands/hooks/output.js +53 -0
  29. package/build/commands/hooks/output.js.map +1 -0
  30. package/build/commands/hooks/planReminder.js +46 -0
  31. package/build/commands/hooks/planReminder.js.map +1 -0
  32. package/build/commands/hooks/planStart.js +30 -0
  33. package/build/commands/hooks/planStart.js.map +1 -0
  34. package/build/commands/hooks/postEdit.js +41 -0
  35. package/build/commands/hooks/postEdit.js.map +1 -0
  36. package/build/commands/hooks/preCompact.js +30 -0
  37. package/build/commands/hooks/preCompact.js.map +1 -0
  38. package/build/commands/hooks/sessionStart.js +35 -0
  39. package/build/commands/hooks/sessionStart.js.map +1 -0
  40. package/build/commands/hooks/stopHook.js +54 -0
  41. package/build/commands/hooks/stopHook.js.map +1 -0
  42. package/build/commands/index.js +1 -0
  43. package/build/commands/index.js.map +1 -1
  44. package/build/commands/skillup.js +41 -77
  45. package/build/commands/skillup.js.map +1 -1
  46. package/build/commands/start.js +1 -1
  47. package/build/commands/start.js.map +1 -1
  48. package/build/commands/status.js +47 -65
  49. package/build/commands/status.js.map +1 -1
  50. package/build/commands/update.js +32 -40
  51. package/build/commands/update.js.map +1 -1
  52. package/build/commands/updateEnv.js +1 -1
  53. package/build/commands/updateEnv.js.map +1 -1
  54. package/build/commands/useEnv.js +1 -1
  55. package/build/commands/useEnv.js.map +1 -1
  56. package/build/commands/utils/browserSessionsApi.js +1 -1
  57. package/build/commands/utils/browserSessionsApi.js.map +1 -1
  58. package/build/commands/utils/claudePlugin.js +85 -0
  59. package/build/commands/utils/claudePlugin.js.map +1 -0
  60. package/build/commands/utils/cliSecret.js +1 -1
  61. package/build/commands/utils/environment.js +0 -6
  62. package/build/commands/utils/environment.js.map +1 -1
  63. package/build/commands/utils/featureApi.js +82 -15
  64. package/build/commands/utils/featureApi.js.map +1 -1
  65. package/build/commands/utils/featureReportGenerator.js +37 -3
  66. package/build/commands/utils/featureReportGenerator.js.map +1 -1
  67. package/build/commands/utils/git.js +44 -0
  68. package/build/commands/utils/git.js.map +1 -0
  69. package/build/commands/utils/keychain.js +1 -1
  70. package/build/commands/utils/keychain.js.map +1 -1
  71. package/build/commands/utils/localAgentInstallationsApi.js +1 -1
  72. package/build/commands/utils/rangerRoot.js +30 -0
  73. package/build/commands/utils/rangerRoot.js.map +1 -0
  74. package/build/commands/utils/sessionCache.js +133 -0
  75. package/build/commands/utils/sessionCache.js.map +1 -0
  76. package/build/commands/utils/settings.js +7 -5
  77. package/build/commands/utils/settings.js.map +1 -1
  78. package/build/commands/utils/skillContent.js +28 -0
  79. package/build/commands/utils/skillContent.js.map +1 -0
  80. package/build/commands/utils/skills.js +1 -1
  81. package/build/commands/utils/skills.js.map +1 -1
  82. package/build/commands/utils/userApi.js +32 -0
  83. package/build/commands/utils/userApi.js.map +1 -0
  84. package/build/commands/verifyFeature.js +450 -105
  85. package/build/commands/verifyFeature.js.map +1 -1
  86. package/build/commands/verifyInBrowser.js +1 -1
  87. package/build/commands/verifyInBrowser.js.map +1 -1
  88. package/build/skills/bug-bash.md +31 -10
  89. package/build/skills/ranger/SKILL.md +164 -0
  90. package/build/skills/ranger/create.md +151 -0
  91. package/build/skills/ranger/start.md +122 -0
  92. package/build/skills/{feature-tracker → ranger}/verify.md +43 -17
  93. package/package.json +5 -3
  94. package/scripts/postinstall.js +18 -0
  95. package/build/commands/utils/mcpConfig.js +0 -1
  96. package/build/commands/utils/mcpConfig.js.map +0 -1
  97. package/build/skills/feature-tracker/SKILL.md +0 -185
  98. package/build/skills/feature-tracker/create.md +0 -105
  99. package/build/skills/feature-tracker/manage.md +0 -145
  100. package/build/skills/feature-tracker/report.md +0 -159
  101. package/build/skills/feature-tracker/start.md +0 -93
@@ -1,15 +1,30 @@
1
1
  import { query } from '@anthropic-ai/claude-agent-sdk';
2
2
  import { join, dirname } from 'path';
3
- import { readFile, readdir, appendFile, mkdir, rm } from 'fs/promises';
3
+ import { readFile, readdir, appendFile, mkdir, rm, stat } from 'fs/promises';
4
4
  import { existsSync } from 'fs';
5
5
  import { execSync } from 'child_process';
6
6
  import { tmpdir } from 'os';
7
7
  import inquirer from 'inquirer';
8
8
  import { loadSettings, resolveEnvVars, buildPlaywrightConfig, cleanupTempFiles, } from './utils/settings.js';
9
- import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, buildTraceViewerUrl, getAnthropicApiKey, } from './utils/browserSessionsApi.js';
9
+ import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getAnthropicApiKey, } from './utils/browserSessionsApi.js';
10
10
  import { getToken } from './utils/keychain.js';
11
11
  import { getActiveFeatureId } from './feature.js';
12
- import { getFeature, addChecklistItem, updateChecklistItem, } from './utils/featureApi.js';
12
+ import { getFeature, updateFeature, updateChecklistItem, startSession, } from './utils/featureApi.js';
13
+ import { getRangerDir } from './utils/rangerRoot.js';
14
+ /**
15
+ * Get the current git branch
16
+ */
17
+ function getGitBranch() {
18
+ try {
19
+ return execSync('git rev-parse --abbrev-ref HEAD', {
20
+ encoding: 'utf-8',
21
+ stdio: ['pipe', 'pipe', 'pipe'],
22
+ }).trim();
23
+ }
24
+ catch {
25
+ return undefined;
26
+ }
27
+ }
13
28
  /**
14
29
  * Zip a directory and return the buffer
15
30
  */
@@ -26,7 +41,7 @@ async function zipDirectory(dirPath) {
26
41
  * Find the trace directory for a session
27
42
  */
28
43
  function getTraceDirectory(sessionId) {
29
- return join(process.cwd(), '.ranger', 'sessions', sessionId);
44
+ return join(getRangerDir(), 'sessions', sessionId);
30
45
  }
31
46
  /**
32
47
  * Get the conversation file path for a session
@@ -34,24 +49,132 @@ function getTraceDirectory(sessionId) {
34
49
  function getConversationFilePath(sessionId) {
35
50
  return join(tmpdir(), 'ranger-browser-sessions', sessionId, 'conversation.jsonl');
36
51
  }
52
+ /**
53
+ * Load videos from a session's videos directory
54
+ */
55
+ async function loadSessionVideos(sessionDir) {
56
+ const videosDir = join(sessionDir, 'videos');
57
+ if (!existsSync(videosDir)) {
58
+ return [];
59
+ }
60
+ const files = await readdir(videosDir);
61
+ const videoFiles = files.filter((f) => f.toLowerCase().endsWith('.webm'));
62
+ return videoFiles.map((filename) => ({
63
+ filename,
64
+ path: join(videosDir, filename),
65
+ }));
66
+ }
67
+ /**
68
+ * Get mock evaluation data for debug mode
69
+ */
70
+ function getMockEvaluation(outcome) {
71
+ const mockEvaluations = {
72
+ verified: {
73
+ success: true,
74
+ summary: '[DEBUG] Mock verification completed successfully.',
75
+ evaluation: 'verified',
76
+ evaluationReason: 'All checklist requirements were met.',
77
+ },
78
+ partial: {
79
+ success: false,
80
+ summary: '[DEBUG] Mock partial verification.',
81
+ evaluation: 'partial',
82
+ evaluationReason: 'Some requirements were not fully verified.',
83
+ issues: [
84
+ {
85
+ severity: 'MINOR',
86
+ type: 'OTHER',
87
+ description: 'Secondary feature not fully implemented',
88
+ },
89
+ ],
90
+ },
91
+ incomplete: {
92
+ success: false,
93
+ summary: '[DEBUG] Mock incomplete verification.',
94
+ evaluation: 'partial',
95
+ evaluationReason: 'Implementation is incomplete and needs additional work.',
96
+ issues: [
97
+ {
98
+ severity: 'MAJOR',
99
+ type: 'OTHER',
100
+ description: 'Feature is partially implemented but missing key functionality',
101
+ },
102
+ {
103
+ severity: 'MINOR',
104
+ type: 'OTHER',
105
+ description: 'UI elements present but not fully functional',
106
+ },
107
+ ],
108
+ },
109
+ blocked: {
110
+ success: false,
111
+ summary: '[DEBUG] Mock blocked verification.',
112
+ evaluation: 'blocked',
113
+ evaluationReason: 'HTTP 404 - Page not found.',
114
+ issues: [
115
+ {
116
+ severity: 'BLOCKER',
117
+ type: 'HTTP_404',
118
+ description: 'Target page returns 404 Not Found',
119
+ },
120
+ {
121
+ severity: 'MAJOR',
122
+ type: 'NAVIGATION_ERROR',
123
+ description: 'Unable to proceed due to missing page',
124
+ },
125
+ ],
126
+ },
127
+ failed: {
128
+ success: false,
129
+ summary: '[DEBUG] Mock failed verification.',
130
+ evaluation: 'failed',
131
+ evaluationReason: 'Browser automation failed with timeout error.',
132
+ issues: [
133
+ {
134
+ severity: 'BLOCKER',
135
+ type: 'OTHER',
136
+ description: 'Timeout waiting for element',
137
+ },
138
+ ],
139
+ },
140
+ };
141
+ return mockEvaluations[outcome];
142
+ }
143
+ /**
144
+ * Get the debug mode prompt for minimal browser interaction
145
+ */
146
+ function getDebugPrompt() {
147
+ return `You are testing browser automation. Your task is simple:
148
+
149
+ 1. Navigate to https://www.mozilla.org using browser_navigate
150
+ 2. Take a snapshot with browser_snapshot to see the page
151
+ 3. Take a screenshot named "01_mozilla-homepage.png" using browser_take_screenshot
152
+ 4. Return immediately with the structured output
153
+
154
+ Return your findings in the structured output format.`;
155
+ }
37
156
  /**
38
157
  * Prompt user to select a checklist item
39
158
  */
40
159
  async function selectChecklistItem(items) {
160
+ if (items.length === 0) {
161
+ return null;
162
+ }
41
163
  const choices = items.map((item, i) => {
42
164
  const emoji = item.status === 'verified'
43
165
  ? '\u2705'
44
- : item.status === 'blocked'
45
- ? '\ud83d\uded1'
46
- : item.status === 'canceled'
47
- ? '\u26d4'
48
- : '\u2b1c';
166
+ : item.status === 'incomplete'
167
+ ? '\ud83d\udfe0' // orange circle
168
+ : item.status === 'blocked'
169
+ ? '\ud83d\uded1'
170
+ : item.status === 'canceled'
171
+ ? '\u26d4'
172
+ : '\u2b1c';
49
173
  return {
50
174
  name: `${i + 1}. ${emoji} ${item.description}`,
51
175
  value: item.id,
52
176
  };
53
177
  });
54
- choices.push({ name: '+ Add new item', value: '__new__' });
55
178
  const { selected } = await inquirer.prompt([
56
179
  {
57
180
  type: 'list',
@@ -60,97 +183,161 @@ async function selectChecklistItem(items) {
60
183
  choices,
61
184
  },
62
185
  ]);
63
- if (selected === '__new__') {
64
- return { item: null, addNew: true };
186
+ return items.find((i) => i.id === selected) || null;
187
+ }
188
+ /**
189
+ * Handle incomplete verification - check if all other items are terminal and prompt user
190
+ */
191
+ async function handleIncompleteItem(featureId, incompleteItem, result) {
192
+ const feature = await getFeature(featureId);
193
+ const sessionItems = feature.checklistItems.filter((i) => i.sessionId === feature.currentSessionId);
194
+ const otherItems = sessionItems.filter((i) => i.id !== incompleteItem.id);
195
+ const allOthersTerminal = otherItems.every((i) => i.status === 'verified' ||
196
+ i.status === 'blocked' ||
197
+ i.status === 'canceled' ||
198
+ i.status === 'incomplete');
199
+ console.log(`\n${'='.repeat(60)}`);
200
+ console.log(`INCOMPLETE - Verification found issues`);
201
+ console.log(`${'='.repeat(60)}`);
202
+ // Display structured list of issues
203
+ if (result.issues && result.issues.length > 0) {
204
+ console.log(`\nIssues found:`);
205
+ for (const issue of result.issues) {
206
+ console.log(` • ${issue.description}`);
207
+ }
65
208
  }
66
- return {
67
- item: items.find((i) => i.id === selected) || null,
68
- addNew: false,
69
- };
209
+ else if (result.evaluationReason) {
210
+ console.log(`\nReason: ${result.evaluationReason}`);
211
+ }
212
+ console.log(`\nNext steps:`);
213
+ console.log(` 1. Fix the issues above in your code`);
214
+ console.log(` 2. Run 'ranger verify-feature' again to re-verify`);
215
+ if (allOthersTerminal && otherItems.length > 0) {
216
+ console.log(`\nAll other checklist items are complete.`);
217
+ console.log(`If you're done for now, run 'ranger feature conclude-session' to end this session.`);
218
+ }
219
+ console.log(`${'='.repeat(60)}\n`);
70
220
  }
71
221
  /**
72
222
  * Verify a checklist item in the browser
73
223
  */
74
- export async function verifyFeature(url, options) {
224
+ export async function verifyFeature(options) {
225
+ const isDebugMode = !!options.debugOutcome;
226
+ if (isDebugMode) {
227
+ console.log(`\n[DEBUG MODE] Running minimal browser test with outcome: ${options.debugOutcome}`);
228
+ }
75
229
  // 1. Check for active feature
76
230
  const featureId = await getActiveFeatureId();
77
231
  if (!featureId) {
78
- throw new Error('No active feature. Run: ranger feature use <id> or ranger feature create');
232
+ throw new Error('No active feature. Run: ranger feature resume <id> or ranger feature create');
79
233
  }
80
234
  // Load feature details
81
235
  const feature = await getFeature(featureId);
236
+ // Update the feature's gitBranch to the current branch
237
+ const currentBranch = getGitBranch();
238
+ if (currentBranch && currentBranch !== feature.gitBranch) {
239
+ await updateFeature(featureId, { gitBranch: currentBranch });
240
+ console.log(` Updated branch to: ${currentBranch}`);
241
+ }
82
242
  console.log(`\nActive feature: ${feature.name} (${featureId})`);
243
+ // Filter to only items in the current session
244
+ const currentSessionId = feature.currentSessionId;
245
+ const currentSessionItems = currentSessionId
246
+ ? feature.checklistItems.filter((item) => item.sessionId === currentSessionId)
247
+ : feature.checklistItems;
83
248
  // 2. Determine which checklist item we're verifying
84
249
  let checklistItem = null;
85
250
  let taskDescription = options.task;
86
- if (options.newItem) {
87
- // Create a new item with the provided description
88
- checklistItem = await addChecklistItem(featureId, {
89
- description: options.newItem,
90
- });
91
- console.log(`Created new checklist item: ${checklistItem.description}`);
92
- if (!taskDescription) {
93
- taskDescription = options.newItem;
94
- }
95
- }
96
- else if (options.item !== undefined) {
97
- // Use specified item index
251
+ if (options.item !== undefined) {
252
+ // Use specified item index (1-based, relative to current session items)
98
253
  const itemIndex = options.item - 1; // 1-based to 0-based
99
- if (itemIndex < 0 || itemIndex >= feature.checklistItems.length) {
100
- throw new Error(`Invalid item index: ${options.item}. Feature has ${feature.checklistItems.length} items.`);
254
+ if (itemIndex < 0 || itemIndex >= currentSessionItems.length) {
255
+ throw new Error(`Invalid item index: ${options.item}. Current session has ${currentSessionItems.length} items.`);
101
256
  }
102
- checklistItem = feature.checklistItems[itemIndex];
257
+ checklistItem = currentSessionItems[itemIndex];
103
258
  if (!taskDescription) {
104
259
  taskDescription = checklistItem.description;
105
260
  }
106
261
  }
107
262
  else {
108
- // Interactive selection
109
- const { item, addNew } = await selectChecklistItem(feature.checklistItems);
110
- if (addNew) {
111
- const { description } = await inquirer.prompt([
112
- {
113
- type: 'input',
114
- name: 'description',
115
- message: 'Enter new item description:',
116
- validate: (input) => input.trim() ? true : 'Description is required',
117
- },
118
- ]);
119
- checklistItem = await addChecklistItem(featureId, {
120
- description: description.trim(),
263
+ // Check if running in non-TTY environment (CI, scripts, Claude Code, etc.)
264
+ const isInteractive = process.stdin.isTTY && process.stdout.isTTY;
265
+ if (!isInteractive) {
266
+ // Non-TTY mode: require --item flag, show available items
267
+ console.log('\nNon-interactive mode detected. The --item flag is required.');
268
+ console.log('\nAvailable checklist items for current session:');
269
+ currentSessionItems.forEach((item, i) => {
270
+ const emoji = item.status === 'verified'
271
+ ? '\u2705'
272
+ : item.status === 'incomplete'
273
+ ? '\ud83d\udfe0' // orange circle
274
+ : item.status === 'blocked'
275
+ ? '\ud83d\uded1'
276
+ : item.status === 'canceled'
277
+ ? '\u26d4'
278
+ : '\u2b1c';
279
+ console.log(` ${i + 1}. ${emoji} ${item.description}`);
121
280
  });
122
- console.log(`Created new checklist item: ${checklistItem.description}`);
123
- if (!taskDescription) {
124
- taskDescription = checklistItem.description;
125
- }
281
+ console.log('\nUsage: ranger verify-feature --item <number>');
282
+ console.log('Example: ranger verify-feature --item 1');
283
+ throw new Error('The --item flag is required in non-interactive mode. See available items above.');
126
284
  }
127
- else {
128
- checklistItem = item;
129
- if (!taskDescription && checklistItem) {
130
- taskDescription = checklistItem.description;
131
- }
285
+ // Interactive selection (show only current session items)
286
+ checklistItem = await selectChecklistItem(currentSessionItems);
287
+ if (!taskDescription && checklistItem) {
288
+ taskDescription = checklistItem.description;
132
289
  }
133
290
  }
134
291
  if (!checklistItem) {
135
- throw new Error('No checklist item selected');
292
+ throw new Error('No checklist item selected. Create items when creating the feature with -c flag.');
136
293
  }
137
294
  if (!taskDescription) {
138
295
  throw new Error('No task description provided');
139
296
  }
140
297
  console.log(`\nVerifying: ${checklistItem.description}`);
141
298
  console.log(`Task: ${taskDescription}`);
142
- // 3. Load active environment
143
- const activeEnvPath = join(process.cwd(), '.ranger', 'active-env.txt');
144
- if (!existsSync(activeEnvPath)) {
145
- throw new Error('No active environment. Run: ranger use <env-name>');
299
+ // Start the session if it's in ready status
300
+ if (feature.currentSession &&
301
+ feature.currentSession.status === 'ready' &&
302
+ feature.currentSessionId) {
303
+ try {
304
+ await startSession(featureId, feature.currentSessionId);
305
+ }
306
+ catch (error) {
307
+ // Ignore if session is already started (race condition)
308
+ const message = error instanceof Error ? error.message : String(error);
309
+ if (!message.includes('already')) {
310
+ throw error;
311
+ }
312
+ }
146
313
  }
147
- const activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
148
- const envDir = join(process.cwd(), '.ranger', activeEnv);
314
+ // Update checklist item status to verification_in_progress
315
+ await updateChecklistItem(featureId, checklistItem.id, {
316
+ status: 'verification_in_progress',
317
+ });
318
+ // 3. Determine which environment to use (same pattern as verifyInBrowser)
319
+ let activeEnv;
320
+ if (options.env) {
321
+ activeEnv = options.env;
322
+ }
323
+ else {
324
+ const activeEnvPath = join(getRangerDir(), 'active-env.txt');
325
+ if (!existsSync(activeEnvPath)) {
326
+ throw new Error('No active environment. Run: ranger use <env-name>');
327
+ }
328
+ activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
329
+ }
330
+ const envDir = join(getRangerDir(), activeEnv);
149
331
  if (!existsSync(envDir)) {
150
332
  throw new Error(`Environment not found at ${envDir}. Run: ranger add env ${activeEnv}`);
151
333
  }
152
334
  const settings = await loadSettings(activeEnv);
153
335
  const resolvedSettings = resolveEnvVars(settings);
336
+ // Get base URL from settings
337
+ const url = resolvedSettings.baseUrl;
338
+ if (!url) {
339
+ throw new Error(`No baseUrl configured for environment "${activeEnv}". Run: ranger config set ${activeEnv} baseUrl <url>`);
340
+ }
154
341
  // 4. Create browser session
155
342
  const token = await getToken();
156
343
  if (!token) {
@@ -161,6 +348,8 @@ export async function verifyFeature(url, options) {
161
348
  settings: resolvedSettings,
162
349
  task: taskDescription,
163
350
  url,
351
+ featureId,
352
+ checklistItemId: checklistItem.id,
164
353
  });
165
354
  console.log(`Browser session created: ${browserSession.id}`);
166
355
  const configResult = await buildPlaywrightConfig(resolvedSettings, activeEnv, browserSession?.id);
@@ -195,23 +384,79 @@ export async function verifyFeature(url, options) {
195
384
  throw new Error(errorMsg);
196
385
  }
197
386
  // 5. UI Verifier + Evaluation Agent prompt
198
- const verifierPrompt = `You are a Feature Verifier. Your job is to verify a checklist item by executing a UI flow and evaluating whether it adequately completes the checklist item.
387
+ let verifierPrompt;
388
+ if (isDebugMode) {
389
+ verifierPrompt = getDebugPrompt();
390
+ }
391
+ else {
392
+ const notesSection = checklistItem.notes
393
+ ? `\n\n## Additional Notes\n${checklistItem.notes}`
394
+ : '';
395
+ verifierPrompt = `You are a Feature Verifier. Your job is to verify a checklist item by executing a UI flow and evaluating whether it adequately completes the checklist item.
199
396
 
200
397
  ## Checklist Item to Verify
201
- ${checklistItem.description}
398
+ ${checklistItem.description}${notesSection}
202
399
 
203
400
  ## Task to Execute
204
401
  ${taskDescription}
205
402
 
206
- ## URL
207
- ${url}
403
+ CRITICAL URL REQUIREMENT:
404
+ Your base URL is: ${url}
405
+ - You may ONLY navigate to paths under this base URL (same protocol, host, and port)
406
+ - For example, if the base URL is "http://localhost:3000", you can navigate to "http://localhost:3000/home", "http://localhost:3000/settings", etc.
407
+ - DO NOT navigate to any different domain, host, or port under any circumstances
408
+ - IGNORE any URLs from product documentation (mcp__ranger__get_product_docs) that have a different base URL
409
+ - If documentation or code diffs suggest a path exists (e.g., "/dashboard"), you may navigate to that path ONLY under the base URL above
410
+ - The base URL above is the ONLY authorized environment for this verification
208
411
 
209
412
  ## Instructions
210
- 1. Navigate to the URL using browser_navigate
413
+ 1. Navigate to the URL above using browser_navigate
211
414
  2. Take a snapshot with browser_snapshot to see the page
212
- 3. Execute the task step-by-step using browser tools
213
- 4. Document any issues found (bugs, errors, unexpected behavior)
214
- 5. After completing the verification, evaluate whether the result adequately verifies the checklist item
415
+ 3. **IMMEDIATELY check for blocking HTTP errors before proceeding**
416
+ 4. Execute the task step-by-step using browser tools
417
+ 5. **Take screenshots at key moments** (see Screenshot Guidelines below)
418
+ 6. Document any issues found (bugs, errors, unexpected behavior)
419
+ 7. After completing the verification, evaluate whether the result adequately verifies the checklist item
420
+
421
+ ## Screenshot Guidelines - IMPORTANT
422
+ Take screenshots throughout the verification flow so a human can review it for completeness. Screenshots are your evidence trail.
423
+
424
+ **When to take screenshots (use browser_take_screenshot):**
425
+ - After initial page load (capture starting state)
426
+ - Before and after clicking buttons or submitting forms
427
+ - When important UI elements appear (modals, notifications, loading states)
428
+ - After navigation to new pages
429
+ - When verifying specific elements exist
430
+ - At the final state showing the completed action
431
+
432
+ **Screenshot naming:**
433
+ - Use descriptive filenames: "01_login-page-loaded.png", "02_form-filled.png", "03_dashboard-visible.png"
434
+ - Number prefixes (01_, 02_, etc.) help maintain chronological order
435
+ - For KEY MOMENTS that prove the checklist item is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
436
+ - The "key_" prefix marks screenshots as high-priority evidence for human reviewers
437
+
438
+ **Aim for 3-6 screenshots per verification** to document the complete flow. Mark 1-2 of the most important ones with the "key_" prefix.
439
+
440
+ ## Critical: Early Error Detection
441
+ After step 2 (taking initial snapshot), IMMEDIATELY check for blocking HTTP errors:
442
+
443
+ **Blocking errors to detect:**
444
+ - HTTP 404: "404", "Not Found", "Page not found", "does not exist"
445
+ - HTTP 500: "500", "Internal Server Error", "Server Error", "Something went wrong"
446
+ - HTTP 400: "400", "Bad Request", "Invalid request"
447
+
448
+ **Also check for:**
449
+ - Framework error pages (Next.js error boundary, React error page, "Application error")
450
+ - Completely blank/empty pages with no content
451
+ - "Cannot GET /path" messages
452
+
453
+ **If ANY blocking error is detected:**
454
+ 1. DO NOT continue with the task
455
+ 2. Return IMMEDIATELY with evaluation: "blocked"
456
+ 3. Set evaluationReason to describe the specific error (e.g., "HTTP 404 - Page not found at /dashboard")
457
+ 4. Include the error in issues array with severity: "BLOCKER" and appropriate type (HTTP_404, HTTP_500, HTTP_400, or NAVIGATION_ERROR)
458
+
459
+ This early exit prevents wasting time on tasks that cannot succeed due to fundamental errors.
215
460
 
216
461
  ## Evaluation Criteria
217
462
  - VERIFIED: The task completed successfully and the checklist item requirements are fully met
@@ -220,6 +465,7 @@ ${url}
220
465
  - FAILED: The task could not be completed due to errors
221
466
 
222
467
  Return your findings in the structured output format with your evaluation.`;
468
+ }
223
469
  const outputSchema = {
224
470
  type: 'object',
225
471
  properties: {
@@ -239,6 +485,16 @@ Return your findings in the structured output format with your evaluation.`;
239
485
  type: 'string',
240
486
  enum: ['BLOCKER', 'MAJOR', 'MINOR'],
241
487
  },
488
+ type: {
489
+ type: 'string',
490
+ enum: [
491
+ 'HTTP_404',
492
+ 'HTTP_500',
493
+ 'HTTP_400',
494
+ 'NAVIGATION_ERROR',
495
+ 'OTHER',
496
+ ],
497
+ },
242
498
  description: { type: 'string' },
243
499
  screenshot: { type: 'string' },
244
500
  },
@@ -264,7 +520,6 @@ Return your findings in the structured output format with your evaluation.`;
264
520
  type: 'json_schema',
265
521
  schema: outputSchema,
266
522
  },
267
- maxTurns: 25,
268
523
  env: {
269
524
  ...process.env,
270
525
  ANTHROPIC_API_KEY: anthropicApiKey,
@@ -275,6 +530,8 @@ Return your findings in the structured output format with your evaluation.`;
275
530
  // 7. Collect messages
276
531
  let finalResult = null;
277
532
  let agentError = null;
533
+ // Fallback: capture StructuredOutput tool call input in case SDK fails to populate structured_output
534
+ let lastStructuredOutputInput = null;
278
535
  const conversationFilePath = getConversationFilePath(browserSession.id);
279
536
  const conversationDir = dirname(conversationFilePath);
280
537
  await mkdir(conversationDir, { recursive: true });
@@ -297,13 +554,25 @@ Return your findings in the structured output format with your evaluation.`;
297
554
  // Ignore
298
555
  }
299
556
  const msg = message;
557
+ // Capture StructuredOutput tool call input as fallback
558
+ // This handles SDK bug where structured_output is not populated in result
559
+ if (msg.type === 'assistant' && msg.message?.content) {
560
+ for (const block of msg.message.content) {
561
+ if (block.type === 'tool_use' &&
562
+ block.name === 'StructuredOutput' &&
563
+ block.input) {
564
+ lastStructuredOutputInput =
565
+ block.input;
566
+ }
567
+ }
568
+ }
300
569
  if (msg.error) {
301
570
  let errorText = msg.error;
302
571
  if (msg.message?.content &&
303
572
  Array.isArray(msg.message.content)) {
304
573
  const texts = msg.message.content
305
574
  .filter((c) => c.type === 'text')
306
- .map((c) => c.text)
575
+ .map((c) => c.text || '')
307
576
  .filter(Boolean);
308
577
  if (texts.length > 0) {
309
578
  errorText = texts.join(' ');
@@ -318,7 +587,15 @@ Return your findings in the structured output format with your evaluation.`;
318
587
  message.structured_output;
319
588
  }
320
589
  else if (message.subtype !== 'success') {
321
- if (!agentError) {
590
+ // SDK bug workaround: If we got error_during_execution but have
591
+ // a StructuredOutput tool call, use that instead
592
+ if (lastStructuredOutputInput &&
593
+ message.errors?.length === 0) {
594
+ finalResult = lastStructuredOutputInput;
595
+ // Clear the error since we actually succeeded
596
+ agentError = null;
597
+ }
598
+ else if (!agentError) {
322
599
  agentError =
323
600
  message.errors?.join(', ') ||
324
601
  'Unknown error';
@@ -334,27 +611,60 @@ Return your findings in the structured output format with your evaluation.`;
334
611
  agentError = error instanceof Error ? error.message : String(error);
335
612
  }
336
613
  const durationMs = Date.now() - startTime;
337
- // 8. Upload trace and update session
614
+ // 8. Upload trace, videos, screenshots with metadata, and update session
338
615
  let traceDownloadUrl;
339
616
  try {
340
617
  const traceDir = getTraceDirectory(browserSession.id);
341
618
  if (existsSync(traceDir)) {
342
619
  const files = await readdir(traceDir);
343
620
  if (files.length > 0) {
621
+ // Upload trace zip
344
622
  const traceUrls = await getUploadUrls(browserSession.id, 'trace.zip', 'zip');
345
623
  const traceBuffer = await zipDirectory(traceDir);
346
624
  await uploadTrace(traceUrls.uploadUrl, traceBuffer);
347
625
  traceDownloadUrl = traceUrls.downloadUrl;
626
+ // Upload videos from videos/ subdirectory
627
+ const videos = await loadSessionVideos(traceDir);
628
+ for (const video of videos) {
629
+ try {
630
+ const videoBuffer = await readFile(video.path);
631
+ const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
632
+ await uploadVideo(videoUrls.uploadUrl, videoBuffer);
633
+ }
634
+ catch {
635
+ // Ignore individual video upload errors
636
+ }
637
+ }
638
+ // Upload screenshots (same approach as main, with metadata)
348
639
  const pngFiles = files.filter((f) => f.toLowerCase().endsWith('.png'));
349
640
  for (const pngFile of pngFiles) {
350
641
  try {
351
642
  const pngPath = join(traceDir, pngFile);
352
643
  const pngBuffer = await readFile(pngPath);
353
- const pngUrls = await getUploadUrls(browserSession.id, pngFile, 'png');
644
+ const pngStat = await stat(pngPath);
645
+ // Detect "key_" prefix for high-priority screenshots
646
+ const isKeyFrame = pngFile
647
+ .toLowerCase()
648
+ .startsWith('key_');
649
+ const displayName = pngFile
650
+ .replace(/\.png$/i, '')
651
+ .replace(/^key_/i, '')
652
+ .replace(/^\d+_/, '')
653
+ .replace(/-/g, ' ');
654
+ const pngUrls = await getUploadUrls(browserSession.id, pngFile, 'png', {
655
+ metadata: {
656
+ name: displayName,
657
+ description: isKeyFrame
658
+ ? 'Key moment captured during verification'
659
+ : 'Screenshot captured during verification',
660
+ highPriority: isKeyFrame,
661
+ timestamp: pngStat.mtime.toISOString(),
662
+ },
663
+ });
354
664
  await uploadScreenshot(pngUrls.uploadUrl, pngBuffer);
355
665
  }
356
666
  catch {
357
- // Ignore
667
+ // Ignore individual screenshot upload errors
358
668
  }
359
669
  }
360
670
  }
@@ -390,39 +700,74 @@ Return your findings in the structured output format with your evaluation.`;
390
700
  catch {
391
701
  // Ignore upload errors
392
702
  }
393
- // 9. Update checklist item based on evaluation
394
- // Use typedResult from outer scope for the evaluation
395
- const resultForEval = finalResult;
396
- if (resultForEval && checklistItem) {
397
- const evaluation = resultForEval.evaluation;
398
- if (evaluation === 'verified') {
399
- await updateChecklistItem(featureId, checklistItem.id, {
400
- status: 'verified',
401
- browserSessionId: browserSession.id,
402
- });
403
- console.log(`\n\u2705 Checklist item verified!`);
404
- }
405
- else if (evaluation === 'blocked') {
406
- await updateChecklistItem(featureId, checklistItem.id, {
407
- status: 'blocked',
408
- browserSessionId: browserSession.id,
409
- blockedReason: resultForEval.evaluationReason,
410
- });
411
- console.log(`\n\ud83d\uded1 Checklist item blocked: ${resultForEval.evaluationReason}`);
703
+ // 9. Determine the result to use for evaluation
704
+ // In debug mode, use mock evaluation; otherwise use agent result
705
+ let resultForEval;
706
+ if (isDebugMode && options.debugOutcome) {
707
+ const mockEval = getMockEvaluation(options.debugOutcome);
708
+ resultForEval = {
709
+ ...mockEval,
710
+ sessionId: browserSession.id,
711
+ sessionDir: getTraceDirectory(browserSession.id),
712
+ durationMs,
713
+ traceViewerUrl: traceDownloadUrl
714
+ ? buildTraceViewerUrl(traceDownloadUrl)
715
+ : undefined,
716
+ checklistItemId: checklistItem.id,
717
+ };
718
+ console.log(`\n[DEBUG MODE] Using mock evaluation: ${options.debugOutcome}`);
719
+ }
720
+ else {
721
+ const typedResult = finalResult;
722
+ if (agentError && !typedResult) {
723
+ throw new Error(`Verification failed: ${agentError}`);
412
724
  }
413
- else if (evaluation === 'partial' || evaluation === 'failed') {
414
- // Keep pending but link session
415
- await updateChecklistItem(featureId, checklistItem.id, {
416
- browserSessionId: browserSession.id,
417
- });
418
- console.log(`\n\u26a0\ufe0f ${evaluation === 'partial' ? 'Partial verification' : 'Verification failed'}: ${resultForEval.evaluationReason}`);
725
+ if (!typedResult) {
726
+ throw new Error('No result received from agent');
419
727
  }
728
+ resultForEval = typedResult;
420
729
  }
421
- if (agentError && !resultForEval) {
422
- throw new Error(`Verification failed: ${agentError}`);
730
+ // 10. Update checklist item based on evaluation
731
+ const evaluation = resultForEval.evaluation;
732
+ if (evaluation === 'verified') {
733
+ await updateChecklistItem(featureId, checklistItem.id, {
734
+ status: 'verified',
735
+ browserSessionId: browserSession.id,
736
+ });
737
+ console.log(`\n\u2705 Checklist item verified!`);
423
738
  }
424
- if (!resultForEval) {
425
- throw new Error('No result received from agent');
739
+ else if (evaluation === 'blocked') {
740
+ await updateChecklistItem(featureId, checklistItem.id, {
741
+ status: 'blocked',
742
+ browserSessionId: browserSession.id,
743
+ blockedReason: resultForEval.evaluationReason,
744
+ });
745
+ // Enhanced output for Claude Code
746
+ console.log(`\n${'='.repeat(60)}`);
747
+ console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
748
+ console.log(`${'='.repeat(60)}`);
749
+ console.log(`\nIssue: ${resultForEval.evaluationReason}`);
750
+ if (resultForEval.issues?.length) {
751
+ console.log(`\nDetails:`);
752
+ for (const issue of resultForEval.issues) {
753
+ const typeStr = issue.type ? ` (${issue.type})` : '';
754
+ console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
755
+ }
756
+ }
757
+ if (resultForEval.traceViewerUrl) {
758
+ console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
759
+ }
760
+ console.log(`\nSuggested action: Debug this issue in your code, then run verify-feature again.`);
761
+ console.log(`${'='.repeat(60)}\n`);
762
+ }
763
+ else if (evaluation === 'partial' || evaluation === 'failed') {
764
+ // Mark as incomplete - verification happened but requirements not fully met
765
+ await updateChecklistItem(featureId, checklistItem.id, {
766
+ status: 'incomplete',
767
+ browserSessionId: browserSession.id,
768
+ });
769
+ // Check if other items are terminal and prompt user
770
+ await handleIncompleteItem(featureId, checklistItem, resultForEval);
426
771
  }
427
772
  return resultForEval;
428
773
  }