@ranger-testing/ranger-cli 1.1.7 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -45
- package/build/cli.js +644 -277
- package/build/cli.js.map +1 -1
- package/build/commands/addEnv.js +1 -1
- package/build/commands/addEnv.js.map +1 -1
- package/build/commands/authEncrypt.js +5 -10
- package/build/commands/authEncrypt.js.map +1 -1
- package/build/commands/clean.js +1 -1
- package/build/commands/clean.js.map +1 -1
- package/build/commands/config.js +9 -15
- package/build/commands/config.js.map +1 -1
- package/build/commands/env.js +10 -13
- package/build/commands/env.js.map +1 -1
- package/build/commands/feature.js +138 -67
- package/build/commands/feature.js.map +1 -1
- package/build/commands/hooks/autoPrompt.js +1 -1
- package/build/commands/hooks/disable.js +1 -1
- package/build/commands/hooks/enable.js +9 -4
- package/build/commands/hooks/enable.js.map +1 -1
- package/build/commands/hooks/exitPlanMode.js +8 -8
- package/build/commands/hooks/planReminder.js +7 -7
- package/build/commands/hooks/planStart.js +4 -4
- package/build/commands/hooks/postEdit.js +4 -4
- package/build/commands/hooks/postEdit.js.map +1 -1
- package/build/commands/hooks/preCompact.js +3 -3
- package/build/commands/hooks/preCompact.js.map +1 -1
- package/build/commands/hooks/sessionStart.js +19 -5
- package/build/commands/hooks/sessionStart.js.map +1 -1
- package/build/commands/hooks/stopHook.js +28 -4
- package/build/commands/hooks/stopHook.js.map +1 -1
- package/build/commands/index.js +1 -2
- package/build/commands/index.js.map +1 -1
- package/build/commands/login.js +2 -5
- package/build/commands/login.js.map +1 -1
- package/build/commands/setupCi.js +189 -0
- package/build/commands/setupCi.js.map +1 -0
- package/build/commands/skillup.js +16 -68
- package/build/commands/skillup.js.map +1 -1
- package/build/commands/start.js +1 -1
- package/build/commands/start.js.map +1 -1
- package/build/commands/status.js +14 -13
- package/build/commands/status.js.map +1 -1
- package/build/commands/update.js +34 -5
- package/build/commands/update.js.map +1 -1
- package/build/commands/updateEnv.js +1 -1
- package/build/commands/updateEnv.js.map +1 -1
- package/build/commands/useEnv.js +1 -1
- package/build/commands/useEnv.js.map +1 -1
- package/build/commands/utils/activeProfile.js +76 -0
- package/build/commands/utils/activeProfile.js.map +1 -0
- package/build/commands/utils/browserSessionsApi.js +1 -1
- package/build/commands/utils/browserSessionsApi.js.map +1 -1
- package/build/commands/utils/deviceAuth.js +53 -5
- package/build/commands/utils/deviceAuth.js.map +1 -1
- package/build/commands/utils/environment.js +11 -12
- package/build/commands/utils/environment.js.map +1 -1
- package/build/commands/utils/featureApi.js +30 -30
- package/build/commands/utils/featureApi.js.map +1 -1
- package/build/commands/utils/featureReportGenerator.js +6 -6
- package/build/commands/utils/featureReportGenerator.js.map +1 -1
- package/build/commands/utils/keychain.js +1 -1
- package/build/commands/utils/localAgentInstallationsApi.js +1 -1
- package/build/commands/utils/profileMessages.js +8 -0
- package/build/commands/utils/profileMessages.js.map +1 -0
- package/build/commands/utils/profileSetupBanner.js +167 -0
- package/build/commands/utils/profileSetupBanner.js.map +1 -0
- package/build/commands/utils/settings.js +20 -2
- package/build/commands/utils/settings.js.map +1 -1
- package/build/commands/utils/skills.js +1 -1
- package/build/commands/utils/telemetry.js +254 -0
- package/build/commands/utils/telemetry.js.map +1 -0
- package/build/commands/utils/userApi.js +4 -4
- package/build/commands/utils/userApi.js.map +1 -1
- package/build/commands/verifyFeature.js +771 -526
- package/build/commands/verifyFeature.js.map +1 -1
- package/build/commands/verifyInBrowser.js +1 -1
- package/build/commands/verifyInBrowser.js.map +1 -1
- package/build/skills/ranger/SKILL.md +65 -64
- package/build/skills/ranger/create.md +31 -31
- package/build/skills/ranger/feedback.md +25 -17
- package/build/skills/ranger/start.md +37 -37
- package/build/skills/ranger/verify.md +59 -55
- package/package.json +1 -1
- package/scripts/postinstall.js +1 -1
- package/build/commands/dataMcpServer.js +0 -1
- package/build/commands/dataMcpServer.js.map +0 -1
- package/build/commands/utils/cliSecret.js +0 -1
- package/build/commands/utils/cliSecret.js.map +0 -1
- package/build/skills/bug-bash.md +0 -329
- package/build/skills/e2e-test-recommender.md +0 -168
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { query, } from '@anthropic-ai/claude-agent-sdk';
|
|
2
|
+
import { createTelemetryCollector, } from './utils/telemetry.js';
|
|
2
3
|
import { join, dirname } from 'path';
|
|
3
4
|
import { readFile, readdir, appendFile, mkdir, rm, stat } from 'fs/promises';
|
|
4
5
|
import { existsSync } from 'fs';
|
|
@@ -6,11 +7,16 @@ import { execSync } from 'child_process';
|
|
|
6
7
|
import { tmpdir } from 'os';
|
|
7
8
|
import inquirer from 'inquirer';
|
|
8
9
|
import { loadSettings, resolveEnvVars, buildPlaywrightConfig, cleanupTempFiles, getEnvDir, } from './utils/settings.js';
|
|
9
|
-
import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl,
|
|
10
|
+
import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getProxySessionToken, createVerificationStep, createStepAsset, } from './utils/browserSessionsApi.js';
|
|
11
|
+
import { getAiProxyUrl } from './utils/environment.js';
|
|
10
12
|
import { getToken } from './utils/keychain.js';
|
|
11
13
|
import { getActiveFeatureId } from './feature.js';
|
|
14
|
+
import { readActiveProfileName } from './utils/activeProfile.js';
|
|
15
|
+
import { getEnvNames } from './env.js';
|
|
16
|
+
import { formatProfileRequiredMessage } from './utils/profileMessages.js';
|
|
12
17
|
import { getFeature, updateFeature, updateChecklistItem, startSession, getActionItems, getItemFeedback, } from './utils/featureApi.js';
|
|
13
18
|
import { getRangerDir } from './utils/rangerRoot.js';
|
|
19
|
+
const bold = (text) => `\x1b[1m${text}\x1b[0m`;
|
|
14
20
|
/**
|
|
15
21
|
* Get the current git branch
|
|
16
22
|
*/
|
|
@@ -73,7 +79,7 @@ function getMockEvaluation(outcome) {
|
|
|
73
79
|
success: true,
|
|
74
80
|
summary: '[DEBUG] Mock verification completed successfully.',
|
|
75
81
|
evaluation: 'verified',
|
|
76
|
-
evaluationReason: 'All
|
|
82
|
+
evaluationReason: 'All scenario requirements were met.',
|
|
77
83
|
},
|
|
78
84
|
partial: {
|
|
79
85
|
success: false,
|
|
@@ -91,7 +97,7 @@ function getMockEvaluation(outcome) {
|
|
|
91
97
|
incomplete: {
|
|
92
98
|
success: false,
|
|
93
99
|
summary: '[DEBUG] Mock incomplete verification.',
|
|
94
|
-
evaluation: '
|
|
100
|
+
evaluation: 'incomplete',
|
|
95
101
|
evaluationReason: 'Implementation is incomplete and needs additional work.',
|
|
96
102
|
issues: [
|
|
97
103
|
{
|
|
@@ -157,42 +163,52 @@ function getDebugPrompt() {
|
|
|
157
163
|
Return your findings in the structured output format.`;
|
|
158
164
|
}
|
|
159
165
|
/**
|
|
160
|
-
* Prompt user to select a
|
|
166
|
+
* Prompt user to select a scenario
|
|
161
167
|
*/
|
|
162
168
|
async function selectChecklistItem(items) {
|
|
163
169
|
if (items.length === 0) {
|
|
164
170
|
return null;
|
|
165
171
|
}
|
|
166
|
-
const choices = items.map((item
|
|
167
|
-
const emoji = item.status === '
|
|
172
|
+
const choices = items.map((item) => {
|
|
173
|
+
const emoji = item.status === 'closed' && item.terminalReason === 'approved'
|
|
168
174
|
? '\u2705'
|
|
169
|
-
: item.status === '
|
|
170
|
-
? '\ud83d\
|
|
171
|
-
: item.status === '
|
|
172
|
-
? '\ud83d\
|
|
173
|
-
: item.status === '
|
|
174
|
-
? '\
|
|
175
|
-
: '
|
|
175
|
+
: item.status === 'verified'
|
|
176
|
+
? '\ud83d\udfe2' // green circle
|
|
177
|
+
: item.status === 'incomplete'
|
|
178
|
+
? '\ud83d\udfe0' // orange circle
|
|
179
|
+
: item.status === 'blocked'
|
|
180
|
+
? '\ud83d\uded1'
|
|
181
|
+
: item.status === 'closed'
|
|
182
|
+
? '\u26d4'
|
|
183
|
+
: item.status === 'verification_in_progress'
|
|
184
|
+
? '\u23f3'
|
|
185
|
+
: '\u2b1c';
|
|
176
186
|
const commentBadge = item.unaddressedCommentCount > 0
|
|
177
187
|
? ` [${item.unaddressedCommentCount} comments]`
|
|
178
188
|
: '';
|
|
189
|
+
const disabledReason = item.actionable
|
|
190
|
+
? false
|
|
191
|
+
: item.status === 'closed' && item.terminalReason
|
|
192
|
+
? item.terminalReason
|
|
193
|
+
: 'not actionable';
|
|
179
194
|
return {
|
|
180
|
-
name: `${
|
|
195
|
+
name: `${item.displayIndex + 1}. ${emoji} ${item.description}${commentBadge}`,
|
|
181
196
|
value: item.id,
|
|
197
|
+
disabled: disabledReason,
|
|
182
198
|
};
|
|
183
199
|
});
|
|
184
200
|
const { selected } = await inquirer.prompt([
|
|
185
201
|
{
|
|
186
202
|
type: 'list',
|
|
187
203
|
name: 'selected',
|
|
188
|
-
message: 'Which
|
|
204
|
+
message: 'Which scenario does this verify?',
|
|
189
205
|
choices,
|
|
190
206
|
},
|
|
191
207
|
]);
|
|
192
208
|
return items.find((i) => i.id === selected) || null;
|
|
193
209
|
}
|
|
194
210
|
/**
|
|
195
|
-
* Handle incomplete verification - check if all other
|
|
211
|
+
* Handle incomplete verification - check if all other scenarios are terminal and prompt user
|
|
196
212
|
*/
|
|
197
213
|
async function handleIncompleteItem(featureId, incompleteItem, result) {
|
|
198
214
|
// Get action items to check if there are other items to work on
|
|
@@ -217,46 +233,98 @@ async function handleIncompleteItem(featureId, incompleteItem, result) {
|
|
|
217
233
|
}
|
|
218
234
|
console.log(`\nNext steps:`);
|
|
219
235
|
console.log(` 1. Fix the issues above in your code`);
|
|
220
|
-
console.log(` 2. Run 'ranger
|
|
236
|
+
console.log(` 2. Run 'ranger go' again to re-verify`);
|
|
221
237
|
if (allOthersTerminal && otherItems.length > 0) {
|
|
222
|
-
console.log(`\nAll other
|
|
223
|
-
console.log(`If you're done for now,
|
|
238
|
+
console.log(`\nAll other scenarios are complete.`);
|
|
239
|
+
console.log(`If you're done for now, you can stop and resume later with 'ranger resume'.`);
|
|
224
240
|
}
|
|
225
241
|
console.log(`${'='.repeat(60)}\n`);
|
|
226
242
|
}
|
|
227
243
|
/**
|
|
228
|
-
* PostToolUse hook that logs browser tool calls to stdout
|
|
244
|
+
* Create a PostToolUse hook that logs browser tool calls to stdout and tracks
|
|
245
|
+
* all tool calls via telemetry with per-call timing.
|
|
246
|
+
*/
|
|
247
|
+
function createToolCallTrackingHook(telemetry) {
|
|
248
|
+
const toolCallCounts = new Map();
|
|
249
|
+
const hook = async (input) => {
|
|
250
|
+
if (input.hook_event_name !== 'PostToolUse')
|
|
251
|
+
return {};
|
|
252
|
+
const postInput = input;
|
|
253
|
+
const toolInput = postInput.tool_input;
|
|
254
|
+
const shortName = postInput.tool_name.replace('mcp__ranger-browser__', '');
|
|
255
|
+
// Track count
|
|
256
|
+
toolCallCounts.set(shortName, (toolCallCounts.get(shortName) || 0) + 1);
|
|
257
|
+
// Log tool call as telemetry event
|
|
258
|
+
telemetry.trackPhaseStart('tool_call', { toolName: shortName });
|
|
259
|
+
telemetry.trackPhaseEnd('tool_call', {
|
|
260
|
+
toolName: shortName,
|
|
261
|
+
toolInput: summarizeToolInput(shortName, toolInput),
|
|
262
|
+
});
|
|
263
|
+
// Console log
|
|
264
|
+
switch (postInput.tool_name) {
|
|
265
|
+
case 'mcp__ranger-browser__browser_navigate':
|
|
266
|
+
console.log(`[browser] Navigate → ${toolInput.url}`);
|
|
267
|
+
break;
|
|
268
|
+
case 'mcp__ranger-browser__browser_click':
|
|
269
|
+
console.log(`[browser] Click → "${toolInput.element}"`);
|
|
270
|
+
break;
|
|
271
|
+
case 'mcp__ranger-browser__browser_type':
|
|
272
|
+
console.log(`[browser] Type → "${toolInput.text}" into "${toolInput.element}"`);
|
|
273
|
+
break;
|
|
274
|
+
case 'mcp__ranger-browser__browser_press_key':
|
|
275
|
+
console.log(`[browser] Press key → ${toolInput.key}`);
|
|
276
|
+
break;
|
|
277
|
+
case 'mcp__ranger-browser__browser_wait_for':
|
|
278
|
+
console.log(`[browser] Wait → ${toolInput.time ? `${toolInput.time}ms` : toolInput.text || 'condition'}`);
|
|
279
|
+
break;
|
|
280
|
+
}
|
|
281
|
+
return {};
|
|
282
|
+
};
|
|
283
|
+
return { hook, toolCallCounts };
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Create a PostToolUseFailure hook that tracks tool failures via telemetry.
|
|
229
287
|
*/
|
|
230
|
-
|
|
231
|
-
|
|
288
|
+
function createToolFailureHook(telemetry) {
|
|
289
|
+
return async (input) => {
|
|
290
|
+
if (input.hook_event_name !== 'PostToolUseFailure')
|
|
291
|
+
return {};
|
|
292
|
+
const failInput = input;
|
|
293
|
+
const shortName = failInput.tool_name.replace('mcp__ranger-browser__', '');
|
|
294
|
+
await telemetry.trackPhaseError('tool_failure', failInput.error, {
|
|
295
|
+
toolName: shortName,
|
|
296
|
+
isInterrupt: failInput.is_interrupt,
|
|
297
|
+
});
|
|
232
298
|
return {};
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
case '
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
case '
|
|
249
|
-
|
|
250
|
-
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Summarize tool input for telemetry (avoid logging sensitive/large data).
|
|
303
|
+
*/
|
|
304
|
+
function summarizeToolInput(toolName, input) {
|
|
305
|
+
switch (toolName) {
|
|
306
|
+
case 'browser_navigate':
|
|
307
|
+
return { url: input.url };
|
|
308
|
+
case 'browser_click':
|
|
309
|
+
return { element: input.element };
|
|
310
|
+
case 'browser_type':
|
|
311
|
+
return { element: input.element };
|
|
312
|
+
case 'browser_take_screenshot':
|
|
313
|
+
return { filename: input.filename };
|
|
314
|
+
case 'browser_press_key':
|
|
315
|
+
return { key: input.key };
|
|
316
|
+
case 'browser_wait_for':
|
|
317
|
+
return { time: input.time, text: input.text };
|
|
318
|
+
default:
|
|
319
|
+
return {};
|
|
251
320
|
}
|
|
252
|
-
|
|
253
|
-
};
|
|
321
|
+
}
|
|
254
322
|
/**
|
|
255
323
|
* Create a PostToolUse hook that uploads screenshots immediately after they're taken.
|
|
256
324
|
* Returns the hook callback and a set of filenames that were successfully uploaded,
|
|
257
325
|
* so the post-hoc fallback can skip them.
|
|
258
326
|
*/
|
|
259
|
-
function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
|
|
327
|
+
function createScreenshotUploadHook(sessionId, checklistItemId, traceDir, telemetry) {
|
|
260
328
|
const uploadedFiles = new Set();
|
|
261
329
|
let position = 1;
|
|
262
330
|
const hook = async (input) => {
|
|
@@ -271,11 +339,15 @@ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
|
|
|
271
339
|
const filename = toolInput?.filename;
|
|
272
340
|
if (!filename)
|
|
273
341
|
return {};
|
|
342
|
+
const isKeyFrame = filename.toLowerCase().startsWith('key_');
|
|
343
|
+
telemetry.trackPhaseStart('hook_screenshot_upload', {
|
|
344
|
+
filename,
|
|
345
|
+
isKeyFrame,
|
|
346
|
+
});
|
|
274
347
|
try {
|
|
275
348
|
const pngPath = join(traceDir, filename);
|
|
276
349
|
const pngBuffer = await readFile(pngPath);
|
|
277
350
|
const pngStat = await stat(pngPath);
|
|
278
|
-
const isKeyFrame = filename.toLowerCase().startsWith('key_');
|
|
279
351
|
const displayName = filename
|
|
280
352
|
.replace(/\.png$/i, '')
|
|
281
353
|
.replace(/^key_/i, '')
|
|
@@ -315,96 +387,142 @@ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
|
|
|
315
387
|
await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
|
|
316
388
|
// Track as uploaded
|
|
317
389
|
uploadedFiles.add(filename);
|
|
390
|
+
telemetry.trackPhaseEnd('hook_screenshot_upload', {
|
|
391
|
+
filename,
|
|
392
|
+
bytes: pngBuffer.length,
|
|
393
|
+
});
|
|
318
394
|
}
|
|
319
395
|
catch (err) {
|
|
320
|
-
|
|
321
|
-
|
|
396
|
+
await telemetry.trackPhaseError('hook_screenshot_upload', err, {
|
|
397
|
+
filename,
|
|
398
|
+
});
|
|
322
399
|
}
|
|
323
400
|
return {};
|
|
324
401
|
};
|
|
325
402
|
return { hook, uploadedFiles };
|
|
326
403
|
}
|
|
327
404
|
/**
|
|
328
|
-
*
|
|
405
|
+
* Phase 1: Setup through agent completion.
|
|
406
|
+
* Returns context for processVerificationResult.
|
|
329
407
|
*/
|
|
330
|
-
|
|
408
|
+
async function runVerification(options, telemetry) {
|
|
331
409
|
const isDebugMode = !!options.debugOutcome;
|
|
332
410
|
if (isDebugMode) {
|
|
333
411
|
console.log(`\n[DEBUG MODE] Running minimal browser test with outcome: ${options.debugOutcome}`);
|
|
334
412
|
}
|
|
335
|
-
//
|
|
413
|
+
// --- Phase: feature_load ---
|
|
414
|
+
telemetry.trackPhaseStart('feature_load');
|
|
336
415
|
const featureId = await getActiveFeatureId();
|
|
337
416
|
if (!featureId) {
|
|
338
|
-
throw new Error('No active feature. Run: ranger
|
|
417
|
+
throw new Error('No active feature review. Run: ranger resume <id> or ranger create');
|
|
339
418
|
}
|
|
340
|
-
// Load feature details
|
|
341
419
|
const feature = await getFeature(featureId);
|
|
342
|
-
|
|
420
|
+
telemetry.setContext({ featureId });
|
|
343
421
|
const currentBranch = getGitBranch();
|
|
344
422
|
if (currentBranch && currentBranch !== feature.gitBranch) {
|
|
345
423
|
await updateFeature(featureId, { gitBranch: currentBranch });
|
|
346
424
|
console.log(` Updated branch to: ${currentBranch}`);
|
|
347
425
|
}
|
|
348
|
-
console.log(`\nActive feature: ${feature.name} (${featureId})`);
|
|
349
|
-
// Get action items - leaf items that can be verified (non-closed with no non-closed children)
|
|
426
|
+
console.log(`\nActive feature review: ${feature.name} (${featureId})`);
|
|
350
427
|
const { items: actionItems } = await getActionItems(featureId);
|
|
351
|
-
|
|
428
|
+
const actionItemsById = new Map(actionItems.map((item) => [item.id, item]));
|
|
429
|
+
const displayItems = feature.checklistItems.map((item, index) => {
|
|
430
|
+
const actionItem = actionItemsById.get(item.id);
|
|
431
|
+
return {
|
|
432
|
+
...item,
|
|
433
|
+
unaddressedCommentCount: actionItem?.unaddressedCommentCount ?? 0,
|
|
434
|
+
displayIndex: index,
|
|
435
|
+
actionable: !!actionItem && item.status !== 'closed',
|
|
436
|
+
};
|
|
437
|
+
});
|
|
438
|
+
telemetry.trackPhaseEnd('feature_load', {
|
|
439
|
+
itemCount: actionItems.length,
|
|
440
|
+
});
|
|
441
|
+
// --- Phase: scenario_select ---
|
|
442
|
+
telemetry.trackPhaseStart('scenario_select');
|
|
352
443
|
let checklistItem = null;
|
|
353
|
-
let taskDescription = options.
|
|
354
|
-
if (options.
|
|
444
|
+
let taskDescription = options.notes;
|
|
445
|
+
if (options.scenario !== undefined) {
|
|
355
446
|
// Use specified item index (1-based)
|
|
356
|
-
const itemIndex = options.
|
|
357
|
-
if (itemIndex < 0 || itemIndex >=
|
|
358
|
-
throw new Error(`Invalid
|
|
447
|
+
const itemIndex = options.scenario - 1; // 1-based to 0-based
|
|
448
|
+
if (itemIndex < 0 || itemIndex >= displayItems.length) {
|
|
449
|
+
throw new Error(`Invalid scenario index: ${options.scenario}. Feature review has ${displayItems.length} scenarios.`);
|
|
450
|
+
}
|
|
451
|
+
const displayItem = displayItems[itemIndex];
|
|
452
|
+
if (!displayItem.actionable) {
|
|
453
|
+
const reason = displayItem.status === 'closed' && displayItem.terminalReason
|
|
454
|
+
? displayItem.terminalReason
|
|
455
|
+
: 'not actionable';
|
|
456
|
+
throw new Error(`Scenario ${options.scenario} is ${reason} and cannot be verified. Choose a different scenario.`);
|
|
359
457
|
}
|
|
360
|
-
|
|
458
|
+
const actionItem = actionItemsById.get(displayItem.id);
|
|
459
|
+
if (!actionItem) {
|
|
460
|
+
throw new Error(`Scenario ${options.scenario} is not currently actionable. Try another scenario.`);
|
|
461
|
+
}
|
|
462
|
+
checklistItem = actionItem;
|
|
361
463
|
if (!taskDescription) {
|
|
362
464
|
taskDescription = checklistItem.description;
|
|
363
465
|
}
|
|
364
466
|
}
|
|
365
467
|
else {
|
|
366
|
-
// Check if running in non-TTY environment (CI, scripts, Claude Code, etc.)
|
|
367
468
|
const isInteractive = process.stdin.isTTY && process.stdout.isTTY;
|
|
368
469
|
if (!isInteractive) {
|
|
369
|
-
// Non-TTY mode: require --
|
|
370
|
-
console.log('\nNon-interactive mode detected. The --
|
|
371
|
-
console.log('\nAvailable
|
|
372
|
-
|
|
373
|
-
const emoji = item.status === '
|
|
470
|
+
// Non-TTY mode: require --scenario flag, show available scenarios
|
|
471
|
+
console.log('\nNon-interactive mode detected. The --scenario flag is required.');
|
|
472
|
+
console.log('\nAvailable scenarios to verify:');
|
|
473
|
+
displayItems.forEach((item) => {
|
|
474
|
+
const emoji = item.status === 'closed' &&
|
|
475
|
+
item.terminalReason === 'approved'
|
|
374
476
|
? '\u2705'
|
|
375
|
-
: item.status === '
|
|
376
|
-
? '\ud83d\
|
|
377
|
-
: item.status === '
|
|
378
|
-
? '\ud83d\
|
|
379
|
-
: item.status === '
|
|
380
|
-
? '\
|
|
381
|
-
: '
|
|
477
|
+
: item.status === 'verified'
|
|
478
|
+
? '\ud83d\udfe2'
|
|
479
|
+
: item.status === 'incomplete'
|
|
480
|
+
? '\ud83d\udfe0'
|
|
481
|
+
: item.status === 'blocked'
|
|
482
|
+
? '\ud83d\uded1'
|
|
483
|
+
: item.status === 'closed'
|
|
484
|
+
? '\u26d4'
|
|
485
|
+
: item.status === 'verification_in_progress'
|
|
486
|
+
? '\u23f3'
|
|
487
|
+
: '\u2b1c';
|
|
382
488
|
const commentBadge = item.unaddressedCommentCount > 0
|
|
383
489
|
? ` [${item.unaddressedCommentCount} comments]`
|
|
384
490
|
: '';
|
|
385
|
-
|
|
491
|
+
const actionHint = item.actionable ? '' : ' [not actionable]';
|
|
492
|
+
console.log(` ${item.displayIndex + 1}. ${emoji} ${item.description}${commentBadge}${actionHint}`);
|
|
386
493
|
});
|
|
387
|
-
console.log('\nUsage: ranger
|
|
388
|
-
console.log('Example: ranger
|
|
389
|
-
throw new Error('The --
|
|
494
|
+
console.log('\nUsage: ranger go --scenario <number>');
|
|
495
|
+
console.log('Example: ranger go --scenario 1');
|
|
496
|
+
throw new Error('The --scenario flag is required in non-interactive mode. See available scenarios above.');
|
|
390
497
|
}
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
498
|
+
const selectedItem = await selectChecklistItem(displayItems);
|
|
499
|
+
if (selectedItem) {
|
|
500
|
+
const actionItem = actionItemsById.get(selectedItem.id);
|
|
501
|
+
if (!actionItem) {
|
|
502
|
+
throw new Error('Selected scenario is not currently actionable. Choose another scenario.');
|
|
503
|
+
}
|
|
504
|
+
checklistItem = actionItem;
|
|
505
|
+
if (!taskDescription) {
|
|
506
|
+
taskDescription = checklistItem.description;
|
|
507
|
+
}
|
|
395
508
|
}
|
|
396
509
|
}
|
|
397
510
|
if (!checklistItem) {
|
|
398
|
-
throw new Error('No
|
|
511
|
+
throw new Error('No scenario selected. Create scenarios when creating the feature review with -c or --scenario flags.');
|
|
399
512
|
}
|
|
400
513
|
if (checklistItem.status === 'closed') {
|
|
401
|
-
throw new Error(`Cannot verify
|
|
514
|
+
throw new Error(`Cannot verify scenario "${checklistItem.description}" — it is concluded (${checklistItem.terminalReason || 'unknown reason'}).`);
|
|
402
515
|
}
|
|
403
516
|
if (!taskDescription) {
|
|
404
|
-
throw new Error('No
|
|
517
|
+
throw new Error('No notes provided');
|
|
405
518
|
}
|
|
406
|
-
|
|
407
|
-
|
|
519
|
+
telemetry.setContext({ checklistItemId: checklistItem.id });
|
|
520
|
+
telemetry.trackPhaseEnd('scenario_select', {
|
|
521
|
+
selectionMethod: options.scenario !== undefined ? 'flag' : 'interactive',
|
|
522
|
+
itemStatus: checklistItem.status,
|
|
523
|
+
});
|
|
524
|
+
console.log(`\nVerifying scenario: ${checklistItem.description}`);
|
|
525
|
+
console.log(`Notes: ${taskDescription}`);
|
|
408
526
|
// Fetch reviewer feedback if item has parent or unaddressed comments
|
|
409
527
|
let itemFeedback = null;
|
|
410
528
|
if (checklistItem.parentItemId ||
|
|
@@ -415,8 +533,9 @@ export async function verifyFeature(options) {
|
|
|
415
533
|
console.log(`Reviewer feedback: ${itemFeedback.unaddressedComments.length} comment(s) to verify`);
|
|
416
534
|
}
|
|
417
535
|
}
|
|
418
|
-
catch {
|
|
419
|
-
// Non-fatal - continue without feedback
|
|
536
|
+
catch (err) {
|
|
537
|
+
// Non-fatal - continue without feedback, but log it
|
|
538
|
+
await telemetry.trackPhaseError('feedback_fetch', err);
|
|
420
539
|
}
|
|
421
540
|
}
|
|
422
541
|
// Start the session if it's in ready status
|
|
@@ -427,41 +546,38 @@ export async function verifyFeature(options) {
|
|
|
427
546
|
await startSession(featureId, feature.currentSessionId);
|
|
428
547
|
}
|
|
429
548
|
catch (error) {
|
|
430
|
-
// Ignore if session is already started (race condition)
|
|
431
549
|
const message = error instanceof Error ? error.message : String(error);
|
|
432
550
|
if (!message.includes('already')) {
|
|
433
551
|
throw error;
|
|
434
552
|
}
|
|
435
553
|
}
|
|
436
554
|
}
|
|
437
|
-
// Update
|
|
555
|
+
// Update scenario status to verification_in_progress
|
|
438
556
|
await updateChecklistItem(featureId, checklistItem.id, {
|
|
439
557
|
status: 'verification_in_progress',
|
|
440
558
|
});
|
|
441
|
-
//
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
559
|
+
// --- Phase: profile_resolution ---
|
|
560
|
+
telemetry.trackPhaseStart('profile_resolution');
|
|
561
|
+
let activeProfile = null;
|
|
562
|
+
if (options.profile) {
|
|
563
|
+
activeProfile = options.profile;
|
|
445
564
|
}
|
|
446
565
|
else {
|
|
447
|
-
|
|
448
|
-
if (!existsSync(activeEnvPath)) {
|
|
449
|
-
throw new Error('No active environment. Run: ranger use <env-name>');
|
|
450
|
-
}
|
|
451
|
-
activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
|
|
566
|
+
activeProfile = await readActiveProfileName();
|
|
452
567
|
}
|
|
453
|
-
|
|
568
|
+
if (!activeProfile) {
|
|
569
|
+
throw new Error('No active profile. Run: ranger profile use <profile-name>');
|
|
570
|
+
}
|
|
571
|
+
const envDir = getEnvDir(activeProfile);
|
|
454
572
|
if (!existsSync(envDir)) {
|
|
455
|
-
throw new Error(`
|
|
573
|
+
throw new Error(`Profile "${activeProfile}" not found. Run: ranger profile add ${activeProfile}`);
|
|
456
574
|
}
|
|
457
|
-
const settings = await loadSettings(
|
|
575
|
+
const settings = await loadSettings(activeProfile);
|
|
458
576
|
const resolvedSettings = resolveEnvVars(settings);
|
|
459
|
-
// Get base URL from settings
|
|
460
577
|
let url = resolvedSettings.baseUrl;
|
|
461
578
|
if (!url) {
|
|
462
|
-
throw new Error(`No baseUrl configured for
|
|
579
|
+
throw new Error(`No baseUrl configured for profile "${activeProfile}". Run: ranger profile config set ${activeProfile} baseUrl <url>`);
|
|
463
580
|
}
|
|
464
|
-
// Append startPath if provided
|
|
465
581
|
if (options.startPath) {
|
|
466
582
|
const base = url.endsWith('/') ? url.slice(0, -1) : url;
|
|
467
583
|
const path = options.startPath.startsWith('/')
|
|
@@ -469,13 +585,17 @@ export async function verifyFeature(options) {
|
|
|
469
585
|
: '/' + options.startPath;
|
|
470
586
|
url = base + path;
|
|
471
587
|
}
|
|
472
|
-
|
|
588
|
+
telemetry.trackPhaseEnd('profile_resolution', {
|
|
589
|
+
profileName: activeProfile,
|
|
590
|
+
});
|
|
591
|
+
// --- Phase: browser_session_create ---
|
|
592
|
+
telemetry.trackPhaseStart('browser_session_create');
|
|
473
593
|
const token = await getToken();
|
|
474
594
|
if (!token) {
|
|
475
|
-
throw new Error('No API token configured. Run: ranger
|
|
595
|
+
throw new Error('No API token configured. Run: ranger setup [token]');
|
|
476
596
|
}
|
|
477
597
|
const browserSession = await createBrowserSession({
|
|
478
|
-
environmentName:
|
|
598
|
+
environmentName: activeProfile,
|
|
479
599
|
settings: resolvedSettings,
|
|
480
600
|
task: taskDescription,
|
|
481
601
|
url,
|
|
@@ -483,116 +603,79 @@ export async function verifyFeature(options) {
|
|
|
483
603
|
checklistItemId: checklistItem.id,
|
|
484
604
|
});
|
|
485
605
|
console.log(`Browser session created: ${browserSession.id}`);
|
|
486
|
-
|
|
606
|
+
telemetry.setContext({ browserSessionId: browserSession.id });
|
|
607
|
+
// Link the browser session to the scenario immediately so steps
|
|
487
608
|
// are visible in the dashboard while verification is in progress
|
|
488
609
|
await updateChecklistItem(featureId, checklistItem.id, {
|
|
489
610
|
browserSessionId: browserSession.id,
|
|
490
611
|
});
|
|
612
|
+
telemetry.trackPhaseEnd('browser_session_create');
|
|
613
|
+
// --- Phase: playwright_config ---
|
|
614
|
+
telemetry.trackPhaseStart('playwright_config');
|
|
491
615
|
let configResult;
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
console.log('\nVerification interrupted. Cleaning up...');
|
|
616
|
+
let sessionToken;
|
|
617
|
+
try {
|
|
618
|
+
sessionToken = await getProxySessionToken();
|
|
619
|
+
}
|
|
620
|
+
catch (error) {
|
|
621
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
622
|
+
const errorMsg = `Failed to fetch proxy session token: ${message}`;
|
|
500
623
|
try {
|
|
501
624
|
await updateBrowserSession(browserSession.id, {
|
|
502
|
-
status: '
|
|
503
|
-
durationMs:
|
|
625
|
+
status: 'failed',
|
|
626
|
+
durationMs: 0,
|
|
627
|
+
errorMessage: errorMsg,
|
|
504
628
|
});
|
|
505
629
|
}
|
|
506
|
-
catch {
|
|
507
|
-
|
|
508
|
-
}
|
|
509
|
-
try {
|
|
510
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
511
|
-
status: 'pending',
|
|
512
|
-
});
|
|
513
|
-
}
|
|
514
|
-
catch {
|
|
515
|
-
// Best effort
|
|
516
|
-
}
|
|
517
|
-
if (configResult) {
|
|
518
|
-
try {
|
|
519
|
-
await cleanupTempFiles(configResult);
|
|
520
|
-
}
|
|
521
|
-
catch {
|
|
522
|
-
// Best effort
|
|
523
|
-
}
|
|
630
|
+
catch (updateErr) {
|
|
631
|
+
await telemetry.trackPhaseError('session_error_update', updateErr);
|
|
524
632
|
}
|
|
525
|
-
|
|
526
|
-
|
|
633
|
+
throw new Error(errorMsg);
|
|
634
|
+
}
|
|
635
|
+
configResult = await buildPlaywrightConfig(resolvedSettings, activeProfile, browserSession?.id);
|
|
636
|
+
telemetry.trackPhaseEnd('playwright_config');
|
|
637
|
+
const startTime = Date.now();
|
|
638
|
+
const rangerBrowserMcp = {
|
|
639
|
+
command: 'npx',
|
|
640
|
+
args: [
|
|
641
|
+
'@ranger-testing/playwright',
|
|
642
|
+
'run-mcp-server',
|
|
643
|
+
'--config',
|
|
644
|
+
configResult.configPath,
|
|
645
|
+
],
|
|
527
646
|
};
|
|
528
|
-
|
|
529
|
-
process.on('SIGTERM', handleInterrupt);
|
|
530
|
-
let anthropicApiKey;
|
|
531
|
-
let conversationDir;
|
|
647
|
+
// Build verifier prompt
|
|
532
648
|
let verifierPrompt;
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
// Ignore
|
|
551
|
-
}
|
|
552
|
-
throw new Error(errorMsg);
|
|
553
|
-
}
|
|
554
|
-
configResult = await buildPlaywrightConfig(resolvedSettings, activeEnv, browserSession?.id);
|
|
555
|
-
const rangerBrowserMcp = {
|
|
556
|
-
command: 'npx',
|
|
557
|
-
args: [
|
|
558
|
-
'@ranger-testing/playwright',
|
|
559
|
-
'run-mcp-server',
|
|
560
|
-
'--config',
|
|
561
|
-
configResult.configPath,
|
|
562
|
-
],
|
|
563
|
-
};
|
|
564
|
-
// 5. UI Verifier + Evaluation Agent prompt
|
|
565
|
-
if (isDebugMode) {
|
|
566
|
-
verifierPrompt = getDebugPrompt();
|
|
567
|
-
}
|
|
568
|
-
else {
|
|
569
|
-
const notesSection = checklistItem.notes
|
|
570
|
-
? `\n\n## Additional Notes\n${checklistItem.notes}`
|
|
571
|
-
: '';
|
|
572
|
-
// Build reviewer feedback section if available
|
|
573
|
-
let feedbackSection = '';
|
|
574
|
-
if (itemFeedback && itemFeedback.unaddressedComments.length > 0) {
|
|
575
|
-
const commentLines = itemFeedback.unaddressedComments
|
|
576
|
-
.map((c) => {
|
|
577
|
-
const date = new Date(c.createdAt).toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
|
|
578
|
-
const author = c.authorName || c.authorEmail || 'Reviewer';
|
|
579
|
-
return `- **${author}** (${date}): "${c.content}"`;
|
|
580
|
-
})
|
|
581
|
-
.join('\n');
|
|
582
|
-
feedbackSection = `\n\n## Reviewer Feedback to Address
|
|
649
|
+
if (isDebugMode) {
|
|
650
|
+
verifierPrompt = getDebugPrompt();
|
|
651
|
+
}
|
|
652
|
+
else {
|
|
653
|
+
const notesSection = checklistItem.notes
|
|
654
|
+
? `\n\n## Additional Notes\n${checklistItem.notes}`
|
|
655
|
+
: '';
|
|
656
|
+
let feedbackSection = '';
|
|
657
|
+
if (itemFeedback && itemFeedback.unaddressedComments.length > 0) {
|
|
658
|
+
const commentLines = itemFeedback.unaddressedComments
|
|
659
|
+
.map((c) => {
|
|
660
|
+
const date = new Date(c.createdAt).toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
|
|
661
|
+
const author = c.authorName || c.authorEmail || 'Reviewer';
|
|
662
|
+
return `- **${author}** (${date}): "${c.content}"`;
|
|
663
|
+
})
|
|
664
|
+
.join('\n');
|
|
665
|
+
feedbackSection = `\n\n## Reviewer Feedback to Address
|
|
583
666
|
The following reviewer comments were left on the previous version of this item.
|
|
584
667
|
Verify that each concern has been addressed in the current implementation:
|
|
585
668
|
|
|
586
669
|
${commentLines}`;
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
670
|
+
}
|
|
671
|
+
let canonicalFlowSection = '';
|
|
672
|
+
if (itemFeedback?.canonicalFlow) {
|
|
673
|
+
canonicalFlowSection = `\n\n## Expected Flow (from previous verification)
|
|
591
674
|
${itemFeedback.canonicalFlow}`;
|
|
592
|
-
|
|
593
|
-
|
|
675
|
+
}
|
|
676
|
+
verifierPrompt = `You are a Feature Review Verifier. Your job is to verify a scenario by executing a UI flow and evaluating whether it adequately completes the scenario.
|
|
594
677
|
|
|
595
|
-
##
|
|
678
|
+
## Scenario to Verify
|
|
596
679
|
${checklistItem.description}${notesSection}${feedbackSection}${canonicalFlowSection}
|
|
597
680
|
|
|
598
681
|
## Task to Execute
|
|
@@ -605,7 +688,7 @@ Your base URL is: ${url}
|
|
|
605
688
|
- DO NOT navigate to any different domain, host, or port under any circumstances
|
|
606
689
|
- IGNORE any URLs from product documentation (mcp__ranger__get_product_docs) that have a different base URL
|
|
607
690
|
- If documentation or code diffs suggest a path exists (e.g., "/dashboard"), you may navigate to that path ONLY under the base URL above
|
|
608
|
-
- The base URL above is the ONLY authorized
|
|
691
|
+
- The base URL above is the ONLY authorized profile for this verification
|
|
609
692
|
|
|
610
693
|
## Instructions
|
|
611
694
|
1. Navigate to the URL above using browser_navigate
|
|
@@ -614,7 +697,7 @@ Your base URL is: ${url}
|
|
|
614
697
|
4. Execute the task step-by-step using browser tools
|
|
615
698
|
5. **Take screenshots at key moments** (see Screenshot Guidelines below)
|
|
616
699
|
6. Document any issues found (bugs, errors, unexpected behavior)
|
|
617
|
-
7. After completing the verification, evaluate whether the result adequately verifies the
|
|
700
|
+
7. After completing the verification, evaluate whether the result adequately verifies the scenario
|
|
618
701
|
|
|
619
702
|
## Screenshot Guidelines - IMPORTANT
|
|
620
703
|
Take screenshots throughout the verification flow so a human can review it for completeness. Screenshots are your evidence trail.
|
|
@@ -630,7 +713,7 @@ Take screenshots throughout the verification flow so a human can review it for c
|
|
|
630
713
|
**Screenshot naming:**
|
|
631
714
|
- Use descriptive filenames: "01_login-page-loaded.png", "02_form-filled.png", "03_dashboard-visible.png"
|
|
632
715
|
- Number prefixes (01_, 02_, etc.) help maintain chronological order
|
|
633
|
-
- For KEY MOMENTS that prove the
|
|
716
|
+
- For KEY MOMENTS that prove the scenario is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
|
|
634
717
|
- The "key_" prefix marks screenshots as high-priority evidence for human reviewers
|
|
635
718
|
|
|
636
719
|
**Aim for 3-6 screenshots per verification** to document the complete flow. Mark 1-2 of the most important ones with the "key_" prefix.
|
|
@@ -657,266 +740,331 @@ After step 2 (taking initial snapshot), IMMEDIATELY check for blocking HTTP erro
|
|
|
657
740
|
This early exit prevents wasting time on tasks that cannot succeed due to fundamental errors.
|
|
658
741
|
|
|
659
742
|
## Evaluation Criteria
|
|
660
|
-
- VERIFIED: The task completed successfully and the
|
|
661
|
-
- PARTIAL: The task partially completed but some aspects of the
|
|
743
|
+
- VERIFIED: The task completed successfully and the scenario requirements are fully met
|
|
744
|
+
- PARTIAL: The task partially completed but some aspects of the scenario are not verified
|
|
662
745
|
- BLOCKED: A blocking issue (bug, error, missing feature) prevents completion
|
|
663
746
|
- FAILED: The task could not be completed due to errors
|
|
664
747
|
|
|
665
748
|
Return your findings in the structured output format with your evaluation.`;
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
},
|
|
686
|
-
type: {
|
|
687
|
-
type: 'string',
|
|
688
|
-
enum: [
|
|
689
|
-
'HTTP_404',
|
|
690
|
-
'HTTP_500',
|
|
691
|
-
'HTTP_400',
|
|
692
|
-
'NAVIGATION_ERROR',
|
|
693
|
-
'OTHER',
|
|
694
|
-
],
|
|
695
|
-
},
|
|
696
|
-
description: { type: 'string' },
|
|
697
|
-
screenshot: { type: 'string' },
|
|
749
|
+
}
|
|
750
|
+
const outputSchema = {
|
|
751
|
+
type: 'object',
|
|
752
|
+
properties: {
|
|
753
|
+
success: { type: 'boolean' },
|
|
754
|
+
summary: { type: 'string' },
|
|
755
|
+
evaluation: {
|
|
756
|
+
type: 'string',
|
|
757
|
+
enum: ['verified', 'partial', 'blocked', 'failed'],
|
|
758
|
+
},
|
|
759
|
+
evaluationReason: { type: 'string' },
|
|
760
|
+
issues: {
|
|
761
|
+
type: 'array',
|
|
762
|
+
items: {
|
|
763
|
+
type: 'object',
|
|
764
|
+
properties: {
|
|
765
|
+
severity: {
|
|
766
|
+
type: 'string',
|
|
767
|
+
enum: ['BLOCKER', 'MAJOR', 'MINOR'],
|
|
698
768
|
},
|
|
699
|
-
|
|
769
|
+
type: {
|
|
770
|
+
type: 'string',
|
|
771
|
+
enum: [
|
|
772
|
+
'HTTP_404',
|
|
773
|
+
'HTTP_500',
|
|
774
|
+
'HTTP_400',
|
|
775
|
+
'NAVIGATION_ERROR',
|
|
776
|
+
'OTHER',
|
|
777
|
+
],
|
|
778
|
+
},
|
|
779
|
+
description: { type: 'string' },
|
|
780
|
+
screenshot: { type: 'string' },
|
|
700
781
|
},
|
|
782
|
+
required: ['severity', 'description'],
|
|
701
783
|
},
|
|
702
784
|
},
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
785
|
+
},
|
|
786
|
+
required: ['success', 'summary', 'evaluation', 'evaluationReason'],
|
|
787
|
+
};
|
|
788
|
+
// --- Phase: agent_execution ---
|
|
789
|
+
telemetry.trackPhaseStart('agent_execution');
|
|
790
|
+
const traceDir = getTraceDirectory(browserSession.id);
|
|
791
|
+
const screenshotHook = createScreenshotUploadHook(browserSession.id, checklistItem.id, traceDir, telemetry);
|
|
792
|
+
const toolCallHook = createToolCallTrackingHook(telemetry);
|
|
793
|
+
const toolFailureHook = createToolFailureHook(telemetry);
|
|
794
|
+
const result = query({
|
|
795
|
+
prompt: verifierPrompt,
|
|
796
|
+
options: {
|
|
797
|
+
cwd: process.cwd(),
|
|
798
|
+
model: 'claude-opus-4-6',
|
|
799
|
+
mcpServers: {
|
|
800
|
+
'ranger-browser': rangerBrowserMcp,
|
|
801
|
+
},
|
|
802
|
+
tools: ['mcp__ranger-browser__*'],
|
|
803
|
+
permissionMode: 'acceptEdits',
|
|
804
|
+
allowedTools: ['mcp__ranger-browser__*', 'Read', 'Glob', 'Grep'],
|
|
805
|
+
outputFormat: {
|
|
806
|
+
type: 'json_schema',
|
|
807
|
+
schema: outputSchema,
|
|
808
|
+
},
|
|
809
|
+
hooks: {
|
|
810
|
+
PostToolUse: [
|
|
811
|
+
{
|
|
812
|
+
hooks: [toolCallHook.hook, screenshotHook.hook],
|
|
813
|
+
},
|
|
814
|
+
],
|
|
815
|
+
PostToolUseFailure: [
|
|
816
|
+
{
|
|
817
|
+
hooks: [toolFailureHook],
|
|
818
|
+
},
|
|
723
819
|
],
|
|
724
|
-
outputFormat: {
|
|
725
|
-
type: 'json_schema',
|
|
726
|
-
schema: outputSchema,
|
|
727
|
-
},
|
|
728
|
-
hooks: {
|
|
729
|
-
PostToolUse: [
|
|
730
|
-
{
|
|
731
|
-
hooks: [browserToolLogHook, screenshotHook.hook],
|
|
732
|
-
},
|
|
733
|
-
],
|
|
734
|
-
},
|
|
735
|
-
env: {
|
|
736
|
-
...process.env,
|
|
737
|
-
ANTHROPIC_API_KEY: anthropicApiKey,
|
|
738
|
-
},
|
|
739
|
-
persistSession: false,
|
|
740
820
|
},
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
821
|
+
env: {
|
|
822
|
+
...process.env,
|
|
823
|
+
ANTHROPIC_API_KEY: sessionToken,
|
|
824
|
+
ANTHROPIC_BASE_URL: getAiProxyUrl(),
|
|
825
|
+
},
|
|
826
|
+
persistSession: false,
|
|
827
|
+
},
|
|
828
|
+
});
|
|
829
|
+
// Collect messages
|
|
830
|
+
let finalResult = null;
|
|
831
|
+
let agentError = null;
|
|
832
|
+
let lastStructuredOutputInput = null;
|
|
833
|
+
let resultMeta = {};
|
|
834
|
+
const conversationFilePath = getConversationFilePath(browserSession.id);
|
|
835
|
+
const conversationDir = dirname(conversationFilePath);
|
|
836
|
+
await mkdir(conversationDir, { recursive: true });
|
|
837
|
+
const TIMEOUT_MS = 59 * 60 * 1000;
|
|
838
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
839
|
+
setTimeout(() => {
|
|
840
|
+
reject(new Error('Agent execution timed out after 59 minutes'));
|
|
841
|
+
}, TIMEOUT_MS);
|
|
842
|
+
});
|
|
843
|
+
try {
|
|
844
|
+
await Promise.race([
|
|
845
|
+
(async () => {
|
|
846
|
+
for await (const message of result) {
|
|
847
|
+
try {
|
|
848
|
+
const jsonLine = JSON.stringify(message) + '\n';
|
|
849
|
+
await appendFile(conversationFilePath, jsonLine, 'utf-8');
|
|
850
|
+
}
|
|
851
|
+
catch {
|
|
852
|
+
// Ignore
|
|
853
|
+
}
|
|
854
|
+
const msg = message;
|
|
855
|
+
// Capture StructuredOutput tool call input as fallback
|
|
856
|
+
if (msg.type === 'assistant' && msg.message?.content) {
|
|
857
|
+
for (const block of msg.message.content) {
|
|
858
|
+
if (block.type === 'tool_use' &&
|
|
859
|
+
block.name === 'StructuredOutput' &&
|
|
860
|
+
block.input) {
|
|
861
|
+
lastStructuredOutputInput =
|
|
862
|
+
block.input;
|
|
778
863
|
}
|
|
779
864
|
}
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
865
|
+
}
|
|
866
|
+
if (msg.error) {
|
|
867
|
+
let errorText = msg.error;
|
|
868
|
+
if (msg.message?.content &&
|
|
869
|
+
Array.isArray(msg.message.content)) {
|
|
870
|
+
const texts = msg.message.content
|
|
871
|
+
.filter((c) => c.type === 'text')
|
|
872
|
+
.map((c) => c.text || '')
|
|
873
|
+
.filter(Boolean);
|
|
874
|
+
if (texts.length > 0) {
|
|
875
|
+
errorText = texts.join(' ');
|
|
791
876
|
}
|
|
792
|
-
agentError = errorText;
|
|
793
877
|
}
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
878
|
+
agentError = errorText;
|
|
879
|
+
}
|
|
880
|
+
if (msg.type === 'result') {
|
|
881
|
+
// Capture SDK result metadata
|
|
882
|
+
resultMeta = {
|
|
883
|
+
numTurns: msg.num_turns,
|
|
884
|
+
totalCostUsd: msg.total_cost_usd,
|
|
885
|
+
durationApiMs: msg.duration_api_ms,
|
|
886
|
+
sdkDurationMs: msg.duration_ms,
|
|
887
|
+
inputTokens: msg.usage?.input_tokens,
|
|
888
|
+
outputTokens: msg.usage?.output_tokens,
|
|
889
|
+
cacheReadTokens: msg.usage?.cache_read_input_tokens,
|
|
890
|
+
cacheCreationTokens: msg.usage?.cache_creation_input_tokens,
|
|
891
|
+
};
|
|
892
|
+
if (msg.subtype === 'success' &&
|
|
893
|
+
message.structured_output) {
|
|
894
|
+
finalResult = message.structured_output;
|
|
895
|
+
}
|
|
896
|
+
else if (msg.subtype !== 'success') {
|
|
897
|
+
if (lastStructuredOutputInput &&
|
|
898
|
+
msg.errors?.length === 0) {
|
|
899
|
+
finalResult = lastStructuredOutputInput;
|
|
900
|
+
agentError = null;
|
|
799
901
|
}
|
|
800
|
-
else if (
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
if (lastStructuredOutputInput &&
|
|
804
|
-
message.errors?.length === 0) {
|
|
805
|
-
finalResult = lastStructuredOutputInput;
|
|
806
|
-
// Clear the error since we actually succeeded
|
|
807
|
-
agentError = null;
|
|
808
|
-
}
|
|
809
|
-
else if (!agentError) {
|
|
810
|
-
agentError =
|
|
811
|
-
message.errors?.join(', ') ||
|
|
812
|
-
'Unknown error';
|
|
813
|
-
}
|
|
902
|
+
else if (!agentError) {
|
|
903
|
+
agentError =
|
|
904
|
+
msg.errors?.join(', ') || 'Unknown error';
|
|
814
905
|
}
|
|
815
906
|
}
|
|
816
907
|
}
|
|
817
|
-
}
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
908
|
+
}
|
|
909
|
+
})(),
|
|
910
|
+
timeoutPromise,
|
|
911
|
+
]);
|
|
912
|
+
}
|
|
913
|
+
catch (error) {
|
|
914
|
+
agentError = error instanceof Error ? error.message : String(error);
|
|
915
|
+
}
|
|
916
|
+
const durationMs = Date.now() - startTime;
|
|
917
|
+
telemetry.trackPhaseEnd('agent_execution', {
|
|
918
|
+
...resultMeta,
|
|
919
|
+
toolCallCounts: Object.fromEntries(toolCallHook.toolCallCounts),
|
|
920
|
+
hasResult: !!finalResult,
|
|
921
|
+
hasError: !!agentError,
|
|
922
|
+
});
|
|
923
|
+
return {
|
|
924
|
+
featureId,
|
|
925
|
+
checklistItem,
|
|
926
|
+
browserSession,
|
|
927
|
+
finalResult,
|
|
928
|
+
agentError,
|
|
929
|
+
lastStructuredOutputInput,
|
|
930
|
+
screenshotHook,
|
|
931
|
+
toolCallCounts: toolCallHook.toolCallCounts,
|
|
932
|
+
configResult,
|
|
933
|
+
startTime,
|
|
934
|
+
durationMs,
|
|
935
|
+
conversationFilePath,
|
|
936
|
+
conversationDir,
|
|
937
|
+
isDebugMode,
|
|
938
|
+
debugOutcome: options.debugOutcome,
|
|
939
|
+
resultMeta,
|
|
940
|
+
telemetry,
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
/**
|
|
944
|
+
* Phase 2: Upload artifacts, evaluate result, update scenario.
|
|
945
|
+
*/
|
|
946
|
+
async function processVerificationResult(ctx) {
|
|
947
|
+
const { featureId, checklistItem, browserSession, screenshotHook, durationMs, conversationFilePath, isDebugMode, debugOutcome, telemetry, } = ctx;
|
|
948
|
+
const { finalResult, agentError } = ctx;
|
|
949
|
+
let traceDownloadUrl;
|
|
950
|
+
// --- Upload trace ---
|
|
951
|
+
try {
|
|
952
|
+
const traceDir = getTraceDirectory(browserSession.id);
|
|
953
|
+
if (existsSync(traceDir)) {
|
|
954
|
+
const files = await readdir(traceDir);
|
|
955
|
+
if (files.length > 0) {
|
|
956
|
+
telemetry.trackPhaseStart('upload_trace');
|
|
957
|
+
try {
|
|
833
958
|
const traceUrls = await getUploadUrls(browserSession.id, 'trace.zip', 'zip');
|
|
834
959
|
const traceBuffer = await zipDirectory(traceDir);
|
|
835
960
|
await uploadTrace(traceUrls.uploadUrl, traceBuffer);
|
|
836
961
|
traceDownloadUrl = traceUrls.downloadUrl;
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
962
|
+
telemetry.trackPhaseEnd('upload_trace', {
|
|
963
|
+
bytes: traceBuffer.length,
|
|
964
|
+
});
|
|
965
|
+
}
|
|
966
|
+
catch (err) {
|
|
967
|
+
await telemetry.trackPhaseError('upload_trace', err);
|
|
968
|
+
}
|
|
969
|
+
// --- Upload videos ---
|
|
970
|
+
const videos = await loadSessionVideos(traceDir);
|
|
971
|
+
for (const video of videos) {
|
|
972
|
+
telemetry.trackPhaseStart('upload_video', {
|
|
973
|
+
filename: video.filename,
|
|
974
|
+
});
|
|
975
|
+
try {
|
|
976
|
+
const videoBuffer = await readFile(video.path);
|
|
977
|
+
const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
|
|
978
|
+
await uploadVideo(videoUrls.uploadUrl, videoBuffer);
|
|
979
|
+
telemetry.trackPhaseEnd('upload_video', {
|
|
980
|
+
filename: video.filename,
|
|
981
|
+
bytes: videoBuffer.length,
|
|
982
|
+
});
|
|
848
983
|
}
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
// Create step asset with upload URL
|
|
889
|
-
const assetResponse = await createStepAsset(browserSession.id, step.id, {
|
|
984
|
+
catch (err) {
|
|
985
|
+
await telemetry.trackPhaseError('upload_video', err, {
|
|
986
|
+
filename: video.filename,
|
|
987
|
+
});
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
// --- Upload remaining screenshots ---
|
|
991
|
+
const pngFiles = files
|
|
992
|
+
.filter((f) => f.toLowerCase().endsWith('.png'))
|
|
993
|
+
.filter((f) => !screenshotHook.uploadedFiles.has(f))
|
|
994
|
+
.sort();
|
|
995
|
+
const positionOffset = screenshotHook.uploadedFiles.size + 1;
|
|
996
|
+
for (let i = 0; i < pngFiles.length; i++) {
|
|
997
|
+
const pngFile = pngFiles[i];
|
|
998
|
+
const isKeyFrame = pngFile.toLowerCase().startsWith('key_');
|
|
999
|
+
telemetry.trackPhaseStart('upload_screenshot', {
|
|
1000
|
+
filename: pngFile,
|
|
1001
|
+
isKeyFrame,
|
|
1002
|
+
});
|
|
1003
|
+
try {
|
|
1004
|
+
const pngPath = join(traceDir, pngFile);
|
|
1005
|
+
const pngBuffer = await readFile(pngPath);
|
|
1006
|
+
const pngStat = await stat(pngPath);
|
|
1007
|
+
const displayName = pngFile
|
|
1008
|
+
.replace(/\.png$/i, '')
|
|
1009
|
+
.replace(/^key_/i, '')
|
|
1010
|
+
.replace(/^\d+_/, '')
|
|
1011
|
+
.replace(/-/g, ' ');
|
|
1012
|
+
const { step } = await createVerificationStep(browserSession.id, {
|
|
1013
|
+
checklistItemId: checklistItem.id,
|
|
1014
|
+
position: positionOffset + i,
|
|
1015
|
+
stepType: 'screenshot',
|
|
1016
|
+
stepName: displayName,
|
|
1017
|
+
description: isKeyFrame
|
|
1018
|
+
? 'Key moment captured during verification'
|
|
1019
|
+
: 'Screenshot captured during verification',
|
|
1020
|
+
isKeyStep: isKeyFrame,
|
|
1021
|
+
status: 'success',
|
|
1022
|
+
metadata: {
|
|
890
1023
|
filename: pngFile,
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
1024
|
+
timestamp: pngStat.mtime.toISOString(),
|
|
1025
|
+
},
|
|
1026
|
+
});
|
|
1027
|
+
const assetResponse = await createStepAsset(browserSession.id, step.id, {
|
|
1028
|
+
filename: pngFile,
|
|
1029
|
+
assetType: 'screenshot',
|
|
1030
|
+
timing: 'after',
|
|
1031
|
+
position: 0,
|
|
1032
|
+
capturedAt: pngStat.mtime.toISOString(),
|
|
1033
|
+
metadata: {
|
|
1034
|
+
name: displayName,
|
|
1035
|
+
highPriority: isKeyFrame,
|
|
1036
|
+
},
|
|
1037
|
+
});
|
|
1038
|
+
await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
|
|
1039
|
+
telemetry.trackPhaseEnd('upload_screenshot', {
|
|
1040
|
+
filename: pngFile,
|
|
1041
|
+
bytes: pngBuffer.length,
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
1044
|
+
catch (err) {
|
|
1045
|
+
await telemetry.trackPhaseError('upload_screenshot', err, { filename: pngFile });
|
|
906
1046
|
}
|
|
907
1047
|
}
|
|
908
1048
|
}
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
1049
|
+
}
|
|
1050
|
+
// --- Upload conversation ---
|
|
1051
|
+
if (existsSync(conversationFilePath)) {
|
|
1052
|
+
telemetry.trackPhaseStart('upload_conversation');
|
|
1053
|
+
try {
|
|
1054
|
+
const conversationUrls = await getUploadUrls(browserSession.id, 'conversation.jsonl', 'jsonl');
|
|
1055
|
+
const conversationBuffer = await readFile(conversationFilePath);
|
|
1056
|
+
await uploadConversation(conversationUrls.uploadUrl, conversationBuffer);
|
|
1057
|
+
telemetry.trackPhaseEnd('upload_conversation', {
|
|
1058
|
+
bytes: conversationBuffer.length,
|
|
1059
|
+
});
|
|
1060
|
+
}
|
|
1061
|
+
catch (err) {
|
|
1062
|
+
await telemetry.trackPhaseError('upload_conversation', err);
|
|
918
1063
|
}
|
|
919
|
-
|
|
1064
|
+
}
|
|
1065
|
+
// --- Update browser session ---
|
|
1066
|
+
telemetry.trackPhaseStart('update_session');
|
|
1067
|
+
try {
|
|
920
1068
|
const typedResult = finalResult;
|
|
921
1069
|
const updateData = {
|
|
922
1070
|
status: (agentError ? 'failed' : 'completed'),
|
|
@@ -933,121 +1081,218 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
933
1081
|
typedResult.durationMs = durationMs;
|
|
934
1082
|
typedResult.checklistItemId = checklistItem.id;
|
|
935
1083
|
}
|
|
1084
|
+
telemetry.trackPhaseEnd('update_session');
|
|
936
1085
|
}
|
|
937
|
-
catch {
|
|
938
|
-
|
|
1086
|
+
catch (err) {
|
|
1087
|
+
await telemetry.trackPhaseError('update_session', err);
|
|
939
1088
|
}
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
1089
|
+
}
|
|
1090
|
+
catch {
|
|
1091
|
+
// Ignore upload errors
|
|
1092
|
+
}
|
|
1093
|
+
// --- Phase: evaluation ---
|
|
1094
|
+
telemetry.trackPhaseStart('evaluation');
|
|
1095
|
+
let resultForEval;
|
|
1096
|
+
if (isDebugMode && debugOutcome) {
|
|
1097
|
+
const mockEval = getMockEvaluation(debugOutcome);
|
|
1098
|
+
resultForEval = {
|
|
1099
|
+
...mockEval,
|
|
1100
|
+
sessionId: browserSession.id,
|
|
1101
|
+
sessionDir: getTraceDirectory(browserSession.id),
|
|
1102
|
+
durationMs,
|
|
1103
|
+
traceViewerUrl: traceDownloadUrl
|
|
1104
|
+
? buildTraceViewerUrl(traceDownloadUrl)
|
|
1105
|
+
: undefined,
|
|
1106
|
+
checklistItemId: checklistItem.id,
|
|
1107
|
+
};
|
|
1108
|
+
console.log(`\n[DEBUG MODE] Using mock evaluation: ${debugOutcome}`);
|
|
1109
|
+
}
|
|
1110
|
+
else {
|
|
1111
|
+
const typedResult = finalResult;
|
|
1112
|
+
if (agentError && !typedResult) {
|
|
1113
|
+
throw new Error(`Verification failed: ${agentError}`);
|
|
956
1114
|
}
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
1115
|
+
if (!typedResult) {
|
|
1116
|
+
throw new Error('No result received from agent');
|
|
1117
|
+
}
|
|
1118
|
+
resultForEval = typedResult;
|
|
1119
|
+
}
|
|
1120
|
+
telemetry.trackPhaseEnd('evaluation', {
|
|
1121
|
+
evaluation: resultForEval.evaluation,
|
|
1122
|
+
issueCount: resultForEval.issues?.length ?? 0,
|
|
1123
|
+
});
|
|
1124
|
+
// --- Phase: scenario_update ---
|
|
1125
|
+
telemetry.trackPhaseStart('scenario_update');
|
|
1126
|
+
const evaluation = resultForEval.evaluation;
|
|
1127
|
+
if (evaluation === 'verified') {
|
|
1128
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
1129
|
+
status: 'verified',
|
|
1130
|
+
browserSessionId: browserSession.id,
|
|
1131
|
+
});
|
|
1132
|
+
console.log(`\n\u2705 Scenario verified!`);
|
|
1133
|
+
}
|
|
1134
|
+
else if (evaluation === 'blocked') {
|
|
1135
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
1136
|
+
status: 'blocked',
|
|
1137
|
+
browserSessionId: browserSession.id,
|
|
1138
|
+
blockedReason: resultForEval.evaluationReason,
|
|
1139
|
+
});
|
|
1140
|
+
// Enhanced output for Claude Code
|
|
1141
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1142
|
+
console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
|
|
1143
|
+
console.log(`${'='.repeat(60)}`);
|
|
1144
|
+
console.log(`\nIssue: ${resultForEval.evaluationReason}`);
|
|
1145
|
+
if (resultForEval.issues?.length) {
|
|
1146
|
+
console.log(`\nDetails:`);
|
|
1147
|
+
for (const issue of resultForEval.issues) {
|
|
1148
|
+
const typeStr = issue.type ? ` (${issue.type})` : '';
|
|
1149
|
+
console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
|
|
964
1150
|
}
|
|
965
|
-
resultForEval = typedResult;
|
|
966
1151
|
}
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
if (evaluation === 'verified') {
|
|
970
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
971
|
-
status: 'verified',
|
|
972
|
-
browserSessionId: browserSession.id,
|
|
973
|
-
});
|
|
974
|
-
console.log(`\n\u2705 Checklist item verified!`);
|
|
1152
|
+
if (resultForEval.traceViewerUrl) {
|
|
1153
|
+
console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
|
|
975
1154
|
}
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1155
|
+
console.log(`\nSuggested action: Debug this issue in your code, then run go again.`);
|
|
1156
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
1157
|
+
}
|
|
1158
|
+
else if (evaluation === 'partial' ||
|
|
1159
|
+
evaluation === 'failed' ||
|
|
1160
|
+
evaluation === 'incomplete') {
|
|
1161
|
+
// Mark as incomplete - verification happened but requirements not fully met
|
|
1162
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
1163
|
+
status: 'incomplete',
|
|
1164
|
+
browserSessionId: browserSession.id,
|
|
1165
|
+
incompleteReason: resultForEval.evaluationReason,
|
|
1166
|
+
});
|
|
1167
|
+
// Check if other items are terminal and prompt user
|
|
1168
|
+
await handleIncompleteItem(featureId, checklistItem, resultForEval);
|
|
1169
|
+
}
|
|
1170
|
+
telemetry.trackPhaseEnd('scenario_update', { newStatus: evaluation });
|
|
1171
|
+
return resultForEval;
|
|
1172
|
+
}
|
|
1173
|
+
/**
|
|
1174
|
+
* Verify a scenario in the browser.
|
|
1175
|
+
* Orchestrates runVerification -> processVerificationResult with telemetry.
|
|
1176
|
+
*/
|
|
1177
|
+
export async function verifyFeature(options) {
|
|
1178
|
+
const telemetry = createTelemetryCollector('go');
|
|
1179
|
+
await telemetry.trackCommandStart({
|
|
1180
|
+
hasProfile: !!options.profile,
|
|
1181
|
+
hasScenario: options.scenario !== undefined,
|
|
1182
|
+
hasNotes: !!options.notes,
|
|
1183
|
+
isDebugMode: !!options.debugOutcome,
|
|
1184
|
+
});
|
|
1185
|
+
const { envNames } = await getEnvNames();
|
|
1186
|
+
if (envNames.length === 0) {
|
|
1187
|
+
throw new Error(formatProfileRequiredMessage((text) => bold(text)));
|
|
1188
|
+
}
|
|
1189
|
+
let ctx;
|
|
1190
|
+
let interrupted = false;
|
|
1191
|
+
let checklistItemResolved = false;
|
|
1192
|
+
const handleInterrupt = async () => {
|
|
1193
|
+
if (interrupted)
|
|
1194
|
+
return;
|
|
1195
|
+
interrupted = true;
|
|
1196
|
+
console.log('\nVerification interrupted. Cleaning up...');
|
|
1197
|
+
await telemetry.trackCommandEnd('interrupted', {
|
|
1198
|
+
durationMs: ctx ? Date.now() - ctx.startTime : 0,
|
|
1199
|
+
});
|
|
1200
|
+
if (ctx) {
|
|
1201
|
+
try {
|
|
1202
|
+
await updateBrowserSession(ctx.browserSession.id, {
|
|
1203
|
+
status: 'interrupted',
|
|
1204
|
+
durationMs: Date.now() - ctx.startTime,
|
|
1205
|
+
});
|
|
993
1206
|
}
|
|
994
|
-
|
|
995
|
-
|
|
1207
|
+
catch {
|
|
1208
|
+
// Best effort
|
|
1209
|
+
}
|
|
1210
|
+
try {
|
|
1211
|
+
await updateChecklistItem(ctx.featureId, ctx.checklistItem.id, {
|
|
1212
|
+
status: 'pending',
|
|
1213
|
+
});
|
|
1214
|
+
}
|
|
1215
|
+
catch {
|
|
1216
|
+
// Best effort
|
|
1217
|
+
}
|
|
1218
|
+
if (ctx.configResult) {
|
|
1219
|
+
try {
|
|
1220
|
+
await cleanupTempFiles(ctx.configResult);
|
|
1221
|
+
}
|
|
1222
|
+
catch {
|
|
1223
|
+
// Best effort
|
|
1224
|
+
}
|
|
996
1225
|
}
|
|
997
|
-
console.log(`\nSuggested action: Debug this issue in your code, then run verify-feature again.`);
|
|
998
|
-
console.log(`${'='.repeat(60)}\n`);
|
|
999
|
-
}
|
|
1000
|
-
else if (evaluation === 'partial' || evaluation === 'failed') {
|
|
1001
|
-
// Mark as incomplete - verification happened but requirements not fully met
|
|
1002
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
1003
|
-
status: 'incomplete',
|
|
1004
|
-
browserSessionId: browserSession.id,
|
|
1005
|
-
incompleteReason: resultForEval.evaluationReason,
|
|
1006
|
-
});
|
|
1007
|
-
// Check if other items are terminal and prompt user
|
|
1008
|
-
await handleIncompleteItem(featureId, checklistItem, resultForEval);
|
|
1009
1226
|
}
|
|
1227
|
+
console.log('Scenario reset to pending. Partial steps are preserved.');
|
|
1228
|
+
process.exit(0);
|
|
1229
|
+
};
|
|
1230
|
+
process.on('SIGINT', handleInterrupt);
|
|
1231
|
+
process.on('SIGTERM', handleInterrupt);
|
|
1232
|
+
try {
|
|
1233
|
+
// Phase 1: Setup + agent execution
|
|
1234
|
+
ctx = await runVerification(options, telemetry);
|
|
1235
|
+
// Boundary flush — all agent execution telemetry is now safe
|
|
1236
|
+
await telemetry.flush();
|
|
1237
|
+
// Phase 2: Uploads + evaluation + status update
|
|
1238
|
+
const resultForEval = await processVerificationResult(ctx);
|
|
1010
1239
|
checklistItemResolved = true;
|
|
1240
|
+
await telemetry.trackCommandEnd('success', {
|
|
1241
|
+
evaluation: resultForEval.evaluation,
|
|
1242
|
+
durationMs: ctx.durationMs,
|
|
1243
|
+
...ctx.resultMeta,
|
|
1244
|
+
});
|
|
1011
1245
|
return resultForEval;
|
|
1012
1246
|
}
|
|
1247
|
+
catch (error) {
|
|
1248
|
+
await telemetry.trackCommandError(error);
|
|
1249
|
+
throw error;
|
|
1250
|
+
}
|
|
1013
1251
|
finally {
|
|
1014
|
-
// Remove interrupt handlers to avoid double-firing after normal completion
|
|
1015
1252
|
process.removeListener('SIGINT', handleInterrupt);
|
|
1016
1253
|
process.removeListener('SIGTERM', handleInterrupt);
|
|
1017
|
-
// If the
|
|
1254
|
+
// If the scenario was never resolved (agent error, throw, etc.),
|
|
1018
1255
|
// reset it to pending so it doesn't stay stuck in verification_in_progress.
|
|
1019
|
-
if (!interrupted && !checklistItemResolved) {
|
|
1256
|
+
if (ctx && !interrupted && !checklistItemResolved) {
|
|
1020
1257
|
try {
|
|
1021
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
1258
|
+
await updateChecklistItem(ctx.featureId, ctx.checklistItem.id, {
|
|
1022
1259
|
status: 'pending',
|
|
1023
1260
|
});
|
|
1024
|
-
console.log('
|
|
1261
|
+
console.log('Scenario reset to pending after unexpected error.');
|
|
1025
1262
|
}
|
|
1026
|
-
catch {
|
|
1027
|
-
|
|
1263
|
+
catch (resetErr) {
|
|
1264
|
+
await telemetry.trackPhaseError('scenario_reset', resetErr);
|
|
1028
1265
|
}
|
|
1029
1266
|
}
|
|
1030
|
-
//
|
|
1031
|
-
if (
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
const traceDir = getTraceDirectory(browserSession.id);
|
|
1036
|
-
if (existsSync(traceDir)) {
|
|
1037
|
-
await rm(traceDir, { recursive: true, force: true });
|
|
1267
|
+
// --- Phase: cleanup ---
|
|
1268
|
+
if (ctx) {
|
|
1269
|
+
telemetry.trackPhaseStart('cleanup');
|
|
1270
|
+
if (ctx.configResult) {
|
|
1271
|
+
await cleanupTempFiles(ctx.configResult);
|
|
1038
1272
|
}
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
if (conversationDir && existsSync(conversationDir)) {
|
|
1045
|
-
await rm(conversationDir, { recursive: true, force: true });
|
|
1273
|
+
try {
|
|
1274
|
+
const traceDir = getTraceDirectory(ctx.browserSession.id);
|
|
1275
|
+
if (existsSync(traceDir)) {
|
|
1276
|
+
await rm(traceDir, { recursive: true, force: true });
|
|
1277
|
+
}
|
|
1046
1278
|
}
|
|
1279
|
+
catch {
|
|
1280
|
+
// Ignore
|
|
1281
|
+
}
|
|
1282
|
+
try {
|
|
1283
|
+
if (ctx.conversationDir && existsSync(ctx.conversationDir)) {
|
|
1284
|
+
await rm(ctx.conversationDir, {
|
|
1285
|
+
recursive: true,
|
|
1286
|
+
force: true,
|
|
1287
|
+
});
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
catch {
|
|
1291
|
+
// Ignore
|
|
1292
|
+
}
|
|
1293
|
+
telemetry.trackPhaseEnd('cleanup');
|
|
1047
1294
|
}
|
|
1048
|
-
|
|
1049
|
-
// Ignore
|
|
1050
|
-
}
|
|
1295
|
+
await telemetry.flush();
|
|
1051
1296
|
}
|
|
1052
1297
|
}
|
|
1053
1298
|
//# sourceMappingURL=verifyFeature.js.map
|