@ranger-testing/ranger-cli 1.1.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -45
- package/build/cli.js +671 -291
- package/build/cli.js.map +1 -1
- package/build/commands/addEnv.js +1 -1
- package/build/commands/addEnv.js.map +1 -1
- package/build/commands/authEncrypt.js +5 -10
- package/build/commands/authEncrypt.js.map +1 -1
- package/build/commands/clean.js +1 -1
- package/build/commands/clean.js.map +1 -1
- package/build/commands/config.js +9 -15
- package/build/commands/config.js.map +1 -1
- package/build/commands/env.js +10 -13
- package/build/commands/env.js.map +1 -1
- package/build/commands/feature.js +138 -67
- package/build/commands/feature.js.map +1 -1
- package/build/commands/hook.js +9 -4
- package/build/commands/hook.js.map +1 -1
- package/build/commands/hooks/autoPrompt.js +32 -0
- package/build/commands/hooks/autoPrompt.js.map +1 -0
- package/build/commands/hooks/disable.js +8 -5
- package/build/commands/hooks/disable.js.map +1 -1
- package/build/commands/hooks/enable.js +16 -9
- package/build/commands/hooks/enable.js.map +1 -1
- package/build/commands/hooks/exitPlanMode.js +10 -10
- package/build/commands/hooks/exitPlanMode.js.map +1 -1
- package/build/commands/hooks/index.js +1 -0
- package/build/commands/hooks/index.js.map +1 -1
- package/build/commands/hooks/output.js +20 -2
- package/build/commands/hooks/output.js.map +1 -1
- package/build/commands/hooks/planReminder.js +9 -9
- package/build/commands/hooks/planReminder.js.map +1 -1
- package/build/commands/hooks/planStart.js +6 -6
- package/build/commands/hooks/planStart.js.map +1 -1
- package/build/commands/hooks/postEdit.js +6 -6
- package/build/commands/hooks/postEdit.js.map +1 -1
- package/build/commands/hooks/preCompact.js +5 -5
- package/build/commands/hooks/preCompact.js.map +1 -1
- package/build/commands/hooks/sessionEnd.js +8 -4
- package/build/commands/hooks/sessionEnd.js.map +1 -1
- package/build/commands/hooks/sessionStart.js +41 -25
- package/build/commands/hooks/sessionStart.js.map +1 -1
- package/build/commands/hooks/stopHook.js +30 -6
- package/build/commands/hooks/stopHook.js.map +1 -1
- package/build/commands/index.js +1 -2
- package/build/commands/index.js.map +1 -1
- package/build/commands/login.js +2 -5
- package/build/commands/login.js.map +1 -1
- package/build/commands/setupCi.js +189 -0
- package/build/commands/setupCi.js.map +1 -0
- package/build/commands/skillup.js +16 -68
- package/build/commands/skillup.js.map +1 -1
- package/build/commands/start.js +1 -1
- package/build/commands/start.js.map +1 -1
- package/build/commands/status.js +14 -13
- package/build/commands/status.js.map +1 -1
- package/build/commands/update.js +34 -5
- package/build/commands/update.js.map +1 -1
- package/build/commands/updateEnv.js +1 -1
- package/build/commands/updateEnv.js.map +1 -1
- package/build/commands/useEnv.js +1 -1
- package/build/commands/useEnv.js.map +1 -1
- package/build/commands/utils/activeProfile.js +76 -0
- package/build/commands/utils/activeProfile.js.map +1 -0
- package/build/commands/utils/browserSessionsApi.js +1 -1
- package/build/commands/utils/browserSessionsApi.js.map +1 -1
- package/build/commands/utils/desirePathLog.js +39 -34
- package/build/commands/utils/desirePathLog.js.map +1 -1
- package/build/commands/utils/deviceAuth.js +53 -5
- package/build/commands/utils/deviceAuth.js.map +1 -1
- package/build/commands/utils/environment.js +11 -12
- package/build/commands/utils/environment.js.map +1 -1
- package/build/commands/utils/featureApi.js +49 -46
- package/build/commands/utils/featureApi.js.map +1 -1
- package/build/commands/utils/featureReportGenerator.js +6 -6
- package/build/commands/utils/featureReportGenerator.js.map +1 -1
- package/build/commands/utils/keychain.js +1 -1
- package/build/commands/utils/localAgentInstallationsApi.js +1 -1
- package/build/commands/utils/profileMessages.js +8 -0
- package/build/commands/utils/profileMessages.js.map +1 -0
- package/build/commands/utils/profileSetupBanner.js +167 -0
- package/build/commands/utils/profileSetupBanner.js.map +1 -0
- package/build/commands/utils/retry.js +25 -0
- package/build/commands/utils/retry.js.map +1 -0
- package/build/commands/utils/sessionCache.js +17 -0
- package/build/commands/utils/sessionCache.js.map +1 -1
- package/build/commands/utils/settings.js +23 -2
- package/build/commands/utils/settings.js.map +1 -1
- package/build/commands/utils/skills.js +1 -1
- package/build/commands/utils/telemetry.js +254 -0
- package/build/commands/utils/telemetry.js.map +1 -0
- package/build/commands/utils/userApi.js +4 -4
- package/build/commands/utils/userApi.js.map +1 -1
- package/build/commands/verifyFeature.js +678 -407
- package/build/commands/verifyFeature.js.map +1 -1
- package/build/commands/verifyInBrowser.js +1 -1
- package/build/commands/verifyInBrowser.js.map +1 -1
- package/build/skills/ranger/SKILL.md +65 -64
- package/build/skills/ranger/create.md +31 -31
- package/build/skills/ranger/feedback.md +25 -17
- package/build/skills/ranger/start.md +37 -37
- package/build/skills/ranger/verify.md +59 -55
- package/package.json +1 -1
- package/scripts/postinstall.js +1 -1
- package/build/commands/dataMcpServer.js +0 -1
- package/build/commands/dataMcpServer.js.map +0 -1
- package/build/commands/utils/cliSecret.js +0 -1
- package/build/commands/utils/cliSecret.js.map +0 -1
- package/build/skills/bug-bash.md +0 -329
- package/build/skills/e2e-test-recommender.md +0 -168
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { query, } from '@anthropic-ai/claude-agent-sdk';
|
|
2
|
+
import { createTelemetryCollector, } from './utils/telemetry.js';
|
|
2
3
|
import { join, dirname } from 'path';
|
|
3
4
|
import { readFile, readdir, appendFile, mkdir, rm, stat } from 'fs/promises';
|
|
4
5
|
import { existsSync } from 'fs';
|
|
@@ -6,11 +7,16 @@ import { execSync } from 'child_process';
|
|
|
6
7
|
import { tmpdir } from 'os';
|
|
7
8
|
import inquirer from 'inquirer';
|
|
8
9
|
import { loadSettings, resolveEnvVars, buildPlaywrightConfig, cleanupTempFiles, getEnvDir, } from './utils/settings.js';
|
|
9
|
-
import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl,
|
|
10
|
+
import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getProxySessionToken, createVerificationStep, createStepAsset, } from './utils/browserSessionsApi.js';
|
|
11
|
+
import { getAiProxyUrl } from './utils/environment.js';
|
|
10
12
|
import { getToken } from './utils/keychain.js';
|
|
11
13
|
import { getActiveFeatureId } from './feature.js';
|
|
14
|
+
import { readActiveProfileName } from './utils/activeProfile.js';
|
|
15
|
+
import { getEnvNames } from './env.js';
|
|
16
|
+
import { formatProfileRequiredMessage } from './utils/profileMessages.js';
|
|
12
17
|
import { getFeature, updateFeature, updateChecklistItem, startSession, getActionItems, getItemFeedback, } from './utils/featureApi.js';
|
|
13
18
|
import { getRangerDir } from './utils/rangerRoot.js';
|
|
19
|
+
const bold = (text) => `\x1b[1m${text}\x1b[0m`;
|
|
14
20
|
/**
|
|
15
21
|
* Get the current git branch
|
|
16
22
|
*/
|
|
@@ -73,7 +79,7 @@ function getMockEvaluation(outcome) {
|
|
|
73
79
|
success: true,
|
|
74
80
|
summary: '[DEBUG] Mock verification completed successfully.',
|
|
75
81
|
evaluation: 'verified',
|
|
76
|
-
evaluationReason: 'All
|
|
82
|
+
evaluationReason: 'All scenario requirements were met.',
|
|
77
83
|
},
|
|
78
84
|
partial: {
|
|
79
85
|
success: false,
|
|
@@ -91,7 +97,7 @@ function getMockEvaluation(outcome) {
|
|
|
91
97
|
incomplete: {
|
|
92
98
|
success: false,
|
|
93
99
|
summary: '[DEBUG] Mock incomplete verification.',
|
|
94
|
-
evaluation: '
|
|
100
|
+
evaluation: 'incomplete',
|
|
95
101
|
evaluationReason: 'Implementation is incomplete and needs additional work.',
|
|
96
102
|
issues: [
|
|
97
103
|
{
|
|
@@ -157,42 +163,52 @@ function getDebugPrompt() {
|
|
|
157
163
|
Return your findings in the structured output format.`;
|
|
158
164
|
}
|
|
159
165
|
/**
|
|
160
|
-
* Prompt user to select a
|
|
166
|
+
* Prompt user to select a scenario
|
|
161
167
|
*/
|
|
162
168
|
async function selectChecklistItem(items) {
|
|
163
169
|
if (items.length === 0) {
|
|
164
170
|
return null;
|
|
165
171
|
}
|
|
166
|
-
const choices = items.map((item
|
|
167
|
-
const emoji = item.status === '
|
|
172
|
+
const choices = items.map((item) => {
|
|
173
|
+
const emoji = item.status === 'closed' && item.terminalReason === 'approved'
|
|
168
174
|
? '\u2705'
|
|
169
|
-
: item.status === '
|
|
170
|
-
? '\ud83d\
|
|
171
|
-
: item.status === '
|
|
172
|
-
? '\ud83d\
|
|
173
|
-
: item.status === '
|
|
174
|
-
? '\
|
|
175
|
-
: '
|
|
175
|
+
: item.status === 'verified'
|
|
176
|
+
? '\ud83d\udfe2' // green circle
|
|
177
|
+
: item.status === 'incomplete'
|
|
178
|
+
? '\ud83d\udfe0' // orange circle
|
|
179
|
+
: item.status === 'blocked'
|
|
180
|
+
? '\ud83d\uded1'
|
|
181
|
+
: item.status === 'closed'
|
|
182
|
+
? '\u26d4'
|
|
183
|
+
: item.status === 'verification_in_progress'
|
|
184
|
+
? '\u23f3'
|
|
185
|
+
: '\u2b1c';
|
|
176
186
|
const commentBadge = item.unaddressedCommentCount > 0
|
|
177
187
|
? ` [${item.unaddressedCommentCount} comments]`
|
|
178
188
|
: '';
|
|
189
|
+
const disabledReason = item.actionable
|
|
190
|
+
? false
|
|
191
|
+
: item.status === 'closed' && item.terminalReason
|
|
192
|
+
? item.terminalReason
|
|
193
|
+
: 'not actionable';
|
|
179
194
|
return {
|
|
180
|
-
name: `${
|
|
195
|
+
name: `${item.displayIndex + 1}. ${emoji} ${item.description}${commentBadge}`,
|
|
181
196
|
value: item.id,
|
|
197
|
+
disabled: disabledReason,
|
|
182
198
|
};
|
|
183
199
|
});
|
|
184
200
|
const { selected } = await inquirer.prompt([
|
|
185
201
|
{
|
|
186
202
|
type: 'list',
|
|
187
203
|
name: 'selected',
|
|
188
|
-
message: 'Which
|
|
204
|
+
message: 'Which scenario does this verify?',
|
|
189
205
|
choices,
|
|
190
206
|
},
|
|
191
207
|
]);
|
|
192
208
|
return items.find((i) => i.id === selected) || null;
|
|
193
209
|
}
|
|
194
210
|
/**
|
|
195
|
-
* Handle incomplete verification - check if all other
|
|
211
|
+
* Handle incomplete verification - check if all other scenarios are terminal and prompt user
|
|
196
212
|
*/
|
|
197
213
|
async function handleIncompleteItem(featureId, incompleteItem, result) {
|
|
198
214
|
// Get action items to check if there are other items to work on
|
|
@@ -217,46 +233,98 @@ async function handleIncompleteItem(featureId, incompleteItem, result) {
|
|
|
217
233
|
}
|
|
218
234
|
console.log(`\nNext steps:`);
|
|
219
235
|
console.log(` 1. Fix the issues above in your code`);
|
|
220
|
-
console.log(` 2. Run 'ranger
|
|
236
|
+
console.log(` 2. Run 'ranger go' again to re-verify`);
|
|
221
237
|
if (allOthersTerminal && otherItems.length > 0) {
|
|
222
|
-
console.log(`\nAll other
|
|
223
|
-
console.log(`If you're done for now,
|
|
238
|
+
console.log(`\nAll other scenarios are complete.`);
|
|
239
|
+
console.log(`If you're done for now, you can stop and resume later with 'ranger resume'.`);
|
|
224
240
|
}
|
|
225
241
|
console.log(`${'='.repeat(60)}\n`);
|
|
226
242
|
}
|
|
227
243
|
/**
|
|
228
|
-
* PostToolUse hook that logs browser tool calls to stdout
|
|
244
|
+
* Create a PostToolUse hook that logs browser tool calls to stdout and tracks
|
|
245
|
+
* all tool calls via telemetry with per-call timing.
|
|
246
|
+
*/
|
|
247
|
+
function createToolCallTrackingHook(telemetry) {
|
|
248
|
+
const toolCallCounts = new Map();
|
|
249
|
+
const hook = async (input) => {
|
|
250
|
+
if (input.hook_event_name !== 'PostToolUse')
|
|
251
|
+
return {};
|
|
252
|
+
const postInput = input;
|
|
253
|
+
const toolInput = postInput.tool_input;
|
|
254
|
+
const shortName = postInput.tool_name.replace('mcp__ranger-browser__', '');
|
|
255
|
+
// Track count
|
|
256
|
+
toolCallCounts.set(shortName, (toolCallCounts.get(shortName) || 0) + 1);
|
|
257
|
+
// Log tool call as telemetry event
|
|
258
|
+
telemetry.trackPhaseStart('tool_call', { toolName: shortName });
|
|
259
|
+
telemetry.trackPhaseEnd('tool_call', {
|
|
260
|
+
toolName: shortName,
|
|
261
|
+
toolInput: summarizeToolInput(shortName, toolInput),
|
|
262
|
+
});
|
|
263
|
+
// Console log
|
|
264
|
+
switch (postInput.tool_name) {
|
|
265
|
+
case 'mcp__ranger-browser__browser_navigate':
|
|
266
|
+
console.log(`[browser] Navigate → ${toolInput.url}`);
|
|
267
|
+
break;
|
|
268
|
+
case 'mcp__ranger-browser__browser_click':
|
|
269
|
+
console.log(`[browser] Click → "${toolInput.element}"`);
|
|
270
|
+
break;
|
|
271
|
+
case 'mcp__ranger-browser__browser_type':
|
|
272
|
+
console.log(`[browser] Type → "${toolInput.text}" into "${toolInput.element}"`);
|
|
273
|
+
break;
|
|
274
|
+
case 'mcp__ranger-browser__browser_press_key':
|
|
275
|
+
console.log(`[browser] Press key → ${toolInput.key}`);
|
|
276
|
+
break;
|
|
277
|
+
case 'mcp__ranger-browser__browser_wait_for':
|
|
278
|
+
console.log(`[browser] Wait → ${toolInput.time ? `${toolInput.time}ms` : toolInput.text || 'condition'}`);
|
|
279
|
+
break;
|
|
280
|
+
}
|
|
281
|
+
return {};
|
|
282
|
+
};
|
|
283
|
+
return { hook, toolCallCounts };
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Create a PostToolUseFailure hook that tracks tool failures via telemetry.
|
|
229
287
|
*/
|
|
230
|
-
|
|
231
|
-
|
|
288
|
+
function createToolFailureHook(telemetry) {
|
|
289
|
+
return async (input) => {
|
|
290
|
+
if (input.hook_event_name !== 'PostToolUseFailure')
|
|
291
|
+
return {};
|
|
292
|
+
const failInput = input;
|
|
293
|
+
const shortName = failInput.tool_name.replace('mcp__ranger-browser__', '');
|
|
294
|
+
await telemetry.trackPhaseError('tool_failure', failInput.error, {
|
|
295
|
+
toolName: shortName,
|
|
296
|
+
isInterrupt: failInput.is_interrupt,
|
|
297
|
+
});
|
|
232
298
|
return {};
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
case '
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
case '
|
|
249
|
-
|
|
250
|
-
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Summarize tool input for telemetry (avoid logging sensitive/large data).
|
|
303
|
+
*/
|
|
304
|
+
function summarizeToolInput(toolName, input) {
|
|
305
|
+
switch (toolName) {
|
|
306
|
+
case 'browser_navigate':
|
|
307
|
+
return { url: input.url };
|
|
308
|
+
case 'browser_click':
|
|
309
|
+
return { element: input.element };
|
|
310
|
+
case 'browser_type':
|
|
311
|
+
return { element: input.element };
|
|
312
|
+
case 'browser_take_screenshot':
|
|
313
|
+
return { filename: input.filename };
|
|
314
|
+
case 'browser_press_key':
|
|
315
|
+
return { key: input.key };
|
|
316
|
+
case 'browser_wait_for':
|
|
317
|
+
return { time: input.time, text: input.text };
|
|
318
|
+
default:
|
|
319
|
+
return {};
|
|
251
320
|
}
|
|
252
|
-
|
|
253
|
-
};
|
|
321
|
+
}
|
|
254
322
|
/**
|
|
255
323
|
* Create a PostToolUse hook that uploads screenshots immediately after they're taken.
|
|
256
324
|
* Returns the hook callback and a set of filenames that were successfully uploaded,
|
|
257
325
|
* so the post-hoc fallback can skip them.
|
|
258
326
|
*/
|
|
259
|
-
function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
|
|
327
|
+
function createScreenshotUploadHook(sessionId, checklistItemId, traceDir, telemetry) {
|
|
260
328
|
const uploadedFiles = new Set();
|
|
261
329
|
let position = 1;
|
|
262
330
|
const hook = async (input) => {
|
|
@@ -271,11 +339,15 @@ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
|
|
|
271
339
|
const filename = toolInput?.filename;
|
|
272
340
|
if (!filename)
|
|
273
341
|
return {};
|
|
342
|
+
const isKeyFrame = filename.toLowerCase().startsWith('key_');
|
|
343
|
+
telemetry.trackPhaseStart('hook_screenshot_upload', {
|
|
344
|
+
filename,
|
|
345
|
+
isKeyFrame,
|
|
346
|
+
});
|
|
274
347
|
try {
|
|
275
348
|
const pngPath = join(traceDir, filename);
|
|
276
349
|
const pngBuffer = await readFile(pngPath);
|
|
277
350
|
const pngStat = await stat(pngPath);
|
|
278
|
-
const isKeyFrame = filename.toLowerCase().startsWith('key_');
|
|
279
351
|
const displayName = filename
|
|
280
352
|
.replace(/\.png$/i, '')
|
|
281
353
|
.replace(/^key_/i, '')
|
|
@@ -315,96 +387,142 @@ function createScreenshotUploadHook(sessionId, checklistItemId, traceDir) {
|
|
|
315
387
|
await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
|
|
316
388
|
// Track as uploaded
|
|
317
389
|
uploadedFiles.add(filename);
|
|
390
|
+
telemetry.trackPhaseEnd('hook_screenshot_upload', {
|
|
391
|
+
filename,
|
|
392
|
+
bytes: pngBuffer.length,
|
|
393
|
+
});
|
|
318
394
|
}
|
|
319
395
|
catch (err) {
|
|
320
|
-
|
|
321
|
-
|
|
396
|
+
await telemetry.trackPhaseError('hook_screenshot_upload', err, {
|
|
397
|
+
filename,
|
|
398
|
+
});
|
|
322
399
|
}
|
|
323
400
|
return {};
|
|
324
401
|
};
|
|
325
402
|
return { hook, uploadedFiles };
|
|
326
403
|
}
|
|
327
404
|
/**
|
|
328
|
-
*
|
|
405
|
+
* Phase 1: Setup through agent completion.
|
|
406
|
+
* Returns context for processVerificationResult.
|
|
329
407
|
*/
|
|
330
|
-
|
|
408
|
+
async function runVerification(options, telemetry) {
|
|
331
409
|
const isDebugMode = !!options.debugOutcome;
|
|
332
410
|
if (isDebugMode) {
|
|
333
411
|
console.log(`\n[DEBUG MODE] Running minimal browser test with outcome: ${options.debugOutcome}`);
|
|
334
412
|
}
|
|
335
|
-
//
|
|
413
|
+
// --- Phase: feature_load ---
|
|
414
|
+
telemetry.trackPhaseStart('feature_load');
|
|
336
415
|
const featureId = await getActiveFeatureId();
|
|
337
416
|
if (!featureId) {
|
|
338
|
-
throw new Error('No active feature. Run: ranger
|
|
417
|
+
throw new Error('No active feature review. Run: ranger resume <id> or ranger create');
|
|
339
418
|
}
|
|
340
|
-
// Load feature details
|
|
341
419
|
const feature = await getFeature(featureId);
|
|
342
|
-
|
|
420
|
+
telemetry.setContext({ featureId });
|
|
343
421
|
const currentBranch = getGitBranch();
|
|
344
422
|
if (currentBranch && currentBranch !== feature.gitBranch) {
|
|
345
423
|
await updateFeature(featureId, { gitBranch: currentBranch });
|
|
346
424
|
console.log(` Updated branch to: ${currentBranch}`);
|
|
347
425
|
}
|
|
348
|
-
console.log(`\nActive feature: ${feature.name} (${featureId})`);
|
|
349
|
-
// Get action items - leaf items that can be verified (non-closed with no non-closed children)
|
|
426
|
+
console.log(`\nActive feature review: ${feature.name} (${featureId})`);
|
|
350
427
|
const { items: actionItems } = await getActionItems(featureId);
|
|
351
|
-
|
|
428
|
+
const actionItemsById = new Map(actionItems.map((item) => [item.id, item]));
|
|
429
|
+
const displayItems = feature.checklistItems.map((item, index) => {
|
|
430
|
+
const actionItem = actionItemsById.get(item.id);
|
|
431
|
+
return {
|
|
432
|
+
...item,
|
|
433
|
+
unaddressedCommentCount: actionItem?.unaddressedCommentCount ?? 0,
|
|
434
|
+
displayIndex: index,
|
|
435
|
+
actionable: !!actionItem && item.status !== 'closed',
|
|
436
|
+
};
|
|
437
|
+
});
|
|
438
|
+
telemetry.trackPhaseEnd('feature_load', {
|
|
439
|
+
itemCount: actionItems.length,
|
|
440
|
+
});
|
|
441
|
+
// --- Phase: scenario_select ---
|
|
442
|
+
telemetry.trackPhaseStart('scenario_select');
|
|
352
443
|
let checklistItem = null;
|
|
353
|
-
let taskDescription = options.
|
|
354
|
-
if (options.
|
|
444
|
+
let taskDescription = options.notes;
|
|
445
|
+
if (options.scenario !== undefined) {
|
|
355
446
|
// Use specified item index (1-based)
|
|
356
|
-
const itemIndex = options.
|
|
357
|
-
if (itemIndex < 0 || itemIndex >=
|
|
358
|
-
throw new Error(`Invalid
|
|
447
|
+
const itemIndex = options.scenario - 1; // 1-based to 0-based
|
|
448
|
+
if (itemIndex < 0 || itemIndex >= displayItems.length) {
|
|
449
|
+
throw new Error(`Invalid scenario index: ${options.scenario}. Feature review has ${displayItems.length} scenarios.`);
|
|
450
|
+
}
|
|
451
|
+
const displayItem = displayItems[itemIndex];
|
|
452
|
+
if (!displayItem.actionable) {
|
|
453
|
+
const reason = displayItem.status === 'closed' && displayItem.terminalReason
|
|
454
|
+
? displayItem.terminalReason
|
|
455
|
+
: 'not actionable';
|
|
456
|
+
throw new Error(`Scenario ${options.scenario} is ${reason} and cannot be verified. Choose a different scenario.`);
|
|
359
457
|
}
|
|
360
|
-
|
|
458
|
+
const actionItem = actionItemsById.get(displayItem.id);
|
|
459
|
+
if (!actionItem) {
|
|
460
|
+
throw new Error(`Scenario ${options.scenario} is not currently actionable. Try another scenario.`);
|
|
461
|
+
}
|
|
462
|
+
checklistItem = actionItem;
|
|
361
463
|
if (!taskDescription) {
|
|
362
464
|
taskDescription = checklistItem.description;
|
|
363
465
|
}
|
|
364
466
|
}
|
|
365
467
|
else {
|
|
366
|
-
// Check if running in non-TTY environment (CI, scripts, Claude Code, etc.)
|
|
367
468
|
const isInteractive = process.stdin.isTTY && process.stdout.isTTY;
|
|
368
469
|
if (!isInteractive) {
|
|
369
|
-
// Non-TTY mode: require --
|
|
370
|
-
console.log('\nNon-interactive mode detected. The --
|
|
371
|
-
console.log('\nAvailable
|
|
372
|
-
|
|
373
|
-
const emoji = item.status === '
|
|
470
|
+
// Non-TTY mode: require --scenario flag, show available scenarios
|
|
471
|
+
console.log('\nNon-interactive mode detected. The --scenario flag is required.');
|
|
472
|
+
console.log('\nAvailable scenarios to verify:');
|
|
473
|
+
displayItems.forEach((item) => {
|
|
474
|
+
const emoji = item.status === 'closed' &&
|
|
475
|
+
item.terminalReason === 'approved'
|
|
374
476
|
? '\u2705'
|
|
375
|
-
: item.status === '
|
|
376
|
-
? '\ud83d\
|
|
377
|
-
: item.status === '
|
|
378
|
-
? '\ud83d\
|
|
379
|
-
: item.status === '
|
|
380
|
-
? '\
|
|
381
|
-
: '
|
|
477
|
+
: item.status === 'verified'
|
|
478
|
+
? '\ud83d\udfe2'
|
|
479
|
+
: item.status === 'incomplete'
|
|
480
|
+
? '\ud83d\udfe0'
|
|
481
|
+
: item.status === 'blocked'
|
|
482
|
+
? '\ud83d\uded1'
|
|
483
|
+
: item.status === 'closed'
|
|
484
|
+
? '\u26d4'
|
|
485
|
+
: item.status === 'verification_in_progress'
|
|
486
|
+
? '\u23f3'
|
|
487
|
+
: '\u2b1c';
|
|
382
488
|
const commentBadge = item.unaddressedCommentCount > 0
|
|
383
489
|
? ` [${item.unaddressedCommentCount} comments]`
|
|
384
490
|
: '';
|
|
385
|
-
|
|
491
|
+
const actionHint = item.actionable ? '' : ' [not actionable]';
|
|
492
|
+
console.log(` ${item.displayIndex + 1}. ${emoji} ${item.description}${commentBadge}${actionHint}`);
|
|
386
493
|
});
|
|
387
|
-
console.log('\nUsage: ranger
|
|
388
|
-
console.log('Example: ranger
|
|
389
|
-
throw new Error('The --
|
|
494
|
+
console.log('\nUsage: ranger go --scenario <number>');
|
|
495
|
+
console.log('Example: ranger go --scenario 1');
|
|
496
|
+
throw new Error('The --scenario flag is required in non-interactive mode. See available scenarios above.');
|
|
390
497
|
}
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
498
|
+
const selectedItem = await selectChecklistItem(displayItems);
|
|
499
|
+
if (selectedItem) {
|
|
500
|
+
const actionItem = actionItemsById.get(selectedItem.id);
|
|
501
|
+
if (!actionItem) {
|
|
502
|
+
throw new Error('Selected scenario is not currently actionable. Choose another scenario.');
|
|
503
|
+
}
|
|
504
|
+
checklistItem = actionItem;
|
|
505
|
+
if (!taskDescription) {
|
|
506
|
+
taskDescription = checklistItem.description;
|
|
507
|
+
}
|
|
395
508
|
}
|
|
396
509
|
}
|
|
397
510
|
if (!checklistItem) {
|
|
398
|
-
throw new Error('No
|
|
511
|
+
throw new Error('No scenario selected. Create scenarios when creating the feature review with -c or --scenario flags.');
|
|
399
512
|
}
|
|
400
513
|
if (checklistItem.status === 'closed') {
|
|
401
|
-
throw new Error(`Cannot verify
|
|
514
|
+
throw new Error(`Cannot verify scenario "${checklistItem.description}" — it is concluded (${checklistItem.terminalReason || 'unknown reason'}).`);
|
|
402
515
|
}
|
|
403
516
|
if (!taskDescription) {
|
|
404
|
-
throw new Error('No
|
|
517
|
+
throw new Error('No notes provided');
|
|
405
518
|
}
|
|
406
|
-
|
|
407
|
-
|
|
519
|
+
telemetry.setContext({ checklistItemId: checklistItem.id });
|
|
520
|
+
telemetry.trackPhaseEnd('scenario_select', {
|
|
521
|
+
selectionMethod: options.scenario !== undefined ? 'flag' : 'interactive',
|
|
522
|
+
itemStatus: checklistItem.status,
|
|
523
|
+
});
|
|
524
|
+
console.log(`\nVerifying scenario: ${checklistItem.description}`);
|
|
525
|
+
console.log(`Notes: ${taskDescription}`);
|
|
408
526
|
// Fetch reviewer feedback if item has parent or unaddressed comments
|
|
409
527
|
let itemFeedback = null;
|
|
410
528
|
if (checklistItem.parentItemId ||
|
|
@@ -415,8 +533,9 @@ export async function verifyFeature(options) {
|
|
|
415
533
|
console.log(`Reviewer feedback: ${itemFeedback.unaddressedComments.length} comment(s) to verify`);
|
|
416
534
|
}
|
|
417
535
|
}
|
|
418
|
-
catch {
|
|
419
|
-
// Non-fatal - continue without feedback
|
|
536
|
+
catch (err) {
|
|
537
|
+
// Non-fatal - continue without feedback, but log it
|
|
538
|
+
await telemetry.trackPhaseError('feedback_fetch', err);
|
|
420
539
|
}
|
|
421
540
|
}
|
|
422
541
|
// Start the session if it's in ready status
|
|
@@ -427,41 +546,38 @@ export async function verifyFeature(options) {
|
|
|
427
546
|
await startSession(featureId, feature.currentSessionId);
|
|
428
547
|
}
|
|
429
548
|
catch (error) {
|
|
430
|
-
// Ignore if session is already started (race condition)
|
|
431
549
|
const message = error instanceof Error ? error.message : String(error);
|
|
432
550
|
if (!message.includes('already')) {
|
|
433
551
|
throw error;
|
|
434
552
|
}
|
|
435
553
|
}
|
|
436
554
|
}
|
|
437
|
-
// Update
|
|
555
|
+
// Update scenario status to verification_in_progress
|
|
438
556
|
await updateChecklistItem(featureId, checklistItem.id, {
|
|
439
557
|
status: 'verification_in_progress',
|
|
440
558
|
});
|
|
441
|
-
//
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
559
|
+
// --- Phase: profile_resolution ---
|
|
560
|
+
telemetry.trackPhaseStart('profile_resolution');
|
|
561
|
+
let activeProfile = null;
|
|
562
|
+
if (options.profile) {
|
|
563
|
+
activeProfile = options.profile;
|
|
445
564
|
}
|
|
446
565
|
else {
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
|
|
566
|
+
activeProfile = await readActiveProfileName();
|
|
567
|
+
}
|
|
568
|
+
if (!activeProfile) {
|
|
569
|
+
throw new Error('No active profile. Run: ranger profile use <profile-name>');
|
|
452
570
|
}
|
|
453
|
-
const envDir = getEnvDir(
|
|
571
|
+
const envDir = getEnvDir(activeProfile);
|
|
454
572
|
if (!existsSync(envDir)) {
|
|
455
|
-
throw new Error(`
|
|
573
|
+
throw new Error(`Profile "${activeProfile}" not found. Run: ranger profile add ${activeProfile}`);
|
|
456
574
|
}
|
|
457
|
-
const settings = await loadSettings(
|
|
575
|
+
const settings = await loadSettings(activeProfile);
|
|
458
576
|
const resolvedSettings = resolveEnvVars(settings);
|
|
459
|
-
// Get base URL from settings
|
|
460
577
|
let url = resolvedSettings.baseUrl;
|
|
461
578
|
if (!url) {
|
|
462
|
-
throw new Error(`No baseUrl configured for
|
|
579
|
+
throw new Error(`No baseUrl configured for profile "${activeProfile}". Run: ranger profile config set ${activeProfile} baseUrl <url>`);
|
|
463
580
|
}
|
|
464
|
-
// Append startPath if provided
|
|
465
581
|
if (options.startPath) {
|
|
466
582
|
const base = url.endsWith('/') ? url.slice(0, -1) : url;
|
|
467
583
|
const path = options.startPath.startsWith('/')
|
|
@@ -469,13 +585,17 @@ export async function verifyFeature(options) {
|
|
|
469
585
|
: '/' + options.startPath;
|
|
470
586
|
url = base + path;
|
|
471
587
|
}
|
|
472
|
-
|
|
588
|
+
telemetry.trackPhaseEnd('profile_resolution', {
|
|
589
|
+
profileName: activeProfile,
|
|
590
|
+
});
|
|
591
|
+
// --- Phase: browser_session_create ---
|
|
592
|
+
telemetry.trackPhaseStart('browser_session_create');
|
|
473
593
|
const token = await getToken();
|
|
474
594
|
if (!token) {
|
|
475
|
-
throw new Error('No API token configured. Run: ranger
|
|
595
|
+
throw new Error('No API token configured. Run: ranger setup [token]');
|
|
476
596
|
}
|
|
477
597
|
const browserSession = await createBrowserSession({
|
|
478
|
-
environmentName:
|
|
598
|
+
environmentName: activeProfile,
|
|
479
599
|
settings: resolvedSettings,
|
|
480
600
|
task: taskDescription,
|
|
481
601
|
url,
|
|
@@ -483,78 +603,48 @@ export async function verifyFeature(options) {
|
|
|
483
603
|
checklistItemId: checklistItem.id,
|
|
484
604
|
});
|
|
485
605
|
console.log(`Browser session created: ${browserSession.id}`);
|
|
486
|
-
|
|
606
|
+
telemetry.setContext({ browserSessionId: browserSession.id });
|
|
607
|
+
// Link the browser session to the scenario immediately so steps
|
|
487
608
|
// are visible in the dashboard while verification is in progress
|
|
488
609
|
await updateChecklistItem(featureId, checklistItem.id, {
|
|
489
610
|
browserSessionId: browserSession.id,
|
|
490
611
|
});
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
'run-mcp-server',
|
|
497
|
-
'--config',
|
|
498
|
-
configResult.configPath,
|
|
499
|
-
],
|
|
500
|
-
};
|
|
501
|
-
const startTime = Date.now();
|
|
502
|
-
// Handle process interruption (Ctrl+C or coding agent killing the process)
|
|
503
|
-
let interrupted = false;
|
|
504
|
-
const handleInterrupt = async () => {
|
|
505
|
-
if (interrupted)
|
|
506
|
-
return;
|
|
507
|
-
interrupted = true;
|
|
508
|
-
console.log('\nVerification interrupted. Cleaning up...');
|
|
509
|
-
try {
|
|
510
|
-
await updateBrowserSession(browserSession.id, {
|
|
511
|
-
status: 'interrupted',
|
|
512
|
-
durationMs: Date.now() - startTime,
|
|
513
|
-
});
|
|
514
|
-
}
|
|
515
|
-
catch {
|
|
516
|
-
// Best effort
|
|
517
|
-
}
|
|
518
|
-
try {
|
|
519
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
520
|
-
status: 'pending',
|
|
521
|
-
});
|
|
522
|
-
}
|
|
523
|
-
catch {
|
|
524
|
-
// Best effort
|
|
525
|
-
}
|
|
526
|
-
try {
|
|
527
|
-
await cleanupTempFiles(configResult);
|
|
528
|
-
}
|
|
529
|
-
catch {
|
|
530
|
-
// Best effort
|
|
531
|
-
}
|
|
532
|
-
console.log('Checklist item reset to pending. Partial steps are preserved.');
|
|
533
|
-
process.exit(0);
|
|
534
|
-
};
|
|
535
|
-
process.on('SIGINT', handleInterrupt);
|
|
536
|
-
process.on('SIGTERM', handleInterrupt);
|
|
537
|
-
// Fetch Anthropic API key
|
|
538
|
-
let anthropicApiKey;
|
|
612
|
+
telemetry.trackPhaseEnd('browser_session_create');
|
|
613
|
+
// --- Phase: playwright_config ---
|
|
614
|
+
telemetry.trackPhaseStart('playwright_config');
|
|
615
|
+
let configResult;
|
|
616
|
+
let sessionToken;
|
|
539
617
|
try {
|
|
540
|
-
|
|
618
|
+
sessionToken = await getProxySessionToken();
|
|
541
619
|
}
|
|
542
620
|
catch (error) {
|
|
543
621
|
const message = error instanceof Error ? error.message : String(error);
|
|
544
|
-
const errorMsg = `Failed to fetch
|
|
622
|
+
const errorMsg = `Failed to fetch proxy session token: ${message}`;
|
|
545
623
|
try {
|
|
546
624
|
await updateBrowserSession(browserSession.id, {
|
|
547
625
|
status: 'failed',
|
|
548
|
-
durationMs:
|
|
626
|
+
durationMs: 0,
|
|
549
627
|
errorMessage: errorMsg,
|
|
550
628
|
});
|
|
551
629
|
}
|
|
552
|
-
catch {
|
|
553
|
-
|
|
630
|
+
catch (updateErr) {
|
|
631
|
+
await telemetry.trackPhaseError('session_error_update', updateErr);
|
|
554
632
|
}
|
|
555
633
|
throw new Error(errorMsg);
|
|
556
634
|
}
|
|
557
|
-
|
|
635
|
+
configResult = await buildPlaywrightConfig(resolvedSettings, activeProfile, browserSession?.id);
|
|
636
|
+
telemetry.trackPhaseEnd('playwright_config');
|
|
637
|
+
const startTime = Date.now();
|
|
638
|
+
const rangerBrowserMcp = {
|
|
639
|
+
command: 'npx',
|
|
640
|
+
args: [
|
|
641
|
+
'@ranger-testing/playwright',
|
|
642
|
+
'run-mcp-server',
|
|
643
|
+
'--config',
|
|
644
|
+
configResult.configPath,
|
|
645
|
+
],
|
|
646
|
+
};
|
|
647
|
+
// Build verifier prompt
|
|
558
648
|
let verifierPrompt;
|
|
559
649
|
if (isDebugMode) {
|
|
560
650
|
verifierPrompt = getDebugPrompt();
|
|
@@ -563,7 +653,6 @@ export async function verifyFeature(options) {
|
|
|
563
653
|
const notesSection = checklistItem.notes
|
|
564
654
|
? `\n\n## Additional Notes\n${checklistItem.notes}`
|
|
565
655
|
: '';
|
|
566
|
-
// Build reviewer feedback section if available
|
|
567
656
|
let feedbackSection = '';
|
|
568
657
|
if (itemFeedback && itemFeedback.unaddressedComments.length > 0) {
|
|
569
658
|
const commentLines = itemFeedback.unaddressedComments
|
|
@@ -584,9 +673,9 @@ ${commentLines}`;
|
|
|
584
673
|
canonicalFlowSection = `\n\n## Expected Flow (from previous verification)
|
|
585
674
|
${itemFeedback.canonicalFlow}`;
|
|
586
675
|
}
|
|
587
|
-
verifierPrompt = `You are a Feature Verifier. Your job is to verify a
|
|
676
|
+
verifierPrompt = `You are a Feature Review Verifier. Your job is to verify a scenario by executing a UI flow and evaluating whether it adequately completes the scenario.
|
|
588
677
|
|
|
589
|
-
##
|
|
678
|
+
## Scenario to Verify
|
|
590
679
|
${checklistItem.description}${notesSection}${feedbackSection}${canonicalFlowSection}
|
|
591
680
|
|
|
592
681
|
## Task to Execute
|
|
@@ -599,7 +688,7 @@ Your base URL is: ${url}
|
|
|
599
688
|
- DO NOT navigate to any different domain, host, or port under any circumstances
|
|
600
689
|
- IGNORE any URLs from product documentation (mcp__ranger__get_product_docs) that have a different base URL
|
|
601
690
|
- If documentation or code diffs suggest a path exists (e.g., "/dashboard"), you may navigate to that path ONLY under the base URL above
|
|
602
|
-
- The base URL above is the ONLY authorized
|
|
691
|
+
- The base URL above is the ONLY authorized profile for this verification
|
|
603
692
|
|
|
604
693
|
## Instructions
|
|
605
694
|
1. Navigate to the URL above using browser_navigate
|
|
@@ -608,7 +697,7 @@ Your base URL is: ${url}
|
|
|
608
697
|
4. Execute the task step-by-step using browser tools
|
|
609
698
|
5. **Take screenshots at key moments** (see Screenshot Guidelines below)
|
|
610
699
|
6. Document any issues found (bugs, errors, unexpected behavior)
|
|
611
|
-
7. After completing the verification, evaluate whether the result adequately verifies the
|
|
700
|
+
7. After completing the verification, evaluate whether the result adequately verifies the scenario
|
|
612
701
|
|
|
613
702
|
## Screenshot Guidelines - IMPORTANT
|
|
614
703
|
Take screenshots throughout the verification flow so a human can review it for completeness. Screenshots are your evidence trail.
|
|
@@ -624,7 +713,7 @@ Take screenshots throughout the verification flow so a human can review it for c
|
|
|
624
713
|
**Screenshot naming:**
|
|
625
714
|
- Use descriptive filenames: "01_login-page-loaded.png", "02_form-filled.png", "03_dashboard-visible.png"
|
|
626
715
|
- Number prefixes (01_, 02_, etc.) help maintain chronological order
|
|
627
|
-
- For KEY MOMENTS that prove the
|
|
716
|
+
- For KEY MOMENTS that prove the scenario is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
|
|
628
717
|
- The "key_" prefix marks screenshots as high-priority evidence for human reviewers
|
|
629
718
|
|
|
630
719
|
**Aim for 3-6 screenshots per verification** to document the complete flow. Mark 1-2 of the most important ones with the "key_" prefix.
|
|
@@ -651,8 +740,8 @@ After step 2 (taking initial snapshot), IMMEDIATELY check for blocking HTTP erro
|
|
|
651
740
|
This early exit prevents wasting time on tasks that cannot succeed due to fundamental errors.
|
|
652
741
|
|
|
653
742
|
## Evaluation Criteria
|
|
654
|
-
- VERIFIED: The task completed successfully and the
|
|
655
|
-
- PARTIAL: The task partially completed but some aspects of the
|
|
743
|
+
- VERIFIED: The task completed successfully and the scenario requirements are fully met
|
|
744
|
+
- PARTIAL: The task partially completed but some aspects of the scenario are not verified
|
|
656
745
|
- BLOCKED: A blocking issue (bug, error, missing feature) prevents completion
|
|
657
746
|
- FAILED: The task could not be completed due to errors
|
|
658
747
|
|
|
@@ -696,9 +785,12 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
696
785
|
},
|
|
697
786
|
required: ['success', 'summary', 'evaluation', 'evaluationReason'],
|
|
698
787
|
};
|
|
699
|
-
//
|
|
788
|
+
// --- Phase: agent_execution ---
|
|
789
|
+
telemetry.trackPhaseStart('agent_execution');
|
|
700
790
|
const traceDir = getTraceDirectory(browserSession.id);
|
|
701
|
-
const screenshotHook = createScreenshotUploadHook(browserSession.id, checklistItem.id, traceDir);
|
|
791
|
+
const screenshotHook = createScreenshotUploadHook(browserSession.id, checklistItem.id, traceDir, telemetry);
|
|
792
|
+
const toolCallHook = createToolCallTrackingHook(telemetry);
|
|
793
|
+
const toolFailureHook = createToolFailureHook(telemetry);
|
|
702
794
|
const result = query({
|
|
703
795
|
prompt: verifierPrompt,
|
|
704
796
|
options: {
|
|
@@ -708,8 +800,8 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
708
800
|
'ranger-browser': rangerBrowserMcp,
|
|
709
801
|
},
|
|
710
802
|
tools: ['mcp__ranger-browser__*'],
|
|
711
|
-
permissionMode: '
|
|
712
|
-
|
|
803
|
+
permissionMode: 'acceptEdits',
|
|
804
|
+
allowedTools: ['mcp__ranger-browser__*', 'Read', 'Glob', 'Grep'],
|
|
713
805
|
outputFormat: {
|
|
714
806
|
type: 'json_schema',
|
|
715
807
|
schema: outputSchema,
|
|
@@ -717,22 +809,28 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
717
809
|
hooks: {
|
|
718
810
|
PostToolUse: [
|
|
719
811
|
{
|
|
720
|
-
hooks: [
|
|
812
|
+
hooks: [toolCallHook.hook, screenshotHook.hook],
|
|
813
|
+
},
|
|
814
|
+
],
|
|
815
|
+
PostToolUseFailure: [
|
|
816
|
+
{
|
|
817
|
+
hooks: [toolFailureHook],
|
|
721
818
|
},
|
|
722
819
|
],
|
|
723
820
|
},
|
|
724
821
|
env: {
|
|
725
822
|
...process.env,
|
|
726
|
-
ANTHROPIC_API_KEY:
|
|
823
|
+
ANTHROPIC_API_KEY: sessionToken,
|
|
824
|
+
ANTHROPIC_BASE_URL: getAiProxyUrl(),
|
|
727
825
|
},
|
|
728
826
|
persistSession: false,
|
|
729
827
|
},
|
|
730
828
|
});
|
|
731
|
-
//
|
|
829
|
+
// Collect messages
|
|
732
830
|
let finalResult = null;
|
|
733
831
|
let agentError = null;
|
|
734
|
-
// Fallback: capture StructuredOutput tool call input in case SDK fails to populate structured_output
|
|
735
832
|
let lastStructuredOutputInput = null;
|
|
833
|
+
let resultMeta = {};
|
|
736
834
|
const conversationFilePath = getConversationFilePath(browserSession.id);
|
|
737
835
|
const conversationDir = dirname(conversationFilePath);
|
|
738
836
|
await mkdir(conversationDir, { recursive: true });
|
|
@@ -743,170 +841,230 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
743
841
|
}, TIMEOUT_MS);
|
|
744
842
|
});
|
|
745
843
|
try {
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
(
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
block.input
|
|
765
|
-
lastStructuredOutputInput =
|
|
766
|
-
block.input;
|
|
767
|
-
}
|
|
844
|
+
await Promise.race([
|
|
845
|
+
(async () => {
|
|
846
|
+
for await (const message of result) {
|
|
847
|
+
try {
|
|
848
|
+
const jsonLine = JSON.stringify(message) + '\n';
|
|
849
|
+
await appendFile(conversationFilePath, jsonLine, 'utf-8');
|
|
850
|
+
}
|
|
851
|
+
catch {
|
|
852
|
+
// Ignore
|
|
853
|
+
}
|
|
854
|
+
const msg = message;
|
|
855
|
+
// Capture StructuredOutput tool call input as fallback
|
|
856
|
+
if (msg.type === 'assistant' && msg.message?.content) {
|
|
857
|
+
for (const block of msg.message.content) {
|
|
858
|
+
if (block.type === 'tool_use' &&
|
|
859
|
+
block.name === 'StructuredOutput' &&
|
|
860
|
+
block.input) {
|
|
861
|
+
lastStructuredOutputInput =
|
|
862
|
+
block.input;
|
|
768
863
|
}
|
|
769
864
|
}
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
865
|
+
}
|
|
866
|
+
if (msg.error) {
|
|
867
|
+
let errorText = msg.error;
|
|
868
|
+
if (msg.message?.content &&
|
|
869
|
+
Array.isArray(msg.message.content)) {
|
|
870
|
+
const texts = msg.message.content
|
|
871
|
+
.filter((c) => c.type === 'text')
|
|
872
|
+
.map((c) => c.text || '')
|
|
873
|
+
.filter(Boolean);
|
|
874
|
+
if (texts.length > 0) {
|
|
875
|
+
errorText = texts.join(' ');
|
|
781
876
|
}
|
|
782
|
-
agentError = errorText;
|
|
783
877
|
}
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
878
|
+
agentError = errorText;
|
|
879
|
+
}
|
|
880
|
+
if (msg.type === 'result') {
|
|
881
|
+
// Capture SDK result metadata
|
|
882
|
+
resultMeta = {
|
|
883
|
+
numTurns: msg.num_turns,
|
|
884
|
+
totalCostUsd: msg.total_cost_usd,
|
|
885
|
+
durationApiMs: msg.duration_api_ms,
|
|
886
|
+
sdkDurationMs: msg.duration_ms,
|
|
887
|
+
inputTokens: msg.usage?.input_tokens,
|
|
888
|
+
outputTokens: msg.usage?.output_tokens,
|
|
889
|
+
cacheReadTokens: msg.usage?.cache_read_input_tokens,
|
|
890
|
+
cacheCreationTokens: msg.usage?.cache_creation_input_tokens,
|
|
891
|
+
};
|
|
892
|
+
if (msg.subtype === 'success' &&
|
|
893
|
+
message.structured_output) {
|
|
894
|
+
finalResult = message.structured_output;
|
|
895
|
+
}
|
|
896
|
+
else if (msg.subtype !== 'success') {
|
|
897
|
+
if (lastStructuredOutputInput &&
|
|
898
|
+
msg.errors?.length === 0) {
|
|
899
|
+
finalResult = lastStructuredOutputInput;
|
|
900
|
+
agentError = null;
|
|
789
901
|
}
|
|
790
|
-
else if (
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
if (lastStructuredOutputInput &&
|
|
794
|
-
message.errors?.length === 0) {
|
|
795
|
-
finalResult = lastStructuredOutputInput;
|
|
796
|
-
// Clear the error since we actually succeeded
|
|
797
|
-
agentError = null;
|
|
798
|
-
}
|
|
799
|
-
else if (!agentError) {
|
|
800
|
-
agentError =
|
|
801
|
-
message.errors?.join(', ') ||
|
|
802
|
-
'Unknown error';
|
|
803
|
-
}
|
|
902
|
+
else if (!agentError) {
|
|
903
|
+
agentError =
|
|
904
|
+
msg.errors?.join(', ') || 'Unknown error';
|
|
804
905
|
}
|
|
805
906
|
}
|
|
806
907
|
}
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
908
|
+
}
|
|
909
|
+
})(),
|
|
910
|
+
timeoutPromise,
|
|
911
|
+
]);
|
|
912
|
+
}
|
|
913
|
+
catch (error) {
|
|
914
|
+
agentError = error instanceof Error ? error.message : String(error);
|
|
915
|
+
}
|
|
916
|
+
const durationMs = Date.now() - startTime;
|
|
917
|
+
telemetry.trackPhaseEnd('agent_execution', {
|
|
918
|
+
...resultMeta,
|
|
919
|
+
toolCallCounts: Object.fromEntries(toolCallHook.toolCallCounts),
|
|
920
|
+
hasResult: !!finalResult,
|
|
921
|
+
hasError: !!agentError,
|
|
922
|
+
});
|
|
923
|
+
return {
|
|
924
|
+
featureId,
|
|
925
|
+
checklistItem,
|
|
926
|
+
browserSession,
|
|
927
|
+
finalResult,
|
|
928
|
+
agentError,
|
|
929
|
+
lastStructuredOutputInput,
|
|
930
|
+
screenshotHook,
|
|
931
|
+
toolCallCounts: toolCallHook.toolCallCounts,
|
|
932
|
+
configResult,
|
|
933
|
+
startTime,
|
|
934
|
+
durationMs,
|
|
935
|
+
conversationFilePath,
|
|
936
|
+
conversationDir,
|
|
937
|
+
isDebugMode,
|
|
938
|
+
debugOutcome: options.debugOutcome,
|
|
939
|
+
resultMeta,
|
|
940
|
+
telemetry,
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
/**
|
|
944
|
+
* Phase 2: Upload artifacts, evaluate result, update scenario.
|
|
945
|
+
*/
|
|
946
|
+
async function processVerificationResult(ctx) {
|
|
947
|
+
const { featureId, checklistItem, browserSession, screenshotHook, durationMs, conversationFilePath, isDebugMode, debugOutcome, telemetry, } = ctx;
|
|
948
|
+
const { finalResult, agentError } = ctx;
|
|
949
|
+
let traceDownloadUrl;
|
|
950
|
+
// --- Upload trace ---
|
|
951
|
+
try {
|
|
952
|
+
const traceDir = getTraceDirectory(browserSession.id);
|
|
953
|
+
if (existsSync(traceDir)) {
|
|
954
|
+
const files = await readdir(traceDir);
|
|
955
|
+
if (files.length > 0) {
|
|
956
|
+
telemetry.trackPhaseStart('upload_trace');
|
|
957
|
+
try {
|
|
823
958
|
const traceUrls = await getUploadUrls(browserSession.id, 'trace.zip', 'zip');
|
|
824
959
|
const traceBuffer = await zipDirectory(traceDir);
|
|
825
960
|
await uploadTrace(traceUrls.uploadUrl, traceBuffer);
|
|
826
961
|
traceDownloadUrl = traceUrls.downloadUrl;
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
962
|
+
telemetry.trackPhaseEnd('upload_trace', {
|
|
963
|
+
bytes: traceBuffer.length,
|
|
964
|
+
});
|
|
965
|
+
}
|
|
966
|
+
catch (err) {
|
|
967
|
+
await telemetry.trackPhaseError('upload_trace', err);
|
|
968
|
+
}
|
|
969
|
+
// --- Upload videos ---
|
|
970
|
+
const videos = await loadSessionVideos(traceDir);
|
|
971
|
+
for (const video of videos) {
|
|
972
|
+
telemetry.trackPhaseStart('upload_video', {
|
|
973
|
+
filename: video.filename,
|
|
974
|
+
});
|
|
975
|
+
try {
|
|
976
|
+
const videoBuffer = await readFile(video.path);
|
|
977
|
+
const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
|
|
978
|
+
await uploadVideo(videoUrls.uploadUrl, videoBuffer);
|
|
979
|
+
telemetry.trackPhaseEnd('upload_video', {
|
|
980
|
+
filename: video.filename,
|
|
981
|
+
bytes: videoBuffer.length,
|
|
982
|
+
});
|
|
983
|
+
}
|
|
984
|
+
catch (err) {
|
|
985
|
+
await telemetry.trackPhaseError('upload_video', err, {
|
|
986
|
+
filename: video.filename,
|
|
987
|
+
});
|
|
838
988
|
}
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
metadata: {
|
|
874
|
-
filename: pngFile,
|
|
875
|
-
timestamp: pngStat.mtime.toISOString(),
|
|
876
|
-
},
|
|
877
|
-
});
|
|
878
|
-
// Create step asset with upload URL
|
|
879
|
-
const assetResponse = await createStepAsset(browserSession.id, step.id, {
|
|
989
|
+
}
|
|
990
|
+
// --- Upload remaining screenshots ---
|
|
991
|
+
const pngFiles = files
|
|
992
|
+
.filter((f) => f.toLowerCase().endsWith('.png'))
|
|
993
|
+
.filter((f) => !screenshotHook.uploadedFiles.has(f))
|
|
994
|
+
.sort();
|
|
995
|
+
const positionOffset = screenshotHook.uploadedFiles.size + 1;
|
|
996
|
+
for (let i = 0; i < pngFiles.length; i++) {
|
|
997
|
+
const pngFile = pngFiles[i];
|
|
998
|
+
const isKeyFrame = pngFile.toLowerCase().startsWith('key_');
|
|
999
|
+
telemetry.trackPhaseStart('upload_screenshot', {
|
|
1000
|
+
filename: pngFile,
|
|
1001
|
+
isKeyFrame,
|
|
1002
|
+
});
|
|
1003
|
+
try {
|
|
1004
|
+
const pngPath = join(traceDir, pngFile);
|
|
1005
|
+
const pngBuffer = await readFile(pngPath);
|
|
1006
|
+
const pngStat = await stat(pngPath);
|
|
1007
|
+
const displayName = pngFile
|
|
1008
|
+
.replace(/\.png$/i, '')
|
|
1009
|
+
.replace(/^key_/i, '')
|
|
1010
|
+
.replace(/^\d+_/, '')
|
|
1011
|
+
.replace(/-/g, ' ');
|
|
1012
|
+
const { step } = await createVerificationStep(browserSession.id, {
|
|
1013
|
+
checklistItemId: checklistItem.id,
|
|
1014
|
+
position: positionOffset + i,
|
|
1015
|
+
stepType: 'screenshot',
|
|
1016
|
+
stepName: displayName,
|
|
1017
|
+
description: isKeyFrame
|
|
1018
|
+
? 'Key moment captured during verification'
|
|
1019
|
+
: 'Screenshot captured during verification',
|
|
1020
|
+
isKeyStep: isKeyFrame,
|
|
1021
|
+
status: 'success',
|
|
1022
|
+
metadata: {
|
|
880
1023
|
filename: pngFile,
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
1024
|
+
timestamp: pngStat.mtime.toISOString(),
|
|
1025
|
+
},
|
|
1026
|
+
});
|
|
1027
|
+
const assetResponse = await createStepAsset(browserSession.id, step.id, {
|
|
1028
|
+
filename: pngFile,
|
|
1029
|
+
assetType: 'screenshot',
|
|
1030
|
+
timing: 'after',
|
|
1031
|
+
position: 0,
|
|
1032
|
+
capturedAt: pngStat.mtime.toISOString(),
|
|
1033
|
+
metadata: {
|
|
1034
|
+
name: displayName,
|
|
1035
|
+
highPriority: isKeyFrame,
|
|
1036
|
+
},
|
|
1037
|
+
});
|
|
1038
|
+
await uploadScreenshot(assetResponse.uploadUrl, pngBuffer);
|
|
1039
|
+
telemetry.trackPhaseEnd('upload_screenshot', {
|
|
1040
|
+
filename: pngFile,
|
|
1041
|
+
bytes: pngBuffer.length,
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
1044
|
+
catch (err) {
|
|
1045
|
+
await telemetry.trackPhaseError('upload_screenshot', err, { filename: pngFile });
|
|
896
1046
|
}
|
|
897
1047
|
}
|
|
898
1048
|
}
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
1049
|
+
}
|
|
1050
|
+
// --- Upload conversation ---
|
|
1051
|
+
if (existsSync(conversationFilePath)) {
|
|
1052
|
+
telemetry.trackPhaseStart('upload_conversation');
|
|
1053
|
+
try {
|
|
1054
|
+
const conversationUrls = await getUploadUrls(browserSession.id, 'conversation.jsonl', 'jsonl');
|
|
1055
|
+
const conversationBuffer = await readFile(conversationFilePath);
|
|
1056
|
+
await uploadConversation(conversationUrls.uploadUrl, conversationBuffer);
|
|
1057
|
+
telemetry.trackPhaseEnd('upload_conversation', {
|
|
1058
|
+
bytes: conversationBuffer.length,
|
|
1059
|
+
});
|
|
1060
|
+
}
|
|
1061
|
+
catch (err) {
|
|
1062
|
+
await telemetry.trackPhaseError('upload_conversation', err);
|
|
908
1063
|
}
|
|
909
|
-
|
|
1064
|
+
}
|
|
1065
|
+
// --- Update browser session ---
|
|
1066
|
+
telemetry.trackPhaseStart('update_session');
|
|
1067
|
+
try {
|
|
910
1068
|
const typedResult = finalResult;
|
|
911
1069
|
const updateData = {
|
|
912
1070
|
status: (agentError ? 'failed' : 'completed'),
|
|
@@ -923,105 +1081,218 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
923
1081
|
typedResult.durationMs = durationMs;
|
|
924
1082
|
typedResult.checklistItemId = checklistItem.id;
|
|
925
1083
|
}
|
|
1084
|
+
telemetry.trackPhaseEnd('update_session');
|
|
926
1085
|
}
|
|
927
|
-
catch {
|
|
928
|
-
|
|
1086
|
+
catch (err) {
|
|
1087
|
+
await telemetry.trackPhaseError('update_session', err);
|
|
929
1088
|
}
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
1089
|
+
}
|
|
1090
|
+
catch {
|
|
1091
|
+
// Ignore upload errors
|
|
1092
|
+
}
|
|
1093
|
+
// --- Phase: evaluation ---
|
|
1094
|
+
telemetry.trackPhaseStart('evaluation');
|
|
1095
|
+
let resultForEval;
|
|
1096
|
+
if (isDebugMode && debugOutcome) {
|
|
1097
|
+
const mockEval = getMockEvaluation(debugOutcome);
|
|
1098
|
+
resultForEval = {
|
|
1099
|
+
...mockEval,
|
|
1100
|
+
sessionId: browserSession.id,
|
|
1101
|
+
sessionDir: getTraceDirectory(browserSession.id),
|
|
1102
|
+
durationMs,
|
|
1103
|
+
traceViewerUrl: traceDownloadUrl
|
|
1104
|
+
? buildTraceViewerUrl(traceDownloadUrl)
|
|
1105
|
+
: undefined,
|
|
1106
|
+
checklistItemId: checklistItem.id,
|
|
1107
|
+
};
|
|
1108
|
+
console.log(`\n[DEBUG MODE] Using mock evaluation: ${debugOutcome}`);
|
|
1109
|
+
}
|
|
1110
|
+
else {
|
|
1111
|
+
const typedResult = finalResult;
|
|
1112
|
+
if (agentError && !typedResult) {
|
|
1113
|
+
throw new Error(`Verification failed: ${agentError}`);
|
|
946
1114
|
}
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
1115
|
+
if (!typedResult) {
|
|
1116
|
+
throw new Error('No result received from agent');
|
|
1117
|
+
}
|
|
1118
|
+
resultForEval = typedResult;
|
|
1119
|
+
}
|
|
1120
|
+
telemetry.trackPhaseEnd('evaluation', {
|
|
1121
|
+
evaluation: resultForEval.evaluation,
|
|
1122
|
+
issueCount: resultForEval.issues?.length ?? 0,
|
|
1123
|
+
});
|
|
1124
|
+
// --- Phase: scenario_update ---
|
|
1125
|
+
telemetry.trackPhaseStart('scenario_update');
|
|
1126
|
+
const evaluation = resultForEval.evaluation;
|
|
1127
|
+
if (evaluation === 'verified') {
|
|
1128
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
1129
|
+
status: 'verified',
|
|
1130
|
+
browserSessionId: browserSession.id,
|
|
1131
|
+
});
|
|
1132
|
+
console.log(`\n\u2705 Scenario verified!`);
|
|
1133
|
+
}
|
|
1134
|
+
else if (evaluation === 'blocked') {
|
|
1135
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
1136
|
+
status: 'blocked',
|
|
1137
|
+
browserSessionId: browserSession.id,
|
|
1138
|
+
blockedReason: resultForEval.evaluationReason,
|
|
1139
|
+
});
|
|
1140
|
+
// Enhanced output for Claude Code
|
|
1141
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1142
|
+
console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
|
|
1143
|
+
console.log(`${'='.repeat(60)}`);
|
|
1144
|
+
console.log(`\nIssue: ${resultForEval.evaluationReason}`);
|
|
1145
|
+
if (resultForEval.issues?.length) {
|
|
1146
|
+
console.log(`\nDetails:`);
|
|
1147
|
+
for (const issue of resultForEval.issues) {
|
|
1148
|
+
const typeStr = issue.type ? ` (${issue.type})` : '';
|
|
1149
|
+
console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
|
|
954
1150
|
}
|
|
955
|
-
resultForEval = typedResult;
|
|
956
1151
|
}
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
if (evaluation === 'verified') {
|
|
960
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
961
|
-
status: 'verified',
|
|
962
|
-
browserSessionId: browserSession.id,
|
|
963
|
-
});
|
|
964
|
-
console.log(`\n\u2705 Checklist item verified!`);
|
|
1152
|
+
if (resultForEval.traceViewerUrl) {
|
|
1153
|
+
console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
|
|
965
1154
|
}
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
1155
|
+
console.log(`\nSuggested action: Debug this issue in your code, then run go again.`);
|
|
1156
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
1157
|
+
}
|
|
1158
|
+
else if (evaluation === 'partial' ||
|
|
1159
|
+
evaluation === 'failed' ||
|
|
1160
|
+
evaluation === 'incomplete') {
|
|
1161
|
+
// Mark as incomplete - verification happened but requirements not fully met
|
|
1162
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
1163
|
+
status: 'incomplete',
|
|
1164
|
+
browserSessionId: browserSession.id,
|
|
1165
|
+
incompleteReason: resultForEval.evaluationReason,
|
|
1166
|
+
});
|
|
1167
|
+
// Check if other items are terminal and prompt user
|
|
1168
|
+
await handleIncompleteItem(featureId, checklistItem, resultForEval);
|
|
1169
|
+
}
|
|
1170
|
+
telemetry.trackPhaseEnd('scenario_update', { newStatus: evaluation });
|
|
1171
|
+
return resultForEval;
|
|
1172
|
+
}
|
|
1173
|
+
/**
|
|
1174
|
+
* Verify a scenario in the browser.
|
|
1175
|
+
* Orchestrates runVerification -> processVerificationResult with telemetry.
|
|
1176
|
+
*/
|
|
1177
|
+
export async function verifyFeature(options) {
|
|
1178
|
+
const telemetry = createTelemetryCollector('go');
|
|
1179
|
+
await telemetry.trackCommandStart({
|
|
1180
|
+
hasProfile: !!options.profile,
|
|
1181
|
+
hasScenario: options.scenario !== undefined,
|
|
1182
|
+
hasNotes: !!options.notes,
|
|
1183
|
+
isDebugMode: !!options.debugOutcome,
|
|
1184
|
+
});
|
|
1185
|
+
const { envNames } = await getEnvNames();
|
|
1186
|
+
if (envNames.length === 0) {
|
|
1187
|
+
throw new Error(formatProfileRequiredMessage((text) => bold(text)));
|
|
1188
|
+
}
|
|
1189
|
+
let ctx;
|
|
1190
|
+
let interrupted = false;
|
|
1191
|
+
let checklistItemResolved = false;
|
|
1192
|
+
const handleInterrupt = async () => {
|
|
1193
|
+
if (interrupted)
|
|
1194
|
+
return;
|
|
1195
|
+
interrupted = true;
|
|
1196
|
+
console.log('\nVerification interrupted. Cleaning up...');
|
|
1197
|
+
await telemetry.trackCommandEnd('interrupted', {
|
|
1198
|
+
durationMs: ctx ? Date.now() - ctx.startTime : 0,
|
|
1199
|
+
});
|
|
1200
|
+
if (ctx) {
|
|
1201
|
+
try {
|
|
1202
|
+
await updateBrowserSession(ctx.browserSession.id, {
|
|
1203
|
+
status: 'interrupted',
|
|
1204
|
+
durationMs: Date.now() - ctx.startTime,
|
|
1205
|
+
});
|
|
983
1206
|
}
|
|
984
|
-
|
|
985
|
-
|
|
1207
|
+
catch {
|
|
1208
|
+
// Best effort
|
|
1209
|
+
}
|
|
1210
|
+
try {
|
|
1211
|
+
await updateChecklistItem(ctx.featureId, ctx.checklistItem.id, {
|
|
1212
|
+
status: 'pending',
|
|
1213
|
+
});
|
|
1214
|
+
}
|
|
1215
|
+
catch {
|
|
1216
|
+
// Best effort
|
|
1217
|
+
}
|
|
1218
|
+
if (ctx.configResult) {
|
|
1219
|
+
try {
|
|
1220
|
+
await cleanupTempFiles(ctx.configResult);
|
|
1221
|
+
}
|
|
1222
|
+
catch {
|
|
1223
|
+
// Best effort
|
|
1224
|
+
}
|
|
986
1225
|
}
|
|
987
|
-
console.log(`\nSuggested action: Debug this issue in your code, then run verify-feature again.`);
|
|
988
|
-
console.log(`${'='.repeat(60)}\n`);
|
|
989
|
-
}
|
|
990
|
-
else if (evaluation === 'partial' || evaluation === 'failed') {
|
|
991
|
-
// Mark as incomplete - verification happened but requirements not fully met
|
|
992
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
993
|
-
status: 'incomplete',
|
|
994
|
-
browserSessionId: browserSession.id,
|
|
995
|
-
incompleteReason: resultForEval.evaluationReason,
|
|
996
|
-
});
|
|
997
|
-
// Check if other items are terminal and prompt user
|
|
998
|
-
await handleIncompleteItem(featureId, checklistItem, resultForEval);
|
|
999
1226
|
}
|
|
1227
|
+
console.log('Scenario reset to pending. Partial steps are preserved.');
|
|
1228
|
+
process.exit(0);
|
|
1229
|
+
};
|
|
1230
|
+
process.on('SIGINT', handleInterrupt);
|
|
1231
|
+
process.on('SIGTERM', handleInterrupt);
|
|
1232
|
+
try {
|
|
1233
|
+
// Phase 1: Setup + agent execution
|
|
1234
|
+
ctx = await runVerification(options, telemetry);
|
|
1235
|
+
// Boundary flush — all agent execution telemetry is now safe
|
|
1236
|
+
await telemetry.flush();
|
|
1237
|
+
// Phase 2: Uploads + evaluation + status update
|
|
1238
|
+
const resultForEval = await processVerificationResult(ctx);
|
|
1239
|
+
checklistItemResolved = true;
|
|
1240
|
+
await telemetry.trackCommandEnd('success', {
|
|
1241
|
+
evaluation: resultForEval.evaluation,
|
|
1242
|
+
durationMs: ctx.durationMs,
|
|
1243
|
+
...ctx.resultMeta,
|
|
1244
|
+
});
|
|
1000
1245
|
return resultForEval;
|
|
1001
1246
|
}
|
|
1247
|
+
catch (error) {
|
|
1248
|
+
await telemetry.trackCommandError(error);
|
|
1249
|
+
throw error;
|
|
1250
|
+
}
|
|
1002
1251
|
finally {
|
|
1003
|
-
// Remove interrupt handlers to avoid double-firing after normal completion
|
|
1004
1252
|
process.removeListener('SIGINT', handleInterrupt);
|
|
1005
1253
|
process.removeListener('SIGTERM', handleInterrupt);
|
|
1006
|
-
//
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1254
|
+
// If the scenario was never resolved (agent error, throw, etc.),
|
|
1255
|
+
// reset it to pending so it doesn't stay stuck in verification_in_progress.
|
|
1256
|
+
if (ctx && !interrupted && !checklistItemResolved) {
|
|
1257
|
+
try {
|
|
1258
|
+
await updateChecklistItem(ctx.featureId, ctx.checklistItem.id, {
|
|
1259
|
+
status: 'pending',
|
|
1260
|
+
});
|
|
1261
|
+
console.log('Scenario reset to pending after unexpected error.');
|
|
1012
1262
|
}
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
// Ignore
|
|
1016
|
-
}
|
|
1017
|
-
try {
|
|
1018
|
-
if (existsSync(conversationDir)) {
|
|
1019
|
-
await rm(conversationDir, { recursive: true, force: true });
|
|
1263
|
+
catch (resetErr) {
|
|
1264
|
+
await telemetry.trackPhaseError('scenario_reset', resetErr);
|
|
1020
1265
|
}
|
|
1021
1266
|
}
|
|
1022
|
-
|
|
1023
|
-
|
|
1267
|
+
// --- Phase: cleanup ---
|
|
1268
|
+
if (ctx) {
|
|
1269
|
+
telemetry.trackPhaseStart('cleanup');
|
|
1270
|
+
if (ctx.configResult) {
|
|
1271
|
+
await cleanupTempFiles(ctx.configResult);
|
|
1272
|
+
}
|
|
1273
|
+
try {
|
|
1274
|
+
const traceDir = getTraceDirectory(ctx.browserSession.id);
|
|
1275
|
+
if (existsSync(traceDir)) {
|
|
1276
|
+
await rm(traceDir, { recursive: true, force: true });
|
|
1277
|
+
}
|
|
1278
|
+
}
|
|
1279
|
+
catch {
|
|
1280
|
+
// Ignore
|
|
1281
|
+
}
|
|
1282
|
+
try {
|
|
1283
|
+
if (ctx.conversationDir && existsSync(ctx.conversationDir)) {
|
|
1284
|
+
await rm(ctx.conversationDir, {
|
|
1285
|
+
recursive: true,
|
|
1286
|
+
force: true,
|
|
1287
|
+
});
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
catch {
|
|
1291
|
+
// Ignore
|
|
1292
|
+
}
|
|
1293
|
+
telemetry.trackPhaseEnd('cleanup');
|
|
1024
1294
|
}
|
|
1295
|
+
await telemetry.flush();
|
|
1025
1296
|
}
|
|
1026
1297
|
}
|
|
1027
1298
|
//# sourceMappingURL=verifyFeature.js.map
|