@ranger-testing/ranger-cli 1.0.12 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -65
- package/build/cli.js +105 -102
- package/build/cli.js.map +1 -1
- package/build/commands/addEnv.js +1 -1
- package/build/commands/addEnv.js.map +1 -1
- package/build/commands/authEncrypt.js +7 -6
- package/build/commands/authEncrypt.js.map +1 -1
- package/build/commands/clean.js +1 -1
- package/build/commands/clean.js.map +1 -1
- package/build/commands/config.js +5 -4
- package/build/commands/config.js.map +1 -1
- package/build/commands/dataMcpServer.js +1 -1
- package/build/commands/dataMcpServer.js.map +1 -1
- package/build/commands/env.js +17 -10
- package/build/commands/env.js.map +1 -1
- package/build/commands/feature.js +208 -273
- package/build/commands/feature.js.map +1 -1
- package/build/commands/index.js +3 -0
- package/build/commands/index.js.map +1 -1
- package/build/commands/postEditHook.js +25 -0
- package/build/commands/postEditHook.js.map +1 -0
- package/build/commands/preCompactHook.js +85 -0
- package/build/commands/preCompactHook.js.map +1 -0
- package/build/commands/sessionStartHook.js +64 -0
- package/build/commands/sessionStartHook.js.map +1 -0
- package/build/commands/skillup.js +21 -21
- package/build/commands/skillup.js.map +1 -1
- package/build/commands/start.js +1 -1
- package/build/commands/start.js.map +1 -1
- package/build/commands/status.js +30 -44
- package/build/commands/status.js.map +1 -1
- package/build/commands/update.js +32 -40
- package/build/commands/update.js.map +1 -1
- package/build/commands/updateEnv.js +1 -1
- package/build/commands/updateEnv.js.map +1 -1
- package/build/commands/useEnv.js +1 -1
- package/build/commands/useEnv.js.map +1 -1
- package/build/commands/utils/browserSessionsApi.js +1 -1
- package/build/commands/utils/browserSessionsApi.js.map +1 -1
- package/build/commands/utils/cliSecret.js +1 -1
- package/build/commands/utils/environment.js +0 -6
- package/build/commands/utils/environment.js.map +1 -1
- package/build/commands/utils/featureApi.js +68 -24
- package/build/commands/utils/featureApi.js.map +1 -1
- package/build/commands/utils/featureReportGenerator.js +37 -3
- package/build/commands/utils/featureReportGenerator.js.map +1 -1
- package/build/commands/utils/keychain.js +1 -1
- package/build/commands/utils/keychain.js.map +1 -1
- package/build/commands/utils/localAgentInstallationsApi.js +1 -1
- package/build/commands/utils/mcpConfig.js +1 -1
- package/build/commands/utils/rangerRoot.js +30 -0
- package/build/commands/utils/rangerRoot.js.map +1 -0
- package/build/commands/utils/settings.js +7 -5
- package/build/commands/utils/settings.js.map +1 -1
- package/build/commands/utils/skillContent.js +28 -0
- package/build/commands/utils/skillContent.js.map +1 -0
- package/build/commands/utils/skills.js +1 -1
- package/build/commands/utils/skills.js.map +1 -1
- package/build/commands/utils/userApi.js +32 -0
- package/build/commands/utils/userApi.js.map +1 -0
- package/build/commands/verifyFeature.js +429 -104
- package/build/commands/verifyFeature.js.map +1 -1
- package/build/commands/verifyInBrowser.js +1 -1
- package/build/commands/verifyInBrowser.js.map +1 -1
- package/build/skills/bug-bash.md +31 -10
- package/build/skills/feature-tracker/SKILL.md +8 -30
- package/build/skills/feature-tracker/create.md +47 -38
- package/build/skills/feature-tracker/start.md +4 -4
- package/build/skills/feature-tracker/verify.md +10 -14
- package/package.json +5 -3
- package/scripts/postinstall.js +18 -0
- package/build/skills/feature-tracker/manage.md +0 -145
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
2
2
|
import { join, dirname } from 'path';
|
|
3
|
-
import { readFile, readdir, appendFile, mkdir, rm } from 'fs/promises';
|
|
3
|
+
import { readFile, readdir, appendFile, mkdir, rm, stat } from 'fs/promises';
|
|
4
4
|
import { existsSync } from 'fs';
|
|
5
5
|
import { execSync } from 'child_process';
|
|
6
6
|
import { tmpdir } from 'os';
|
|
7
7
|
import inquirer from 'inquirer';
|
|
8
8
|
import { loadSettings, resolveEnvVars, buildPlaywrightConfig, cleanupTempFiles, } from './utils/settings.js';
|
|
9
|
-
import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, buildTraceViewerUrl, getAnthropicApiKey, } from './utils/browserSessionsApi.js';
|
|
9
|
+
import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getAnthropicApiKey, } from './utils/browserSessionsApi.js';
|
|
10
10
|
import { getToken } from './utils/keychain.js';
|
|
11
11
|
import { getActiveFeatureId } from './feature.js';
|
|
12
|
-
import { getFeature,
|
|
12
|
+
import { getFeature, updateChecklistItem, startSession, } from './utils/featureApi.js';
|
|
13
|
+
import { getRangerDir } from './utils/rangerRoot.js';
|
|
13
14
|
/**
|
|
14
15
|
* Zip a directory and return the buffer
|
|
15
16
|
*/
|
|
@@ -26,7 +27,7 @@ async function zipDirectory(dirPath) {
|
|
|
26
27
|
* Find the trace directory for a session
|
|
27
28
|
*/
|
|
28
29
|
function getTraceDirectory(sessionId) {
|
|
29
|
-
return join(
|
|
30
|
+
return join(getRangerDir(), 'sessions', sessionId);
|
|
30
31
|
}
|
|
31
32
|
/**
|
|
32
33
|
* Get the conversation file path for a session
|
|
@@ -34,24 +35,132 @@ function getTraceDirectory(sessionId) {
|
|
|
34
35
|
function getConversationFilePath(sessionId) {
|
|
35
36
|
return join(tmpdir(), 'ranger-browser-sessions', sessionId, 'conversation.jsonl');
|
|
36
37
|
}
|
|
38
|
+
/**
|
|
39
|
+
* Load videos from a session's videos directory
|
|
40
|
+
*/
|
|
41
|
+
async function loadSessionVideos(sessionDir) {
|
|
42
|
+
const videosDir = join(sessionDir, 'videos');
|
|
43
|
+
if (!existsSync(videosDir)) {
|
|
44
|
+
return [];
|
|
45
|
+
}
|
|
46
|
+
const files = await readdir(videosDir);
|
|
47
|
+
const videoFiles = files.filter((f) => f.toLowerCase().endsWith('.webm'));
|
|
48
|
+
return videoFiles.map((filename) => ({
|
|
49
|
+
filename,
|
|
50
|
+
path: join(videosDir, filename),
|
|
51
|
+
}));
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Get mock evaluation data for debug mode
|
|
55
|
+
*/
|
|
56
|
+
function getMockEvaluation(outcome) {
|
|
57
|
+
const mockEvaluations = {
|
|
58
|
+
verified: {
|
|
59
|
+
success: true,
|
|
60
|
+
summary: '[DEBUG] Mock verification completed successfully.',
|
|
61
|
+
evaluation: 'verified',
|
|
62
|
+
evaluationReason: 'All checklist requirements were met.',
|
|
63
|
+
},
|
|
64
|
+
partial: {
|
|
65
|
+
success: false,
|
|
66
|
+
summary: '[DEBUG] Mock partial verification.',
|
|
67
|
+
evaluation: 'partial',
|
|
68
|
+
evaluationReason: 'Some requirements were not fully verified.',
|
|
69
|
+
issues: [
|
|
70
|
+
{
|
|
71
|
+
severity: 'MINOR',
|
|
72
|
+
type: 'OTHER',
|
|
73
|
+
description: 'Secondary feature not fully implemented',
|
|
74
|
+
},
|
|
75
|
+
],
|
|
76
|
+
},
|
|
77
|
+
incomplete: {
|
|
78
|
+
success: false,
|
|
79
|
+
summary: '[DEBUG] Mock incomplete verification.',
|
|
80
|
+
evaluation: 'partial',
|
|
81
|
+
evaluationReason: 'Implementation is incomplete and needs additional work.',
|
|
82
|
+
issues: [
|
|
83
|
+
{
|
|
84
|
+
severity: 'MAJOR',
|
|
85
|
+
type: 'OTHER',
|
|
86
|
+
description: 'Feature is partially implemented but missing key functionality',
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
severity: 'MINOR',
|
|
90
|
+
type: 'OTHER',
|
|
91
|
+
description: 'UI elements present but not fully functional',
|
|
92
|
+
},
|
|
93
|
+
],
|
|
94
|
+
},
|
|
95
|
+
blocked: {
|
|
96
|
+
success: false,
|
|
97
|
+
summary: '[DEBUG] Mock blocked verification.',
|
|
98
|
+
evaluation: 'blocked',
|
|
99
|
+
evaluationReason: 'HTTP 404 - Page not found.',
|
|
100
|
+
issues: [
|
|
101
|
+
{
|
|
102
|
+
severity: 'BLOCKER',
|
|
103
|
+
type: 'HTTP_404',
|
|
104
|
+
description: 'Target page returns 404 Not Found',
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
severity: 'MAJOR',
|
|
108
|
+
type: 'NAVIGATION_ERROR',
|
|
109
|
+
description: 'Unable to proceed due to missing page',
|
|
110
|
+
},
|
|
111
|
+
],
|
|
112
|
+
},
|
|
113
|
+
failed: {
|
|
114
|
+
success: false,
|
|
115
|
+
summary: '[DEBUG] Mock failed verification.',
|
|
116
|
+
evaluation: 'failed',
|
|
117
|
+
evaluationReason: 'Browser automation failed with timeout error.',
|
|
118
|
+
issues: [
|
|
119
|
+
{
|
|
120
|
+
severity: 'BLOCKER',
|
|
121
|
+
type: 'OTHER',
|
|
122
|
+
description: 'Timeout waiting for element',
|
|
123
|
+
},
|
|
124
|
+
],
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
return mockEvaluations[outcome];
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Get the debug mode prompt for minimal browser interaction
|
|
131
|
+
*/
|
|
132
|
+
function getDebugPrompt() {
|
|
133
|
+
return `You are testing browser automation. Your task is simple:
|
|
134
|
+
|
|
135
|
+
1. Navigate to https://www.mozilla.org using browser_navigate
|
|
136
|
+
2. Take a snapshot with browser_snapshot to see the page
|
|
137
|
+
3. Take a screenshot named "01_mozilla-homepage.png" using browser_take_screenshot
|
|
138
|
+
4. Return immediately with the structured output
|
|
139
|
+
|
|
140
|
+
Return your findings in the structured output format.`;
|
|
141
|
+
}
|
|
37
142
|
/**
|
|
38
143
|
* Prompt user to select a checklist item
|
|
39
144
|
*/
|
|
40
145
|
async function selectChecklistItem(items) {
|
|
146
|
+
if (items.length === 0) {
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
41
149
|
const choices = items.map((item, i) => {
|
|
42
150
|
const emoji = item.status === 'verified'
|
|
43
151
|
? '\u2705'
|
|
44
|
-
: item.status === '
|
|
45
|
-
? '\ud83d\
|
|
46
|
-
: item.status === '
|
|
47
|
-
? '\
|
|
48
|
-
: '
|
|
152
|
+
: item.status === 'incomplete'
|
|
153
|
+
? '\ud83d\udfe0' // orange circle
|
|
154
|
+
: item.status === 'blocked'
|
|
155
|
+
? '\ud83d\uded1'
|
|
156
|
+
: item.status === 'canceled'
|
|
157
|
+
? '\u26d4'
|
|
158
|
+
: '\u2b1c';
|
|
49
159
|
return {
|
|
50
160
|
name: `${i + 1}. ${emoji} ${item.description}`,
|
|
51
161
|
value: item.id,
|
|
52
162
|
};
|
|
53
163
|
});
|
|
54
|
-
choices.push({ name: '+ Add new item', value: '__new__' });
|
|
55
164
|
const { selected } = await inquirer.prompt([
|
|
56
165
|
{
|
|
57
166
|
type: 'list',
|
|
@@ -60,18 +169,49 @@ async function selectChecklistItem(items) {
|
|
|
60
169
|
choices,
|
|
61
170
|
},
|
|
62
171
|
]);
|
|
63
|
-
|
|
64
|
-
|
|
172
|
+
return items.find((i) => i.id === selected) || null;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Handle incomplete verification - check if all other items are terminal and prompt user
|
|
176
|
+
*/
|
|
177
|
+
async function handleIncompleteItem(featureId, incompleteItem, result) {
|
|
178
|
+
const feature = await getFeature(featureId);
|
|
179
|
+
const sessionItems = feature.checklistItems.filter((i) => i.sessionId === feature.currentSessionId);
|
|
180
|
+
const otherItems = sessionItems.filter((i) => i.id !== incompleteItem.id);
|
|
181
|
+
const allOthersTerminal = otherItems.every((i) => i.status === 'verified' ||
|
|
182
|
+
i.status === 'blocked' ||
|
|
183
|
+
i.status === 'canceled' ||
|
|
184
|
+
i.status === 'incomplete');
|
|
185
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
186
|
+
console.log(`INCOMPLETE - Verification found issues`);
|
|
187
|
+
console.log(`${'='.repeat(60)}`);
|
|
188
|
+
// Display structured list of issues
|
|
189
|
+
if (result.issues && result.issues.length > 0) {
|
|
190
|
+
console.log(`\nIssues found:`);
|
|
191
|
+
for (const issue of result.issues) {
|
|
192
|
+
console.log(` • ${issue.description}`);
|
|
193
|
+
}
|
|
65
194
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
195
|
+
else if (result.evaluationReason) {
|
|
196
|
+
console.log(`\nReason: ${result.evaluationReason}`);
|
|
197
|
+
}
|
|
198
|
+
console.log(`\nNext steps:`);
|
|
199
|
+
console.log(` 1. Fix the issues above in your code`);
|
|
200
|
+
console.log(` 2. Run 'ranger verify-feature' again to re-verify`);
|
|
201
|
+
if (allOthersTerminal && otherItems.length > 0) {
|
|
202
|
+
console.log(`\nAll other checklist items are complete.`);
|
|
203
|
+
console.log(`If you're done for now, run 'ranger feature conclude-session' to end this session.`);
|
|
204
|
+
}
|
|
205
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
70
206
|
}
|
|
71
207
|
/**
|
|
72
208
|
* Verify a checklist item in the browser
|
|
73
209
|
*/
|
|
74
|
-
export async function verifyFeature(
|
|
210
|
+
export async function verifyFeature(options) {
|
|
211
|
+
const isDebugMode = !!options.debugOutcome;
|
|
212
|
+
if (isDebugMode) {
|
|
213
|
+
console.log(`\n[DEBUG MODE] Running minimal browser test with outcome: ${options.debugOutcome}`);
|
|
214
|
+
}
|
|
75
215
|
// 1. Check for active feature
|
|
76
216
|
const featureId = await getActiveFeatureId();
|
|
77
217
|
if (!featureId) {
|
|
@@ -80,77 +220,104 @@ export async function verifyFeature(url, options) {
|
|
|
80
220
|
// Load feature details
|
|
81
221
|
const feature = await getFeature(featureId);
|
|
82
222
|
console.log(`\nActive feature: ${feature.name} (${featureId})`);
|
|
223
|
+
// Filter to only items in the current session
|
|
224
|
+
const currentSessionId = feature.currentSessionId;
|
|
225
|
+
const currentSessionItems = currentSessionId
|
|
226
|
+
? feature.checklistItems.filter((item) => item.sessionId === currentSessionId)
|
|
227
|
+
: feature.checklistItems;
|
|
83
228
|
// 2. Determine which checklist item we're verifying
|
|
84
229
|
let checklistItem = null;
|
|
85
230
|
let taskDescription = options.task;
|
|
86
|
-
if (options.
|
|
87
|
-
//
|
|
88
|
-
checklistItem = await addChecklistItem(featureId, {
|
|
89
|
-
description: options.newItem,
|
|
90
|
-
});
|
|
91
|
-
console.log(`Created new checklist item: ${checklistItem.description}`);
|
|
92
|
-
if (!taskDescription) {
|
|
93
|
-
taskDescription = options.newItem;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
else if (options.item !== undefined) {
|
|
97
|
-
// Use specified item index
|
|
231
|
+
if (options.item !== undefined) {
|
|
232
|
+
// Use specified item index (1-based, relative to current session items)
|
|
98
233
|
const itemIndex = options.item - 1; // 1-based to 0-based
|
|
99
|
-
if (itemIndex < 0 || itemIndex >=
|
|
100
|
-
throw new Error(`Invalid item index: ${options.item}.
|
|
234
|
+
if (itemIndex < 0 || itemIndex >= currentSessionItems.length) {
|
|
235
|
+
throw new Error(`Invalid item index: ${options.item}. Current session has ${currentSessionItems.length} items.`);
|
|
101
236
|
}
|
|
102
|
-
checklistItem =
|
|
237
|
+
checklistItem = currentSessionItems[itemIndex];
|
|
103
238
|
if (!taskDescription) {
|
|
104
239
|
taskDescription = checklistItem.description;
|
|
105
240
|
}
|
|
106
241
|
}
|
|
107
242
|
else {
|
|
108
|
-
//
|
|
109
|
-
const
|
|
110
|
-
if (
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
243
|
+
// Check if running in non-TTY environment (CI, scripts, Claude Code, etc.)
|
|
244
|
+
const isInteractive = process.stdin.isTTY && process.stdout.isTTY;
|
|
245
|
+
if (!isInteractive) {
|
|
246
|
+
// Non-TTY mode: require --item flag, show available items
|
|
247
|
+
console.log('\nNon-interactive mode detected. The --item flag is required.');
|
|
248
|
+
console.log('\nAvailable checklist items for current session:');
|
|
249
|
+
currentSessionItems.forEach((item, i) => {
|
|
250
|
+
const emoji = item.status === 'verified'
|
|
251
|
+
? '\u2705'
|
|
252
|
+
: item.status === 'incomplete'
|
|
253
|
+
? '\ud83d\udfe0' // orange circle
|
|
254
|
+
: item.status === 'blocked'
|
|
255
|
+
? '\ud83d\uded1'
|
|
256
|
+
: item.status === 'canceled'
|
|
257
|
+
? '\u26d4'
|
|
258
|
+
: '\u2b1c';
|
|
259
|
+
console.log(` ${i + 1}. ${emoji} ${item.description}`);
|
|
121
260
|
});
|
|
122
|
-
console.log(
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
261
|
+
console.log('\nUsage: ranger verify-feature --item <number>');
|
|
262
|
+
console.log('Example: ranger verify-feature --item 1');
|
|
263
|
+
throw new Error('The --item flag is required in non-interactive mode. See available items above.');
|
|
126
264
|
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
}
|
|
265
|
+
// Interactive selection (show only current session items)
|
|
266
|
+
checklistItem = await selectChecklistItem(currentSessionItems);
|
|
267
|
+
if (!taskDescription && checklistItem) {
|
|
268
|
+
taskDescription = checklistItem.description;
|
|
132
269
|
}
|
|
133
270
|
}
|
|
134
271
|
if (!checklistItem) {
|
|
135
|
-
throw new Error('No checklist item selected');
|
|
272
|
+
throw new Error('No checklist item selected. Create items when creating the feature with -c flag.');
|
|
136
273
|
}
|
|
137
274
|
if (!taskDescription) {
|
|
138
275
|
throw new Error('No task description provided');
|
|
139
276
|
}
|
|
140
277
|
console.log(`\nVerifying: ${checklistItem.description}`);
|
|
141
278
|
console.log(`Task: ${taskDescription}`);
|
|
142
|
-
//
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
279
|
+
// Start the session if it's in ready status
|
|
280
|
+
if (feature.currentSession &&
|
|
281
|
+
feature.currentSession.status === 'ready' &&
|
|
282
|
+
feature.currentSessionId) {
|
|
283
|
+
try {
|
|
284
|
+
await startSession(featureId, feature.currentSessionId);
|
|
285
|
+
}
|
|
286
|
+
catch (error) {
|
|
287
|
+
// Ignore if session is already started (race condition)
|
|
288
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
289
|
+
if (!message.includes('already')) {
|
|
290
|
+
throw error;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
146
293
|
}
|
|
147
|
-
|
|
148
|
-
|
|
294
|
+
// Update checklist item status to verification_in_progress
|
|
295
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
296
|
+
status: 'verification_in_progress',
|
|
297
|
+
});
|
|
298
|
+
// 3. Determine which environment to use (same pattern as verifyInBrowser)
|
|
299
|
+
let activeEnv;
|
|
300
|
+
if (options.env) {
|
|
301
|
+
activeEnv = options.env;
|
|
302
|
+
}
|
|
303
|
+
else {
|
|
304
|
+
const activeEnvPath = join(getRangerDir(), 'active-env.txt');
|
|
305
|
+
if (!existsSync(activeEnvPath)) {
|
|
306
|
+
throw new Error('No active environment. Run: ranger use <env-name>');
|
|
307
|
+
}
|
|
308
|
+
activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
|
|
309
|
+
}
|
|
310
|
+
const envDir = join(getRangerDir(), activeEnv);
|
|
149
311
|
if (!existsSync(envDir)) {
|
|
150
312
|
throw new Error(`Environment not found at ${envDir}. Run: ranger add env ${activeEnv}`);
|
|
151
313
|
}
|
|
152
314
|
const settings = await loadSettings(activeEnv);
|
|
153
315
|
const resolvedSettings = resolveEnvVars(settings);
|
|
316
|
+
// Get base URL from settings
|
|
317
|
+
const url = resolvedSettings.baseUrl;
|
|
318
|
+
if (!url) {
|
|
319
|
+
throw new Error(`No baseUrl configured for environment "${activeEnv}". Run: ranger config set ${activeEnv} baseUrl <url>`);
|
|
320
|
+
}
|
|
154
321
|
// 4. Create browser session
|
|
155
322
|
const token = await getToken();
|
|
156
323
|
if (!token) {
|
|
@@ -161,6 +328,8 @@ export async function verifyFeature(url, options) {
|
|
|
161
328
|
settings: resolvedSettings,
|
|
162
329
|
task: taskDescription,
|
|
163
330
|
url,
|
|
331
|
+
featureId,
|
|
332
|
+
checklistItemId: checklistItem.id,
|
|
164
333
|
});
|
|
165
334
|
console.log(`Browser session created: ${browserSession.id}`);
|
|
166
335
|
const configResult = await buildPlaywrightConfig(resolvedSettings, activeEnv, browserSession?.id);
|
|
@@ -195,23 +364,79 @@ export async function verifyFeature(url, options) {
|
|
|
195
364
|
throw new Error(errorMsg);
|
|
196
365
|
}
|
|
197
366
|
// 5. UI Verifier + Evaluation Agent prompt
|
|
198
|
-
|
|
367
|
+
let verifierPrompt;
|
|
368
|
+
if (isDebugMode) {
|
|
369
|
+
verifierPrompt = getDebugPrompt();
|
|
370
|
+
}
|
|
371
|
+
else {
|
|
372
|
+
const notesSection = checklistItem.notes
|
|
373
|
+
? `\n\n## Additional Notes\n${checklistItem.notes}`
|
|
374
|
+
: '';
|
|
375
|
+
verifierPrompt = `You are a Feature Verifier. Your job is to verify a checklist item by executing a UI flow and evaluating whether it adequately completes the checklist item.
|
|
199
376
|
|
|
200
377
|
## Checklist Item to Verify
|
|
201
|
-
${checklistItem.description}
|
|
378
|
+
${checklistItem.description}${notesSection}
|
|
202
379
|
|
|
203
380
|
## Task to Execute
|
|
204
381
|
${taskDescription}
|
|
205
382
|
|
|
206
|
-
|
|
207
|
-
${url}
|
|
383
|
+
CRITICAL URL REQUIREMENT:
|
|
384
|
+
Your base URL is: ${url}
|
|
385
|
+
- You may ONLY navigate to paths under this base URL (same protocol, host, and port)
|
|
386
|
+
- For example, if the base URL is "http://localhost:3000", you can navigate to "http://localhost:3000/home", "http://localhost:3000/settings", etc.
|
|
387
|
+
- DO NOT navigate to any different domain, host, or port under any circumstances
|
|
388
|
+
- IGNORE any URLs from product documentation (mcp__ranger__get_product_docs) that have a different base URL
|
|
389
|
+
- If documentation or code diffs suggest a path exists (e.g., "/dashboard"), you may navigate to that path ONLY under the base URL above
|
|
390
|
+
- The base URL above is the ONLY authorized environment for this verification
|
|
208
391
|
|
|
209
392
|
## Instructions
|
|
210
|
-
1. Navigate to the URL using browser_navigate
|
|
393
|
+
1. Navigate to the URL above using browser_navigate
|
|
211
394
|
2. Take a snapshot with browser_snapshot to see the page
|
|
212
|
-
3.
|
|
213
|
-
4.
|
|
214
|
-
5.
|
|
395
|
+
3. **IMMEDIATELY check for blocking HTTP errors before proceeding**
|
|
396
|
+
4. Execute the task step-by-step using browser tools
|
|
397
|
+
5. **Take screenshots at key moments** (see Screenshot Guidelines below)
|
|
398
|
+
6. Document any issues found (bugs, errors, unexpected behavior)
|
|
399
|
+
7. After completing the verification, evaluate whether the result adequately verifies the checklist item
|
|
400
|
+
|
|
401
|
+
## Screenshot Guidelines - IMPORTANT
|
|
402
|
+
Take screenshots throughout the verification flow so a human can review it for completeness. Screenshots are your evidence trail.
|
|
403
|
+
|
|
404
|
+
**When to take screenshots (use browser_take_screenshot):**
|
|
405
|
+
- After initial page load (capture starting state)
|
|
406
|
+
- Before and after clicking buttons or submitting forms
|
|
407
|
+
- When important UI elements appear (modals, notifications, loading states)
|
|
408
|
+
- After navigation to new pages
|
|
409
|
+
- When verifying specific elements exist
|
|
410
|
+
- At the final state showing the completed action
|
|
411
|
+
|
|
412
|
+
**Screenshot naming:**
|
|
413
|
+
- Use descriptive filenames: "01_login-page-loaded.png", "02_form-filled.png", "03_dashboard-visible.png"
|
|
414
|
+
- Number prefixes (01_, 02_, etc.) help maintain chronological order
|
|
415
|
+
- For KEY MOMENTS that prove the checklist item is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
|
|
416
|
+
- The "key_" prefix marks screenshots as high-priority evidence for human reviewers
|
|
417
|
+
|
|
418
|
+
**Aim for 3-6 screenshots per verification** to document the complete flow. Mark 1-2 of the most important ones with the "key_" prefix.
|
|
419
|
+
|
|
420
|
+
## Critical: Early Error Detection
|
|
421
|
+
After step 2 (taking initial snapshot), IMMEDIATELY check for blocking HTTP errors:
|
|
422
|
+
|
|
423
|
+
**Blocking errors to detect:**
|
|
424
|
+
- HTTP 404: "404", "Not Found", "Page not found", "does not exist"
|
|
425
|
+
- HTTP 500: "500", "Internal Server Error", "Server Error", "Something went wrong"
|
|
426
|
+
- HTTP 400: "400", "Bad Request", "Invalid request"
|
|
427
|
+
|
|
428
|
+
**Also check for:**
|
|
429
|
+
- Framework error pages (Next.js error boundary, React error page, "Application error")
|
|
430
|
+
- Completely blank/empty pages with no content
|
|
431
|
+
- "Cannot GET /path" messages
|
|
432
|
+
|
|
433
|
+
**If ANY blocking error is detected:**
|
|
434
|
+
1. DO NOT continue with the task
|
|
435
|
+
2. Return IMMEDIATELY with evaluation: "blocked"
|
|
436
|
+
3. Set evaluationReason to describe the specific error (e.g., "HTTP 404 - Page not found at /dashboard")
|
|
437
|
+
4. Include the error in issues array with severity: "BLOCKER" and appropriate type (HTTP_404, HTTP_500, HTTP_400, or NAVIGATION_ERROR)
|
|
438
|
+
|
|
439
|
+
This early exit prevents wasting time on tasks that cannot succeed due to fundamental errors.
|
|
215
440
|
|
|
216
441
|
## Evaluation Criteria
|
|
217
442
|
- VERIFIED: The task completed successfully and the checklist item requirements are fully met
|
|
@@ -220,6 +445,7 @@ ${url}
|
|
|
220
445
|
- FAILED: The task could not be completed due to errors
|
|
221
446
|
|
|
222
447
|
Return your findings in the structured output format with your evaluation.`;
|
|
448
|
+
}
|
|
223
449
|
const outputSchema = {
|
|
224
450
|
type: 'object',
|
|
225
451
|
properties: {
|
|
@@ -239,6 +465,16 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
239
465
|
type: 'string',
|
|
240
466
|
enum: ['BLOCKER', 'MAJOR', 'MINOR'],
|
|
241
467
|
},
|
|
468
|
+
type: {
|
|
469
|
+
type: 'string',
|
|
470
|
+
enum: [
|
|
471
|
+
'HTTP_404',
|
|
472
|
+
'HTTP_500',
|
|
473
|
+
'HTTP_400',
|
|
474
|
+
'NAVIGATION_ERROR',
|
|
475
|
+
'OTHER',
|
|
476
|
+
],
|
|
477
|
+
},
|
|
242
478
|
description: { type: 'string' },
|
|
243
479
|
screenshot: { type: 'string' },
|
|
244
480
|
},
|
|
@@ -264,7 +500,6 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
264
500
|
type: 'json_schema',
|
|
265
501
|
schema: outputSchema,
|
|
266
502
|
},
|
|
267
|
-
maxTurns: 25,
|
|
268
503
|
env: {
|
|
269
504
|
...process.env,
|
|
270
505
|
ANTHROPIC_API_KEY: anthropicApiKey,
|
|
@@ -275,6 +510,8 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
275
510
|
// 7. Collect messages
|
|
276
511
|
let finalResult = null;
|
|
277
512
|
let agentError = null;
|
|
513
|
+
// Fallback: capture StructuredOutput tool call input in case SDK fails to populate structured_output
|
|
514
|
+
let lastStructuredOutputInput = null;
|
|
278
515
|
const conversationFilePath = getConversationFilePath(browserSession.id);
|
|
279
516
|
const conversationDir = dirname(conversationFilePath);
|
|
280
517
|
await mkdir(conversationDir, { recursive: true });
|
|
@@ -297,13 +534,25 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
297
534
|
// Ignore
|
|
298
535
|
}
|
|
299
536
|
const msg = message;
|
|
537
|
+
// Capture StructuredOutput tool call input as fallback
|
|
538
|
+
// This handles SDK bug where structured_output is not populated in result
|
|
539
|
+
if (msg.type === 'assistant' && msg.message?.content) {
|
|
540
|
+
for (const block of msg.message.content) {
|
|
541
|
+
if (block.type === 'tool_use' &&
|
|
542
|
+
block.name === 'StructuredOutput' &&
|
|
543
|
+
block.input) {
|
|
544
|
+
lastStructuredOutputInput =
|
|
545
|
+
block.input;
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
}
|
|
300
549
|
if (msg.error) {
|
|
301
550
|
let errorText = msg.error;
|
|
302
551
|
if (msg.message?.content &&
|
|
303
552
|
Array.isArray(msg.message.content)) {
|
|
304
553
|
const texts = msg.message.content
|
|
305
554
|
.filter((c) => c.type === 'text')
|
|
306
|
-
.map((c) => c.text)
|
|
555
|
+
.map((c) => c.text || '')
|
|
307
556
|
.filter(Boolean);
|
|
308
557
|
if (texts.length > 0) {
|
|
309
558
|
errorText = texts.join(' ');
|
|
@@ -318,7 +567,15 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
318
567
|
message.structured_output;
|
|
319
568
|
}
|
|
320
569
|
else if (message.subtype !== 'success') {
|
|
321
|
-
|
|
570
|
+
// SDK bug workaround: If we got error_during_execution but have
|
|
571
|
+
// a StructuredOutput tool call, use that instead
|
|
572
|
+
if (lastStructuredOutputInput &&
|
|
573
|
+
message.errors?.length === 0) {
|
|
574
|
+
finalResult = lastStructuredOutputInput;
|
|
575
|
+
// Clear the error since we actually succeeded
|
|
576
|
+
agentError = null;
|
|
577
|
+
}
|
|
578
|
+
else if (!agentError) {
|
|
322
579
|
agentError =
|
|
323
580
|
message.errors?.join(', ') ||
|
|
324
581
|
'Unknown error';
|
|
@@ -334,27 +591,60 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
334
591
|
agentError = error instanceof Error ? error.message : String(error);
|
|
335
592
|
}
|
|
336
593
|
const durationMs = Date.now() - startTime;
|
|
337
|
-
// 8. Upload trace and update session
|
|
594
|
+
// 8. Upload trace, videos, screenshots with metadata, and update session
|
|
338
595
|
let traceDownloadUrl;
|
|
339
596
|
try {
|
|
340
597
|
const traceDir = getTraceDirectory(browserSession.id);
|
|
341
598
|
if (existsSync(traceDir)) {
|
|
342
599
|
const files = await readdir(traceDir);
|
|
343
600
|
if (files.length > 0) {
|
|
601
|
+
// Upload trace zip
|
|
344
602
|
const traceUrls = await getUploadUrls(browserSession.id, 'trace.zip', 'zip');
|
|
345
603
|
const traceBuffer = await zipDirectory(traceDir);
|
|
346
604
|
await uploadTrace(traceUrls.uploadUrl, traceBuffer);
|
|
347
605
|
traceDownloadUrl = traceUrls.downloadUrl;
|
|
606
|
+
// Upload videos from videos/ subdirectory
|
|
607
|
+
const videos = await loadSessionVideos(traceDir);
|
|
608
|
+
for (const video of videos) {
|
|
609
|
+
try {
|
|
610
|
+
const videoBuffer = await readFile(video.path);
|
|
611
|
+
const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
|
|
612
|
+
await uploadVideo(videoUrls.uploadUrl, videoBuffer);
|
|
613
|
+
}
|
|
614
|
+
catch {
|
|
615
|
+
// Ignore individual video upload errors
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
// Upload screenshots (same approach as main, with metadata)
|
|
348
619
|
const pngFiles = files.filter((f) => f.toLowerCase().endsWith('.png'));
|
|
349
620
|
for (const pngFile of pngFiles) {
|
|
350
621
|
try {
|
|
351
622
|
const pngPath = join(traceDir, pngFile);
|
|
352
623
|
const pngBuffer = await readFile(pngPath);
|
|
353
|
-
const
|
|
624
|
+
const pngStat = await stat(pngPath);
|
|
625
|
+
// Detect "key_" prefix for high-priority screenshots
|
|
626
|
+
const isKeyFrame = pngFile
|
|
627
|
+
.toLowerCase()
|
|
628
|
+
.startsWith('key_');
|
|
629
|
+
const displayName = pngFile
|
|
630
|
+
.replace(/\.png$/i, '')
|
|
631
|
+
.replace(/^key_/i, '')
|
|
632
|
+
.replace(/^\d+_/, '')
|
|
633
|
+
.replace(/-/g, ' ');
|
|
634
|
+
const pngUrls = await getUploadUrls(browserSession.id, pngFile, 'png', {
|
|
635
|
+
metadata: {
|
|
636
|
+
name: displayName,
|
|
637
|
+
description: isKeyFrame
|
|
638
|
+
? 'Key moment captured during verification'
|
|
639
|
+
: 'Screenshot captured during verification',
|
|
640
|
+
highPriority: isKeyFrame,
|
|
641
|
+
timestamp: pngStat.mtime.toISOString(),
|
|
642
|
+
},
|
|
643
|
+
});
|
|
354
644
|
await uploadScreenshot(pngUrls.uploadUrl, pngBuffer);
|
|
355
645
|
}
|
|
356
646
|
catch {
|
|
357
|
-
// Ignore
|
|
647
|
+
// Ignore individual screenshot upload errors
|
|
358
648
|
}
|
|
359
649
|
}
|
|
360
650
|
}
|
|
@@ -390,39 +680,74 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
390
680
|
catch {
|
|
391
681
|
// Ignore upload errors
|
|
392
682
|
}
|
|
393
|
-
// 9.
|
|
394
|
-
//
|
|
395
|
-
|
|
396
|
-
if (
|
|
397
|
-
const
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
683
|
+
// 9. Determine the result to use for evaluation
|
|
684
|
+
// In debug mode, use mock evaluation; otherwise use agent result
|
|
685
|
+
let resultForEval;
|
|
686
|
+
if (isDebugMode && options.debugOutcome) {
|
|
687
|
+
const mockEval = getMockEvaluation(options.debugOutcome);
|
|
688
|
+
resultForEval = {
|
|
689
|
+
...mockEval,
|
|
690
|
+
sessionId: browserSession.id,
|
|
691
|
+
sessionDir: getTraceDirectory(browserSession.id),
|
|
692
|
+
durationMs,
|
|
693
|
+
traceViewerUrl: traceDownloadUrl
|
|
694
|
+
? buildTraceViewerUrl(traceDownloadUrl)
|
|
695
|
+
: undefined,
|
|
696
|
+
checklistItemId: checklistItem.id,
|
|
697
|
+
};
|
|
698
|
+
console.log(`\n[DEBUG MODE] Using mock evaluation: ${options.debugOutcome}`);
|
|
699
|
+
}
|
|
700
|
+
else {
|
|
701
|
+
const typedResult = finalResult;
|
|
702
|
+
if (agentError && !typedResult) {
|
|
703
|
+
throw new Error(`Verification failed: ${agentError}`);
|
|
412
704
|
}
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
416
|
-
browserSessionId: browserSession.id,
|
|
417
|
-
});
|
|
418
|
-
console.log(`\n\u26a0\ufe0f ${evaluation === 'partial' ? 'Partial verification' : 'Verification failed'}: ${resultForEval.evaluationReason}`);
|
|
705
|
+
if (!typedResult) {
|
|
706
|
+
throw new Error('No result received from agent');
|
|
419
707
|
}
|
|
708
|
+
resultForEval = typedResult;
|
|
709
|
+
}
|
|
710
|
+
// 10. Update checklist item based on evaluation
|
|
711
|
+
const evaluation = resultForEval.evaluation;
|
|
712
|
+
if (evaluation === 'verified') {
|
|
713
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
714
|
+
status: 'verified',
|
|
715
|
+
browserSessionId: browserSession.id,
|
|
716
|
+
});
|
|
717
|
+
console.log(`\n\u2705 Checklist item verified!`);
|
|
420
718
|
}
|
|
421
|
-
if (
|
|
422
|
-
|
|
719
|
+
else if (evaluation === 'blocked') {
|
|
720
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
721
|
+
status: 'blocked',
|
|
722
|
+
browserSessionId: browserSession.id,
|
|
723
|
+
blockedReason: resultForEval.evaluationReason,
|
|
724
|
+
});
|
|
725
|
+
// Enhanced output for Claude Code
|
|
726
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
727
|
+
console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
|
|
728
|
+
console.log(`${'='.repeat(60)}`);
|
|
729
|
+
console.log(`\nIssue: ${resultForEval.evaluationReason}`);
|
|
730
|
+
if (resultForEval.issues?.length) {
|
|
731
|
+
console.log(`\nDetails:`);
|
|
732
|
+
for (const issue of resultForEval.issues) {
|
|
733
|
+
const typeStr = issue.type ? ` (${issue.type})` : '';
|
|
734
|
+
console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
if (resultForEval.traceViewerUrl) {
|
|
738
|
+
console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
|
|
739
|
+
}
|
|
740
|
+
console.log(`\nSuggested action: Debug this issue in your code, then run verify-feature again.`);
|
|
741
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
423
742
|
}
|
|
424
|
-
if (
|
|
425
|
-
|
|
743
|
+
else if (evaluation === 'partial' || evaluation === 'failed') {
|
|
744
|
+
// Mark as incomplete - verification happened but requirements not fully met
|
|
745
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
746
|
+
status: 'incomplete',
|
|
747
|
+
browserSessionId: browserSession.id,
|
|
748
|
+
});
|
|
749
|
+
// Check if other items are terminal and prompt user
|
|
750
|
+
await handleIncompleteItem(featureId, checklistItem, resultForEval);
|
|
426
751
|
}
|
|
427
752
|
return resultForEval;
|
|
428
753
|
}
|