@ranger-testing/ranger-cli 1.0.13 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -183
- package/build/cli.js +219 -278
- package/build/cli.js.map +1 -1
- package/build/commands/addEnv.js +1 -1
- package/build/commands/addEnv.js.map +1 -1
- package/build/commands/authEncrypt.js +7 -6
- package/build/commands/authEncrypt.js.map +1 -1
- package/build/commands/clean.js +1 -1
- package/build/commands/clean.js.map +1 -1
- package/build/commands/config.js +5 -4
- package/build/commands/config.js.map +1 -1
- package/build/commands/dataMcpServer.js +1 -1
- package/build/commands/dataMcpServer.js.map +1 -1
- package/build/commands/env.js +17 -10
- package/build/commands/env.js.map +1 -1
- package/build/commands/feature.js +277 -285
- package/build/commands/feature.js.map +1 -1
- package/build/commands/hook.js +27 -0
- package/build/commands/hook.js.map +1 -0
- package/build/commands/hooks/disable.js +25 -0
- package/build/commands/hooks/disable.js.map +1 -0
- package/build/commands/hooks/enable.js +44 -0
- package/build/commands/hooks/enable.js.map +1 -0
- package/build/commands/hooks/exitPlanMode.js +35 -0
- package/build/commands/hooks/exitPlanMode.js.map +1 -0
- package/build/commands/hooks/index.js +10 -0
- package/build/commands/hooks/index.js.map +1 -0
- package/build/commands/hooks/output.js +53 -0
- package/build/commands/hooks/output.js.map +1 -0
- package/build/commands/hooks/planReminder.js +46 -0
- package/build/commands/hooks/planReminder.js.map +1 -0
- package/build/commands/hooks/planStart.js +30 -0
- package/build/commands/hooks/planStart.js.map +1 -0
- package/build/commands/hooks/postEdit.js +41 -0
- package/build/commands/hooks/postEdit.js.map +1 -0
- package/build/commands/hooks/preCompact.js +30 -0
- package/build/commands/hooks/preCompact.js.map +1 -0
- package/build/commands/hooks/sessionStart.js +35 -0
- package/build/commands/hooks/sessionStart.js.map +1 -0
- package/build/commands/hooks/stopHook.js +54 -0
- package/build/commands/hooks/stopHook.js.map +1 -0
- package/build/commands/index.js +1 -0
- package/build/commands/index.js.map +1 -1
- package/build/commands/skillup.js +41 -77
- package/build/commands/skillup.js.map +1 -1
- package/build/commands/start.js +1 -1
- package/build/commands/start.js.map +1 -1
- package/build/commands/status.js +47 -65
- package/build/commands/status.js.map +1 -1
- package/build/commands/update.js +32 -40
- package/build/commands/update.js.map +1 -1
- package/build/commands/updateEnv.js +1 -1
- package/build/commands/updateEnv.js.map +1 -1
- package/build/commands/useEnv.js +1 -1
- package/build/commands/useEnv.js.map +1 -1
- package/build/commands/utils/browserSessionsApi.js +1 -1
- package/build/commands/utils/browserSessionsApi.js.map +1 -1
- package/build/commands/utils/claudePlugin.js +85 -0
- package/build/commands/utils/claudePlugin.js.map +1 -0
- package/build/commands/utils/cliSecret.js +1 -1
- package/build/commands/utils/environment.js +0 -6
- package/build/commands/utils/environment.js.map +1 -1
- package/build/commands/utils/featureApi.js +82 -15
- package/build/commands/utils/featureApi.js.map +1 -1
- package/build/commands/utils/featureReportGenerator.js +37 -3
- package/build/commands/utils/featureReportGenerator.js.map +1 -1
- package/build/commands/utils/git.js +44 -0
- package/build/commands/utils/git.js.map +1 -0
- package/build/commands/utils/keychain.js +1 -1
- package/build/commands/utils/keychain.js.map +1 -1
- package/build/commands/utils/localAgentInstallationsApi.js +1 -1
- package/build/commands/utils/rangerRoot.js +30 -0
- package/build/commands/utils/rangerRoot.js.map +1 -0
- package/build/commands/utils/sessionCache.js +133 -0
- package/build/commands/utils/sessionCache.js.map +1 -0
- package/build/commands/utils/settings.js +7 -5
- package/build/commands/utils/settings.js.map +1 -1
- package/build/commands/utils/skillContent.js +28 -0
- package/build/commands/utils/skillContent.js.map +1 -0
- package/build/commands/utils/skills.js +1 -1
- package/build/commands/utils/skills.js.map +1 -1
- package/build/commands/utils/userApi.js +32 -0
- package/build/commands/utils/userApi.js.map +1 -0
- package/build/commands/verifyFeature.js +450 -105
- package/build/commands/verifyFeature.js.map +1 -1
- package/build/commands/verifyInBrowser.js +1 -1
- package/build/commands/verifyInBrowser.js.map +1 -1
- package/build/skills/bug-bash.md +31 -10
- package/build/skills/ranger/SKILL.md +164 -0
- package/build/skills/ranger/create.md +151 -0
- package/build/skills/ranger/start.md +122 -0
- package/build/skills/{feature-tracker → ranger}/verify.md +43 -17
- package/package.json +5 -3
- package/scripts/postinstall.js +18 -0
- package/build/commands/utils/mcpConfig.js +0 -1
- package/build/commands/utils/mcpConfig.js.map +0 -1
- package/build/skills/feature-tracker/SKILL.md +0 -185
- package/build/skills/feature-tracker/create.md +0 -105
- package/build/skills/feature-tracker/manage.md +0 -145
- package/build/skills/feature-tracker/report.md +0 -159
- package/build/skills/feature-tracker/start.md +0 -93
|
@@ -1,15 +1,30 @@
|
|
|
1
1
|
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
2
2
|
import { join, dirname } from 'path';
|
|
3
|
-
import { readFile, readdir, appendFile, mkdir, rm } from 'fs/promises';
|
|
3
|
+
import { readFile, readdir, appendFile, mkdir, rm, stat } from 'fs/promises';
|
|
4
4
|
import { existsSync } from 'fs';
|
|
5
5
|
import { execSync } from 'child_process';
|
|
6
6
|
import { tmpdir } from 'os';
|
|
7
7
|
import inquirer from 'inquirer';
|
|
8
8
|
import { loadSettings, resolveEnvVars, buildPlaywrightConfig, cleanupTempFiles, } from './utils/settings.js';
|
|
9
|
-
import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, buildTraceViewerUrl, getAnthropicApiKey, } from './utils/browserSessionsApi.js';
|
|
9
|
+
import { createBrowserSession, updateBrowserSession, getUploadUrls, uploadTrace, uploadConversation, uploadScreenshot, uploadVideo, buildTraceViewerUrl, getAnthropicApiKey, } from './utils/browserSessionsApi.js';
|
|
10
10
|
import { getToken } from './utils/keychain.js';
|
|
11
11
|
import { getActiveFeatureId } from './feature.js';
|
|
12
|
-
import { getFeature,
|
|
12
|
+
import { getFeature, updateFeature, updateChecklistItem, startSession, } from './utils/featureApi.js';
|
|
13
|
+
import { getRangerDir } from './utils/rangerRoot.js';
|
|
14
|
+
/**
|
|
15
|
+
* Get the current git branch
|
|
16
|
+
*/
|
|
17
|
+
function getGitBranch() {
|
|
18
|
+
try {
|
|
19
|
+
return execSync('git rev-parse --abbrev-ref HEAD', {
|
|
20
|
+
encoding: 'utf-8',
|
|
21
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
22
|
+
}).trim();
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return undefined;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
13
28
|
/**
|
|
14
29
|
* Zip a directory and return the buffer
|
|
15
30
|
*/
|
|
@@ -26,7 +41,7 @@ async function zipDirectory(dirPath) {
|
|
|
26
41
|
* Find the trace directory for a session
|
|
27
42
|
*/
|
|
28
43
|
function getTraceDirectory(sessionId) {
|
|
29
|
-
return join(
|
|
44
|
+
return join(getRangerDir(), 'sessions', sessionId);
|
|
30
45
|
}
|
|
31
46
|
/**
|
|
32
47
|
* Get the conversation file path for a session
|
|
@@ -34,24 +49,132 @@ function getTraceDirectory(sessionId) {
|
|
|
34
49
|
function getConversationFilePath(sessionId) {
|
|
35
50
|
return join(tmpdir(), 'ranger-browser-sessions', sessionId, 'conversation.jsonl');
|
|
36
51
|
}
|
|
52
|
+
/**
|
|
53
|
+
* Load videos from a session's videos directory
|
|
54
|
+
*/
|
|
55
|
+
async function loadSessionVideos(sessionDir) {
|
|
56
|
+
const videosDir = join(sessionDir, 'videos');
|
|
57
|
+
if (!existsSync(videosDir)) {
|
|
58
|
+
return [];
|
|
59
|
+
}
|
|
60
|
+
const files = await readdir(videosDir);
|
|
61
|
+
const videoFiles = files.filter((f) => f.toLowerCase().endsWith('.webm'));
|
|
62
|
+
return videoFiles.map((filename) => ({
|
|
63
|
+
filename,
|
|
64
|
+
path: join(videosDir, filename),
|
|
65
|
+
}));
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Get mock evaluation data for debug mode
|
|
69
|
+
*/
|
|
70
|
+
function getMockEvaluation(outcome) {
|
|
71
|
+
const mockEvaluations = {
|
|
72
|
+
verified: {
|
|
73
|
+
success: true,
|
|
74
|
+
summary: '[DEBUG] Mock verification completed successfully.',
|
|
75
|
+
evaluation: 'verified',
|
|
76
|
+
evaluationReason: 'All checklist requirements were met.',
|
|
77
|
+
},
|
|
78
|
+
partial: {
|
|
79
|
+
success: false,
|
|
80
|
+
summary: '[DEBUG] Mock partial verification.',
|
|
81
|
+
evaluation: 'partial',
|
|
82
|
+
evaluationReason: 'Some requirements were not fully verified.',
|
|
83
|
+
issues: [
|
|
84
|
+
{
|
|
85
|
+
severity: 'MINOR',
|
|
86
|
+
type: 'OTHER',
|
|
87
|
+
description: 'Secondary feature not fully implemented',
|
|
88
|
+
},
|
|
89
|
+
],
|
|
90
|
+
},
|
|
91
|
+
incomplete: {
|
|
92
|
+
success: false,
|
|
93
|
+
summary: '[DEBUG] Mock incomplete verification.',
|
|
94
|
+
evaluation: 'partial',
|
|
95
|
+
evaluationReason: 'Implementation is incomplete and needs additional work.',
|
|
96
|
+
issues: [
|
|
97
|
+
{
|
|
98
|
+
severity: 'MAJOR',
|
|
99
|
+
type: 'OTHER',
|
|
100
|
+
description: 'Feature is partially implemented but missing key functionality',
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
severity: 'MINOR',
|
|
104
|
+
type: 'OTHER',
|
|
105
|
+
description: 'UI elements present but not fully functional',
|
|
106
|
+
},
|
|
107
|
+
],
|
|
108
|
+
},
|
|
109
|
+
blocked: {
|
|
110
|
+
success: false,
|
|
111
|
+
summary: '[DEBUG] Mock blocked verification.',
|
|
112
|
+
evaluation: 'blocked',
|
|
113
|
+
evaluationReason: 'HTTP 404 - Page not found.',
|
|
114
|
+
issues: [
|
|
115
|
+
{
|
|
116
|
+
severity: 'BLOCKER',
|
|
117
|
+
type: 'HTTP_404',
|
|
118
|
+
description: 'Target page returns 404 Not Found',
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
severity: 'MAJOR',
|
|
122
|
+
type: 'NAVIGATION_ERROR',
|
|
123
|
+
description: 'Unable to proceed due to missing page',
|
|
124
|
+
},
|
|
125
|
+
],
|
|
126
|
+
},
|
|
127
|
+
failed: {
|
|
128
|
+
success: false,
|
|
129
|
+
summary: '[DEBUG] Mock failed verification.',
|
|
130
|
+
evaluation: 'failed',
|
|
131
|
+
evaluationReason: 'Browser automation failed with timeout error.',
|
|
132
|
+
issues: [
|
|
133
|
+
{
|
|
134
|
+
severity: 'BLOCKER',
|
|
135
|
+
type: 'OTHER',
|
|
136
|
+
description: 'Timeout waiting for element',
|
|
137
|
+
},
|
|
138
|
+
],
|
|
139
|
+
},
|
|
140
|
+
};
|
|
141
|
+
return mockEvaluations[outcome];
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Get the debug mode prompt for minimal browser interaction
|
|
145
|
+
*/
|
|
146
|
+
function getDebugPrompt() {
|
|
147
|
+
return `You are testing browser automation. Your task is simple:
|
|
148
|
+
|
|
149
|
+
1. Navigate to https://www.mozilla.org using browser_navigate
|
|
150
|
+
2. Take a snapshot with browser_snapshot to see the page
|
|
151
|
+
3. Take a screenshot named "01_mozilla-homepage.png" using browser_take_screenshot
|
|
152
|
+
4. Return immediately with the structured output
|
|
153
|
+
|
|
154
|
+
Return your findings in the structured output format.`;
|
|
155
|
+
}
|
|
37
156
|
/**
|
|
38
157
|
* Prompt user to select a checklist item
|
|
39
158
|
*/
|
|
40
159
|
async function selectChecklistItem(items) {
|
|
160
|
+
if (items.length === 0) {
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
41
163
|
const choices = items.map((item, i) => {
|
|
42
164
|
const emoji = item.status === 'verified'
|
|
43
165
|
? '\u2705'
|
|
44
|
-
: item.status === '
|
|
45
|
-
? '\ud83d\
|
|
46
|
-
: item.status === '
|
|
47
|
-
? '\
|
|
48
|
-
: '
|
|
166
|
+
: item.status === 'incomplete'
|
|
167
|
+
? '\ud83d\udfe0' // orange circle
|
|
168
|
+
: item.status === 'blocked'
|
|
169
|
+
? '\ud83d\uded1'
|
|
170
|
+
: item.status === 'canceled'
|
|
171
|
+
? '\u26d4'
|
|
172
|
+
: '\u2b1c';
|
|
49
173
|
return {
|
|
50
174
|
name: `${i + 1}. ${emoji} ${item.description}`,
|
|
51
175
|
value: item.id,
|
|
52
176
|
};
|
|
53
177
|
});
|
|
54
|
-
choices.push({ name: '+ Add new item', value: '__new__' });
|
|
55
178
|
const { selected } = await inquirer.prompt([
|
|
56
179
|
{
|
|
57
180
|
type: 'list',
|
|
@@ -60,97 +183,161 @@ async function selectChecklistItem(items) {
|
|
|
60
183
|
choices,
|
|
61
184
|
},
|
|
62
185
|
]);
|
|
63
|
-
|
|
64
|
-
|
|
186
|
+
return items.find((i) => i.id === selected) || null;
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Handle incomplete verification - check if all other items are terminal and prompt user
|
|
190
|
+
*/
|
|
191
|
+
async function handleIncompleteItem(featureId, incompleteItem, result) {
|
|
192
|
+
const feature = await getFeature(featureId);
|
|
193
|
+
const sessionItems = feature.checklistItems.filter((i) => i.sessionId === feature.currentSessionId);
|
|
194
|
+
const otherItems = sessionItems.filter((i) => i.id !== incompleteItem.id);
|
|
195
|
+
const allOthersTerminal = otherItems.every((i) => i.status === 'verified' ||
|
|
196
|
+
i.status === 'blocked' ||
|
|
197
|
+
i.status === 'canceled' ||
|
|
198
|
+
i.status === 'incomplete');
|
|
199
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
200
|
+
console.log(`INCOMPLETE - Verification found issues`);
|
|
201
|
+
console.log(`${'='.repeat(60)}`);
|
|
202
|
+
// Display structured list of issues
|
|
203
|
+
if (result.issues && result.issues.length > 0) {
|
|
204
|
+
console.log(`\nIssues found:`);
|
|
205
|
+
for (const issue of result.issues) {
|
|
206
|
+
console.log(` • ${issue.description}`);
|
|
207
|
+
}
|
|
65
208
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
209
|
+
else if (result.evaluationReason) {
|
|
210
|
+
console.log(`\nReason: ${result.evaluationReason}`);
|
|
211
|
+
}
|
|
212
|
+
console.log(`\nNext steps:`);
|
|
213
|
+
console.log(` 1. Fix the issues above in your code`);
|
|
214
|
+
console.log(` 2. Run 'ranger verify-feature' again to re-verify`);
|
|
215
|
+
if (allOthersTerminal && otherItems.length > 0) {
|
|
216
|
+
console.log(`\nAll other checklist items are complete.`);
|
|
217
|
+
console.log(`If you're done for now, run 'ranger feature conclude-session' to end this session.`);
|
|
218
|
+
}
|
|
219
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
70
220
|
}
|
|
71
221
|
/**
|
|
72
222
|
* Verify a checklist item in the browser
|
|
73
223
|
*/
|
|
74
|
-
export async function verifyFeature(
|
|
224
|
+
export async function verifyFeature(options) {
|
|
225
|
+
const isDebugMode = !!options.debugOutcome;
|
|
226
|
+
if (isDebugMode) {
|
|
227
|
+
console.log(`\n[DEBUG MODE] Running minimal browser test with outcome: ${options.debugOutcome}`);
|
|
228
|
+
}
|
|
75
229
|
// 1. Check for active feature
|
|
76
230
|
const featureId = await getActiveFeatureId();
|
|
77
231
|
if (!featureId) {
|
|
78
|
-
throw new Error('No active feature. Run: ranger feature
|
|
232
|
+
throw new Error('No active feature. Run: ranger feature resume <id> or ranger feature create');
|
|
79
233
|
}
|
|
80
234
|
// Load feature details
|
|
81
235
|
const feature = await getFeature(featureId);
|
|
236
|
+
// Update the feature's gitBranch to the current branch
|
|
237
|
+
const currentBranch = getGitBranch();
|
|
238
|
+
if (currentBranch && currentBranch !== feature.gitBranch) {
|
|
239
|
+
await updateFeature(featureId, { gitBranch: currentBranch });
|
|
240
|
+
console.log(` Updated branch to: ${currentBranch}`);
|
|
241
|
+
}
|
|
82
242
|
console.log(`\nActive feature: ${feature.name} (${featureId})`);
|
|
243
|
+
// Filter to only items in the current session
|
|
244
|
+
const currentSessionId = feature.currentSessionId;
|
|
245
|
+
const currentSessionItems = currentSessionId
|
|
246
|
+
? feature.checklistItems.filter((item) => item.sessionId === currentSessionId)
|
|
247
|
+
: feature.checklistItems;
|
|
83
248
|
// 2. Determine which checklist item we're verifying
|
|
84
249
|
let checklistItem = null;
|
|
85
250
|
let taskDescription = options.task;
|
|
86
|
-
if (options.
|
|
87
|
-
//
|
|
88
|
-
checklistItem = await addChecklistItem(featureId, {
|
|
89
|
-
description: options.newItem,
|
|
90
|
-
});
|
|
91
|
-
console.log(`Created new checklist item: ${checklistItem.description}`);
|
|
92
|
-
if (!taskDescription) {
|
|
93
|
-
taskDescription = options.newItem;
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
else if (options.item !== undefined) {
|
|
97
|
-
// Use specified item index
|
|
251
|
+
if (options.item !== undefined) {
|
|
252
|
+
// Use specified item index (1-based, relative to current session items)
|
|
98
253
|
const itemIndex = options.item - 1; // 1-based to 0-based
|
|
99
|
-
if (itemIndex < 0 || itemIndex >=
|
|
100
|
-
throw new Error(`Invalid item index: ${options.item}.
|
|
254
|
+
if (itemIndex < 0 || itemIndex >= currentSessionItems.length) {
|
|
255
|
+
throw new Error(`Invalid item index: ${options.item}. Current session has ${currentSessionItems.length} items.`);
|
|
101
256
|
}
|
|
102
|
-
checklistItem =
|
|
257
|
+
checklistItem = currentSessionItems[itemIndex];
|
|
103
258
|
if (!taskDescription) {
|
|
104
259
|
taskDescription = checklistItem.description;
|
|
105
260
|
}
|
|
106
261
|
}
|
|
107
262
|
else {
|
|
108
|
-
//
|
|
109
|
-
const
|
|
110
|
-
if (
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
263
|
+
// Check if running in non-TTY environment (CI, scripts, Claude Code, etc.)
|
|
264
|
+
const isInteractive = process.stdin.isTTY && process.stdout.isTTY;
|
|
265
|
+
if (!isInteractive) {
|
|
266
|
+
// Non-TTY mode: require --item flag, show available items
|
|
267
|
+
console.log('\nNon-interactive mode detected. The --item flag is required.');
|
|
268
|
+
console.log('\nAvailable checklist items for current session:');
|
|
269
|
+
currentSessionItems.forEach((item, i) => {
|
|
270
|
+
const emoji = item.status === 'verified'
|
|
271
|
+
? '\u2705'
|
|
272
|
+
: item.status === 'incomplete'
|
|
273
|
+
? '\ud83d\udfe0' // orange circle
|
|
274
|
+
: item.status === 'blocked'
|
|
275
|
+
? '\ud83d\uded1'
|
|
276
|
+
: item.status === 'canceled'
|
|
277
|
+
? '\u26d4'
|
|
278
|
+
: '\u2b1c';
|
|
279
|
+
console.log(` ${i + 1}. ${emoji} ${item.description}`);
|
|
121
280
|
});
|
|
122
|
-
console.log(
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
281
|
+
console.log('\nUsage: ranger verify-feature --item <number>');
|
|
282
|
+
console.log('Example: ranger verify-feature --item 1');
|
|
283
|
+
throw new Error('The --item flag is required in non-interactive mode. See available items above.');
|
|
126
284
|
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
}
|
|
285
|
+
// Interactive selection (show only current session items)
|
|
286
|
+
checklistItem = await selectChecklistItem(currentSessionItems);
|
|
287
|
+
if (!taskDescription && checklistItem) {
|
|
288
|
+
taskDescription = checklistItem.description;
|
|
132
289
|
}
|
|
133
290
|
}
|
|
134
291
|
if (!checklistItem) {
|
|
135
|
-
throw new Error('No checklist item selected');
|
|
292
|
+
throw new Error('No checklist item selected. Create items when creating the feature with -c flag.');
|
|
136
293
|
}
|
|
137
294
|
if (!taskDescription) {
|
|
138
295
|
throw new Error('No task description provided');
|
|
139
296
|
}
|
|
140
297
|
console.log(`\nVerifying: ${checklistItem.description}`);
|
|
141
298
|
console.log(`Task: ${taskDescription}`);
|
|
142
|
-
//
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
299
|
+
// Start the session if it's in ready status
|
|
300
|
+
if (feature.currentSession &&
|
|
301
|
+
feature.currentSession.status === 'ready' &&
|
|
302
|
+
feature.currentSessionId) {
|
|
303
|
+
try {
|
|
304
|
+
await startSession(featureId, feature.currentSessionId);
|
|
305
|
+
}
|
|
306
|
+
catch (error) {
|
|
307
|
+
// Ignore if session is already started (race condition)
|
|
308
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
309
|
+
if (!message.includes('already')) {
|
|
310
|
+
throw error;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
146
313
|
}
|
|
147
|
-
|
|
148
|
-
|
|
314
|
+
// Update checklist item status to verification_in_progress
|
|
315
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
316
|
+
status: 'verification_in_progress',
|
|
317
|
+
});
|
|
318
|
+
// 3. Determine which environment to use (same pattern as verifyInBrowser)
|
|
319
|
+
let activeEnv;
|
|
320
|
+
if (options.env) {
|
|
321
|
+
activeEnv = options.env;
|
|
322
|
+
}
|
|
323
|
+
else {
|
|
324
|
+
const activeEnvPath = join(getRangerDir(), 'active-env.txt');
|
|
325
|
+
if (!existsSync(activeEnvPath)) {
|
|
326
|
+
throw new Error('No active environment. Run: ranger use <env-name>');
|
|
327
|
+
}
|
|
328
|
+
activeEnv = await readFile(activeEnvPath, 'utf-8').then((s) => s.trim());
|
|
329
|
+
}
|
|
330
|
+
const envDir = join(getRangerDir(), activeEnv);
|
|
149
331
|
if (!existsSync(envDir)) {
|
|
150
332
|
throw new Error(`Environment not found at ${envDir}. Run: ranger add env ${activeEnv}`);
|
|
151
333
|
}
|
|
152
334
|
const settings = await loadSettings(activeEnv);
|
|
153
335
|
const resolvedSettings = resolveEnvVars(settings);
|
|
336
|
+
// Get base URL from settings
|
|
337
|
+
const url = resolvedSettings.baseUrl;
|
|
338
|
+
if (!url) {
|
|
339
|
+
throw new Error(`No baseUrl configured for environment "${activeEnv}". Run: ranger config set ${activeEnv} baseUrl <url>`);
|
|
340
|
+
}
|
|
154
341
|
// 4. Create browser session
|
|
155
342
|
const token = await getToken();
|
|
156
343
|
if (!token) {
|
|
@@ -161,6 +348,8 @@ export async function verifyFeature(url, options) {
|
|
|
161
348
|
settings: resolvedSettings,
|
|
162
349
|
task: taskDescription,
|
|
163
350
|
url,
|
|
351
|
+
featureId,
|
|
352
|
+
checklistItemId: checklistItem.id,
|
|
164
353
|
});
|
|
165
354
|
console.log(`Browser session created: ${browserSession.id}`);
|
|
166
355
|
const configResult = await buildPlaywrightConfig(resolvedSettings, activeEnv, browserSession?.id);
|
|
@@ -195,23 +384,79 @@ export async function verifyFeature(url, options) {
|
|
|
195
384
|
throw new Error(errorMsg);
|
|
196
385
|
}
|
|
197
386
|
// 5. UI Verifier + Evaluation Agent prompt
|
|
198
|
-
|
|
387
|
+
let verifierPrompt;
|
|
388
|
+
if (isDebugMode) {
|
|
389
|
+
verifierPrompt = getDebugPrompt();
|
|
390
|
+
}
|
|
391
|
+
else {
|
|
392
|
+
const notesSection = checklistItem.notes
|
|
393
|
+
? `\n\n## Additional Notes\n${checklistItem.notes}`
|
|
394
|
+
: '';
|
|
395
|
+
verifierPrompt = `You are a Feature Verifier. Your job is to verify a checklist item by executing a UI flow and evaluating whether it adequately completes the checklist item.
|
|
199
396
|
|
|
200
397
|
## Checklist Item to Verify
|
|
201
|
-
${checklistItem.description}
|
|
398
|
+
${checklistItem.description}${notesSection}
|
|
202
399
|
|
|
203
400
|
## Task to Execute
|
|
204
401
|
${taskDescription}
|
|
205
402
|
|
|
206
|
-
|
|
207
|
-
${url}
|
|
403
|
+
CRITICAL URL REQUIREMENT:
|
|
404
|
+
Your base URL is: ${url}
|
|
405
|
+
- You may ONLY navigate to paths under this base URL (same protocol, host, and port)
|
|
406
|
+
- For example, if the base URL is "http://localhost:3000", you can navigate to "http://localhost:3000/home", "http://localhost:3000/settings", etc.
|
|
407
|
+
- DO NOT navigate to any different domain, host, or port under any circumstances
|
|
408
|
+
- IGNORE any URLs from product documentation (mcp__ranger__get_product_docs) that have a different base URL
|
|
409
|
+
- If documentation or code diffs suggest a path exists (e.g., "/dashboard"), you may navigate to that path ONLY under the base URL above
|
|
410
|
+
- The base URL above is the ONLY authorized environment for this verification
|
|
208
411
|
|
|
209
412
|
## Instructions
|
|
210
|
-
1. Navigate to the URL using browser_navigate
|
|
413
|
+
1. Navigate to the URL above using browser_navigate
|
|
211
414
|
2. Take a snapshot with browser_snapshot to see the page
|
|
212
|
-
3.
|
|
213
|
-
4.
|
|
214
|
-
5.
|
|
415
|
+
3. **IMMEDIATELY check for blocking HTTP errors before proceeding**
|
|
416
|
+
4. Execute the task step-by-step using browser tools
|
|
417
|
+
5. **Take screenshots at key moments** (see Screenshot Guidelines below)
|
|
418
|
+
6. Document any issues found (bugs, errors, unexpected behavior)
|
|
419
|
+
7. After completing the verification, evaluate whether the result adequately verifies the checklist item
|
|
420
|
+
|
|
421
|
+
## Screenshot Guidelines - IMPORTANT
|
|
422
|
+
Take screenshots throughout the verification flow so a human can review it for completeness. Screenshots are your evidence trail.
|
|
423
|
+
|
|
424
|
+
**When to take screenshots (use browser_take_screenshot):**
|
|
425
|
+
- After initial page load (capture starting state)
|
|
426
|
+
- Before and after clicking buttons or submitting forms
|
|
427
|
+
- When important UI elements appear (modals, notifications, loading states)
|
|
428
|
+
- After navigation to new pages
|
|
429
|
+
- When verifying specific elements exist
|
|
430
|
+
- At the final state showing the completed action
|
|
431
|
+
|
|
432
|
+
**Screenshot naming:**
|
|
433
|
+
- Use descriptive filenames: "01_login-page-loaded.png", "02_form-filled.png", "03_dashboard-visible.png"
|
|
434
|
+
- Number prefixes (01_, 02_, etc.) help maintain chronological order
|
|
435
|
+
- For KEY MOMENTS that prove the checklist item is complete, prefix with "key_": "key_04_success-message.png", "key_05_final-state.png"
|
|
436
|
+
- The "key_" prefix marks screenshots as high-priority evidence for human reviewers
|
|
437
|
+
|
|
438
|
+
**Aim for 3-6 screenshots per verification** to document the complete flow. Mark 1-2 of the most important ones with the "key_" prefix.
|
|
439
|
+
|
|
440
|
+
## Critical: Early Error Detection
|
|
441
|
+
After step 2 (taking initial snapshot), IMMEDIATELY check for blocking HTTP errors:
|
|
442
|
+
|
|
443
|
+
**Blocking errors to detect:**
|
|
444
|
+
- HTTP 404: "404", "Not Found", "Page not found", "does not exist"
|
|
445
|
+
- HTTP 500: "500", "Internal Server Error", "Server Error", "Something went wrong"
|
|
446
|
+
- HTTP 400: "400", "Bad Request", "Invalid request"
|
|
447
|
+
|
|
448
|
+
**Also check for:**
|
|
449
|
+
- Framework error pages (Next.js error boundary, React error page, "Application error")
|
|
450
|
+
- Completely blank/empty pages with no content
|
|
451
|
+
- "Cannot GET /path" messages
|
|
452
|
+
|
|
453
|
+
**If ANY blocking error is detected:**
|
|
454
|
+
1. DO NOT continue with the task
|
|
455
|
+
2. Return IMMEDIATELY with evaluation: "blocked"
|
|
456
|
+
3. Set evaluationReason to describe the specific error (e.g., "HTTP 404 - Page not found at /dashboard")
|
|
457
|
+
4. Include the error in issues array with severity: "BLOCKER" and appropriate type (HTTP_404, HTTP_500, HTTP_400, or NAVIGATION_ERROR)
|
|
458
|
+
|
|
459
|
+
This early exit prevents wasting time on tasks that cannot succeed due to fundamental errors.
|
|
215
460
|
|
|
216
461
|
## Evaluation Criteria
|
|
217
462
|
- VERIFIED: The task completed successfully and the checklist item requirements are fully met
|
|
@@ -220,6 +465,7 @@ ${url}
|
|
|
220
465
|
- FAILED: The task could not be completed due to errors
|
|
221
466
|
|
|
222
467
|
Return your findings in the structured output format with your evaluation.`;
|
|
468
|
+
}
|
|
223
469
|
const outputSchema = {
|
|
224
470
|
type: 'object',
|
|
225
471
|
properties: {
|
|
@@ -239,6 +485,16 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
239
485
|
type: 'string',
|
|
240
486
|
enum: ['BLOCKER', 'MAJOR', 'MINOR'],
|
|
241
487
|
},
|
|
488
|
+
type: {
|
|
489
|
+
type: 'string',
|
|
490
|
+
enum: [
|
|
491
|
+
'HTTP_404',
|
|
492
|
+
'HTTP_500',
|
|
493
|
+
'HTTP_400',
|
|
494
|
+
'NAVIGATION_ERROR',
|
|
495
|
+
'OTHER',
|
|
496
|
+
],
|
|
497
|
+
},
|
|
242
498
|
description: { type: 'string' },
|
|
243
499
|
screenshot: { type: 'string' },
|
|
244
500
|
},
|
|
@@ -264,7 +520,6 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
264
520
|
type: 'json_schema',
|
|
265
521
|
schema: outputSchema,
|
|
266
522
|
},
|
|
267
|
-
maxTurns: 25,
|
|
268
523
|
env: {
|
|
269
524
|
...process.env,
|
|
270
525
|
ANTHROPIC_API_KEY: anthropicApiKey,
|
|
@@ -275,6 +530,8 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
275
530
|
// 7. Collect messages
|
|
276
531
|
let finalResult = null;
|
|
277
532
|
let agentError = null;
|
|
533
|
+
// Fallback: capture StructuredOutput tool call input in case SDK fails to populate structured_output
|
|
534
|
+
let lastStructuredOutputInput = null;
|
|
278
535
|
const conversationFilePath = getConversationFilePath(browserSession.id);
|
|
279
536
|
const conversationDir = dirname(conversationFilePath);
|
|
280
537
|
await mkdir(conversationDir, { recursive: true });
|
|
@@ -297,13 +554,25 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
297
554
|
// Ignore
|
|
298
555
|
}
|
|
299
556
|
const msg = message;
|
|
557
|
+
// Capture StructuredOutput tool call input as fallback
|
|
558
|
+
// This handles SDK bug where structured_output is not populated in result
|
|
559
|
+
if (msg.type === 'assistant' && msg.message?.content) {
|
|
560
|
+
for (const block of msg.message.content) {
|
|
561
|
+
if (block.type === 'tool_use' &&
|
|
562
|
+
block.name === 'StructuredOutput' &&
|
|
563
|
+
block.input) {
|
|
564
|
+
lastStructuredOutputInput =
|
|
565
|
+
block.input;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
}
|
|
300
569
|
if (msg.error) {
|
|
301
570
|
let errorText = msg.error;
|
|
302
571
|
if (msg.message?.content &&
|
|
303
572
|
Array.isArray(msg.message.content)) {
|
|
304
573
|
const texts = msg.message.content
|
|
305
574
|
.filter((c) => c.type === 'text')
|
|
306
|
-
.map((c) => c.text)
|
|
575
|
+
.map((c) => c.text || '')
|
|
307
576
|
.filter(Boolean);
|
|
308
577
|
if (texts.length > 0) {
|
|
309
578
|
errorText = texts.join(' ');
|
|
@@ -318,7 +587,15 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
318
587
|
message.structured_output;
|
|
319
588
|
}
|
|
320
589
|
else if (message.subtype !== 'success') {
|
|
321
|
-
|
|
590
|
+
// SDK bug workaround: If we got error_during_execution but have
|
|
591
|
+
// a StructuredOutput tool call, use that instead
|
|
592
|
+
if (lastStructuredOutputInput &&
|
|
593
|
+
message.errors?.length === 0) {
|
|
594
|
+
finalResult = lastStructuredOutputInput;
|
|
595
|
+
// Clear the error since we actually succeeded
|
|
596
|
+
agentError = null;
|
|
597
|
+
}
|
|
598
|
+
else if (!agentError) {
|
|
322
599
|
agentError =
|
|
323
600
|
message.errors?.join(', ') ||
|
|
324
601
|
'Unknown error';
|
|
@@ -334,27 +611,60 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
334
611
|
agentError = error instanceof Error ? error.message : String(error);
|
|
335
612
|
}
|
|
336
613
|
const durationMs = Date.now() - startTime;
|
|
337
|
-
// 8. Upload trace and update session
|
|
614
|
+
// 8. Upload trace, videos, screenshots with metadata, and update session
|
|
338
615
|
let traceDownloadUrl;
|
|
339
616
|
try {
|
|
340
617
|
const traceDir = getTraceDirectory(browserSession.id);
|
|
341
618
|
if (existsSync(traceDir)) {
|
|
342
619
|
const files = await readdir(traceDir);
|
|
343
620
|
if (files.length > 0) {
|
|
621
|
+
// Upload trace zip
|
|
344
622
|
const traceUrls = await getUploadUrls(browserSession.id, 'trace.zip', 'zip');
|
|
345
623
|
const traceBuffer = await zipDirectory(traceDir);
|
|
346
624
|
await uploadTrace(traceUrls.uploadUrl, traceBuffer);
|
|
347
625
|
traceDownloadUrl = traceUrls.downloadUrl;
|
|
626
|
+
// Upload videos from videos/ subdirectory
|
|
627
|
+
const videos = await loadSessionVideos(traceDir);
|
|
628
|
+
for (const video of videos) {
|
|
629
|
+
try {
|
|
630
|
+
const videoBuffer = await readFile(video.path);
|
|
631
|
+
const videoUrls = await getUploadUrls(browserSession.id, video.filename, 'webm');
|
|
632
|
+
await uploadVideo(videoUrls.uploadUrl, videoBuffer);
|
|
633
|
+
}
|
|
634
|
+
catch {
|
|
635
|
+
// Ignore individual video upload errors
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
// Upload screenshots (same approach as main, with metadata)
|
|
348
639
|
const pngFiles = files.filter((f) => f.toLowerCase().endsWith('.png'));
|
|
349
640
|
for (const pngFile of pngFiles) {
|
|
350
641
|
try {
|
|
351
642
|
const pngPath = join(traceDir, pngFile);
|
|
352
643
|
const pngBuffer = await readFile(pngPath);
|
|
353
|
-
const
|
|
644
|
+
const pngStat = await stat(pngPath);
|
|
645
|
+
// Detect "key_" prefix for high-priority screenshots
|
|
646
|
+
const isKeyFrame = pngFile
|
|
647
|
+
.toLowerCase()
|
|
648
|
+
.startsWith('key_');
|
|
649
|
+
const displayName = pngFile
|
|
650
|
+
.replace(/\.png$/i, '')
|
|
651
|
+
.replace(/^key_/i, '')
|
|
652
|
+
.replace(/^\d+_/, '')
|
|
653
|
+
.replace(/-/g, ' ');
|
|
654
|
+
const pngUrls = await getUploadUrls(browserSession.id, pngFile, 'png', {
|
|
655
|
+
metadata: {
|
|
656
|
+
name: displayName,
|
|
657
|
+
description: isKeyFrame
|
|
658
|
+
? 'Key moment captured during verification'
|
|
659
|
+
: 'Screenshot captured during verification',
|
|
660
|
+
highPriority: isKeyFrame,
|
|
661
|
+
timestamp: pngStat.mtime.toISOString(),
|
|
662
|
+
},
|
|
663
|
+
});
|
|
354
664
|
await uploadScreenshot(pngUrls.uploadUrl, pngBuffer);
|
|
355
665
|
}
|
|
356
666
|
catch {
|
|
357
|
-
// Ignore
|
|
667
|
+
// Ignore individual screenshot upload errors
|
|
358
668
|
}
|
|
359
669
|
}
|
|
360
670
|
}
|
|
@@ -390,39 +700,74 @@ Return your findings in the structured output format with your evaluation.`;
|
|
|
390
700
|
catch {
|
|
391
701
|
// Ignore upload errors
|
|
392
702
|
}
|
|
393
|
-
// 9.
|
|
394
|
-
//
|
|
395
|
-
|
|
396
|
-
if (
|
|
397
|
-
const
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
703
|
+
// 9. Determine the result to use for evaluation
|
|
704
|
+
// In debug mode, use mock evaluation; otherwise use agent result
|
|
705
|
+
let resultForEval;
|
|
706
|
+
if (isDebugMode && options.debugOutcome) {
|
|
707
|
+
const mockEval = getMockEvaluation(options.debugOutcome);
|
|
708
|
+
resultForEval = {
|
|
709
|
+
...mockEval,
|
|
710
|
+
sessionId: browserSession.id,
|
|
711
|
+
sessionDir: getTraceDirectory(browserSession.id),
|
|
712
|
+
durationMs,
|
|
713
|
+
traceViewerUrl: traceDownloadUrl
|
|
714
|
+
? buildTraceViewerUrl(traceDownloadUrl)
|
|
715
|
+
: undefined,
|
|
716
|
+
checklistItemId: checklistItem.id,
|
|
717
|
+
};
|
|
718
|
+
console.log(`\n[DEBUG MODE] Using mock evaluation: ${options.debugOutcome}`);
|
|
719
|
+
}
|
|
720
|
+
else {
|
|
721
|
+
const typedResult = finalResult;
|
|
722
|
+
if (agentError && !typedResult) {
|
|
723
|
+
throw new Error(`Verification failed: ${agentError}`);
|
|
412
724
|
}
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
await updateChecklistItem(featureId, checklistItem.id, {
|
|
416
|
-
browserSessionId: browserSession.id,
|
|
417
|
-
});
|
|
418
|
-
console.log(`\n\u26a0\ufe0f ${evaluation === 'partial' ? 'Partial verification' : 'Verification failed'}: ${resultForEval.evaluationReason}`);
|
|
725
|
+
if (!typedResult) {
|
|
726
|
+
throw new Error('No result received from agent');
|
|
419
727
|
}
|
|
728
|
+
resultForEval = typedResult;
|
|
420
729
|
}
|
|
421
|
-
|
|
422
|
-
|
|
730
|
+
// 10. Update checklist item based on evaluation
|
|
731
|
+
const evaluation = resultForEval.evaluation;
|
|
732
|
+
if (evaluation === 'verified') {
|
|
733
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
734
|
+
status: 'verified',
|
|
735
|
+
browserSessionId: browserSession.id,
|
|
736
|
+
});
|
|
737
|
+
console.log(`\n\u2705 Checklist item verified!`);
|
|
423
738
|
}
|
|
424
|
-
if (
|
|
425
|
-
|
|
739
|
+
else if (evaluation === 'blocked') {
|
|
740
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
741
|
+
status: 'blocked',
|
|
742
|
+
browserSessionId: browserSession.id,
|
|
743
|
+
blockedReason: resultForEval.evaluationReason,
|
|
744
|
+
});
|
|
745
|
+
// Enhanced output for Claude Code
|
|
746
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
747
|
+
console.log(`BLOCKING ISSUE DETECTED - Debug Required`);
|
|
748
|
+
console.log(`${'='.repeat(60)}`);
|
|
749
|
+
console.log(`\nIssue: ${resultForEval.evaluationReason}`);
|
|
750
|
+
if (resultForEval.issues?.length) {
|
|
751
|
+
console.log(`\nDetails:`);
|
|
752
|
+
for (const issue of resultForEval.issues) {
|
|
753
|
+
const typeStr = issue.type ? ` (${issue.type})` : '';
|
|
754
|
+
console.log(` - [${issue.severity}]${typeStr} ${issue.description}`);
|
|
755
|
+
}
|
|
756
|
+
}
|
|
757
|
+
if (resultForEval.traceViewerUrl) {
|
|
758
|
+
console.log(`\nTrace: ${resultForEval.traceViewerUrl}`);
|
|
759
|
+
}
|
|
760
|
+
console.log(`\nSuggested action: Debug this issue in your code, then run verify-feature again.`);
|
|
761
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
762
|
+
}
|
|
763
|
+
else if (evaluation === 'partial' || evaluation === 'failed') {
|
|
764
|
+
// Mark as incomplete - verification happened but requirements not fully met
|
|
765
|
+
await updateChecklistItem(featureId, checklistItem.id, {
|
|
766
|
+
status: 'incomplete',
|
|
767
|
+
browserSessionId: browserSession.id,
|
|
768
|
+
});
|
|
769
|
+
// Check if other items are terminal and prompt user
|
|
770
|
+
await handleIncompleteItem(featureId, checklistItem, resultForEval);
|
|
426
771
|
}
|
|
427
772
|
return resultForEval;
|
|
428
773
|
}
|