@weldr/runr 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +216 -0
- package/LICENSE +190 -0
- package/NOTICE +4 -0
- package/README.md +200 -0
- package/dist/cli.js +464 -0
- package/dist/commands/__tests__/report.test.js +202 -0
- package/dist/commands/compare.js +168 -0
- package/dist/commands/doctor.js +124 -0
- package/dist/commands/follow.js +251 -0
- package/dist/commands/gc.js +161 -0
- package/dist/commands/guards-only.js +89 -0
- package/dist/commands/metrics.js +441 -0
- package/dist/commands/orchestrate.js +800 -0
- package/dist/commands/paths.js +31 -0
- package/dist/commands/preflight.js +152 -0
- package/dist/commands/report.js +478 -0
- package/dist/commands/resume.js +149 -0
- package/dist/commands/run.js +538 -0
- package/dist/commands/status.js +189 -0
- package/dist/commands/summarize.js +220 -0
- package/dist/commands/version.js +82 -0
- package/dist/commands/wait.js +170 -0
- package/dist/config/__tests__/presets.test.js +104 -0
- package/dist/config/load.js +66 -0
- package/dist/config/schema.js +160 -0
- package/dist/context/__tests__/artifact.test.js +130 -0
- package/dist/context/__tests__/pack.test.js +191 -0
- package/dist/context/artifact.js +67 -0
- package/dist/context/index.js +2 -0
- package/dist/context/pack.js +273 -0
- package/dist/diagnosis/analyzer.js +678 -0
- package/dist/diagnosis/formatter.js +136 -0
- package/dist/diagnosis/index.js +6 -0
- package/dist/diagnosis/types.js +7 -0
- package/dist/env/__tests__/fingerprint.test.js +116 -0
- package/dist/env/fingerprint.js +111 -0
- package/dist/orchestrator/__tests__/policy.test.js +185 -0
- package/dist/orchestrator/__tests__/schema-version.test.js +65 -0
- package/dist/orchestrator/artifacts.js +405 -0
- package/dist/orchestrator/state-machine.js +646 -0
- package/dist/orchestrator/types.js +88 -0
- package/dist/ownership/normalize.js +45 -0
- package/dist/repo/context.js +90 -0
- package/dist/repo/git.js +13 -0
- package/dist/repo/worktree.js +239 -0
- package/dist/store/run-store.js +107 -0
- package/dist/store/run-utils.js +69 -0
- package/dist/store/runs-root.js +126 -0
- package/dist/supervisor/__tests__/evidence-gate.test.js +111 -0
- package/dist/supervisor/__tests__/ownership.test.js +103 -0
- package/dist/supervisor/__tests__/state-machine.test.js +290 -0
- package/dist/supervisor/collision.js +240 -0
- package/dist/supervisor/evidence-gate.js +98 -0
- package/dist/supervisor/planner.js +18 -0
- package/dist/supervisor/runner.js +1562 -0
- package/dist/supervisor/scope-guard.js +55 -0
- package/dist/supervisor/state-machine.js +121 -0
- package/dist/supervisor/verification-policy.js +64 -0
- package/dist/tasks/task-metadata.js +72 -0
- package/dist/types/schemas.js +1 -0
- package/dist/verification/engine.js +49 -0
- package/dist/workers/__tests__/claude.test.js +88 -0
- package/dist/workers/__tests__/codex.test.js +81 -0
- package/dist/workers/claude.js +119 -0
- package/dist/workers/codex.js +162 -0
- package/dist/workers/json.js +22 -0
- package/dist/workers/mock.js +193 -0
- package/dist/workers/prompts.js +98 -0
- package/dist/workers/schemas.js +39 -0
- package/package.json +47 -0
- package/templates/prompts/implementer.md +70 -0
- package/templates/prompts/planner.md +62 -0
- package/templates/prompts/reviewer.md +77 -0
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Diagnosis analyzer for auto-diagnosing run stop reasons.
|
|
3
|
+
*
|
|
4
|
+
* Reads timeline events, state, and logs to determine why a run stopped
|
|
5
|
+
* and what action to take next.
|
|
6
|
+
*/
|
|
7
|
+
import fs from 'node:fs';
|
|
8
|
+
import path from 'node:path';
|
|
9
|
+
/**
|
|
10
|
+
* Map diagnosis category to stop reason family.
|
|
11
|
+
*/
|
|
12
|
+
function categoryToFamily(category) {
|
|
13
|
+
switch (category) {
|
|
14
|
+
case 'auth_expired':
|
|
15
|
+
return 'auth';
|
|
16
|
+
case 'verification_cwd_mismatch':
|
|
17
|
+
case 'scope_violation':
|
|
18
|
+
case 'lockfile_restricted':
|
|
19
|
+
case 'guard_violation_dirty':
|
|
20
|
+
return 'guard';
|
|
21
|
+
case 'verification_failure':
|
|
22
|
+
return 'verification';
|
|
23
|
+
case 'worker_parse_failure':
|
|
24
|
+
return 'worker';
|
|
25
|
+
case 'stall_timeout':
|
|
26
|
+
return 'stall';
|
|
27
|
+
case 'max_ticks_reached':
|
|
28
|
+
case 'time_budget_exceeded':
|
|
29
|
+
return 'budget';
|
|
30
|
+
default:
|
|
31
|
+
return 'unknown';
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Generate resume command for budget-related stops.
|
|
36
|
+
*/
|
|
37
|
+
function generateResumeCommand(runId, category, events) {
|
|
38
|
+
if (category === 'max_ticks_reached') {
|
|
39
|
+
const ticksEvent = events.find((e) => e.type === 'max_ticks_reached');
|
|
40
|
+
const currentTicks = ticksEvent?.payload?.max_ticks ?? 50;
|
|
41
|
+
const suggestedTicks = Math.ceil(currentTicks * 1.5);
|
|
42
|
+
return `node dist/cli.js resume ${runId} --max-ticks ${suggestedTicks}`;
|
|
43
|
+
}
|
|
44
|
+
if (category === 'time_budget_exceeded') {
|
|
45
|
+
const runStarted = events.find((e) => e.type === 'run_started');
|
|
46
|
+
const currentTime = runStarted?.payload?.time_budget_minutes ?? 60;
|
|
47
|
+
const suggestedTime = Math.ceil(currentTime * 1.5);
|
|
48
|
+
return `node dist/cli.js resume ${runId} --time ${suggestedTime}`;
|
|
49
|
+
}
|
|
50
|
+
if (category === 'stall_timeout') {
|
|
51
|
+
return `WORKER_TIMEOUT_MINUTES=45 node dist/cli.js resume ${runId}`;
|
|
52
|
+
}
|
|
53
|
+
return undefined;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Main diagnosis function.
|
|
57
|
+
* Analyzes context and returns the most likely diagnosis.
|
|
58
|
+
*/
|
|
59
|
+
export function diagnoseStop(context) {
|
|
60
|
+
const { runId, runDir, state, events } = context;
|
|
61
|
+
// Run all diagnostic rules and collect results
|
|
62
|
+
const results = [
|
|
63
|
+
diagnoseAuthExpired(context),
|
|
64
|
+
diagnoseVerificationCwdMismatch(context),
|
|
65
|
+
diagnoseScopeViolation(context),
|
|
66
|
+
diagnoseLockfileRestricted(context),
|
|
67
|
+
diagnoseVerificationFailure(context),
|
|
68
|
+
diagnoseWorkerParseFailure(context),
|
|
69
|
+
diagnoseStallTimeout(context),
|
|
70
|
+
diagnoseMaxTicksReached(context),
|
|
71
|
+
diagnoseTimeBudgetExceeded(context),
|
|
72
|
+
diagnoseGuardViolationDirty(context)
|
|
73
|
+
].filter((r) => r.confidence > 0);
|
|
74
|
+
// Sort by confidence descending
|
|
75
|
+
results.sort((a, b) => b.confidence - a.confidence);
|
|
76
|
+
// Use highest confidence result, or unknown
|
|
77
|
+
const best = results[0] ?? {
|
|
78
|
+
category: 'unknown',
|
|
79
|
+
confidence: 0.5,
|
|
80
|
+
signals: [],
|
|
81
|
+
nextActions: [
|
|
82
|
+
{
|
|
83
|
+
title: 'Review timeline',
|
|
84
|
+
command: `cat runs/${runId}/timeline.jsonl | tail -20`,
|
|
85
|
+
why: 'Inspect recent events to understand what happened'
|
|
86
|
+
}
|
|
87
|
+
]
|
|
88
|
+
};
|
|
89
|
+
// Determine outcome
|
|
90
|
+
let outcome = 'stopped';
|
|
91
|
+
if (state.stop_reason === 'complete') {
|
|
92
|
+
outcome = 'complete';
|
|
93
|
+
}
|
|
94
|
+
else if (state.phase !== 'STOPPED') {
|
|
95
|
+
outcome = 'running';
|
|
96
|
+
}
|
|
97
|
+
// Compute family and resume command
|
|
98
|
+
const family = categoryToFamily(best.category);
|
|
99
|
+
const resumeCommand = generateResumeCommand(runId, best.category, events);
|
|
100
|
+
return {
|
|
101
|
+
run_id: runId,
|
|
102
|
+
outcome,
|
|
103
|
+
stop_reason: state.stop_reason ?? null,
|
|
104
|
+
stop_reason_family: family,
|
|
105
|
+
primary_diagnosis: best.category,
|
|
106
|
+
confidence: best.confidence,
|
|
107
|
+
resume_command: resumeCommand,
|
|
108
|
+
signals: best.signals,
|
|
109
|
+
next_actions: best.nextActions,
|
|
110
|
+
related_artifacts: {
|
|
111
|
+
report: `node dist/cli.js report ${runId} --tail 120`,
|
|
112
|
+
timeline: `runs/${runId}/timeline.jsonl`,
|
|
113
|
+
verify_logs: findVerifyLogs(runDir),
|
|
114
|
+
worker_output: findWorkerOutput(runDir)
|
|
115
|
+
},
|
|
116
|
+
diagnosed_at: new Date().toISOString()
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
// ============================================================================
|
|
120
|
+
// Diagnostic Rules
|
|
121
|
+
// ============================================================================
|
|
122
|
+
/**
|
|
123
|
+
* Rule 1: Auth expired / login required
|
|
124
|
+
* Detect: preflight ping error contains "login", "401", "token expired", oauth keywords
|
|
125
|
+
*/
|
|
126
|
+
function diagnoseAuthExpired(ctx) {
|
|
127
|
+
const signals = [];
|
|
128
|
+
let confidence = 0;
|
|
129
|
+
// Check preflight ping results
|
|
130
|
+
const preflightEvent = ctx.events.find((e) => e.type === 'preflight');
|
|
131
|
+
if (preflightEvent?.payload) {
|
|
132
|
+
const payload = preflightEvent.payload;
|
|
133
|
+
const ping = payload.ping;
|
|
134
|
+
const results = ping?.results;
|
|
135
|
+
if (results) {
|
|
136
|
+
for (const result of results) {
|
|
137
|
+
if (!result.ok) {
|
|
138
|
+
const message = String(result.message ?? '').toLowerCase();
|
|
139
|
+
const category = String(result.category ?? '');
|
|
140
|
+
if (message.includes('login') ||
|
|
141
|
+
message.includes('401') ||
|
|
142
|
+
message.includes('token') ||
|
|
143
|
+
message.includes('oauth') ||
|
|
144
|
+
message.includes('unauthorized') ||
|
|
145
|
+
category === 'auth') {
|
|
146
|
+
signals.push({
|
|
147
|
+
source: 'preflight.ping',
|
|
148
|
+
pattern: 'auth_error',
|
|
149
|
+
snippet: `${result.worker}: ${result.message}`
|
|
150
|
+
});
|
|
151
|
+
confidence = 0.95;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Check for worker auth errors in events
|
|
158
|
+
for (const event of ctx.events) {
|
|
159
|
+
if (event.type === 'worker_error') {
|
|
160
|
+
const payload = event.payload;
|
|
161
|
+
const error = String(payload?.error ?? '').toLowerCase();
|
|
162
|
+
if (error.includes('login') ||
|
|
163
|
+
error.includes('401') ||
|
|
164
|
+
error.includes('unauthorized')) {
|
|
165
|
+
signals.push({
|
|
166
|
+
source: 'event.worker_error',
|
|
167
|
+
pattern: 'auth_error',
|
|
168
|
+
snippet: error.slice(0, 200)
|
|
169
|
+
});
|
|
170
|
+
confidence = Math.max(confidence, 0.9);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return {
|
|
175
|
+
category: 'auth_expired',
|
|
176
|
+
confidence,
|
|
177
|
+
signals,
|
|
178
|
+
nextActions: confidence > 0
|
|
179
|
+
? [
|
|
180
|
+
{
|
|
181
|
+
title: 'Re-authenticate workers',
|
|
182
|
+
command: 'codex login && claude login',
|
|
183
|
+
why: 'Worker authentication expired or invalid'
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
title: 'Run doctor checks',
|
|
187
|
+
command: 'node dist/cli.js doctor',
|
|
188
|
+
why: 'Verify all workers are authenticated and reachable'
|
|
189
|
+
}
|
|
190
|
+
]
|
|
191
|
+
: []
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Rule 2: Verification CWD mismatch
|
|
196
|
+
* Detect: error patterns like "package.json not found", "No such file or directory"
|
|
197
|
+
*/
|
|
198
|
+
function diagnoseVerificationCwdMismatch(ctx) {
|
|
199
|
+
const signals = [];
|
|
200
|
+
let confidence = 0;
|
|
201
|
+
// Check guard reasons for verification_cwd_missing
|
|
202
|
+
const guardEvent = ctx.events.find((e) => e.type === 'guard_violation' || e.type === 'preflight');
|
|
203
|
+
if (guardEvent?.payload) {
|
|
204
|
+
const payload = guardEvent.payload;
|
|
205
|
+
const guard = payload.guard;
|
|
206
|
+
const reasons = guard?.reasons;
|
|
207
|
+
if (reasons?.some((r) => r.startsWith('verification_cwd_missing'))) {
|
|
208
|
+
const cwdReason = reasons.find((r) => r.startsWith('verification_cwd_missing'));
|
|
209
|
+
signals.push({
|
|
210
|
+
source: 'guard.reasons',
|
|
211
|
+
pattern: 'verification_cwd_missing',
|
|
212
|
+
snippet: cwdReason
|
|
213
|
+
});
|
|
214
|
+
confidence = 0.95;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
// Check verification events for path errors
|
|
218
|
+
for (const event of ctx.events) {
|
|
219
|
+
if (event.type === 'verification' || event.type === 'verify_failure') {
|
|
220
|
+
const payload = event.payload;
|
|
221
|
+
const output = String(payload?.output ?? payload?.error ?? '');
|
|
222
|
+
if (output.includes('ENOENT') ||
|
|
223
|
+
output.includes('package.json') && output.includes('not found') ||
|
|
224
|
+
output.includes('No such file or directory') ||
|
|
225
|
+
output.includes('Cannot find module')) {
|
|
226
|
+
signals.push({
|
|
227
|
+
source: `event.${event.type}`,
|
|
228
|
+
pattern: 'path_error',
|
|
229
|
+
snippet: output.slice(0, 200)
|
|
230
|
+
});
|
|
231
|
+
confidence = Math.max(confidence, 0.85);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
// Check for implement_blocked with path issues
|
|
236
|
+
if (ctx.state.stop_reason === 'implement_blocked') {
|
|
237
|
+
const lastError = ctx.state.last_error ?? '';
|
|
238
|
+
if (lastError.includes('directory') ||
|
|
239
|
+
lastError.includes('path') ||
|
|
240
|
+
lastError.includes('ENOENT')) {
|
|
241
|
+
signals.push({
|
|
242
|
+
source: 'state.last_error',
|
|
243
|
+
pattern: 'path_error',
|
|
244
|
+
snippet: lastError.slice(0, 200)
|
|
245
|
+
});
|
|
246
|
+
confidence = Math.max(confidence, 0.8);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
// Extract the missing cwd from signals if available
|
|
250
|
+
const cwdSignal = signals.find((s) => s.pattern === 'verification_cwd_missing');
|
|
251
|
+
const missingPath = cwdSignal?.snippet?.split(':')[1] ?? 'apps/subdir';
|
|
252
|
+
return {
|
|
253
|
+
category: 'verification_cwd_mismatch',
|
|
254
|
+
confidence,
|
|
255
|
+
signals,
|
|
256
|
+
nextActions: confidence > 0
|
|
257
|
+
? [
|
|
258
|
+
{
|
|
259
|
+
title: 'Set verification.cwd in config',
|
|
260
|
+
command: `jq '.verification.cwd = "${missingPath}"' agent.config.json > tmp.json && mv tmp.json agent.config.json`,
|
|
261
|
+
why: 'Verification commands running in wrong directory'
|
|
262
|
+
},
|
|
263
|
+
{
|
|
264
|
+
title: 'Verify config and retry',
|
|
265
|
+
command: `cat agent.config.json | jq '.verification' && node dist/cli.js resume ${ctx.runId}`,
|
|
266
|
+
why: 'Check cwd is set correctly then resume the run'
|
|
267
|
+
}
|
|
268
|
+
]
|
|
269
|
+
: []
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Rule 3: Scope violation
|
|
274
|
+
* Detect: scope guard events / "outside allowlist"
|
|
275
|
+
*/
|
|
276
|
+
function diagnoseScopeViolation(ctx) {
|
|
277
|
+
const signals = [];
|
|
278
|
+
let confidence = 0;
|
|
279
|
+
// Check guard violations
|
|
280
|
+
const guardEvent = ctx.events.find((e) => e.type === 'guard_violation');
|
|
281
|
+
if (guardEvent?.payload) {
|
|
282
|
+
const payload = guardEvent.payload;
|
|
283
|
+
const guard = payload.guard;
|
|
284
|
+
const scopeViolations = guard?.scope_violations;
|
|
285
|
+
if (scopeViolations && scopeViolations.length > 0) {
|
|
286
|
+
signals.push({
|
|
287
|
+
source: 'guard.scope_violations',
|
|
288
|
+
pattern: 'files_outside_allowlist',
|
|
289
|
+
snippet: scopeViolations.slice(0, 5).join(', ')
|
|
290
|
+
});
|
|
291
|
+
confidence = 0.95;
|
|
292
|
+
}
|
|
293
|
+
const reasons = guard?.reasons;
|
|
294
|
+
if (reasons?.includes('scope_violation')) {
|
|
295
|
+
confidence = Math.max(confidence, 0.9);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
// Check stop reason
|
|
299
|
+
if (ctx.state.stop_reason === 'guard_violation') {
|
|
300
|
+
confidence = Math.max(confidence, 0.7);
|
|
301
|
+
}
|
|
302
|
+
// Extract violated paths from signals
|
|
303
|
+
const violatedPaths = signals[0]?.snippet ?? '';
|
|
304
|
+
return {
|
|
305
|
+
category: 'scope_violation',
|
|
306
|
+
confidence,
|
|
307
|
+
signals,
|
|
308
|
+
nextActions: confidence > 0
|
|
309
|
+
? [
|
|
310
|
+
{
|
|
311
|
+
title: 'View current scope config',
|
|
312
|
+
command: `cat agent.config.json | jq '.scope'`,
|
|
313
|
+
why: 'See current allowlist to understand what needs updating'
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
title: 'Resume with expanded scope',
|
|
317
|
+
command: `node dist/cli.js resume ${ctx.runId}`,
|
|
318
|
+
why: `After updating allowlist to include: ${violatedPaths.split(',')[0]}`
|
|
319
|
+
}
|
|
320
|
+
]
|
|
321
|
+
: []
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Rule 4: Lockfile restricted
|
|
326
|
+
* Detect: lockfile touched event / guard failure
|
|
327
|
+
*/
|
|
328
|
+
function diagnoseLockfileRestricted(ctx) {
|
|
329
|
+
const signals = [];
|
|
330
|
+
let confidence = 0;
|
|
331
|
+
// Check guard violations for lockfile
|
|
332
|
+
const guardEvent = ctx.events.find((e) => e.type === 'guard_violation');
|
|
333
|
+
if (guardEvent?.payload) {
|
|
334
|
+
const payload = guardEvent.payload;
|
|
335
|
+
const guard = payload.guard;
|
|
336
|
+
const lockfileViolations = guard?.lockfile_violations;
|
|
337
|
+
if (lockfileViolations && lockfileViolations.length > 0) {
|
|
338
|
+
signals.push({
|
|
339
|
+
source: 'guard.lockfile_violations',
|
|
340
|
+
pattern: 'lockfile_modified',
|
|
341
|
+
snippet: lockfileViolations.join(', ')
|
|
342
|
+
});
|
|
343
|
+
confidence = 0.95;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
return {
|
|
347
|
+
category: 'lockfile_restricted',
|
|
348
|
+
confidence,
|
|
349
|
+
signals,
|
|
350
|
+
nextActions: confidence > 0
|
|
351
|
+
? [
|
|
352
|
+
{
|
|
353
|
+
title: 'Resume with --allow-deps',
|
|
354
|
+
command: `node dist/cli.js resume ${ctx.runId} --allow-deps`,
|
|
355
|
+
why: 'Task requires installing dependencies'
|
|
356
|
+
},
|
|
357
|
+
{
|
|
358
|
+
title: 'Check which lockfiles changed',
|
|
359
|
+
command: `git diff --name-only | grep -E 'package-lock|yarn.lock|pnpm-lock'`,
|
|
360
|
+
why: 'See exactly which dependency files were modified'
|
|
361
|
+
}
|
|
362
|
+
]
|
|
363
|
+
: []
|
|
364
|
+
};
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Rule 5: Verification failure
|
|
368
|
+
* Detect: tier0/tier1 nonzero + logs show failing test/lint/typecheck
|
|
369
|
+
*/
|
|
370
|
+
function diagnoseVerificationFailure(ctx) {
|
|
371
|
+
const signals = [];
|
|
372
|
+
let confidence = 0;
|
|
373
|
+
// Check for verification failure events
|
|
374
|
+
for (const event of ctx.events) {
|
|
375
|
+
if (event.type === 'verification') {
|
|
376
|
+
const payload = event.payload;
|
|
377
|
+
if (payload?.ok === false) {
|
|
378
|
+
const tier = payload.tier;
|
|
379
|
+
const commands = payload.commands;
|
|
380
|
+
const failedCommands = commands?.filter((c) => !c.ok) ?? [];
|
|
381
|
+
if (failedCommands.length > 0) {
|
|
382
|
+
signals.push({
|
|
383
|
+
source: `verification.${tier}`,
|
|
384
|
+
pattern: 'command_failed',
|
|
385
|
+
snippet: failedCommands.map((c) => c.command).join('; ')
|
|
386
|
+
});
|
|
387
|
+
confidence = Math.max(confidence, 0.85);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
if (event.type === 'verify_failure') {
|
|
392
|
+
signals.push({
|
|
393
|
+
source: 'event.verify_failure',
|
|
394
|
+
pattern: 'verification_failed',
|
|
395
|
+
snippet: String(event.payload?.reason ?? '')
|
|
396
|
+
});
|
|
397
|
+
confidence = Math.max(confidence, 0.9);
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
// Check stop reason
|
|
401
|
+
if (ctx.state.stop_reason === 'verification_failed_max_retries') {
|
|
402
|
+
confidence = Math.max(confidence, 0.95);
|
|
403
|
+
}
|
|
404
|
+
// Build the run command from the first failing signal
|
|
405
|
+
const failingCommand = signals[0]?.snippet?.split(';')[0]?.trim() ?? 'npm test';
|
|
406
|
+
return {
|
|
407
|
+
category: 'verification_failure',
|
|
408
|
+
confidence,
|
|
409
|
+
signals,
|
|
410
|
+
nextActions: confidence > 0
|
|
411
|
+
? [
|
|
412
|
+
{
|
|
413
|
+
title: 'View verification logs',
|
|
414
|
+
command: `cat runs/${ctx.runId}/artifacts/tests_tier0.log 2>/dev/null || cat runs/${ctx.runId}/artifacts/verify.log`,
|
|
415
|
+
why: 'See full error output from failing tests/lint'
|
|
416
|
+
},
|
|
417
|
+
{
|
|
418
|
+
title: 'Resume to retry verification',
|
|
419
|
+
command: `node dist/cli.js resume ${ctx.runId}`,
|
|
420
|
+
why: `Will re-run: ${failingCommand}`
|
|
421
|
+
}
|
|
422
|
+
]
|
|
423
|
+
: []
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* Rule 6: Worker parse failure / fallback occurred
|
|
428
|
+
* Detect: worker_fallback events, retry exhausted
|
|
429
|
+
*/
|
|
430
|
+
function diagnoseWorkerParseFailure(ctx) {
|
|
431
|
+
const signals = [];
|
|
432
|
+
let confidence = 0;
|
|
433
|
+
// Check for parse failure stop reasons
|
|
434
|
+
if (ctx.state.stop_reason === 'plan_parse_failed' ||
|
|
435
|
+
ctx.state.stop_reason === 'implement_parse_failed' ||
|
|
436
|
+
ctx.state.stop_reason === 'review_parse_failed') {
|
|
437
|
+
signals.push({
|
|
438
|
+
source: 'state.stop_reason',
|
|
439
|
+
pattern: 'parse_failed',
|
|
440
|
+
snippet: ctx.state.stop_reason
|
|
441
|
+
});
|
|
442
|
+
confidence = 0.95;
|
|
443
|
+
}
|
|
444
|
+
// Check for fallback events
|
|
445
|
+
for (const event of ctx.events) {
|
|
446
|
+
if (event.type === 'worker_fallback') {
|
|
447
|
+
const payload = event.payload;
|
|
448
|
+
signals.push({
|
|
449
|
+
source: 'event.worker_fallback',
|
|
450
|
+
pattern: 'fallback_triggered',
|
|
451
|
+
snippet: `${payload?.from} -> ${payload?.to}: ${payload?.reason}`
|
|
452
|
+
});
|
|
453
|
+
confidence = Math.max(confidence, 0.8);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
return {
|
|
457
|
+
category: 'worker_parse_failure',
|
|
458
|
+
confidence,
|
|
459
|
+
signals,
|
|
460
|
+
nextActions: confidence > 0
|
|
461
|
+
? [
|
|
462
|
+
{
|
|
463
|
+
title: 'View raw worker response',
|
|
464
|
+
command: `cat runs/${ctx.runId}/artifacts/last_worker_response.txt 2>/dev/null | head -100`,
|
|
465
|
+
why: 'See what the worker actually returned'
|
|
466
|
+
},
|
|
467
|
+
{
|
|
468
|
+
title: 'Resume to retry with worker',
|
|
469
|
+
command: `node dist/cli.js resume ${ctx.runId}`,
|
|
470
|
+
why: 'Will retry the failed worker call'
|
|
471
|
+
}
|
|
472
|
+
]
|
|
473
|
+
: []
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Rule 7: Stall timeout
|
|
478
|
+
* Detect: stalled_timeout with worker_in_flight true/false
|
|
479
|
+
*/
|
|
480
|
+
function diagnoseStallTimeout(ctx) {
|
|
481
|
+
const signals = [];
|
|
482
|
+
let confidence = 0;
|
|
483
|
+
if (ctx.state.stop_reason === 'stalled_timeout') {
|
|
484
|
+
confidence = 0.95;
|
|
485
|
+
// Check if worker was in flight
|
|
486
|
+
const stopEvent = ctx.events.find((e) => e.type === 'stop' && e.payload?.reason === 'stalled_timeout');
|
|
487
|
+
if (stopEvent?.payload) {
|
|
488
|
+
const payload = stopEvent.payload;
|
|
489
|
+
const workerInFlight = payload.worker_in_flight;
|
|
490
|
+
signals.push({
|
|
491
|
+
source: 'stop.stalled_timeout',
|
|
492
|
+
pattern: workerInFlight ? 'worker_in_flight' : 'no_activity',
|
|
493
|
+
snippet: `worker_in_flight: ${workerInFlight}`
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
const workerInFlight = signals.some((s) => s.pattern === 'worker_in_flight');
|
|
498
|
+
return {
|
|
499
|
+
category: 'stall_timeout',
|
|
500
|
+
confidence,
|
|
501
|
+
signals,
|
|
502
|
+
nextActions: confidence > 0
|
|
503
|
+
? workerInFlight
|
|
504
|
+
? [
|
|
505
|
+
{
|
|
506
|
+
title: 'Increase worker timeout',
|
|
507
|
+
command: 'WORKER_TIMEOUT_MINUTES=45 node dist/cli.js resume ...',
|
|
508
|
+
why: 'Worker call took longer than expected'
|
|
509
|
+
},
|
|
510
|
+
{
|
|
511
|
+
title: 'Check worker status',
|
|
512
|
+
command: 'node dist/cli.js doctor',
|
|
513
|
+
why: 'Verify workers are responsive'
|
|
514
|
+
}
|
|
515
|
+
]
|
|
516
|
+
: [
|
|
517
|
+
{
|
|
518
|
+
title: 'Inspect last progress',
|
|
519
|
+
command: `node dist/cli.js report ${ctx.runId} --tail 50`,
|
|
520
|
+
why: 'See what happened before the stall'
|
|
521
|
+
},
|
|
522
|
+
{
|
|
523
|
+
title: 'Resume with follow',
|
|
524
|
+
command: `node dist/cli.js resume ${ctx.runId} & node dist/cli.js follow ${ctx.runId}`,
|
|
525
|
+
why: 'Monitor progress in real-time'
|
|
526
|
+
}
|
|
527
|
+
]
|
|
528
|
+
: []
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
/**
|
|
532
|
+
* Rule 8: Max ticks reached
|
|
533
|
+
* Detect: max_ticks_reached
|
|
534
|
+
*/
|
|
535
|
+
function diagnoseMaxTicksReached(ctx) {
|
|
536
|
+
const signals = [];
|
|
537
|
+
let confidence = 0;
|
|
538
|
+
if (ctx.state.stop_reason === 'max_ticks_reached') {
|
|
539
|
+
confidence = 0.95;
|
|
540
|
+
// Get ticks info from event
|
|
541
|
+
const event = ctx.events.find((e) => e.type === 'max_ticks_reached');
|
|
542
|
+
if (event?.payload) {
|
|
543
|
+
const payload = event.payload;
|
|
544
|
+
signals.push({
|
|
545
|
+
source: 'event.max_ticks_reached',
|
|
546
|
+
pattern: 'tick_limit',
|
|
547
|
+
snippet: `${payload.ticks_used}/${payload.max_ticks} ticks, milestone ${payload.milestone_index + 1}/${payload.milestones_total}`
|
|
548
|
+
});
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
// Suggest ~50% more ticks
|
|
552
|
+
const ticksEvent = ctx.events.find((e) => e.type === 'max_ticks_reached');
|
|
553
|
+
const currentTicks = ticksEvent?.payload?.max_ticks ?? 50;
|
|
554
|
+
const suggestedTicks = Math.ceil(currentTicks * 1.5);
|
|
555
|
+
return {
|
|
556
|
+
category: 'max_ticks_reached',
|
|
557
|
+
confidence,
|
|
558
|
+
signals,
|
|
559
|
+
nextActions: confidence > 0
|
|
560
|
+
? [
|
|
561
|
+
{
|
|
562
|
+
title: 'Resume with more ticks',
|
|
563
|
+
command: `node dist/cli.js resume ${ctx.runId} --max-ticks ${suggestedTicks}`,
|
|
564
|
+
why: 'Run made progress but hit tick limit'
|
|
565
|
+
},
|
|
566
|
+
{
|
|
567
|
+
title: 'Check for oscillation',
|
|
568
|
+
command: `cat runs/${ctx.runId}/timeline.jsonl | grep phase_start`,
|
|
569
|
+
why: 'Look for repeated phase transitions'
|
|
570
|
+
}
|
|
571
|
+
]
|
|
572
|
+
: []
|
|
573
|
+
};
|
|
574
|
+
}
|
|
575
|
+
/**
|
|
576
|
+
* Rule 9: Time budget exceeded
|
|
577
|
+
*/
|
|
578
|
+
function diagnoseTimeBudgetExceeded(ctx) {
|
|
579
|
+
const signals = [];
|
|
580
|
+
let confidence = 0;
|
|
581
|
+
if (ctx.state.stop_reason === 'time_budget_exceeded') {
|
|
582
|
+
confidence = 0.95;
|
|
583
|
+
const event = ctx.events.find((e) => e.type === 'stop' && e.payload?.reason === 'time_budget_exceeded');
|
|
584
|
+
if (event?.payload) {
|
|
585
|
+
const payload = event.payload;
|
|
586
|
+
signals.push({
|
|
587
|
+
source: 'stop.time_budget_exceeded',
|
|
588
|
+
pattern: 'time_limit',
|
|
589
|
+
snippet: `ticks_used: ${payload.ticks_used}`
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
return {
|
|
594
|
+
category: 'time_budget_exceeded',
|
|
595
|
+
confidence,
|
|
596
|
+
signals,
|
|
597
|
+
nextActions: confidence > 0
|
|
598
|
+
? [
|
|
599
|
+
{
|
|
600
|
+
title: 'Resume with more time',
|
|
601
|
+
command: `node dist/cli.js resume ${ctx.runId} --time 90`,
|
|
602
|
+
why: 'Run made progress but hit time limit'
|
|
603
|
+
}
|
|
604
|
+
]
|
|
605
|
+
: []
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
/**
|
|
609
|
+
* Rule 10: Guard violation (dirty worktree)
|
|
610
|
+
*/
|
|
611
|
+
function diagnoseGuardViolationDirty(ctx) {
|
|
612
|
+
const signals = [];
|
|
613
|
+
let confidence = 0;
|
|
614
|
+
const guardEvent = ctx.events.find((e) => e.type === 'guard_violation');
|
|
615
|
+
if (guardEvent?.payload) {
|
|
616
|
+
const payload = guardEvent.payload;
|
|
617
|
+
const guard = payload.guard;
|
|
618
|
+
if (guard?.dirty === true) {
|
|
619
|
+
const reasons = guard.reasons;
|
|
620
|
+
if (reasons?.includes('dirty_worktree')) {
|
|
621
|
+
signals.push({
|
|
622
|
+
source: 'guard.reasons',
|
|
623
|
+
pattern: 'dirty_worktree',
|
|
624
|
+
snippet: 'Uncommitted changes in working directory'
|
|
625
|
+
});
|
|
626
|
+
confidence = 0.95;
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
return {
|
|
631
|
+
category: 'guard_violation_dirty',
|
|
632
|
+
confidence,
|
|
633
|
+
signals,
|
|
634
|
+
nextActions: confidence > 0
|
|
635
|
+
? [
|
|
636
|
+
{
|
|
637
|
+
title: 'See uncommitted changes',
|
|
638
|
+
command: `git status --short`,
|
|
639
|
+
why: 'Check what changes are blocking the run'
|
|
640
|
+
},
|
|
641
|
+
{
|
|
642
|
+
title: 'Stash and retry with worktree',
|
|
643
|
+
command: `git stash -u && node dist/cli.js resume ${ctx.runId} --worktree`,
|
|
644
|
+
why: 'Isolates agent work from your changes'
|
|
645
|
+
}
|
|
646
|
+
]
|
|
647
|
+
: []
|
|
648
|
+
};
|
|
649
|
+
}
|
|
650
|
+
// ============================================================================
|
|
651
|
+
// Helpers
|
|
652
|
+
// ============================================================================
|
|
653
|
+
function findVerifyLogs(runDir) {
|
|
654
|
+
const artifactsDir = path.join(runDir, 'artifacts');
|
|
655
|
+
if (!fs.existsSync(artifactsDir))
|
|
656
|
+
return undefined;
|
|
657
|
+
const candidates = ['tests_tier0.log', 'tests_tier1.log', 'verify.log'];
|
|
658
|
+
for (const name of candidates) {
|
|
659
|
+
const p = path.join(artifactsDir, name);
|
|
660
|
+
if (fs.existsSync(p)) {
|
|
661
|
+
return p;
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
return undefined;
|
|
665
|
+
}
|
|
666
|
+
function findWorkerOutput(runDir) {
|
|
667
|
+
const artifactsDir = path.join(runDir, 'artifacts');
|
|
668
|
+
if (!fs.existsSync(artifactsDir))
|
|
669
|
+
return undefined;
|
|
670
|
+
const candidates = ['last_worker_response.txt', 'worker_output.log'];
|
|
671
|
+
for (const name of candidates) {
|
|
672
|
+
const p = path.join(artifactsDir, name);
|
|
673
|
+
if (fs.existsSync(p)) {
|
|
674
|
+
return p;
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
return undefined;
|
|
678
|
+
}
|