@weldr/runr 0.3.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -1
- package/README.md +124 -111
- package/dist/audit/classifier.js +331 -0
- package/dist/cli.js +593 -282
- package/dist/commands/audit.js +259 -0
- package/dist/commands/bundle.js +180 -0
- package/dist/commands/continue.js +276 -0
- package/dist/commands/doctor.js +430 -45
- package/dist/commands/hooks.js +352 -0
- package/dist/commands/init.js +368 -8
- package/dist/commands/intervene.js +109 -0
- package/dist/commands/journal.js +167 -0
- package/dist/commands/meta.js +245 -0
- package/dist/commands/mode.js +157 -0
- package/dist/commands/orchestrate.js +29 -0
- package/dist/commands/packs.js +47 -0
- package/dist/commands/preflight.js +8 -5
- package/dist/commands/resume.js +421 -3
- package/dist/commands/run.js +63 -4
- package/dist/commands/status.js +47 -0
- package/dist/commands/submit.js +374 -0
- package/dist/config/schema.js +61 -1
- package/dist/diagnosis/analyzer.js +86 -1
- package/dist/diagnosis/formatter.js +3 -0
- package/dist/diagnosis/index.js +1 -0
- package/dist/diagnosis/stop-explainer.js +267 -0
- package/dist/diagnostics/stop-explainer.js +267 -0
- package/dist/guards/checkpoint.js +119 -0
- package/dist/journal/builder.js +497 -0
- package/dist/journal/redactor.js +68 -0
- package/dist/journal/renderer.js +220 -0
- package/dist/journal/types.js +7 -0
- package/dist/orchestrator/artifacts.js +17 -2
- package/dist/orchestrator/receipt.js +304 -0
- package/dist/output/stop-footer.js +185 -0
- package/dist/packs/actions.js +176 -0
- package/dist/packs/loader.js +200 -0
- package/dist/packs/renderer.js +46 -0
- package/dist/receipt/intervention.js +465 -0
- package/dist/receipt/writer.js +296 -0
- package/dist/redaction/redactor.js +95 -0
- package/dist/repo/context.js +147 -20
- package/dist/review/check-parser.js +211 -0
- package/dist/store/checkpoint-metadata.js +111 -0
- package/dist/store/run-store.js +21 -0
- package/dist/supervisor/runner.js +161 -10
- package/dist/tasks/task-metadata.js +74 -1
- package/dist/ux/brain.js +528 -0
- package/dist/ux/render.js +123 -0
- package/dist/ux/safe-commands.js +133 -0
- package/dist/ux/state.js +193 -0
- package/dist/ux/telemetry.js +110 -0
- package/package.json +5 -1
- package/packs/pr/pack.json +50 -0
- package/packs/pr/templates/AGENTS.md.tmpl +120 -0
- package/packs/pr/templates/CLAUDE.md.tmpl +101 -0
- package/packs/pr/templates/bundle.md.tmpl +27 -0
- package/packs/solo/pack.json +82 -0
- package/packs/solo/templates/AGENTS.md.tmpl +80 -0
- package/packs/solo/templates/CLAUDE.md.tmpl +126 -0
- package/packs/solo/templates/bundle.md.tmpl +27 -0
- package/packs/solo/templates/claude-cmd-bundle.md.tmpl +40 -0
- package/packs/solo/templates/claude-cmd-resume.md.tmpl +43 -0
- package/packs/solo/templates/claude-cmd-submit.md.tmpl +51 -0
- package/packs/solo/templates/claude-skill.md.tmpl +96 -0
- package/packs/trunk/pack.json +50 -0
- package/packs/trunk/templates/AGENTS.md.tmpl +87 -0
- package/packs/trunk/templates/CLAUDE.md.tmpl +126 -0
- package/packs/trunk/templates/bundle.md.tmpl +27 -0
- package/dist/commands/__tests__/report.test.js +0 -202
- package/dist/config/__tests__/presets.test.js +0 -104
- package/dist/context/__tests__/artifact.test.js +0 -130
- package/dist/context/__tests__/pack.test.js +0 -191
- package/dist/env/__tests__/fingerprint.test.js +0 -116
- package/dist/orchestrator/__tests__/policy.test.js +0 -185
- package/dist/orchestrator/__tests__/schema-version.test.js +0 -65
- package/dist/supervisor/__tests__/evidence-gate.test.js +0 -111
- package/dist/supervisor/__tests__/ownership.test.js +0 -103
- package/dist/supervisor/__tests__/state-machine.test.js +0 -290
- package/dist/workers/__tests__/claude.test.js +0 -88
- package/dist/workers/__tests__/codex.test.js +0 -81
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stop Diagnostics - Explains why a run stopped and suggests fixes.
|
|
3
|
+
*
|
|
4
|
+
* When review_loop_detected or other STOPPED states occur, this module
|
|
5
|
+
* analyzes the timeline and provides actionable guidance.
|
|
6
|
+
*/
|
|
7
|
+
import fs from 'node:fs';
|
|
8
|
+
import path from 'node:path';
|
|
9
|
+
/**
|
|
10
|
+
* Parse timeline.jsonl file
|
|
11
|
+
*/
|
|
12
|
+
export function parseTimeline(timelinePath) {
|
|
13
|
+
if (!fs.existsSync(timelinePath)) {
|
|
14
|
+
return [];
|
|
15
|
+
}
|
|
16
|
+
const content = fs.readFileSync(timelinePath, 'utf-8');
|
|
17
|
+
const events = [];
|
|
18
|
+
for (const line of content.split('\n')) {
|
|
19
|
+
if (line.trim()) {
|
|
20
|
+
try {
|
|
21
|
+
events.push(JSON.parse(line));
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
// Skip malformed lines
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return events;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Extract review loop context from timeline
|
|
32
|
+
*/
|
|
33
|
+
export function extractReviewLoopContext(events) {
|
|
34
|
+
const reviewEvents = events.filter(e => e.event_type === 'worker_response' && e.phase === 'review');
|
|
35
|
+
const implementEvents = events.filter(e => e.event_type === 'worker_response' && e.phase === 'implement');
|
|
36
|
+
// Count review rounds
|
|
37
|
+
const loopCount = Math.max(1, reviewEvents.length);
|
|
38
|
+
// Extract review requests (look for common patterns)
|
|
39
|
+
const reviewRequests = [];
|
|
40
|
+
for (const event of reviewEvents.slice(-3)) {
|
|
41
|
+
const content = event.response || event.content || '';
|
|
42
|
+
// Look for requests in review feedback
|
|
43
|
+
const requestPatterns = [
|
|
44
|
+
/include (.+?) (output|evidence|in evidence)/gi,
|
|
45
|
+
/run (.+?) (and provide|and show|to verify)/gi,
|
|
46
|
+
/provide (.+?) (evidence|output)/gi,
|
|
47
|
+
/missing (.+?) (output|evidence)/gi,
|
|
48
|
+
];
|
|
49
|
+
for (const pattern of requestPatterns) {
|
|
50
|
+
const matches = content.match(pattern);
|
|
51
|
+
if (matches) {
|
|
52
|
+
reviewRequests.push(...matches.slice(0, 2));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// Extract evidence provided
|
|
57
|
+
const evidenceProvided = [];
|
|
58
|
+
for (const event of implementEvents.slice(-3)) {
|
|
59
|
+
const content = event.response || event.content || '';
|
|
60
|
+
// Look for evidence mentions
|
|
61
|
+
if (content.includes('typecheck'))
|
|
62
|
+
evidenceProvided.push('typecheck');
|
|
63
|
+
if (content.includes('test') && content.includes('pass'))
|
|
64
|
+
evidenceProvided.push('tests');
|
|
65
|
+
if (content.includes('build'))
|
|
66
|
+
evidenceProvided.push('build');
|
|
67
|
+
}
|
|
68
|
+
return {
|
|
69
|
+
loopCount,
|
|
70
|
+
reviewRequests: [...new Set(reviewRequests)].slice(0, 5),
|
|
71
|
+
evidenceProvided: [...new Set(evidenceProvided)]
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Generate unmet checks based on review context
|
|
76
|
+
*/
|
|
77
|
+
export function generateUnmetChecks(reviewRequests, evidenceProvided) {
|
|
78
|
+
const unmet = [];
|
|
79
|
+
// Check for common verification patterns
|
|
80
|
+
const checkPatterns = [
|
|
81
|
+
{ keyword: 'typecheck', check: 'typecheck_output_missing' },
|
|
82
|
+
{ keyword: 'test', check: 'test_output_missing' },
|
|
83
|
+
{ keyword: 'build', check: 'build_output_missing' },
|
|
84
|
+
{ keyword: 'lint', check: 'lint_output_missing' },
|
|
85
|
+
{ keyword: 'coverage', check: 'test_coverage_not_reported' },
|
|
86
|
+
];
|
|
87
|
+
for (const { keyword, check } of checkPatterns) {
|
|
88
|
+
const requested = reviewRequests.some(r => r.toLowerCase().includes(keyword));
|
|
89
|
+
const provided = evidenceProvided.includes(keyword);
|
|
90
|
+
if (requested && !provided) {
|
|
91
|
+
unmet.push(check);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// If nothing specific found but we have review requests, add generic
|
|
95
|
+
if (unmet.length === 0 && reviewRequests.length > 0) {
|
|
96
|
+
unmet.push('evidence_incomplete');
|
|
97
|
+
}
|
|
98
|
+
return unmet;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Generate suggested actions based on stop reason
|
|
102
|
+
*/
|
|
103
|
+
export function generateSuggestedActions(stopReason, runId, unmetChecks) {
|
|
104
|
+
const actions = [];
|
|
105
|
+
if (stopReason === 'review_loop_detected') {
|
|
106
|
+
// Add specific commands based on unmet checks
|
|
107
|
+
for (const check of unmetChecks) {
|
|
108
|
+
switch (check) {
|
|
109
|
+
case 'typecheck_output_missing':
|
|
110
|
+
actions.push({
|
|
111
|
+
command: `npm run typecheck 2>&1 | tee .runr/runs/${runId}/typecheck.log`,
|
|
112
|
+
description: 'Run typecheck and capture output'
|
|
113
|
+
});
|
|
114
|
+
break;
|
|
115
|
+
case 'test_output_missing':
|
|
116
|
+
actions.push({
|
|
117
|
+
command: `npm test 2>&1 | tee .runr/runs/${runId}/test.log`,
|
|
118
|
+
description: 'Run tests and capture output'
|
|
119
|
+
});
|
|
120
|
+
break;
|
|
121
|
+
case 'build_output_missing':
|
|
122
|
+
actions.push({
|
|
123
|
+
command: `npm run build 2>&1 | tee .runr/runs/${runId}/build.log`,
|
|
124
|
+
description: 'Run build and capture output'
|
|
125
|
+
});
|
|
126
|
+
break;
|
|
127
|
+
case 'lint_output_missing':
|
|
128
|
+
actions.push({
|
|
129
|
+
command: `npm run lint 2>&1 | tee .runr/runs/${runId}/lint.log`,
|
|
130
|
+
description: 'Run lint and capture output'
|
|
131
|
+
});
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
// Always suggest resume or intervene
|
|
136
|
+
actions.push({
|
|
137
|
+
command: `runr resume ${runId}`,
|
|
138
|
+
description: 'Resume the run after fixing issues'
|
|
139
|
+
});
|
|
140
|
+
actions.push({
|
|
141
|
+
command: `runr intervene ${runId} --reason review_loop --note "Fixed manually" --cmd "npm run build"`,
|
|
142
|
+
description: 'Record manual intervention and continue'
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
else if (stopReason === 'stalled_timeout') {
|
|
146
|
+
actions.push({
|
|
147
|
+
command: `runr resume ${runId}`,
|
|
148
|
+
description: 'Resume the run (may have recovered)'
|
|
149
|
+
});
|
|
150
|
+
actions.push({
|
|
151
|
+
command: `runr intervene ${runId} --reason stalled_timeout --note "Completed manually"`,
|
|
152
|
+
description: 'Record manual completion'
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
else if (stopReason === 'verification_failed') {
|
|
156
|
+
actions.push({
|
|
157
|
+
command: `npm run build && npm test`,
|
|
158
|
+
description: 'Fix failing verification commands'
|
|
159
|
+
});
|
|
160
|
+
actions.push({
|
|
161
|
+
command: `runr resume ${runId}`,
|
|
162
|
+
description: 'Resume after fixing'
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
return actions;
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Generate stop diagnostics from timeline
|
|
169
|
+
*/
|
|
170
|
+
export function generateStopDiagnostics(runStorePath, runId, stopReason) {
|
|
171
|
+
const timelinePath = path.join(runStorePath, 'timeline.jsonl');
|
|
172
|
+
const events = parseTimeline(timelinePath);
|
|
173
|
+
// Base diagnostics
|
|
174
|
+
const diagnostics = {
|
|
175
|
+
stop_reason: stopReason,
|
|
176
|
+
explanation: getExplanation(stopReason),
|
|
177
|
+
suggested_actions: []
|
|
178
|
+
};
|
|
179
|
+
if (stopReason === 'review_loop_detected') {
|
|
180
|
+
const context = extractReviewLoopContext(events);
|
|
181
|
+
diagnostics.loop_count = context.loopCount;
|
|
182
|
+
diagnostics.last_review_requests = context.reviewRequests;
|
|
183
|
+
diagnostics.last_evidence_provided = context.evidenceProvided;
|
|
184
|
+
diagnostics.unmet_checks = generateUnmetChecks(context.reviewRequests, context.evidenceProvided);
|
|
185
|
+
}
|
|
186
|
+
else if (stopReason === 'stalled_timeout') {
|
|
187
|
+
const lastEvent = events[events.length - 1];
|
|
188
|
+
if (lastEvent) {
|
|
189
|
+
diagnostics.last_activity_at = lastEvent.timestamp;
|
|
190
|
+
diagnostics.time_since_activity_ms = Date.now() - new Date(lastEvent.timestamp).getTime();
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
diagnostics.suggested_actions = generateSuggestedActions(stopReason, runId, diagnostics.unmet_checks || []);
|
|
194
|
+
return diagnostics;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Get human-readable explanation for stop reason
|
|
198
|
+
*/
|
|
199
|
+
function getExplanation(stopReason) {
|
|
200
|
+
switch (stopReason) {
|
|
201
|
+
case 'review_loop_detected':
|
|
202
|
+
return 'The run exceeded the maximum review rounds without passing all checks. ' +
|
|
203
|
+
'The reviewer kept requesting changes that were not fully addressed.';
|
|
204
|
+
case 'stalled_timeout':
|
|
205
|
+
return 'The run timed out waiting for a response from the worker. ' +
|
|
206
|
+
'The worker may have hung or encountered an unrecoverable error.';
|
|
207
|
+
case 'verification_failed':
|
|
208
|
+
return 'The verification commands failed. The implementation may have errors ' +
|
|
209
|
+
'that need to be fixed before the run can continue.';
|
|
210
|
+
case 'scope_violation':
|
|
211
|
+
return 'The implementation attempted to modify files outside the allowed scope. ' +
|
|
212
|
+
'Update the task scope or intervene to record the necessary changes.';
|
|
213
|
+
default:
|
|
214
|
+
return `The run stopped with reason: ${stopReason}`;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Write diagnostics to file
|
|
219
|
+
*/
|
|
220
|
+
export function writeStopDiagnostics(runStorePath, diagnostics) {
|
|
221
|
+
const diagnosticsPath = path.join(runStorePath, 'stop_diagnostics.json');
|
|
222
|
+
fs.writeFileSync(diagnosticsPath, JSON.stringify(diagnostics, null, 2));
|
|
223
|
+
return diagnosticsPath;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Print diagnostics to console
|
|
227
|
+
*/
|
|
228
|
+
export function printStopDiagnostics(runId, diagnostics) {
|
|
229
|
+
console.log('');
|
|
230
|
+
console.log(`Run ${runId} STOPPED: ${diagnostics.stop_reason}`);
|
|
231
|
+
console.log('');
|
|
232
|
+
console.log('Diagnostics:');
|
|
233
|
+
if (diagnostics.loop_count) {
|
|
234
|
+
console.log(` Loop count: ${diagnostics.loop_count}`);
|
|
235
|
+
}
|
|
236
|
+
if (diagnostics.last_review_requests && diagnostics.last_review_requests.length > 0) {
|
|
237
|
+
console.log(` Last reviewer requests:`);
|
|
238
|
+
for (const req of diagnostics.last_review_requests) {
|
|
239
|
+
console.log(` - "${req}"`);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
if (diagnostics.unmet_checks && diagnostics.unmet_checks.length > 0) {
|
|
243
|
+
console.log('');
|
|
244
|
+
console.log(' Unmet checks:');
|
|
245
|
+
for (const check of diagnostics.unmet_checks) {
|
|
246
|
+
console.log(` - ${check}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
if (diagnostics.time_since_activity_ms) {
|
|
250
|
+
const mins = Math.round(diagnostics.time_since_activity_ms / 60000);
|
|
251
|
+
console.log(` Time since last activity: ${mins} minutes`);
|
|
252
|
+
}
|
|
253
|
+
if (diagnostics.suggested_actions.length > 0) {
|
|
254
|
+
console.log('');
|
|
255
|
+
console.log(' Suggested actions:');
|
|
256
|
+
diagnostics.suggested_actions.forEach((action, i) => {
|
|
257
|
+
console.log(` ${i + 1}. ${action.description}`);
|
|
258
|
+
if (action.command) {
|
|
259
|
+
console.log(` Run: ${action.command}`);
|
|
260
|
+
}
|
|
261
|
+
if (action.edit) {
|
|
262
|
+
console.log(` Edit: ${action.edit}`);
|
|
263
|
+
}
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
console.log('');
|
|
267
|
+
}
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stop Diagnostics - Explains why a run stopped and suggests fixes.
|
|
3
|
+
*
|
|
4
|
+
* When review_loop_detected or other STOPPED states occur, this module
|
|
5
|
+
* analyzes the timeline and provides actionable guidance.
|
|
6
|
+
*/
|
|
7
|
+
import fs from 'node:fs';
|
|
8
|
+
import path from 'node:path';
|
|
9
|
+
/**
|
|
10
|
+
* Parse timeline.jsonl file
|
|
11
|
+
*/
|
|
12
|
+
export function parseTimeline(timelinePath) {
|
|
13
|
+
if (!fs.existsSync(timelinePath)) {
|
|
14
|
+
return [];
|
|
15
|
+
}
|
|
16
|
+
const content = fs.readFileSync(timelinePath, 'utf-8');
|
|
17
|
+
const events = [];
|
|
18
|
+
for (const line of content.split('\n')) {
|
|
19
|
+
if (line.trim()) {
|
|
20
|
+
try {
|
|
21
|
+
events.push(JSON.parse(line));
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
// Skip malformed lines
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return events;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Extract review loop context from timeline
|
|
32
|
+
*/
|
|
33
|
+
export function extractReviewLoopContext(events) {
|
|
34
|
+
const reviewEvents = events.filter(e => e.event_type === 'worker_response' && e.phase === 'review');
|
|
35
|
+
const implementEvents = events.filter(e => e.event_type === 'worker_response' && e.phase === 'implement');
|
|
36
|
+
// Count review rounds
|
|
37
|
+
const loopCount = Math.max(1, reviewEvents.length);
|
|
38
|
+
// Extract review requests (look for common patterns)
|
|
39
|
+
const reviewRequests = [];
|
|
40
|
+
for (const event of reviewEvents.slice(-3)) {
|
|
41
|
+
const content = event.response || event.content || '';
|
|
42
|
+
// Look for requests in review feedback
|
|
43
|
+
const requestPatterns = [
|
|
44
|
+
/include (.+?) (output|evidence|in evidence)/gi,
|
|
45
|
+
/run (.+?) (and provide|and show|to verify)/gi,
|
|
46
|
+
/provide (.+?) (evidence|output)/gi,
|
|
47
|
+
/missing (.+?) (output|evidence)/gi,
|
|
48
|
+
];
|
|
49
|
+
for (const pattern of requestPatterns) {
|
|
50
|
+
const matches = content.match(pattern);
|
|
51
|
+
if (matches) {
|
|
52
|
+
reviewRequests.push(...matches.slice(0, 2));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// Extract evidence provided
|
|
57
|
+
const evidenceProvided = [];
|
|
58
|
+
for (const event of implementEvents.slice(-3)) {
|
|
59
|
+
const content = event.response || event.content || '';
|
|
60
|
+
// Look for evidence mentions
|
|
61
|
+
if (content.includes('typecheck'))
|
|
62
|
+
evidenceProvided.push('typecheck');
|
|
63
|
+
if (content.includes('test') && content.includes('pass'))
|
|
64
|
+
evidenceProvided.push('tests');
|
|
65
|
+
if (content.includes('build'))
|
|
66
|
+
evidenceProvided.push('build');
|
|
67
|
+
}
|
|
68
|
+
return {
|
|
69
|
+
loopCount,
|
|
70
|
+
reviewRequests: [...new Set(reviewRequests)].slice(0, 5),
|
|
71
|
+
evidenceProvided: [...new Set(evidenceProvided)]
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Generate unmet checks based on review context
|
|
76
|
+
*/
|
|
77
|
+
export function generateUnmetChecks(reviewRequests, evidenceProvided) {
|
|
78
|
+
const unmet = [];
|
|
79
|
+
// Check for common verification patterns
|
|
80
|
+
const checkPatterns = [
|
|
81
|
+
{ keyword: 'typecheck', check: 'typecheck_output_missing' },
|
|
82
|
+
{ keyword: 'test', check: 'test_output_missing' },
|
|
83
|
+
{ keyword: 'build', check: 'build_output_missing' },
|
|
84
|
+
{ keyword: 'lint', check: 'lint_output_missing' },
|
|
85
|
+
{ keyword: 'coverage', check: 'test_coverage_not_reported' },
|
|
86
|
+
];
|
|
87
|
+
for (const { keyword, check } of checkPatterns) {
|
|
88
|
+
const requested = reviewRequests.some(r => r.toLowerCase().includes(keyword));
|
|
89
|
+
const provided = evidenceProvided.includes(keyword);
|
|
90
|
+
if (requested && !provided) {
|
|
91
|
+
unmet.push(check);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
// If nothing specific found but we have review requests, add generic
|
|
95
|
+
if (unmet.length === 0 && reviewRequests.length > 0) {
|
|
96
|
+
unmet.push('evidence_incomplete');
|
|
97
|
+
}
|
|
98
|
+
return unmet;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Generate suggested actions based on stop reason
|
|
102
|
+
*/
|
|
103
|
+
export function generateSuggestedActions(stopReason, runId, unmetChecks) {
|
|
104
|
+
const actions = [];
|
|
105
|
+
if (stopReason === 'review_loop_detected') {
|
|
106
|
+
// Add specific commands based on unmet checks
|
|
107
|
+
for (const check of unmetChecks) {
|
|
108
|
+
switch (check) {
|
|
109
|
+
case 'typecheck_output_missing':
|
|
110
|
+
actions.push({
|
|
111
|
+
command: `npm run typecheck 2>&1 | tee .runr/runs/${runId}/typecheck.log`,
|
|
112
|
+
description: 'Run typecheck and capture output'
|
|
113
|
+
});
|
|
114
|
+
break;
|
|
115
|
+
case 'test_output_missing':
|
|
116
|
+
actions.push({
|
|
117
|
+
command: `npm test 2>&1 | tee .runr/runs/${runId}/test.log`,
|
|
118
|
+
description: 'Run tests and capture output'
|
|
119
|
+
});
|
|
120
|
+
break;
|
|
121
|
+
case 'build_output_missing':
|
|
122
|
+
actions.push({
|
|
123
|
+
command: `npm run build 2>&1 | tee .runr/runs/${runId}/build.log`,
|
|
124
|
+
description: 'Run build and capture output'
|
|
125
|
+
});
|
|
126
|
+
break;
|
|
127
|
+
case 'lint_output_missing':
|
|
128
|
+
actions.push({
|
|
129
|
+
command: `npm run lint 2>&1 | tee .runr/runs/${runId}/lint.log`,
|
|
130
|
+
description: 'Run lint and capture output'
|
|
131
|
+
});
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
// Always suggest resume or intervene
|
|
136
|
+
actions.push({
|
|
137
|
+
command: `runr resume ${runId}`,
|
|
138
|
+
description: 'Resume the run after fixing issues'
|
|
139
|
+
});
|
|
140
|
+
actions.push({
|
|
141
|
+
command: `runr intervene ${runId} --reason review_loop --note "Fixed manually" --cmd "npm run build"`,
|
|
142
|
+
description: 'Record manual intervention and continue'
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
else if (stopReason === 'stalled_timeout') {
|
|
146
|
+
actions.push({
|
|
147
|
+
command: `runr resume ${runId}`,
|
|
148
|
+
description: 'Resume the run (may have recovered)'
|
|
149
|
+
});
|
|
150
|
+
actions.push({
|
|
151
|
+
command: `runr intervene ${runId} --reason stalled_timeout --note "Completed manually"`,
|
|
152
|
+
description: 'Record manual completion'
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
else if (stopReason === 'verification_failed') {
|
|
156
|
+
actions.push({
|
|
157
|
+
command: `npm run build && npm test`,
|
|
158
|
+
description: 'Fix failing verification commands'
|
|
159
|
+
});
|
|
160
|
+
actions.push({
|
|
161
|
+
command: `runr resume ${runId}`,
|
|
162
|
+
description: 'Resume after fixing'
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
return actions;
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Generate stop diagnostics from timeline
|
|
169
|
+
*/
|
|
170
|
+
export function generateStopDiagnostics(runStorePath, runId, stopReason) {
|
|
171
|
+
const timelinePath = path.join(runStorePath, 'timeline.jsonl');
|
|
172
|
+
const events = parseTimeline(timelinePath);
|
|
173
|
+
// Base diagnostics
|
|
174
|
+
const diagnostics = {
|
|
175
|
+
stop_reason: stopReason,
|
|
176
|
+
explanation: getExplanation(stopReason),
|
|
177
|
+
suggested_actions: []
|
|
178
|
+
};
|
|
179
|
+
if (stopReason === 'review_loop_detected') {
|
|
180
|
+
const context = extractReviewLoopContext(events);
|
|
181
|
+
diagnostics.loop_count = context.loopCount;
|
|
182
|
+
diagnostics.last_review_requests = context.reviewRequests;
|
|
183
|
+
diagnostics.last_evidence_provided = context.evidenceProvided;
|
|
184
|
+
diagnostics.unmet_checks = generateUnmetChecks(context.reviewRequests, context.evidenceProvided);
|
|
185
|
+
}
|
|
186
|
+
else if (stopReason === 'stalled_timeout') {
|
|
187
|
+
const lastEvent = events[events.length - 1];
|
|
188
|
+
if (lastEvent) {
|
|
189
|
+
diagnostics.last_activity_at = lastEvent.timestamp;
|
|
190
|
+
diagnostics.time_since_activity_ms = Date.now() - new Date(lastEvent.timestamp).getTime();
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
diagnostics.suggested_actions = generateSuggestedActions(stopReason, runId, diagnostics.unmet_checks || []);
|
|
194
|
+
return diagnostics;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Get human-readable explanation for stop reason
|
|
198
|
+
*/
|
|
199
|
+
function getExplanation(stopReason) {
|
|
200
|
+
switch (stopReason) {
|
|
201
|
+
case 'review_loop_detected':
|
|
202
|
+
return 'The run exceeded the maximum review rounds without passing all checks. ' +
|
|
203
|
+
'The reviewer kept requesting changes that were not fully addressed.';
|
|
204
|
+
case 'stalled_timeout':
|
|
205
|
+
return 'The run timed out waiting for a response from the worker. ' +
|
|
206
|
+
'The worker may have hung or encountered an unrecoverable error.';
|
|
207
|
+
case 'verification_failed':
|
|
208
|
+
return 'The verification commands failed. The implementation may have errors ' +
|
|
209
|
+
'that need to be fixed before the run can continue.';
|
|
210
|
+
case 'scope_violation':
|
|
211
|
+
return 'The implementation attempted to modify files outside the allowed scope. ' +
|
|
212
|
+
'Update the task scope or intervene to record the necessary changes.';
|
|
213
|
+
default:
|
|
214
|
+
return `The run stopped with reason: ${stopReason}`;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Write diagnostics to file
|
|
219
|
+
*/
|
|
220
|
+
export function writeStopDiagnostics(runStorePath, diagnostics) {
|
|
221
|
+
const diagnosticsPath = path.join(runStorePath, 'stop_diagnostics.json');
|
|
222
|
+
fs.writeFileSync(diagnosticsPath, JSON.stringify(diagnostics, null, 2));
|
|
223
|
+
return diagnosticsPath;
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Print diagnostics to console
|
|
227
|
+
*/
|
|
228
|
+
export function printStopDiagnostics(runId, diagnostics) {
|
|
229
|
+
console.log('');
|
|
230
|
+
console.log(`Run ${runId} STOPPED: ${diagnostics.stop_reason}`);
|
|
231
|
+
console.log('');
|
|
232
|
+
console.log('Diagnostics:');
|
|
233
|
+
if (diagnostics.loop_count) {
|
|
234
|
+
console.log(` Loop count: ${diagnostics.loop_count}`);
|
|
235
|
+
}
|
|
236
|
+
if (diagnostics.last_review_requests && diagnostics.last_review_requests.length > 0) {
|
|
237
|
+
console.log(` Last reviewer requests:`);
|
|
238
|
+
for (const req of diagnostics.last_review_requests) {
|
|
239
|
+
console.log(` - "${req}"`);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
if (diagnostics.unmet_checks && diagnostics.unmet_checks.length > 0) {
|
|
243
|
+
console.log('');
|
|
244
|
+
console.log(' Unmet checks:');
|
|
245
|
+
for (const check of diagnostics.unmet_checks) {
|
|
246
|
+
console.log(` - ${check}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
if (diagnostics.time_since_activity_ms) {
|
|
250
|
+
const mins = Math.round(diagnostics.time_since_activity_ms / 60000);
|
|
251
|
+
console.log(` Time since last activity: ${mins} minutes`);
|
|
252
|
+
}
|
|
253
|
+
if (diagnostics.suggested_actions.length > 0) {
|
|
254
|
+
console.log('');
|
|
255
|
+
console.log(' Suggested actions:');
|
|
256
|
+
diagnostics.suggested_actions.forEach((action, i) => {
|
|
257
|
+
console.log(` ${i + 1}. ${action.description}`);
|
|
258
|
+
if (action.command) {
|
|
259
|
+
console.log(` Run: ${action.command}`);
|
|
260
|
+
}
|
|
261
|
+
if (action.edit) {
|
|
262
|
+
console.log(` Edit: ${action.edit}`);
|
|
263
|
+
}
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
console.log('');
|
|
267
|
+
}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Checkpoint commit detection and protection.
|
|
3
|
+
*
|
|
4
|
+
* Prevents accidental amendment of checkpoint commits,
|
|
5
|
+
* which would rewrite verified/audited history.
|
|
6
|
+
*/
|
|
7
|
+
import { execSync } from 'node:child_process';
|
|
8
|
+
import fs from 'node:fs';
|
|
9
|
+
import path from 'node:path';
|
|
10
|
+
import { getRunsRoot } from '../store/runs-root.js';
|
|
11
|
+
/**
|
|
12
|
+
* Detect if HEAD is a Runr checkpoint commit.
|
|
13
|
+
*
|
|
14
|
+
* Detection methods (in order):
|
|
15
|
+
* 1. Subject prefix: "chore(runr): checkpoint"
|
|
16
|
+
* 2. Trailer: "Runr-Checkpoint: true"
|
|
17
|
+
* 3. SHA in any run's state.json checkpoint_commit_sha
|
|
18
|
+
*/
|
|
19
|
+
export function isCheckpointCommit(repoPath) {
|
|
20
|
+
try {
|
|
21
|
+
// Get HEAD commit info
|
|
22
|
+
const format = '%H%x00%s%x00%(trailers:key=Runr-Checkpoint,valueonly)%x00%(trailers:key=Runr-Run-Id,valueonly)';
|
|
23
|
+
const output = execSync(`git log -1 --format="${format}" HEAD`, {
|
|
24
|
+
cwd: repoPath,
|
|
25
|
+
encoding: 'utf-8',
|
|
26
|
+
stdio: ['pipe', 'pipe', 'pipe']
|
|
27
|
+
}).trim();
|
|
28
|
+
const [sha, subject, checkpointTrailer, runIdTrailer] = output.split('\x00');
|
|
29
|
+
// Method 1: Check subject prefix
|
|
30
|
+
if (subject.startsWith('chore(runr): checkpoint')) {
|
|
31
|
+
const runIdMatch = subject.match(/checkpoint (\d{14})/);
|
|
32
|
+
return {
|
|
33
|
+
isCheckpoint: true,
|
|
34
|
+
sha,
|
|
35
|
+
runId: runIdMatch?.[1] || runIdTrailer?.trim(),
|
|
36
|
+
detectedBy: 'subject'
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
// Method 2: Check trailer
|
|
40
|
+
if (checkpointTrailer?.trim().toLowerCase() === 'true') {
|
|
41
|
+
return {
|
|
42
|
+
isCheckpoint: true,
|
|
43
|
+
sha,
|
|
44
|
+
runId: runIdTrailer?.trim(),
|
|
45
|
+
detectedBy: 'trailer'
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
// Method 3: Check state.json files
|
|
49
|
+
const runsRoot = getRunsRoot(repoPath);
|
|
50
|
+
if (fs.existsSync(runsRoot)) {
|
|
51
|
+
try {
|
|
52
|
+
const runDirs = fs.readdirSync(runsRoot, { withFileTypes: true })
|
|
53
|
+
.filter(d => d.isDirectory() && /^\d{14}$/.test(d.name));
|
|
54
|
+
for (const runDir of runDirs) {
|
|
55
|
+
const statePath = path.join(runsRoot, runDir.name, 'state.json');
|
|
56
|
+
if (fs.existsSync(statePath)) {
|
|
57
|
+
try {
|
|
58
|
+
const state = JSON.parse(fs.readFileSync(statePath, 'utf-8'));
|
|
59
|
+
if (state.checkpoint_commit_sha === sha) {
|
|
60
|
+
return {
|
|
61
|
+
isCheckpoint: true,
|
|
62
|
+
sha,
|
|
63
|
+
runId: runDir.name,
|
|
64
|
+
detectedBy: 'state'
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
catch { /* ignore parse errors */ }
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
catch { /* ignore read errors */ }
|
|
73
|
+
}
|
|
74
|
+
// Not a checkpoint
|
|
75
|
+
return { isCheckpoint: false, sha };
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
// Git command failed (no commits, not a repo, etc.)
|
|
79
|
+
return { isCheckpoint: false, sha: '' };
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Check if amending HEAD is allowed.
|
|
84
|
+
*
|
|
85
|
+
* Blocks amendment if HEAD is a checkpoint commit,
|
|
86
|
+
* unless --force is provided.
|
|
87
|
+
*/
|
|
88
|
+
export function checkAmendAllowed(repoPath, forceOverride = false, ledgerMode = false) {
|
|
89
|
+
const info = isCheckpointCommit(repoPath);
|
|
90
|
+
if (!info.isCheckpoint) {
|
|
91
|
+
return { allowed: true, checkpointInfo: info };
|
|
92
|
+
}
|
|
93
|
+
if (forceOverride) {
|
|
94
|
+
// Force allowed, but emit warning
|
|
95
|
+
return {
|
|
96
|
+
allowed: true,
|
|
97
|
+
checkpointInfo: info,
|
|
98
|
+
error: `Warning: Amending checkpoint commit ${info.sha.slice(0, 7)} (run ${info.runId || 'unknown'}).
|
|
99
|
+
This rewrites audited history. Proceed with caution.`
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
// Build error message
|
|
103
|
+
let message = `Refusing to amend: HEAD is a Runr checkpoint commit (verified work).
|
|
104
|
+
This would rewrite audited history.`;
|
|
105
|
+
if (ledgerMode) {
|
|
106
|
+
message += `
|
|
107
|
+
|
|
108
|
+
Ledger mode: checkpoint history is immutable.`;
|
|
109
|
+
}
|
|
110
|
+
message += `
|
|
111
|
+
|
|
112
|
+
If you really mean it: re-run with --force.
|
|
113
|
+
Better alternative: create a follow-up commit with trailers instead.`;
|
|
114
|
+
return {
|
|
115
|
+
allowed: false,
|
|
116
|
+
error: message,
|
|
117
|
+
checkpointInfo: info
|
|
118
|
+
};
|
|
119
|
+
}
|