@weldr/runr 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +216 -0
- package/LICENSE +190 -0
- package/NOTICE +4 -0
- package/README.md +200 -0
- package/dist/cli.js +464 -0
- package/dist/commands/__tests__/report.test.js +202 -0
- package/dist/commands/compare.js +168 -0
- package/dist/commands/doctor.js +124 -0
- package/dist/commands/follow.js +251 -0
- package/dist/commands/gc.js +161 -0
- package/dist/commands/guards-only.js +89 -0
- package/dist/commands/metrics.js +441 -0
- package/dist/commands/orchestrate.js +800 -0
- package/dist/commands/paths.js +31 -0
- package/dist/commands/preflight.js +152 -0
- package/dist/commands/report.js +478 -0
- package/dist/commands/resume.js +149 -0
- package/dist/commands/run.js +538 -0
- package/dist/commands/status.js +189 -0
- package/dist/commands/summarize.js +220 -0
- package/dist/commands/version.js +82 -0
- package/dist/commands/wait.js +170 -0
- package/dist/config/__tests__/presets.test.js +104 -0
- package/dist/config/load.js +66 -0
- package/dist/config/schema.js +160 -0
- package/dist/context/__tests__/artifact.test.js +130 -0
- package/dist/context/__tests__/pack.test.js +191 -0
- package/dist/context/artifact.js +67 -0
- package/dist/context/index.js +2 -0
- package/dist/context/pack.js +273 -0
- package/dist/diagnosis/analyzer.js +678 -0
- package/dist/diagnosis/formatter.js +136 -0
- package/dist/diagnosis/index.js +6 -0
- package/dist/diagnosis/types.js +7 -0
- package/dist/env/__tests__/fingerprint.test.js +116 -0
- package/dist/env/fingerprint.js +111 -0
- package/dist/orchestrator/__tests__/policy.test.js +185 -0
- package/dist/orchestrator/__tests__/schema-version.test.js +65 -0
- package/dist/orchestrator/artifacts.js +405 -0
- package/dist/orchestrator/state-machine.js +646 -0
- package/dist/orchestrator/types.js +88 -0
- package/dist/ownership/normalize.js +45 -0
- package/dist/repo/context.js +90 -0
- package/dist/repo/git.js +13 -0
- package/dist/repo/worktree.js +239 -0
- package/dist/store/run-store.js +107 -0
- package/dist/store/run-utils.js +69 -0
- package/dist/store/runs-root.js +126 -0
- package/dist/supervisor/__tests__/evidence-gate.test.js +111 -0
- package/dist/supervisor/__tests__/ownership.test.js +103 -0
- package/dist/supervisor/__tests__/state-machine.test.js +290 -0
- package/dist/supervisor/collision.js +240 -0
- package/dist/supervisor/evidence-gate.js +98 -0
- package/dist/supervisor/planner.js +18 -0
- package/dist/supervisor/runner.js +1562 -0
- package/dist/supervisor/scope-guard.js +55 -0
- package/dist/supervisor/state-machine.js +121 -0
- package/dist/supervisor/verification-policy.js +64 -0
- package/dist/tasks/task-metadata.js +72 -0
- package/dist/types/schemas.js +1 -0
- package/dist/verification/engine.js +49 -0
- package/dist/workers/__tests__/claude.test.js +88 -0
- package/dist/workers/__tests__/codex.test.js +81 -0
- package/dist/workers/claude.js +119 -0
- package/dist/workers/codex.js +162 -0
- package/dist/workers/json.js +22 -0
- package/dist/workers/mock.js +193 -0
- package/dist/workers/prompts.js +98 -0
- package/dist/workers/schemas.js +39 -0
- package/package.json +47 -0
- package/templates/prompts/implementer.md +70 -0
- package/templates/prompts/planner.md +62 -0
- package/templates/prompts/reviewer.md +77 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { execa } from 'execa';
|
|
2
|
+
/**
|
|
3
|
+
* Extract assistant text from Codex JSONL output.
|
|
4
|
+
*
|
|
5
|
+
* Codex emits various event types. We look for text in priority order:
|
|
6
|
+
* 1. agent_message / message items (the canonical final response)
|
|
7
|
+
* 2. Any item.completed with text content
|
|
8
|
+
* 3. turn.completed or response events with content
|
|
9
|
+
*
|
|
10
|
+
* Returns concatenated text from all matching events.
|
|
11
|
+
*/
|
|
12
|
+
function extractTextFromCodexJsonl(output) {
|
|
13
|
+
const lines = output.trim().split('\n').filter(Boolean);
|
|
14
|
+
const texts = [];
|
|
15
|
+
for (const line of lines) {
|
|
16
|
+
try {
|
|
17
|
+
const event = JSON.parse(line);
|
|
18
|
+
// Priority 1: agent_message or message items
|
|
19
|
+
if (event.type === 'item.completed' && event.item) {
|
|
20
|
+
const itemType = event.item.type;
|
|
21
|
+
if (itemType === 'agent_message' || itemType === 'message') {
|
|
22
|
+
const text = event.item.text || event.item.content;
|
|
23
|
+
if (text)
|
|
24
|
+
texts.push(text);
|
|
25
|
+
continue;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
// Priority 2: Any item.completed with text (reasoning, etc.)
|
|
29
|
+
if (event.type === 'item.completed' && event.item?.text) {
|
|
30
|
+
texts.push(event.item.text);
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
// Priority 3: Top-level message/response events
|
|
34
|
+
if ((event.type === 'response' || event.type === 'turn.completed') && event.message) {
|
|
35
|
+
const text = event.message.content || event.message.text;
|
|
36
|
+
if (text)
|
|
37
|
+
texts.push(text);
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
// Priority 4: Direct content on event
|
|
41
|
+
if (event.type === 'response' && (event.content || event.text)) {
|
|
42
|
+
texts.push(event.content || event.text || '');
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
// Skip malformed lines
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return texts.join('\n');
|
|
50
|
+
}
|
|
51
|
+
export async function runCodex(input) {
|
|
52
|
+
const { bin, args } = input.worker;
|
|
53
|
+
// Build argv: base args + repo path via -C
|
|
54
|
+
const argv = [...args, '-C', input.repo_path];
|
|
55
|
+
try {
|
|
56
|
+
const result = await execa(bin, argv, {
|
|
57
|
+
cwd: input.repo_path,
|
|
58
|
+
input: input.prompt,
|
|
59
|
+
stdout: 'pipe',
|
|
60
|
+
stderr: 'pipe',
|
|
61
|
+
timeout: 300000 // 5 min timeout
|
|
62
|
+
});
|
|
63
|
+
const rawOutput = result.stdout;
|
|
64
|
+
const text = input.worker.output === 'jsonl'
|
|
65
|
+
? extractTextFromCodexJsonl(rawOutput)
|
|
66
|
+
: rawOutput;
|
|
67
|
+
return {
|
|
68
|
+
status: result.exitCode === 0 ? 'ok' : 'failed',
|
|
69
|
+
commands_run: [`${bin} ${argv.join(' ')}`],
|
|
70
|
+
observations: [text || rawOutput]
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
catch (error) {
|
|
74
|
+
const err = error;
|
|
75
|
+
const output = err.stdout || err.stderr || err.message || 'Codex command failed';
|
|
76
|
+
return {
|
|
77
|
+
status: 'failed',
|
|
78
|
+
commands_run: [`${bin} ${argv.join(' ')}`],
|
|
79
|
+
observations: [output]
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Classify error output into categories for preflight reporting.
|
|
85
|
+
*/
|
|
86
|
+
function classifyError(output) {
|
|
87
|
+
const lower = output.toLowerCase();
|
|
88
|
+
// Auth errors
|
|
89
|
+
if (lower.includes('oauth') || lower.includes('token expired') ||
|
|
90
|
+
lower.includes('authentication') || lower.includes('login') ||
|
|
91
|
+
lower.includes('401') || lower.includes('unauthorized') ||
|
|
92
|
+
lower.includes('not authenticated') || lower.includes('sign in')) {
|
|
93
|
+
return 'auth';
|
|
94
|
+
}
|
|
95
|
+
// Network errors
|
|
96
|
+
if (lower.includes('enotfound') || lower.includes('econnrefused') ||
|
|
97
|
+
lower.includes('network') || lower.includes('timeout') ||
|
|
98
|
+
lower.includes('econnreset') || lower.includes('socket')) {
|
|
99
|
+
return 'network';
|
|
100
|
+
}
|
|
101
|
+
// Rate limit errors
|
|
102
|
+
if (lower.includes('rate limit') || lower.includes('429') ||
|
|
103
|
+
lower.includes('too many requests') || lower.includes('quota')) {
|
|
104
|
+
return 'rate_limit';
|
|
105
|
+
}
|
|
106
|
+
return 'unknown';
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Ping Codex to verify auth and connectivity.
|
|
110
|
+
* Success = process exits 0 within timeout.
|
|
111
|
+
*/
|
|
112
|
+
export async function pingCodex(worker) {
|
|
113
|
+
const { bin, args } = worker;
|
|
114
|
+
const start = Date.now();
|
|
115
|
+
const pingPrompt = 'Respond with exactly: ok';
|
|
116
|
+
// Build minimal argv (no -C repo path for ping)
|
|
117
|
+
const argv = [...args];
|
|
118
|
+
try {
|
|
119
|
+
const result = await execa(bin, argv, {
|
|
120
|
+
input: pingPrompt,
|
|
121
|
+
stdout: 'pipe',
|
|
122
|
+
stderr: 'pipe',
|
|
123
|
+
timeout: 15000 // 15s timeout for ping
|
|
124
|
+
});
|
|
125
|
+
const ms = Date.now() - start;
|
|
126
|
+
// Success = exit code 0
|
|
127
|
+
if (result.exitCode === 0) {
|
|
128
|
+
return { ok: true, worker: 'codex', ms };
|
|
129
|
+
}
|
|
130
|
+
// Non-zero exit
|
|
131
|
+
const output = result.stderr || result.stdout || '';
|
|
132
|
+
return {
|
|
133
|
+
ok: false,
|
|
134
|
+
worker: 'codex',
|
|
135
|
+
ms,
|
|
136
|
+
category: classifyError(output),
|
|
137
|
+
message: output.slice(0, 200)
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
catch (error) {
|
|
141
|
+
const ms = Date.now() - start;
|
|
142
|
+
const err = error;
|
|
143
|
+
const output = err.stderr || err.stdout || err.message || 'Ping failed';
|
|
144
|
+
// Check for timeout
|
|
145
|
+
if (err.code === 'ETIMEDOUT' || (err.message && err.message.includes('timed out'))) {
|
|
146
|
+
return {
|
|
147
|
+
ok: false,
|
|
148
|
+
worker: 'codex',
|
|
149
|
+
ms,
|
|
150
|
+
category: 'network',
|
|
151
|
+
message: 'Ping timed out'
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
return {
|
|
155
|
+
ok: false,
|
|
156
|
+
worker: 'codex',
|
|
157
|
+
ms,
|
|
158
|
+
category: classifyError(output),
|
|
159
|
+
message: output.slice(0, 200)
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export function extractJsonBlock(output) {
|
|
2
|
+
const start = output.indexOf('BEGIN_JSON');
|
|
3
|
+
const end = output.indexOf('END_JSON');
|
|
4
|
+
if (start === -1 || end === -1 || end <= start) {
|
|
5
|
+
return null;
|
|
6
|
+
}
|
|
7
|
+
return output.slice(start + 'BEGIN_JSON'.length, end).trim();
|
|
8
|
+
}
|
|
9
|
+
export function parseJsonWithSchema(output, schema) {
|
|
10
|
+
const block = extractJsonBlock(output) ?? output.trim();
|
|
11
|
+
try {
|
|
12
|
+
const parsed = JSON.parse(block);
|
|
13
|
+
const result = schema.safeParse(parsed);
|
|
14
|
+
if (!result.success) {
|
|
15
|
+
return { error: result.error.message };
|
|
16
|
+
}
|
|
17
|
+
return { data: result.data };
|
|
18
|
+
}
|
|
19
|
+
catch (error) {
|
|
20
|
+
return { error: error instanceof Error ? error.message : 'Invalid JSON' };
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock worker for testing auto-resume and stall detection.
|
|
3
|
+
*
|
|
4
|
+
* Controlled via AGENT_MOCK_WORKER env var:
|
|
5
|
+
* - "hang": Never resolves (simulates hung worker)
|
|
6
|
+
* - "hang_once": First call hangs, subsequent calls succeed
|
|
7
|
+
* - "delay_5s": Resolves after 5 seconds with valid output
|
|
8
|
+
* - "timeout_once_then_ok": First call times out (triggers worker_call_timeout), then succeeds
|
|
9
|
+
* - "no_changes_no_evidence": Returns no_changes_needed without evidence (triggers insufficient_evidence)
|
|
10
|
+
* - unset/other: Not used (real workers are used)
|
|
11
|
+
*
|
|
12
|
+
* The mock worker returns valid JSON output for the stage being tested.
|
|
13
|
+
*/
|
|
14
|
+
// Track call count for hang_once mode
|
|
15
|
+
let callCount = 0;
|
|
16
|
+
/** Valid mock worker modes */
|
|
17
|
+
const MOCK_WORKER_MODES = [
|
|
18
|
+
'hang',
|
|
19
|
+
'hang_once',
|
|
20
|
+
'delay_5s',
|
|
21
|
+
'timeout_once_then_ok',
|
|
22
|
+
'no_changes_no_evidence',
|
|
23
|
+
'review_always_request_changes'
|
|
24
|
+
];
|
|
25
|
+
/**
|
|
26
|
+
* Check if mock worker mode is enabled.
|
|
27
|
+
*/
|
|
28
|
+
export function isMockWorkerEnabled() {
|
|
29
|
+
const mode = process.env.AGENT_MOCK_WORKER;
|
|
30
|
+
return MOCK_WORKER_MODES.includes(mode);
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Get mock worker mode.
|
|
34
|
+
*/
|
|
35
|
+
export function getMockWorkerMode() {
|
|
36
|
+
return process.env.AGENT_MOCK_WORKER;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Reset mock worker state (for tests).
|
|
40
|
+
*/
|
|
41
|
+
export function resetMockWorker() {
|
|
42
|
+
callCount = 0;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Generate valid JSON output based on stage.
|
|
46
|
+
* This ensures the mock can produce parseable responses.
|
|
47
|
+
* Uses specific prompt template markers to avoid false matches in task content.
|
|
48
|
+
*/
|
|
49
|
+
function generateValidOutput(prompt) {
|
|
50
|
+
// Detect stage from prompt template headers (avoids matching task content)
|
|
51
|
+
if (prompt.includes('# Planner Prompt') || prompt.includes('You are the planning model')) {
|
|
52
|
+
return JSON.stringify({
|
|
53
|
+
milestones: [
|
|
54
|
+
{
|
|
55
|
+
goal: 'Mock milestone for testing',
|
|
56
|
+
files_expected: ['src/test.ts'],
|
|
57
|
+
done_checks: ['Build passes', 'Tests pass'],
|
|
58
|
+
risk_level: 'low'
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
if (prompt.includes('# Implementer Prompt') || prompt.includes('You are the implementer')) {
|
|
64
|
+
return JSON.stringify({
|
|
65
|
+
status: 'ok',
|
|
66
|
+
handoff_memo: 'Mock implementation complete.'
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
if (prompt.includes('# Reviewer Prompt') || prompt.includes('You are the reviewer model')) {
|
|
70
|
+
return JSON.stringify({
|
|
71
|
+
status: 'approve',
|
|
72
|
+
changes: []
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
// Default response
|
|
76
|
+
return JSON.stringify({ result: 'ok' });
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Run mock worker with configured behavior.
|
|
80
|
+
*/
|
|
81
|
+
export async function runMockWorker(input) {
|
|
82
|
+
const mode = getMockWorkerMode();
|
|
83
|
+
callCount++;
|
|
84
|
+
console.log(`[mock-worker] Mode: ${mode}, Call: ${callCount}`);
|
|
85
|
+
switch (mode) {
|
|
86
|
+
case 'hang':
|
|
87
|
+
// Hang for 20 seconds then fail - allows watchdog (10s intervals) to catch the 12s cap
|
|
88
|
+
console.log('[mock-worker] Hanging for 20 seconds...');
|
|
89
|
+
await new Promise(resolve => setTimeout(resolve, 20000));
|
|
90
|
+
return {
|
|
91
|
+
status: 'failed',
|
|
92
|
+
commands_run: ['mock-worker'],
|
|
93
|
+
observations: ['Worker timed out (mock)']
|
|
94
|
+
};
|
|
95
|
+
case 'hang_once':
|
|
96
|
+
// First call hangs (20s), subsequent calls succeed
|
|
97
|
+
if (callCount === 1) {
|
|
98
|
+
console.log('[mock-worker] First call - hanging for 20 seconds...');
|
|
99
|
+
await new Promise(resolve => setTimeout(resolve, 20000));
|
|
100
|
+
return {
|
|
101
|
+
status: 'failed',
|
|
102
|
+
commands_run: ['mock-worker'],
|
|
103
|
+
observations: ['Worker timed out (mock hang_once first call)']
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
console.log('[mock-worker] Subsequent call - returning success');
|
|
107
|
+
return {
|
|
108
|
+
status: 'ok',
|
|
109
|
+
commands_run: ['mock-worker'],
|
|
110
|
+
observations: [generateValidOutput(input.prompt)]
|
|
111
|
+
};
|
|
112
|
+
case 'delay_5s':
|
|
113
|
+
// Delay 5 seconds then succeed
|
|
114
|
+
console.log('[mock-worker] Delaying 5 seconds...');
|
|
115
|
+
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
116
|
+
return {
|
|
117
|
+
status: 'ok',
|
|
118
|
+
commands_run: ['mock-worker'],
|
|
119
|
+
observations: [generateValidOutput(input.prompt)]
|
|
120
|
+
};
|
|
121
|
+
case 'timeout_once_then_ok':
|
|
122
|
+
// First call times out (for auto-resume testing), subsequent calls succeed
|
|
123
|
+
if (callCount === 1) {
|
|
124
|
+
// Use AGENT_MOCK_TIMEOUT_MS for fast testing, default to 65s for compatibility
|
|
125
|
+
const timeoutMs = Number.parseInt(process.env.AGENT_MOCK_TIMEOUT_MS ?? '', 10) || 65000;
|
|
126
|
+
console.log(`[mock-worker] First call - sleeping ${timeoutMs}ms to trigger stall timeout...`);
|
|
127
|
+
await new Promise(resolve => setTimeout(resolve, timeoutMs));
|
|
128
|
+
return {
|
|
129
|
+
status: 'failed',
|
|
130
|
+
commands_run: ['mock-worker'],
|
|
131
|
+
observations: ['Worker stall timeout (mock timeout_once_then_ok)']
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
console.log('[mock-worker] Subsequent call - returning success');
|
|
135
|
+
return {
|
|
136
|
+
status: 'ok',
|
|
137
|
+
commands_run: ['mock-worker'],
|
|
138
|
+
observations: [generateValidOutput(input.prompt)]
|
|
139
|
+
};
|
|
140
|
+
case 'no_changes_no_evidence':
|
|
141
|
+
// Returns no_changes_needed without evidence (triggers insufficient_evidence)
|
|
142
|
+
console.log('[mock-worker] Returning no_changes_needed without evidence');
|
|
143
|
+
if (input.prompt.includes('IMPLEMENT') || input.prompt.includes('implement')) {
|
|
144
|
+
return {
|
|
145
|
+
status: 'ok',
|
|
146
|
+
commands_run: ['mock-worker'],
|
|
147
|
+
observations: [JSON.stringify({
|
|
148
|
+
status: 'no_changes_needed',
|
|
149
|
+
handoff_memo: 'No changes needed (mock)',
|
|
150
|
+
evidence: null // Missing evidence triggers insufficient_evidence
|
|
151
|
+
})]
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
// For other phases, return normal success
|
|
155
|
+
return {
|
|
156
|
+
status: 'ok',
|
|
157
|
+
commands_run: ['mock-worker'],
|
|
158
|
+
observations: [generateValidOutput(input.prompt)]
|
|
159
|
+
};
|
|
160
|
+
case 'review_always_request_changes':
|
|
161
|
+
// Review always returns request_changes with identical message (triggers review_loop_detected)
|
|
162
|
+
// Use more specific phase detection to avoid false matches in task content
|
|
163
|
+
console.log('[mock-worker] review_always_request_changes mode');
|
|
164
|
+
if (input.prompt.includes('# Reviewer Prompt') || input.prompt.includes('You are the reviewer model')) {
|
|
165
|
+
console.log('[mock-worker] Returning request_changes for REVIEW phase');
|
|
166
|
+
return {
|
|
167
|
+
status: 'ok',
|
|
168
|
+
commands_run: ['mock-worker'],
|
|
169
|
+
observations: [JSON.stringify({
|
|
170
|
+
status: 'request_changes',
|
|
171
|
+
changes: [
|
|
172
|
+
'The done checks require testing the actual CLI behavior.',
|
|
173
|
+
'Please run the CLI commands to confirm the implementation works.'
|
|
174
|
+
]
|
|
175
|
+
})]
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
// For PLAN and IMPLEMENT, return normal success
|
|
179
|
+
console.log('[mock-worker] Returning success for non-REVIEW phase');
|
|
180
|
+
return {
|
|
181
|
+
status: 'ok',
|
|
182
|
+
commands_run: ['mock-worker'],
|
|
183
|
+
observations: [generateValidOutput(input.prompt)]
|
|
184
|
+
};
|
|
185
|
+
default:
|
|
186
|
+
// Should not reach here if isMockWorkerEnabled() is checked first
|
|
187
|
+
return {
|
|
188
|
+
status: 'failed',
|
|
189
|
+
commands_run: ['mock-worker'],
|
|
190
|
+
observations: ['Mock worker called but not configured']
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
// Get the directory of this module (works in ESM)
|
|
5
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
6
|
+
const __dirname = path.dirname(__filename);
|
|
7
|
+
function loadTemplate(name) {
|
|
8
|
+
// Resolve relative to the agent-framework's templates directory, not CWD
|
|
9
|
+
const target = path.resolve(__dirname, '..', '..', 'templates', 'prompts', name);
|
|
10
|
+
return fs.readFileSync(target, 'utf-8');
|
|
11
|
+
}
|
|
12
|
+
export function buildPlanPrompt(input) {
|
|
13
|
+
const template = loadTemplate('planner.md');
|
|
14
|
+
return [
|
|
15
|
+
template,
|
|
16
|
+
'',
|
|
17
|
+
`Scope allowlist: ${input.scopeAllowlist.join(', ')}`,
|
|
18
|
+
'(All files_expected paths must match one of these patterns)',
|
|
19
|
+
'',
|
|
20
|
+
'Task:',
|
|
21
|
+
input.taskText,
|
|
22
|
+
'',
|
|
23
|
+
'Output JSON between markers:',
|
|
24
|
+
'BEGIN_JSON',
|
|
25
|
+
'{"milestones": [{"goal": "...", "files_expected": ["..."], "done_checks": ["..."], "risk_level": "medium"}], "risk_map": ["..."], "do_not_touch": ["..."]}',
|
|
26
|
+
'END_JSON'
|
|
27
|
+
].join('\n');
|
|
28
|
+
}
|
|
29
|
+
export function buildImplementPrompt(input) {
|
|
30
|
+
const template = loadTemplate('implementer.md');
|
|
31
|
+
const filesExpected = input.milestone.files_expected ?? [];
|
|
32
|
+
const lines = [];
|
|
33
|
+
// Context pack goes first so agent sees verification bar + patterns before acting
|
|
34
|
+
if (input.contextPack) {
|
|
35
|
+
lines.push('## CONTEXT PACK (read first)', '', input.contextPack, '', '## END CONTEXT PACK', '');
|
|
36
|
+
}
|
|
37
|
+
lines.push(template, '', `Milestone goal: ${input.milestone.goal}`, `Files to create/modify: ${filesExpected.length > 0 ? filesExpected.join(', ') : '(infer from goal)'}`, `Done checks: ${input.milestone.done_checks.join('; ')}`, `Scope allowlist: ${input.scopeAllowlist.join(', ') || 'none'}`, `Scope denylist: ${input.scopeDenylist.join(', ') || 'none'}`, `Allow deps: ${input.allowDeps ? 'yes' : 'no'}`);
|
|
38
|
+
if (input.fixInstructions) {
|
|
39
|
+
lines.push('', '## FIX REQUIRED (Attempt ' + input.fixInstructions.attemptNumber + ')', '', 'The previous implementation failed verification. Fix the error below.', '', `Failed command: ${input.fixInstructions.failedCommand}`, '', 'Error output:', '```', input.fixInstructions.errorOutput.slice(0, 2000), '```', '', `Changed files: ${input.fixInstructions.changedFiles.join(', ') || 'none'}`, '', 'Fix the error and ensure all done_checks pass.');
|
|
40
|
+
}
|
|
41
|
+
lines.push('', 'Output JSON between markers:', 'BEGIN_JSON', '{"status": "ok", "handoff_memo": "...", "commands_run": [], "observations": []}', 'END_JSON');
|
|
42
|
+
return lines.join('\n');
|
|
43
|
+
}
|
|
44
|
+
export function buildReviewPrompt(input) {
|
|
45
|
+
const template = loadTemplate('reviewer.md');
|
|
46
|
+
const filesExpected = input.milestone.files_expected ?? [];
|
|
47
|
+
// Build verification summary section
|
|
48
|
+
let verificationSummaryText = '';
|
|
49
|
+
if (input.verificationSummary) {
|
|
50
|
+
verificationSummaryText = [
|
|
51
|
+
'',
|
|
52
|
+
'## Verification Summary (MUST CHECK)',
|
|
53
|
+
'',
|
|
54
|
+
'```json',
|
|
55
|
+
JSON.stringify(input.verificationSummary, null, 2),
|
|
56
|
+
'```',
|
|
57
|
+
''
|
|
58
|
+
].join('\n');
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
// No summary provided - reviewer must request_changes
|
|
62
|
+
verificationSummaryText = [
|
|
63
|
+
'',
|
|
64
|
+
'## Verification Summary (MUST CHECK)',
|
|
65
|
+
'',
|
|
66
|
+
'```json',
|
|
67
|
+
JSON.stringify({
|
|
68
|
+
commands_required: ['(not provided)'],
|
|
69
|
+
commands_run: [],
|
|
70
|
+
commands_missing: ['(verification summary not available)'],
|
|
71
|
+
files_expected: filesExpected,
|
|
72
|
+
files_exist: filesExpected.map(f => ({ path: f, exists: '(not checked)' }))
|
|
73
|
+
}, null, 2),
|
|
74
|
+
'```',
|
|
75
|
+
'',
|
|
76
|
+
'⚠️ WARNING: Verification summary not available. You MUST request_changes.',
|
|
77
|
+
''
|
|
78
|
+
].join('\n');
|
|
79
|
+
}
|
|
80
|
+
return [
|
|
81
|
+
template,
|
|
82
|
+
verificationSummaryText,
|
|
83
|
+
`Milestone goal: ${input.milestone.goal}`,
|
|
84
|
+
`Files expected: ${filesExpected.length > 0 ? filesExpected.join(', ') : '(infer from goal)'}`,
|
|
85
|
+
`Done checks: ${input.milestone.done_checks.join('; ')}`,
|
|
86
|
+
'',
|
|
87
|
+
'Diff summary (includes untracked new files):',
|
|
88
|
+
input.diffSummary || '(no diff)',
|
|
89
|
+
'',
|
|
90
|
+
'Verification output:',
|
|
91
|
+
input.verificationOutput || '(none)',
|
|
92
|
+
'',
|
|
93
|
+
'Output JSON between markers:',
|
|
94
|
+
'BEGIN_JSON',
|
|
95
|
+
'{"status": "approve", "changes": []}',
|
|
96
|
+
'END_JSON'
|
|
97
|
+
].join('\n');
|
|
98
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export const milestoneSchema = z.object({
|
|
3
|
+
goal: z.string().min(1),
|
|
4
|
+
files_expected: z.array(z.string()).optional(),
|
|
5
|
+
done_checks: z.array(z.string()).min(1),
|
|
6
|
+
risk_level: z.enum(['low', 'medium', 'high'])
|
|
7
|
+
});
|
|
8
|
+
export const planOutputSchema = z.object({
|
|
9
|
+
milestones: z.array(milestoneSchema).min(1),
|
|
10
|
+
risk_map: z.array(z.string()).optional(),
|
|
11
|
+
do_not_touch: z.array(z.string()).optional()
|
|
12
|
+
});
|
|
13
|
+
export const reviewOutputSchema = z.object({
|
|
14
|
+
status: z.enum(['approve', 'request_changes', 'reject']),
|
|
15
|
+
changes: z
|
|
16
|
+
.array(z.union([z.string(), z.object({}).passthrough()]))
|
|
17
|
+
.default([])
|
|
18
|
+
.transform((arr) => arr.map((item) => (typeof item === 'string' ? item : JSON.stringify(item))))
|
|
19
|
+
});
|
|
20
|
+
/**
|
|
21
|
+
* Evidence required when implementer claims "no_changes_needed".
|
|
22
|
+
* At least one of files_checked, grep_output, or commands_run must be populated.
|
|
23
|
+
*/
|
|
24
|
+
export const noChangesEvidenceSchema = z.object({
|
|
25
|
+
files_checked: z.array(z.string()).optional(),
|
|
26
|
+
grep_output: z.string().max(8192).optional(),
|
|
27
|
+
reason: z.string().optional(),
|
|
28
|
+
commands_run: z.array(z.object({
|
|
29
|
+
command: z.string(),
|
|
30
|
+
exit_code: z.number()
|
|
31
|
+
})).optional()
|
|
32
|
+
});
|
|
33
|
+
export const implementerOutputSchema = z.object({
|
|
34
|
+
status: z.enum(['ok', 'blocked', 'failed', 'no_changes_needed']),
|
|
35
|
+
handoff_memo: z.string().min(1),
|
|
36
|
+
commands_run: z.array(z.string()).default([]),
|
|
37
|
+
observations: z.array(z.string()).default([]),
|
|
38
|
+
evidence: noChangesEvidenceSchema.optional()
|
|
39
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@weldr/runr",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "Phase-gated orchestration for agent tasks",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"runr": "dist/cli.js",
|
|
8
|
+
"agent": "dist/cli.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist/",
|
|
12
|
+
"templates/prompts/",
|
|
13
|
+
"README.md",
|
|
14
|
+
"LICENSE",
|
|
15
|
+
"NOTICE",
|
|
16
|
+
"CHANGELOG.md"
|
|
17
|
+
],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "tsc -p tsconfig.json",
|
|
20
|
+
"prepare": "npm run build",
|
|
21
|
+
"dev": "node --loader ts-node/esm src/cli.ts",
|
|
22
|
+
"start": "node dist/cli.js",
|
|
23
|
+
"test": "vitest run",
|
|
24
|
+
"test:watch": "vitest",
|
|
25
|
+
"bench": "npx ts-node scripts/bench.ts",
|
|
26
|
+
"bench:dry": "npx ts-node scripts/bench.ts --dry-run",
|
|
27
|
+
"bench:minimal": "npx ts-node scripts/bench.ts --preset minimal",
|
|
28
|
+
"bench:context": "npx ts-node scripts/bench.ts --preset context",
|
|
29
|
+
"bench:stress": "npx ts-node scripts/bench.ts --preset stress",
|
|
30
|
+
"bench:full": "npx ts-node scripts/bench.ts --preset full"
|
|
31
|
+
},
|
|
32
|
+
"dependencies": {
|
|
33
|
+
"commander": "^12.1.0",
|
|
34
|
+
"execa": "^8.0.1",
|
|
35
|
+
"picomatch": "^4.0.2",
|
|
36
|
+
"pino": "^9.3.2",
|
|
37
|
+
"yaml": "^2.8.2",
|
|
38
|
+
"zod": "^3.23.8"
|
|
39
|
+
},
|
|
40
|
+
"devDependencies": {
|
|
41
|
+
"@types/node": "^22.7.5",
|
|
42
|
+
"@types/picomatch": "^4.0.2",
|
|
43
|
+
"ts-node": "^10.9.2",
|
|
44
|
+
"typescript": "^5.5.4",
|
|
45
|
+
"vitest": "^4.0.16"
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Implementer Prompt
|
|
2
|
+
|
|
3
|
+
You are the execution model. Implement the smallest viable change for the current milestone.
|
|
4
|
+
Follow scope lock. Do not edit lockfiles unless explicitly allowed.
|
|
5
|
+
|
|
6
|
+
**Important**: Scope patterns (allowlist/denylist) are **repo-relative paths**, not absolute paths.
|
|
7
|
+
Ignore any `.agent` substrings in your absolute working directory path - they do not affect scope compliance.
|
|
8
|
+
Only the relative path from the repo root matters (e.g., `src/foo.ts` not `/path/to/.agent-worktrees/123/src/foo.ts`).
|
|
9
|
+
|
|
10
|
+
## Output Format
|
|
11
|
+
|
|
12
|
+
Return ONLY machine-readable JSON between BEGIN_JSON and END_JSON markers:
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
BEGIN_JSON
|
|
16
|
+
{
|
|
17
|
+
"status": "ok" | "blocked" | "failed",
|
|
18
|
+
"handoff_memo": "Description of what was done or why blocked",
|
|
19
|
+
"commands_run": ["list", "of", "commands"],
|
|
20
|
+
"observations": ["notable", "findings"]
|
|
21
|
+
}
|
|
22
|
+
END_JSON
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Status Values
|
|
26
|
+
|
|
27
|
+
| Status | When to use | Effect |
|
|
28
|
+
|--------|-------------|--------|
|
|
29
|
+
| `ok` | Implementation complete, ready for verification | Proceeds to VERIFY phase |
|
|
30
|
+
| `blocked` | Cannot proceed without external input | Run stops with stop memo |
|
|
31
|
+
| `failed` | Unrecoverable error occurred | Run stops with stop memo |
|
|
32
|
+
|
|
33
|
+
## Block Protocol
|
|
34
|
+
|
|
35
|
+
When you cannot complete a milestone (`status: "blocked"` or `status: "failed"`), structure your `handoff_memo` using this format:
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
## What broke
|
|
39
|
+
<Specific error or blocking issue>
|
|
40
|
+
|
|
41
|
+
## Hypothesis A
|
|
42
|
+
<First theory about the cause>
|
|
43
|
+
|
|
44
|
+
## Hypothesis B
|
|
45
|
+
<Alternative theory>
|
|
46
|
+
|
|
47
|
+
## Experiment
|
|
48
|
+
<What you tried to diagnose>
|
|
49
|
+
|
|
50
|
+
## Decision
|
|
51
|
+
<Conclusion based on experiments>
|
|
52
|
+
|
|
53
|
+
## Next action
|
|
54
|
+
<What a human or future run should do>
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
This structured format helps:
|
|
58
|
+
- Humans understand exactly what went wrong
|
|
59
|
+
- Future runs can learn from the diagnosis
|
|
60
|
+
- The stop memo captures actionable next steps
|
|
61
|
+
|
|
62
|
+
## Fix Instructions
|
|
63
|
+
|
|
64
|
+
When retrying after verification failure, you receive `fixInstructions`:
|
|
65
|
+
- `failedCommand` - The command that failed
|
|
66
|
+
- `errorOutput` - Captured error output
|
|
67
|
+
- `changedFiles` - Files you modified
|
|
68
|
+
- `attemptNumber` - Current retry (1-3)
|
|
69
|
+
|
|
70
|
+
Use this to fix the specific issue that caused verification to fail.
|