@weldr/runr 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +216 -0
  2. package/LICENSE +190 -0
  3. package/NOTICE +4 -0
  4. package/README.md +200 -0
  5. package/dist/cli.js +464 -0
  6. package/dist/commands/__tests__/report.test.js +202 -0
  7. package/dist/commands/compare.js +168 -0
  8. package/dist/commands/doctor.js +124 -0
  9. package/dist/commands/follow.js +251 -0
  10. package/dist/commands/gc.js +161 -0
  11. package/dist/commands/guards-only.js +89 -0
  12. package/dist/commands/metrics.js +441 -0
  13. package/dist/commands/orchestrate.js +800 -0
  14. package/dist/commands/paths.js +31 -0
  15. package/dist/commands/preflight.js +152 -0
  16. package/dist/commands/report.js +478 -0
  17. package/dist/commands/resume.js +149 -0
  18. package/dist/commands/run.js +538 -0
  19. package/dist/commands/status.js +189 -0
  20. package/dist/commands/summarize.js +220 -0
  21. package/dist/commands/version.js +82 -0
  22. package/dist/commands/wait.js +170 -0
  23. package/dist/config/__tests__/presets.test.js +104 -0
  24. package/dist/config/load.js +66 -0
  25. package/dist/config/schema.js +160 -0
  26. package/dist/context/__tests__/artifact.test.js +130 -0
  27. package/dist/context/__tests__/pack.test.js +191 -0
  28. package/dist/context/artifact.js +67 -0
  29. package/dist/context/index.js +2 -0
  30. package/dist/context/pack.js +273 -0
  31. package/dist/diagnosis/analyzer.js +678 -0
  32. package/dist/diagnosis/formatter.js +136 -0
  33. package/dist/diagnosis/index.js +6 -0
  34. package/dist/diagnosis/types.js +7 -0
  35. package/dist/env/__tests__/fingerprint.test.js +116 -0
  36. package/dist/env/fingerprint.js +111 -0
  37. package/dist/orchestrator/__tests__/policy.test.js +185 -0
  38. package/dist/orchestrator/__tests__/schema-version.test.js +65 -0
  39. package/dist/orchestrator/artifacts.js +405 -0
  40. package/dist/orchestrator/state-machine.js +646 -0
  41. package/dist/orchestrator/types.js +88 -0
  42. package/dist/ownership/normalize.js +45 -0
  43. package/dist/repo/context.js +90 -0
  44. package/dist/repo/git.js +13 -0
  45. package/dist/repo/worktree.js +239 -0
  46. package/dist/store/run-store.js +107 -0
  47. package/dist/store/run-utils.js +69 -0
  48. package/dist/store/runs-root.js +126 -0
  49. package/dist/supervisor/__tests__/evidence-gate.test.js +111 -0
  50. package/dist/supervisor/__tests__/ownership.test.js +103 -0
  51. package/dist/supervisor/__tests__/state-machine.test.js +290 -0
  52. package/dist/supervisor/collision.js +240 -0
  53. package/dist/supervisor/evidence-gate.js +98 -0
  54. package/dist/supervisor/planner.js +18 -0
  55. package/dist/supervisor/runner.js +1562 -0
  56. package/dist/supervisor/scope-guard.js +55 -0
  57. package/dist/supervisor/state-machine.js +121 -0
  58. package/dist/supervisor/verification-policy.js +64 -0
  59. package/dist/tasks/task-metadata.js +72 -0
  60. package/dist/types/schemas.js +1 -0
  61. package/dist/verification/engine.js +49 -0
  62. package/dist/workers/__tests__/claude.test.js +88 -0
  63. package/dist/workers/__tests__/codex.test.js +81 -0
  64. package/dist/workers/claude.js +119 -0
  65. package/dist/workers/codex.js +162 -0
  66. package/dist/workers/json.js +22 -0
  67. package/dist/workers/mock.js +193 -0
  68. package/dist/workers/prompts.js +98 -0
  69. package/dist/workers/schemas.js +39 -0
  70. package/package.json +47 -0
  71. package/templates/prompts/implementer.md +70 -0
  72. package/templates/prompts/planner.md +62 -0
  73. package/templates/prompts/reviewer.md +77 -0
@@ -0,0 +1,162 @@
1
+ import { execa } from 'execa';
2
+ /**
3
+ * Extract assistant text from Codex JSONL output.
4
+ *
5
+ * Codex emits various event types. We look for text in priority order:
6
+ * 1. agent_message / message items (the canonical final response)
7
+ * 2. Any item.completed with text content
8
+ * 3. turn.completed or response events with content
9
+ *
10
+ * Returns concatenated text from all matching events.
11
+ */
12
+ function extractTextFromCodexJsonl(output) {
13
+ const lines = output.trim().split('\n').filter(Boolean);
14
+ const texts = [];
15
+ for (const line of lines) {
16
+ try {
17
+ const event = JSON.parse(line);
18
+ // Priority 1: agent_message or message items
19
+ if (event.type === 'item.completed' && event.item) {
20
+ const itemType = event.item.type;
21
+ if (itemType === 'agent_message' || itemType === 'message') {
22
+ const text = event.item.text || event.item.content;
23
+ if (text)
24
+ texts.push(text);
25
+ continue;
26
+ }
27
+ }
28
+ // Priority 2: Any item.completed with text (reasoning, etc.)
29
+ if (event.type === 'item.completed' && event.item?.text) {
30
+ texts.push(event.item.text);
31
+ continue;
32
+ }
33
+ // Priority 3: Top-level message/response events
34
+ if ((event.type === 'response' || event.type === 'turn.completed') && event.message) {
35
+ const text = event.message.content || event.message.text;
36
+ if (text)
37
+ texts.push(text);
38
+ continue;
39
+ }
40
+ // Priority 4: Direct content on event
41
+ if (event.type === 'response' && (event.content || event.text)) {
42
+ texts.push(event.content || event.text || '');
43
+ }
44
+ }
45
+ catch {
46
+ // Skip malformed lines
47
+ }
48
+ }
49
+ return texts.join('\n');
50
+ }
51
+ export async function runCodex(input) {
52
+ const { bin, args } = input.worker;
53
+ // Build argv: base args + repo path via -C
54
+ const argv = [...args, '-C', input.repo_path];
55
+ try {
56
+ const result = await execa(bin, argv, {
57
+ cwd: input.repo_path,
58
+ input: input.prompt,
59
+ stdout: 'pipe',
60
+ stderr: 'pipe',
61
+ timeout: 300000 // 5 min timeout
62
+ });
63
+ const rawOutput = result.stdout;
64
+ const text = input.worker.output === 'jsonl'
65
+ ? extractTextFromCodexJsonl(rawOutput)
66
+ : rawOutput;
67
+ return {
68
+ status: result.exitCode === 0 ? 'ok' : 'failed',
69
+ commands_run: [`${bin} ${argv.join(' ')}`],
70
+ observations: [text || rawOutput]
71
+ };
72
+ }
73
+ catch (error) {
74
+ const err = error;
75
+ const output = err.stdout || err.stderr || err.message || 'Codex command failed';
76
+ return {
77
+ status: 'failed',
78
+ commands_run: [`${bin} ${argv.join(' ')}`],
79
+ observations: [output]
80
+ };
81
+ }
82
+ }
83
+ /**
84
+ * Classify error output into categories for preflight reporting.
85
+ */
86
+ function classifyError(output) {
87
+ const lower = output.toLowerCase();
88
+ // Auth errors
89
+ if (lower.includes('oauth') || lower.includes('token expired') ||
90
+ lower.includes('authentication') || lower.includes('login') ||
91
+ lower.includes('401') || lower.includes('unauthorized') ||
92
+ lower.includes('not authenticated') || lower.includes('sign in')) {
93
+ return 'auth';
94
+ }
95
+ // Network errors
96
+ if (lower.includes('enotfound') || lower.includes('econnrefused') ||
97
+ lower.includes('network') || lower.includes('timeout') ||
98
+ lower.includes('econnreset') || lower.includes('socket')) {
99
+ return 'network';
100
+ }
101
+ // Rate limit errors
102
+ if (lower.includes('rate limit') || lower.includes('429') ||
103
+ lower.includes('too many requests') || lower.includes('quota')) {
104
+ return 'rate_limit';
105
+ }
106
+ return 'unknown';
107
+ }
108
+ /**
109
+ * Ping Codex to verify auth and connectivity.
110
+ * Success = process exits 0 within timeout.
111
+ */
112
+ export async function pingCodex(worker) {
113
+ const { bin, args } = worker;
114
+ const start = Date.now();
115
+ const pingPrompt = 'Respond with exactly: ok';
116
+ // Build minimal argv (no -C repo path for ping)
117
+ const argv = [...args];
118
+ try {
119
+ const result = await execa(bin, argv, {
120
+ input: pingPrompt,
121
+ stdout: 'pipe',
122
+ stderr: 'pipe',
123
+ timeout: 15000 // 15s timeout for ping
124
+ });
125
+ const ms = Date.now() - start;
126
+ // Success = exit code 0
127
+ if (result.exitCode === 0) {
128
+ return { ok: true, worker: 'codex', ms };
129
+ }
130
+ // Non-zero exit
131
+ const output = result.stderr || result.stdout || '';
132
+ return {
133
+ ok: false,
134
+ worker: 'codex',
135
+ ms,
136
+ category: classifyError(output),
137
+ message: output.slice(0, 200)
138
+ };
139
+ }
140
+ catch (error) {
141
+ const ms = Date.now() - start;
142
+ const err = error;
143
+ const output = err.stderr || err.stdout || err.message || 'Ping failed';
144
+ // Check for timeout
145
+ if (err.code === 'ETIMEDOUT' || (err.message && err.message.includes('timed out'))) {
146
+ return {
147
+ ok: false,
148
+ worker: 'codex',
149
+ ms,
150
+ category: 'network',
151
+ message: 'Ping timed out'
152
+ };
153
+ }
154
+ return {
155
+ ok: false,
156
+ worker: 'codex',
157
+ ms,
158
+ category: classifyError(output),
159
+ message: output.slice(0, 200)
160
+ };
161
+ }
162
+ }
@@ -0,0 +1,22 @@
1
+ export function extractJsonBlock(output) {
2
+ const start = output.indexOf('BEGIN_JSON');
3
+ const end = output.indexOf('END_JSON');
4
+ if (start === -1 || end === -1 || end <= start) {
5
+ return null;
6
+ }
7
+ return output.slice(start + 'BEGIN_JSON'.length, end).trim();
8
+ }
9
+ export function parseJsonWithSchema(output, schema) {
10
+ const block = extractJsonBlock(output) ?? output.trim();
11
+ try {
12
+ const parsed = JSON.parse(block);
13
+ const result = schema.safeParse(parsed);
14
+ if (!result.success) {
15
+ return { error: result.error.message };
16
+ }
17
+ return { data: result.data };
18
+ }
19
+ catch (error) {
20
+ return { error: error instanceof Error ? error.message : 'Invalid JSON' };
21
+ }
22
+ }
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Mock worker for testing auto-resume and stall detection.
3
+ *
4
+ * Controlled via AGENT_MOCK_WORKER env var:
5
+ * - "hang": Never resolves (simulates hung worker)
6
+ * - "hang_once": First call hangs, subsequent calls succeed
7
+ * - "delay_5s": Resolves after 5 seconds with valid output
8
+ * - "timeout_once_then_ok": First call times out (triggers worker_call_timeout), then succeeds
9
+ * - "no_changes_no_evidence": Returns no_changes_needed without evidence (triggers insufficient_evidence)
10
+ * - unset/other: Not used (real workers are used)
11
+ *
12
+ * The mock worker returns valid JSON output for the stage being tested.
13
+ */
14
+ // Track call count for hang_once mode
15
+ let callCount = 0;
16
+ /** Valid mock worker modes */
17
+ const MOCK_WORKER_MODES = [
18
+ 'hang',
19
+ 'hang_once',
20
+ 'delay_5s',
21
+ 'timeout_once_then_ok',
22
+ 'no_changes_no_evidence',
23
+ 'review_always_request_changes'
24
+ ];
25
+ /**
26
+ * Check if mock worker mode is enabled.
27
+ */
28
+ export function isMockWorkerEnabled() {
29
+ const mode = process.env.AGENT_MOCK_WORKER;
30
+ return MOCK_WORKER_MODES.includes(mode);
31
+ }
32
+ /**
33
+ * Get mock worker mode.
34
+ */
35
+ export function getMockWorkerMode() {
36
+ return process.env.AGENT_MOCK_WORKER;
37
+ }
38
+ /**
39
+ * Reset mock worker state (for tests).
40
+ */
41
+ export function resetMockWorker() {
42
+ callCount = 0;
43
+ }
44
+ /**
45
+ * Generate valid JSON output based on stage.
46
+ * This ensures the mock can produce parseable responses.
47
+ * Uses specific prompt template markers to avoid false matches in task content.
48
+ */
49
+ function generateValidOutput(prompt) {
50
+ // Detect stage from prompt template headers (avoids matching task content)
51
+ if (prompt.includes('# Planner Prompt') || prompt.includes('You are the planning model')) {
52
+ return JSON.stringify({
53
+ milestones: [
54
+ {
55
+ goal: 'Mock milestone for testing',
56
+ files_expected: ['src/test.ts'],
57
+ done_checks: ['Build passes', 'Tests pass'],
58
+ risk_level: 'low'
59
+ }
60
+ ]
61
+ });
62
+ }
63
+ if (prompt.includes('# Implementer Prompt') || prompt.includes('You are the implementer')) {
64
+ return JSON.stringify({
65
+ status: 'ok',
66
+ handoff_memo: 'Mock implementation complete.'
67
+ });
68
+ }
69
+ if (prompt.includes('# Reviewer Prompt') || prompt.includes('You are the reviewer model')) {
70
+ return JSON.stringify({
71
+ status: 'approve',
72
+ changes: []
73
+ });
74
+ }
75
+ // Default response
76
+ return JSON.stringify({ result: 'ok' });
77
+ }
78
+ /**
79
+ * Run mock worker with configured behavior.
80
+ */
81
+ export async function runMockWorker(input) {
82
+ const mode = getMockWorkerMode();
83
+ callCount++;
84
+ console.log(`[mock-worker] Mode: ${mode}, Call: ${callCount}`);
85
+ switch (mode) {
86
+ case 'hang':
87
+ // Hang for 20 seconds then fail - allows watchdog (10s intervals) to catch the 12s cap
88
+ console.log('[mock-worker] Hanging for 20 seconds...');
89
+ await new Promise(resolve => setTimeout(resolve, 20000));
90
+ return {
91
+ status: 'failed',
92
+ commands_run: ['mock-worker'],
93
+ observations: ['Worker timed out (mock)']
94
+ };
95
+ case 'hang_once':
96
+ // First call hangs (20s), subsequent calls succeed
97
+ if (callCount === 1) {
98
+ console.log('[mock-worker] First call - hanging for 20 seconds...');
99
+ await new Promise(resolve => setTimeout(resolve, 20000));
100
+ return {
101
+ status: 'failed',
102
+ commands_run: ['mock-worker'],
103
+ observations: ['Worker timed out (mock hang_once first call)']
104
+ };
105
+ }
106
+ console.log('[mock-worker] Subsequent call - returning success');
107
+ return {
108
+ status: 'ok',
109
+ commands_run: ['mock-worker'],
110
+ observations: [generateValidOutput(input.prompt)]
111
+ };
112
+ case 'delay_5s':
113
+ // Delay 5 seconds then succeed
114
+ console.log('[mock-worker] Delaying 5 seconds...');
115
+ await new Promise(resolve => setTimeout(resolve, 5000));
116
+ return {
117
+ status: 'ok',
118
+ commands_run: ['mock-worker'],
119
+ observations: [generateValidOutput(input.prompt)]
120
+ };
121
+ case 'timeout_once_then_ok':
122
+ // First call times out (for auto-resume testing), subsequent calls succeed
123
+ if (callCount === 1) {
124
+ // Use AGENT_MOCK_TIMEOUT_MS for fast testing, default to 65s for compatibility
125
+ const timeoutMs = Number.parseInt(process.env.AGENT_MOCK_TIMEOUT_MS ?? '', 10) || 65000;
126
+ console.log(`[mock-worker] First call - sleeping ${timeoutMs}ms to trigger stall timeout...`);
127
+ await new Promise(resolve => setTimeout(resolve, timeoutMs));
128
+ return {
129
+ status: 'failed',
130
+ commands_run: ['mock-worker'],
131
+ observations: ['Worker stall timeout (mock timeout_once_then_ok)']
132
+ };
133
+ }
134
+ console.log('[mock-worker] Subsequent call - returning success');
135
+ return {
136
+ status: 'ok',
137
+ commands_run: ['mock-worker'],
138
+ observations: [generateValidOutput(input.prompt)]
139
+ };
140
+ case 'no_changes_no_evidence':
141
+ // Returns no_changes_needed without evidence (triggers insufficient_evidence)
142
+ console.log('[mock-worker] Returning no_changes_needed without evidence');
143
+ if (input.prompt.includes('IMPLEMENT') || input.prompt.includes('implement')) {
144
+ return {
145
+ status: 'ok',
146
+ commands_run: ['mock-worker'],
147
+ observations: [JSON.stringify({
148
+ status: 'no_changes_needed',
149
+ handoff_memo: 'No changes needed (mock)',
150
+ evidence: null // Missing evidence triggers insufficient_evidence
151
+ })]
152
+ };
153
+ }
154
+ // For other phases, return normal success
155
+ return {
156
+ status: 'ok',
157
+ commands_run: ['mock-worker'],
158
+ observations: [generateValidOutput(input.prompt)]
159
+ };
160
+ case 'review_always_request_changes':
161
+ // Review always returns request_changes with identical message (triggers review_loop_detected)
162
+ // Use more specific phase detection to avoid false matches in task content
163
+ console.log('[mock-worker] review_always_request_changes mode');
164
+ if (input.prompt.includes('# Reviewer Prompt') || input.prompt.includes('You are the reviewer model')) {
165
+ console.log('[mock-worker] Returning request_changes for REVIEW phase');
166
+ return {
167
+ status: 'ok',
168
+ commands_run: ['mock-worker'],
169
+ observations: [JSON.stringify({
170
+ status: 'request_changes',
171
+ changes: [
172
+ 'The done checks require testing the actual CLI behavior.',
173
+ 'Please run the CLI commands to confirm the implementation works.'
174
+ ]
175
+ })]
176
+ };
177
+ }
178
+ // For PLAN and IMPLEMENT, return normal success
179
+ console.log('[mock-worker] Returning success for non-REVIEW phase');
180
+ return {
181
+ status: 'ok',
182
+ commands_run: ['mock-worker'],
183
+ observations: [generateValidOutput(input.prompt)]
184
+ };
185
+ default:
186
+ // Should not reach here if isMockWorkerEnabled() is checked first
187
+ return {
188
+ status: 'failed',
189
+ commands_run: ['mock-worker'],
190
+ observations: ['Mock worker called but not configured']
191
+ };
192
+ }
193
+ }
@@ -0,0 +1,98 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ // Get the directory of this module (works in ESM)
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = path.dirname(__filename);
7
+ function loadTemplate(name) {
8
+ // Resolve relative to the agent-framework's templates directory, not CWD
9
+ const target = path.resolve(__dirname, '..', '..', 'templates', 'prompts', name);
10
+ return fs.readFileSync(target, 'utf-8');
11
+ }
12
+ export function buildPlanPrompt(input) {
13
+ const template = loadTemplate('planner.md');
14
+ return [
15
+ template,
16
+ '',
17
+ `Scope allowlist: ${input.scopeAllowlist.join(', ')}`,
18
+ '(All files_expected paths must match one of these patterns)',
19
+ '',
20
+ 'Task:',
21
+ input.taskText,
22
+ '',
23
+ 'Output JSON between markers:',
24
+ 'BEGIN_JSON',
25
+ '{"milestones": [{"goal": "...", "files_expected": ["..."], "done_checks": ["..."], "risk_level": "medium"}], "risk_map": ["..."], "do_not_touch": ["..."]}',
26
+ 'END_JSON'
27
+ ].join('\n');
28
+ }
29
+ export function buildImplementPrompt(input) {
30
+ const template = loadTemplate('implementer.md');
31
+ const filesExpected = input.milestone.files_expected ?? [];
32
+ const lines = [];
33
+ // Context pack goes first so agent sees verification bar + patterns before acting
34
+ if (input.contextPack) {
35
+ lines.push('## CONTEXT PACK (read first)', '', input.contextPack, '', '## END CONTEXT PACK', '');
36
+ }
37
+ lines.push(template, '', `Milestone goal: ${input.milestone.goal}`, `Files to create/modify: ${filesExpected.length > 0 ? filesExpected.join(', ') : '(infer from goal)'}`, `Done checks: ${input.milestone.done_checks.join('; ')}`, `Scope allowlist: ${input.scopeAllowlist.join(', ') || 'none'}`, `Scope denylist: ${input.scopeDenylist.join(', ') || 'none'}`, `Allow deps: ${input.allowDeps ? 'yes' : 'no'}`);
38
+ if (input.fixInstructions) {
39
+ lines.push('', '## FIX REQUIRED (Attempt ' + input.fixInstructions.attemptNumber + ')', '', 'The previous implementation failed verification. Fix the error below.', '', `Failed command: ${input.fixInstructions.failedCommand}`, '', 'Error output:', '```', input.fixInstructions.errorOutput.slice(0, 2000), '```', '', `Changed files: ${input.fixInstructions.changedFiles.join(', ') || 'none'}`, '', 'Fix the error and ensure all done_checks pass.');
40
+ }
41
+ lines.push('', 'Output JSON between markers:', 'BEGIN_JSON', '{"status": "ok", "handoff_memo": "...", "commands_run": [], "observations": []}', 'END_JSON');
42
+ return lines.join('\n');
43
+ }
44
+ export function buildReviewPrompt(input) {
45
+ const template = loadTemplate('reviewer.md');
46
+ const filesExpected = input.milestone.files_expected ?? [];
47
+ // Build verification summary section
48
+ let verificationSummaryText = '';
49
+ if (input.verificationSummary) {
50
+ verificationSummaryText = [
51
+ '',
52
+ '## Verification Summary (MUST CHECK)',
53
+ '',
54
+ '```json',
55
+ JSON.stringify(input.verificationSummary, null, 2),
56
+ '```',
57
+ ''
58
+ ].join('\n');
59
+ }
60
+ else {
61
+ // No summary provided - reviewer must request_changes
62
+ verificationSummaryText = [
63
+ '',
64
+ '## Verification Summary (MUST CHECK)',
65
+ '',
66
+ '```json',
67
+ JSON.stringify({
68
+ commands_required: ['(not provided)'],
69
+ commands_run: [],
70
+ commands_missing: ['(verification summary not available)'],
71
+ files_expected: filesExpected,
72
+ files_exist: filesExpected.map(f => ({ path: f, exists: '(not checked)' }))
73
+ }, null, 2),
74
+ '```',
75
+ '',
76
+ '⚠️ WARNING: Verification summary not available. You MUST request_changes.',
77
+ ''
78
+ ].join('\n');
79
+ }
80
+ return [
81
+ template,
82
+ verificationSummaryText,
83
+ `Milestone goal: ${input.milestone.goal}`,
84
+ `Files expected: ${filesExpected.length > 0 ? filesExpected.join(', ') : '(infer from goal)'}`,
85
+ `Done checks: ${input.milestone.done_checks.join('; ')}`,
86
+ '',
87
+ 'Diff summary (includes untracked new files):',
88
+ input.diffSummary || '(no diff)',
89
+ '',
90
+ 'Verification output:',
91
+ input.verificationOutput || '(none)',
92
+ '',
93
+ 'Output JSON between markers:',
94
+ 'BEGIN_JSON',
95
+ '{"status": "approve", "changes": []}',
96
+ 'END_JSON'
97
+ ].join('\n');
98
+ }
@@ -0,0 +1,39 @@
1
+ import { z } from 'zod';
2
+ export const milestoneSchema = z.object({
3
+ goal: z.string().min(1),
4
+ files_expected: z.array(z.string()).optional(),
5
+ done_checks: z.array(z.string()).min(1),
6
+ risk_level: z.enum(['low', 'medium', 'high'])
7
+ });
8
+ export const planOutputSchema = z.object({
9
+ milestones: z.array(milestoneSchema).min(1),
10
+ risk_map: z.array(z.string()).optional(),
11
+ do_not_touch: z.array(z.string()).optional()
12
+ });
13
+ export const reviewOutputSchema = z.object({
14
+ status: z.enum(['approve', 'request_changes', 'reject']),
15
+ changes: z
16
+ .array(z.union([z.string(), z.object({}).passthrough()]))
17
+ .default([])
18
+ .transform((arr) => arr.map((item) => (typeof item === 'string' ? item : JSON.stringify(item))))
19
+ });
20
+ /**
21
+ * Evidence required when implementer claims "no_changes_needed".
22
+ * At least one of files_checked, grep_output, or commands_run must be populated.
23
+ */
24
+ export const noChangesEvidenceSchema = z.object({
25
+ files_checked: z.array(z.string()).optional(),
26
+ grep_output: z.string().max(8192).optional(),
27
+ reason: z.string().optional(),
28
+ commands_run: z.array(z.object({
29
+ command: z.string(),
30
+ exit_code: z.number()
31
+ })).optional()
32
+ });
33
+ export const implementerOutputSchema = z.object({
34
+ status: z.enum(['ok', 'blocked', 'failed', 'no_changes_needed']),
35
+ handoff_memo: z.string().min(1),
36
+ commands_run: z.array(z.string()).default([]),
37
+ observations: z.array(z.string()).default([]),
38
+ evidence: noChangesEvidenceSchema.optional()
39
+ });
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@weldr/runr",
3
+ "version": "0.3.0",
4
+ "description": "Phase-gated orchestration for agent tasks",
5
+ "type": "module",
6
+ "bin": {
7
+ "runr": "dist/cli.js",
8
+ "agent": "dist/cli.js"
9
+ },
10
+ "files": [
11
+ "dist/",
12
+ "templates/prompts/",
13
+ "README.md",
14
+ "LICENSE",
15
+ "NOTICE",
16
+ "CHANGELOG.md"
17
+ ],
18
+ "scripts": {
19
+ "build": "tsc -p tsconfig.json",
20
+ "prepare": "npm run build",
21
+ "dev": "node --loader ts-node/esm src/cli.ts",
22
+ "start": "node dist/cli.js",
23
+ "test": "vitest run",
24
+ "test:watch": "vitest",
25
+ "bench": "npx ts-node scripts/bench.ts",
26
+ "bench:dry": "npx ts-node scripts/bench.ts --dry-run",
27
+ "bench:minimal": "npx ts-node scripts/bench.ts --preset minimal",
28
+ "bench:context": "npx ts-node scripts/bench.ts --preset context",
29
+ "bench:stress": "npx ts-node scripts/bench.ts --preset stress",
30
+ "bench:full": "npx ts-node scripts/bench.ts --preset full"
31
+ },
32
+ "dependencies": {
33
+ "commander": "^12.1.0",
34
+ "execa": "^8.0.1",
35
+ "picomatch": "^4.0.2",
36
+ "pino": "^9.3.2",
37
+ "yaml": "^2.8.2",
38
+ "zod": "^3.23.8"
39
+ },
40
+ "devDependencies": {
41
+ "@types/node": "^22.7.5",
42
+ "@types/picomatch": "^4.0.2",
43
+ "ts-node": "^10.9.2",
44
+ "typescript": "^5.5.4",
45
+ "vitest": "^4.0.16"
46
+ }
47
+ }
@@ -0,0 +1,70 @@
1
+ # Implementer Prompt
2
+
3
+ You are the execution model. Implement the smallest viable change for the current milestone.
4
+ Follow scope lock. Do not edit lockfiles unless explicitly allowed.
5
+
6
+ **Important**: Scope patterns (allowlist/denylist) are **repo-relative paths**, not absolute paths.
7
+ Ignore any `.agent` substrings in your absolute working directory path - they do not affect scope compliance.
8
+ Only the relative path from the repo root matters (e.g., `src/foo.ts` not `/path/to/.agent-worktrees/123/src/foo.ts`).
9
+
10
+ ## Output Format
11
+
12
+ Return ONLY machine-readable JSON between BEGIN_JSON and END_JSON markers:
13
+
14
+ ```
15
+ BEGIN_JSON
16
+ {
17
+ "status": "ok" | "blocked" | "failed",
18
+ "handoff_memo": "Description of what was done or why blocked",
19
+ "commands_run": ["list", "of", "commands"],
20
+ "observations": ["notable", "findings"]
21
+ }
22
+ END_JSON
23
+ ```
24
+
25
+ ## Status Values
26
+
27
+ | Status | When to use | Effect |
28
+ |--------|-------------|--------|
29
+ | `ok` | Implementation complete, ready for verification | Proceeds to VERIFY phase |
30
+ | `blocked` | Cannot proceed without external input | Run stops with stop memo |
31
+ | `failed` | Unrecoverable error occurred | Run stops with stop memo |
32
+
33
+ ## Block Protocol
34
+
35
+ When you cannot complete a milestone (`status: "blocked"` or `status: "failed"`), structure your `handoff_memo` using this format:
36
+
37
+ ```
38
+ ## What broke
39
+ <Specific error or blocking issue>
40
+
41
+ ## Hypothesis A
42
+ <First theory about the cause>
43
+
44
+ ## Hypothesis B
45
+ <Alternative theory>
46
+
47
+ ## Experiment
48
+ <What you tried to diagnose>
49
+
50
+ ## Decision
51
+ <Conclusion based on experiments>
52
+
53
+ ## Next action
54
+ <What a human or future run should do>
55
+ ```
56
+
57
+ This structured format helps:
58
+ - Humans understand exactly what went wrong
59
+ - Future runs can learn from the diagnosis
60
+ - The stop memo captures actionable next steps
61
+
62
+ ## Fix Instructions
63
+
64
+ When retrying after verification failure, you receive `fixInstructions`:
65
+ - `failedCommand` - The command that failed
66
+ - `errorOutput` - Captured error output
67
+ - `changedFiles` - Files you modified
68
+ - `attemptNumber` - Current retry (1-3)
69
+
70
+ Use this to fix the specific issue that caused verification to fail.