@yail259/overnight 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.context/notes.md +0 -0
- package/.context/todos.md +0 -0
- package/dist/cli.js +1205 -181
- package/package.json +2 -2
- package/src/cli.ts +410 -127
- package/src/goal-runner.ts +709 -0
- package/src/planner.ts +238 -0
- package/src/runner.ts +7 -4
- package/src/security.ts +6 -6
- package/src/types.ts +48 -0
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
import { query, type Options as ClaudeCodeOptions } from "@anthropic-ai/claude-agent-sdk";
|
|
2
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
|
|
3
|
+
import { execSync } from "child_process";
|
|
4
|
+
import { parse as parseYaml, stringify as stringifyYaml } from "yaml";
|
|
5
|
+
import {
|
|
6
|
+
type GoalConfig,
|
|
7
|
+
type GoalRunState,
|
|
8
|
+
type IterationState,
|
|
9
|
+
type GateResult,
|
|
10
|
+
type GateCheck,
|
|
11
|
+
type SecurityConfig,
|
|
12
|
+
DEFAULT_TOOLS,
|
|
13
|
+
DEFAULT_TIMEOUT,
|
|
14
|
+
DEFAULT_GOAL_STATE_FILE,
|
|
15
|
+
DEFAULT_MAX_TURNS,
|
|
16
|
+
DEFAULT_MAX_ITERATIONS,
|
|
17
|
+
DEFAULT_CONVERGENCE_THRESHOLD,
|
|
18
|
+
DEFAULT_DENY_PATTERNS,
|
|
19
|
+
} from "./types.js";
|
|
20
|
+
import { createSecurityHooks } from "./security.js";
|
|
21
|
+
|
|
22
|
+
type LogCallback = (msg: string) => void;
|
|
23
|
+
|
|
24
|
+
// --- State persistence ---
|
|
25
|
+
|
|
26
|
+
const ITERATION_DIR = ".overnight-iterations";
|
|
27
|
+
|
|
28
|
+
function ensureIterationDir(): void {
|
|
29
|
+
if (!existsSync(ITERATION_DIR)) {
|
|
30
|
+
mkdirSync(ITERATION_DIR, { recursive: true });
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function saveGoalState(state: GoalRunState, stateFile: string): void {
|
|
35
|
+
writeFileSync(stateFile, JSON.stringify(state, null, 2));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function loadGoalState(stateFile: string): GoalRunState | null {
|
|
39
|
+
if (!existsSync(stateFile)) return null;
|
|
40
|
+
return JSON.parse(readFileSync(stateFile, "utf-8"));
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function saveIterationState(iteration: number, state: IterationState): void {
|
|
44
|
+
ensureIterationDir();
|
|
45
|
+
writeFileSync(
|
|
46
|
+
`${ITERATION_DIR}/iteration-${iteration}-state.yaml`,
|
|
47
|
+
stringifyYaml(state)
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function saveIterationNarrative(iteration: number, narrative: string): void {
|
|
52
|
+
ensureIterationDir();
|
|
53
|
+
writeFileSync(
|
|
54
|
+
`${ITERATION_DIR}/iteration-${iteration}-summary.md`,
|
|
55
|
+
narrative
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function loadPreviousIterationState(iteration: number): IterationState | null {
|
|
60
|
+
const path = `${ITERATION_DIR}/iteration-${iteration}-state.yaml`;
|
|
61
|
+
if (!existsSync(path)) return null;
|
|
62
|
+
return parseYaml(readFileSync(path, "utf-8")) as IterationState;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function loadPreviousNarrative(iteration: number): string | null {
|
|
66
|
+
const path = `${ITERATION_DIR}/iteration-${iteration}-summary.md`;
|
|
67
|
+
if (!existsSync(path)) return null;
|
|
68
|
+
return readFileSync(path, "utf-8");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// --- Convergence detection ---
|
|
72
|
+
|
|
73
|
+
function isConverging(states: IterationState[], threshold: number): boolean {
|
|
74
|
+
if (states.length < threshold) return true; // Not enough data yet
|
|
75
|
+
|
|
76
|
+
const recent = states.slice(-threshold);
|
|
77
|
+
const remainingCounts = recent.map((s) => s.remaining_items.length);
|
|
78
|
+
|
|
79
|
+
// Check if remaining items stopped shrinking
|
|
80
|
+
for (let i = 1; i < remainingCounts.length; i++) {
|
|
81
|
+
if (remainingCounts[i] < remainingCounts[i - 1]) {
|
|
82
|
+
return true; // Still making progress
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return false; // Stalled
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// --- Progress display ---
|
|
90
|
+
|
|
91
|
+
const SPINNER_FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
92
|
+
|
|
93
|
+
class ProgressDisplay {
|
|
94
|
+
private interval: ReturnType<typeof setInterval> | null = null;
|
|
95
|
+
private frame = 0;
|
|
96
|
+
private startTime = Date.now();
|
|
97
|
+
private currentActivity = "Working";
|
|
98
|
+
|
|
99
|
+
start(activity: string): void {
|
|
100
|
+
this.currentActivity = activity;
|
|
101
|
+
this.startTime = Date.now();
|
|
102
|
+
this.frame = 0;
|
|
103
|
+
if (this.interval) return;
|
|
104
|
+
this.interval = setInterval(() => {
|
|
105
|
+
const elapsed = Math.floor((Date.now() - this.startTime) / 1000);
|
|
106
|
+
process.stdout.write(
|
|
107
|
+
`\r\x1b[K${SPINNER_FRAMES[this.frame]} ${this.currentActivity} (${elapsed}s)`
|
|
108
|
+
);
|
|
109
|
+
this.frame = (this.frame + 1) % SPINNER_FRAMES.length;
|
|
110
|
+
}, 100);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
stop(finalMessage?: string): void {
|
|
114
|
+
if (this.interval) {
|
|
115
|
+
clearInterval(this.interval);
|
|
116
|
+
this.interval = null;
|
|
117
|
+
}
|
|
118
|
+
process.stdout.write("\r\x1b[K");
|
|
119
|
+
if (finalMessage) console.log(finalMessage);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// --- Claude execution helpers ---
|
|
124
|
+
|
|
125
|
+
let claudeExecutablePath: string | undefined;
|
|
126
|
+
|
|
127
|
+
function findClaudeExecutable(): string | undefined {
|
|
128
|
+
if (claudeExecutablePath !== undefined) return claudeExecutablePath;
|
|
129
|
+
if (process.env.CLAUDE_CODE_PATH) {
|
|
130
|
+
claudeExecutablePath = process.env.CLAUDE_CODE_PATH;
|
|
131
|
+
return claudeExecutablePath;
|
|
132
|
+
}
|
|
133
|
+
try {
|
|
134
|
+
const cmd = process.platform === "win32" ? "where claude" : "which claude";
|
|
135
|
+
claudeExecutablePath = execSync(cmd, { encoding: "utf-8" }).trim().split("\n")[0];
|
|
136
|
+
return claudeExecutablePath;
|
|
137
|
+
} catch {
|
|
138
|
+
const commonPaths = [
|
|
139
|
+
"/usr/local/bin/claude",
|
|
140
|
+
"/opt/homebrew/bin/claude",
|
|
141
|
+
`${process.env.HOME}/.local/bin/claude`,
|
|
142
|
+
];
|
|
143
|
+
for (const p of commonPaths) {
|
|
144
|
+
if (existsSync(p)) {
|
|
145
|
+
claudeExecutablePath = p;
|
|
146
|
+
return claudeExecutablePath;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return undefined;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
async function runWithTimeout<T>(promise: Promise<T>, timeoutMs: number): Promise<T> {
|
|
154
|
+
let timeoutId: ReturnType<typeof setTimeout>;
|
|
155
|
+
const timeoutPromise = new Promise<never>((_, reject) => {
|
|
156
|
+
timeoutId = setTimeout(() => reject(new Error("TIMEOUT")), timeoutMs);
|
|
157
|
+
});
|
|
158
|
+
try {
|
|
159
|
+
const result = await Promise.race([promise, timeoutPromise]);
|
|
160
|
+
clearTimeout(timeoutId!);
|
|
161
|
+
return result;
|
|
162
|
+
} catch (e) {
|
|
163
|
+
clearTimeout(timeoutId!);
|
|
164
|
+
throw e;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async function runClaudePrompt(
|
|
169
|
+
prompt: string,
|
|
170
|
+
config: GoalConfig,
|
|
171
|
+
log: LogCallback,
|
|
172
|
+
progress: ProgressDisplay,
|
|
173
|
+
resumeSessionId?: string,
|
|
174
|
+
): Promise<{ result?: string; sessionId?: string }> {
|
|
175
|
+
const claudePath = findClaudeExecutable();
|
|
176
|
+
if (!claudePath) {
|
|
177
|
+
throw new Error("Claude CLI not found. Install with: curl -fsSL https://claude.ai/install.sh | bash");
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
const tools = config.defaults?.allowed_tools ?? DEFAULT_TOOLS;
|
|
181
|
+
const timeout = (config.defaults?.timeout_seconds ?? DEFAULT_TIMEOUT) * 1000;
|
|
182
|
+
const security = config.defaults?.security;
|
|
183
|
+
const securityHooks = security ? createSecurityHooks(security) : undefined;
|
|
184
|
+
|
|
185
|
+
const sdkOptions: ClaudeCodeOptions = {
|
|
186
|
+
allowedTools: tools,
|
|
187
|
+
permissionMode: "acceptEdits",
|
|
188
|
+
pathToClaudeCodeExecutable: claudePath,
|
|
189
|
+
...(security?.max_turns && { maxTurns: security.max_turns }),
|
|
190
|
+
...(securityHooks && { hooks: securityHooks }),
|
|
191
|
+
...(resumeSessionId && { resume: resumeSessionId }),
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
let sessionId: string | undefined;
|
|
195
|
+
let result: string | undefined;
|
|
196
|
+
|
|
197
|
+
const conversation = query({ prompt, options: sdkOptions });
|
|
198
|
+
|
|
199
|
+
for await (const message of conversation) {
|
|
200
|
+
if (message.type === "result") {
|
|
201
|
+
sessionId = message.session_id;
|
|
202
|
+
if (message.subtype === "success") {
|
|
203
|
+
result = message.result;
|
|
204
|
+
}
|
|
205
|
+
} else if (message.type === "system" && "subtype" in message) {
|
|
206
|
+
if (message.subtype === "init") {
|
|
207
|
+
sessionId = message.session_id;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return { result, sessionId };
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// --- Build iteration ---
|
|
216
|
+
|
|
217
|
+
function buildIterationPrompt(
|
|
218
|
+
goal: GoalConfig,
|
|
219
|
+
iteration: number,
|
|
220
|
+
previousState: IterationState | null,
|
|
221
|
+
previousNarrative: string | null,
|
|
222
|
+
): string {
|
|
223
|
+
const parts: string[] = [];
|
|
224
|
+
|
|
225
|
+
parts.push(`# Goal\n\n${goal.goal}`);
|
|
226
|
+
|
|
227
|
+
if (goal.acceptance_criteria && goal.acceptance_criteria.length > 0) {
|
|
228
|
+
parts.push(`\n# Acceptance Criteria\n\n${goal.acceptance_criteria.map((c) => `- ${c}`).join("\n")}`);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
if (goal.constraints && goal.constraints.length > 0) {
|
|
232
|
+
parts.push(`\n# Constraints\n\n${goal.constraints.map((c) => `- ${c}`).join("\n")}`);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (goal.verification_commands && goal.verification_commands.length > 0) {
|
|
236
|
+
parts.push(`\n# Verification Commands (must pass)\n\n${goal.verification_commands.map((c) => `- \`${c}\``).join("\n")}`);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
parts.push(`\n# Iteration ${iteration}`);
|
|
240
|
+
|
|
241
|
+
if (previousState && previousNarrative) {
|
|
242
|
+
parts.push(`\n## Previous Iteration State\n\n### Completed Items\n${previousState.completed_items.map((i) => `- ${i}`).join("\n") || "- (none yet)"}`);
|
|
243
|
+
parts.push(`\n### Remaining Items\n${previousState.remaining_items.map((i) => `- ${i}`).join("\n") || "- (none)"}`);
|
|
244
|
+
parts.push(`\n### Known Issues\n${previousState.known_issues.map((i) => `- ${i}`).join("\n") || "- (none)"}`);
|
|
245
|
+
parts.push(`\n### Files Modified\n${previousState.files_modified.map((f) => `- ${f}`).join("\n") || "- (none)"}`);
|
|
246
|
+
parts.push(`\n### Previous Summary\n\n${previousNarrative}`);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
parts.push(`\n# Instructions
|
|
250
|
+
|
|
251
|
+
You are iteration ${iteration} of an autonomous build loop working toward the goal above.
|
|
252
|
+
|
|
253
|
+
1. Assess the current state of the project
|
|
254
|
+
2. Identify the highest-priority remaining work
|
|
255
|
+
3. Implement as much as you can in this iteration
|
|
256
|
+
4. When done, output your structured state update in the following EXACT format:
|
|
257
|
+
|
|
258
|
+
\`\`\`yaml
|
|
259
|
+
completed_items:
|
|
260
|
+
- "item 1 you completed"
|
|
261
|
+
- "item 2 you completed"
|
|
262
|
+
remaining_items:
|
|
263
|
+
- "item still to do"
|
|
264
|
+
- "another item still to do"
|
|
265
|
+
known_issues:
|
|
266
|
+
- "any issues found"
|
|
267
|
+
files_modified:
|
|
268
|
+
- "path/to/file1.ts"
|
|
269
|
+
- "path/to/file2.ts"
|
|
270
|
+
agent_done: false # Set to true ONLY if you believe the goal is fully met
|
|
271
|
+
\`\`\`
|
|
272
|
+
|
|
273
|
+
5. After the YAML block, write a brief narrative summary (2-3 paragraphs) of what you did, what challenges you encountered, and what the next iteration should focus on.
|
|
274
|
+
|
|
275
|
+
IMPORTANT: Always output the YAML block wrapped in \`\`\`yaml ... \`\`\` fences. This is how state is tracked between iterations.`);
|
|
276
|
+
|
|
277
|
+
return parts.join("\n");
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function parseIterationOutput(output: string, iteration: number): { state: IterationState; narrative: string } {
|
|
281
|
+
// Extract YAML block
|
|
282
|
+
const yamlMatch = output.match(/```yaml\n([\s\S]*?)\n```/);
|
|
283
|
+
|
|
284
|
+
let state: IterationState;
|
|
285
|
+
|
|
286
|
+
if (yamlMatch) {
|
|
287
|
+
try {
|
|
288
|
+
const parsed = parseYaml(yamlMatch[1]) as Partial<IterationState>;
|
|
289
|
+
state = {
|
|
290
|
+
iteration,
|
|
291
|
+
completed_items: parsed.completed_items ?? [],
|
|
292
|
+
remaining_items: parsed.remaining_items ?? [],
|
|
293
|
+
known_issues: parsed.known_issues ?? [],
|
|
294
|
+
files_modified: parsed.files_modified ?? [],
|
|
295
|
+
agent_done: parsed.agent_done ?? false,
|
|
296
|
+
timestamp: new Date().toISOString(),
|
|
297
|
+
};
|
|
298
|
+
} catch {
|
|
299
|
+
// Failed to parse YAML, create minimal state
|
|
300
|
+
state = {
|
|
301
|
+
iteration,
|
|
302
|
+
completed_items: [],
|
|
303
|
+
remaining_items: ["(failed to parse agent output)"],
|
|
304
|
+
known_issues: ["Agent output did not contain valid YAML state block"],
|
|
305
|
+
files_modified: [],
|
|
306
|
+
agent_done: false,
|
|
307
|
+
timestamp: new Date().toISOString(),
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
} else {
|
|
311
|
+
state = {
|
|
312
|
+
iteration,
|
|
313
|
+
completed_items: [],
|
|
314
|
+
remaining_items: ["(no structured output from agent)"],
|
|
315
|
+
known_issues: ["Agent did not output a YAML state block"],
|
|
316
|
+
files_modified: [],
|
|
317
|
+
agent_done: false,
|
|
318
|
+
timestamp: new Date().toISOString(),
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// Extract narrative (everything after the YAML block, or the whole output if no YAML)
|
|
323
|
+
let narrative: string;
|
|
324
|
+
if (yamlMatch) {
|
|
325
|
+
const afterYaml = output.slice(output.indexOf("```", output.indexOf("```yaml") + 7) + 3).trim();
|
|
326
|
+
narrative = afterYaml || "(no narrative provided)";
|
|
327
|
+
} else {
|
|
328
|
+
narrative = output;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return { state, narrative };
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// --- Final gate agent ---
|
|
335
|
+
|
|
336
|
+
function buildGatePrompt(goal: GoalConfig, iterationStates: IterationState[]): string {
|
|
337
|
+
const lastState = iterationStates[iterationStates.length - 1];
|
|
338
|
+
|
|
339
|
+
const parts: string[] = [];
|
|
340
|
+
|
|
341
|
+
parts.push(`# Final Verification Gate
|
|
342
|
+
|
|
343
|
+
You are a dedicated verification agent. You did NOT write this code. Your only job is to determine if the goal has been met to production quality. Be rigorous and honest.
|
|
344
|
+
|
|
345
|
+
## Goal
|
|
346
|
+
|
|
347
|
+
${goal.goal}`);
|
|
348
|
+
|
|
349
|
+
if (goal.acceptance_criteria && goal.acceptance_criteria.length > 0) {
|
|
350
|
+
parts.push(`\n## Acceptance Criteria (ALL must be met)\n\n${goal.acceptance_criteria.map((c, i) => `${i + 1}. ${c}`).join("\n")}`);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (goal.verification_commands && goal.verification_commands.length > 0) {
|
|
354
|
+
parts.push(`\n## Required Verification Commands\n\nRun ALL of these. Each must pass:\n${goal.verification_commands.map((c) => `- \`${c}\``).join("\n")}`);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
parts.push(`\n## Build Agent's Final State
|
|
358
|
+
|
|
359
|
+
### Completed Items
|
|
360
|
+
${lastState?.completed_items.map((i) => `- ${i}`).join("\n") || "- (none)"}
|
|
361
|
+
|
|
362
|
+
### Claimed Remaining Items
|
|
363
|
+
${lastState?.remaining_items.map((i) => `- ${i}`).join("\n") || "- (none)"}
|
|
364
|
+
|
|
365
|
+
### Known Issues
|
|
366
|
+
${lastState?.known_issues.map((i) => `- ${i}`).join("\n") || "- (none)"}
|
|
367
|
+
|
|
368
|
+
## Instructions
|
|
369
|
+
|
|
370
|
+
Perform EVERY form of verification you can:
|
|
371
|
+
|
|
372
|
+
1. **Build check**: Does the project compile/build without errors?
|
|
373
|
+
2. **Lint/type check**: Are there type errors or lint warnings?
|
|
374
|
+
3. **Unit tests**: Do all unit tests pass?
|
|
375
|
+
4. **E2E tests**: Do end-to-end tests pass?
|
|
376
|
+
5. **Visual review**: Check rendered output if applicable
|
|
377
|
+
6. **Manual walkthrough**: Trace key user flows through the code
|
|
378
|
+
7. **Acceptance criteria**: Verify each criterion explicitly
|
|
379
|
+
8. **Verification commands**: Run each command listed above
|
|
380
|
+
9. **Code quality**: Look for obvious bugs, missing error handling, broken imports
|
|
381
|
+
10. **Integration**: Is everything wired up? No dead code, no missing connections?
|
|
382
|
+
|
|
383
|
+
After your review, output your verdict in this EXACT format:
|
|
384
|
+
|
|
385
|
+
\`\`\`yaml
|
|
386
|
+
passed: false # or true
|
|
387
|
+
checks:
|
|
388
|
+
- name: "Build"
|
|
389
|
+
passed: true
|
|
390
|
+
output: "npm run build succeeded"
|
|
391
|
+
- name: "Unit tests"
|
|
392
|
+
passed: false
|
|
393
|
+
output: "3 tests failed: ..."
|
|
394
|
+
summary: "Brief overall assessment"
|
|
395
|
+
failures:
|
|
396
|
+
- "Description of failure 1"
|
|
397
|
+
- "Description of failure 2"
|
|
398
|
+
\`\`\`
|
|
399
|
+
|
|
400
|
+
Be thorough. Do not let bad quality pass. If ANYTHING is broken, set passed: false.`);
|
|
401
|
+
|
|
402
|
+
return parts.join("\n");
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
function parseGateOutput(output: string): GateResult {
|
|
406
|
+
const yamlMatch = output.match(/```yaml\n([\s\S]*?)\n```/);
|
|
407
|
+
|
|
408
|
+
if (yamlMatch) {
|
|
409
|
+
try {
|
|
410
|
+
const parsed = parseYaml(yamlMatch[1]) as Partial<GateResult>;
|
|
411
|
+
return {
|
|
412
|
+
passed: parsed.passed ?? false,
|
|
413
|
+
checks: (parsed.checks ?? []).map((c: Partial<GateCheck>) => ({
|
|
414
|
+
name: c.name ?? "unknown",
|
|
415
|
+
passed: c.passed ?? false,
|
|
416
|
+
output: c.output ?? "",
|
|
417
|
+
})),
|
|
418
|
+
summary: parsed.summary ?? "",
|
|
419
|
+
failures: parsed.failures ?? [],
|
|
420
|
+
};
|
|
421
|
+
} catch {
|
|
422
|
+
return {
|
|
423
|
+
passed: false,
|
|
424
|
+
checks: [],
|
|
425
|
+
summary: "Failed to parse gate agent output",
|
|
426
|
+
failures: ["Gate agent output was not valid YAML"],
|
|
427
|
+
};
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
return {
|
|
432
|
+
passed: false,
|
|
433
|
+
checks: [],
|
|
434
|
+
summary: "Gate agent did not output a structured verdict",
|
|
435
|
+
failures: ["No YAML verdict block found in gate agent output"],
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// --- Main goal runner ---
|
|
440
|
+
|
|
441
|
+
export async function runGoal(
|
|
442
|
+
goal: GoalConfig,
|
|
443
|
+
options: {
|
|
444
|
+
stateFile?: string;
|
|
445
|
+
log?: LogCallback;
|
|
446
|
+
notify?: boolean;
|
|
447
|
+
notifyTopic?: string;
|
|
448
|
+
} = {}
|
|
449
|
+
): Promise<GoalRunState> {
|
|
450
|
+
const stateFile = options.stateFile ?? DEFAULT_GOAL_STATE_FILE;
|
|
451
|
+
const log = options.log ?? (() => {});
|
|
452
|
+
const maxIterations = goal.max_iterations ?? DEFAULT_MAX_ITERATIONS;
|
|
453
|
+
const convergenceThreshold = goal.convergence_threshold ?? DEFAULT_CONVERGENCE_THRESHOLD;
|
|
454
|
+
const progress = new ProgressDisplay();
|
|
455
|
+
|
|
456
|
+
// Load or create state
|
|
457
|
+
let runState: GoalRunState = loadGoalState(stateFile) ?? {
|
|
458
|
+
goal: goal.goal,
|
|
459
|
+
iterations: [],
|
|
460
|
+
gate_results: [],
|
|
461
|
+
status: "running",
|
|
462
|
+
timestamp: new Date().toISOString(),
|
|
463
|
+
};
|
|
464
|
+
|
|
465
|
+
const startIteration = runState.iterations.length + 1;
|
|
466
|
+
|
|
467
|
+
if (startIteration > 1) {
|
|
468
|
+
log(`\x1b[1movernight: Resuming from iteration ${startIteration}\x1b[0m`);
|
|
469
|
+
} else {
|
|
470
|
+
log(`\x1b[1movernight: Starting goal loop\x1b[0m`);
|
|
471
|
+
log(`\x1b[2mGoal: ${goal.goal.slice(0, 80)}${goal.goal.length > 80 ? "..." : ""}\x1b[0m`);
|
|
472
|
+
log(`\x1b[2mMax iterations: ${maxIterations}, convergence threshold: ${convergenceThreshold}\x1b[0m`);
|
|
473
|
+
}
|
|
474
|
+
log("");
|
|
475
|
+
|
|
476
|
+
for (let iteration = startIteration; iteration <= maxIterations; iteration++) {
|
|
477
|
+
log(`\x1b[1m━━━ Iteration ${iteration}/${maxIterations} ━━━\x1b[0m`);
|
|
478
|
+
|
|
479
|
+
// Load previous state
|
|
480
|
+
const prevState = iteration > 1 ? loadPreviousIterationState(iteration - 1) : null;
|
|
481
|
+
const prevNarrative = iteration > 1 ? loadPreviousNarrative(iteration - 1) : null;
|
|
482
|
+
|
|
483
|
+
// Check convergence
|
|
484
|
+
if (!isConverging(runState.iterations, convergenceThreshold)) {
|
|
485
|
+
log(`\x1b[33m⚠ Build loop stalled — remaining items unchanged for ${convergenceThreshold} iterations\x1b[0m`);
|
|
486
|
+
runState.status = "stalled";
|
|
487
|
+
saveGoalState(runState, stateFile);
|
|
488
|
+
break;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// Build and run prompt
|
|
492
|
+
const prompt = buildIterationPrompt(goal, iteration, prevState, prevNarrative);
|
|
493
|
+
progress.start(`Iteration ${iteration}`);
|
|
494
|
+
|
|
495
|
+
try {
|
|
496
|
+
const { result } = await runClaudePrompt(prompt, goal, log, progress);
|
|
497
|
+
progress.stop();
|
|
498
|
+
|
|
499
|
+
if (!result) {
|
|
500
|
+
log(`\x1b[31m✗ No output from build agent\x1b[0m`);
|
|
501
|
+
continue;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// Parse output
|
|
505
|
+
const { state: iterState, narrative } = parseIterationOutput(result, iteration);
|
|
506
|
+
|
|
507
|
+
// Persist
|
|
508
|
+
saveIterationState(iteration, iterState);
|
|
509
|
+
saveIterationNarrative(iteration, narrative);
|
|
510
|
+
runState.iterations.push(iterState);
|
|
511
|
+
runState.timestamp = new Date().toISOString();
|
|
512
|
+
saveGoalState(runState, stateFile);
|
|
513
|
+
|
|
514
|
+
// Summary
|
|
515
|
+
log(`\x1b[32m✓ Iteration ${iteration} complete\x1b[0m`);
|
|
516
|
+
log(` Completed: ${iterState.completed_items.length} items`);
|
|
517
|
+
log(` Remaining: ${iterState.remaining_items.length} items`);
|
|
518
|
+
if (iterState.known_issues.length > 0) {
|
|
519
|
+
log(` Issues: ${iterState.known_issues.length}`);
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// Check if agent reports done
|
|
523
|
+
if (iterState.agent_done) {
|
|
524
|
+
log(`\n\x1b[36m◆ Build agent reports goal is met — running final gate...\x1b[0m\n`);
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
} catch (e) {
|
|
529
|
+
progress.stop();
|
|
530
|
+
const error = e as Error;
|
|
531
|
+
log(`\x1b[31m✗ Iteration ${iteration} failed: ${error.message}\x1b[0m`);
|
|
532
|
+
|
|
533
|
+
if (error.message === "TIMEOUT") {
|
|
534
|
+
log(`\x1b[33m Continuing to next iteration...\x1b[0m`);
|
|
535
|
+
continue;
|
|
536
|
+
}
|
|
537
|
+
// For non-timeout errors, still continue
|
|
538
|
+
continue;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
log("");
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// --- Final gate ---
|
|
545
|
+
if (runState.status === "running") {
|
|
546
|
+
const maxGateAttempts = 3;
|
|
547
|
+
|
|
548
|
+
for (let gateAttempt = 1; gateAttempt <= maxGateAttempts; gateAttempt++) {
|
|
549
|
+
log(`\x1b[1m━━━ Final Gate (attempt ${gateAttempt}/${maxGateAttempts}) ━━━\x1b[0m`);
|
|
550
|
+
|
|
551
|
+
const gatePrompt = buildGatePrompt(goal, runState.iterations);
|
|
552
|
+
|
|
553
|
+
// Gate agent needs Bash for running verification commands
|
|
554
|
+
const gateGoalConfig: GoalConfig = {
|
|
555
|
+
...goal,
|
|
556
|
+
defaults: {
|
|
557
|
+
...goal.defaults,
|
|
558
|
+
allowed_tools: [...(goal.defaults?.allowed_tools ?? DEFAULT_TOOLS), "Bash"],
|
|
559
|
+
},
|
|
560
|
+
};
|
|
561
|
+
|
|
562
|
+
progress.start("Running final gate");
|
|
563
|
+
|
|
564
|
+
try {
|
|
565
|
+
const { result } = await runClaudePrompt(gatePrompt, gateGoalConfig, log, progress);
|
|
566
|
+
progress.stop();
|
|
567
|
+
|
|
568
|
+
if (!result) {
|
|
569
|
+
log(`\x1b[31m✗ No output from gate agent\x1b[0m`);
|
|
570
|
+
continue;
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
const gateResult = parseGateOutput(result);
|
|
574
|
+
runState.gate_results.push(gateResult);
|
|
575
|
+
saveGoalState(runState, stateFile);
|
|
576
|
+
|
|
577
|
+
if (gateResult.passed) {
|
|
578
|
+
log(`\x1b[32m✓ GATE PASSED\x1b[0m`);
|
|
579
|
+
log(` ${gateResult.summary}`);
|
|
580
|
+
for (const check of gateResult.checks) {
|
|
581
|
+
const icon = check.passed ? "\x1b[32m✓\x1b[0m" : "\x1b[31m✗\x1b[0m";
|
|
582
|
+
log(` ${icon} ${check.name}`);
|
|
583
|
+
}
|
|
584
|
+
runState.status = "gate_passed";
|
|
585
|
+
saveGoalState(runState, stateFile);
|
|
586
|
+
break;
|
|
587
|
+
} else {
|
|
588
|
+
log(`\x1b[31m✗ GATE FAILED\x1b[0m`);
|
|
589
|
+
log(` ${gateResult.summary}`);
|
|
590
|
+
for (const failure of gateResult.failures) {
|
|
591
|
+
log(` \x1b[31m- ${failure}\x1b[0m`);
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
if (gateAttempt < maxGateAttempts) {
|
|
595
|
+
// Loop back to build agent with gate failures
|
|
596
|
+
log(`\n\x1b[36m◆ Looping back to build agent with gate failures...\x1b[0m\n`);
|
|
597
|
+
|
|
598
|
+
const fixIteration = runState.iterations.length + 1;
|
|
599
|
+
const fixPrompt = buildGateFixPrompt(goal, gateResult, fixIteration);
|
|
600
|
+
|
|
601
|
+
progress.start(`Fix iteration ${fixIteration}`);
|
|
602
|
+
try {
|
|
603
|
+
const { result: fixResult } = await runClaudePrompt(fixPrompt, goal, log, progress);
|
|
604
|
+
progress.stop();
|
|
605
|
+
|
|
606
|
+
if (fixResult) {
|
|
607
|
+
const { state: fixState, narrative: fixNarrative } = parseIterationOutput(fixResult, fixIteration);
|
|
608
|
+
saveIterationState(fixIteration, fixState);
|
|
609
|
+
saveIterationNarrative(fixIteration, fixNarrative);
|
|
610
|
+
runState.iterations.push(fixState);
|
|
611
|
+
saveGoalState(runState, stateFile);
|
|
612
|
+
|
|
613
|
+
log(`\x1b[32m✓ Fix iteration complete\x1b[0m`);
|
|
614
|
+
log(` Fixed: ${fixState.completed_items.length} items`);
|
|
615
|
+
}
|
|
616
|
+
} catch (e) {
|
|
617
|
+
progress.stop();
|
|
618
|
+
log(`\x1b[31m✗ Fix iteration failed: ${(e as Error).message}\x1b[0m`);
|
|
619
|
+
}
|
|
620
|
+
} else {
|
|
621
|
+
runState.status = "gate_failed";
|
|
622
|
+
saveGoalState(runState, stateFile);
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
} catch (e) {
|
|
626
|
+
progress.stop();
|
|
627
|
+
log(`\x1b[31m✗ Gate failed: ${(e as Error).message}\x1b[0m`);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
log("");
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// Check if we exhausted iterations without the agent reporting done
|
|
635
|
+
if (runState.status === "running") {
|
|
636
|
+
const lastState = runState.iterations[runState.iterations.length - 1];
|
|
637
|
+
if (!lastState?.agent_done) {
|
|
638
|
+
log(`\x1b[33m⚠ Reached max iterations (${maxIterations}) without completion\x1b[0m`);
|
|
639
|
+
runState.status = "max_iterations";
|
|
640
|
+
saveGoalState(runState, stateFile);
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
return runState;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
function buildGateFixPrompt(goal: GoalConfig, gateResult: GateResult, iteration: number): string {
|
|
648
|
+
return `# Goal
|
|
649
|
+
|
|
650
|
+
${goal.goal}
|
|
651
|
+
|
|
652
|
+
# Urgent: Fix Gate Failures
|
|
653
|
+
|
|
654
|
+
The final verification gate FAILED. You must fix these issues:
|
|
655
|
+
|
|
656
|
+
## Failures
|
|
657
|
+
|
|
658
|
+
${gateResult.failures.map((f) => `- ${f}`).join("\n")}
|
|
659
|
+
|
|
660
|
+
## Check Results
|
|
661
|
+
|
|
662
|
+
${gateResult.checks.map((c) => `- ${c.passed ? "PASS" : "FAIL"}: ${c.name} — ${c.output}`).join("\n")}
|
|
663
|
+
|
|
664
|
+
## Gate Summary
|
|
665
|
+
|
|
666
|
+
${gateResult.summary}
|
|
667
|
+
|
|
668
|
+
# Instructions
|
|
669
|
+
|
|
670
|
+
Fix ALL of the failures listed above. Focus exclusively on making the gate pass. Do not add new features.
|
|
671
|
+
|
|
672
|
+
When done, output your state update:
|
|
673
|
+
|
|
674
|
+
\`\`\`yaml
|
|
675
|
+
completed_items:
|
|
676
|
+
- "fixed: description of what you fixed"
|
|
677
|
+
remaining_items:
|
|
678
|
+
- "any remaining issues"
|
|
679
|
+
known_issues:
|
|
680
|
+
- "any issues you could not fix"
|
|
681
|
+
files_modified:
|
|
682
|
+
- "path/to/file.ts"
|
|
683
|
+
agent_done: true
|
|
684
|
+
\`\`\`
|
|
685
|
+
|
|
686
|
+
Then write a brief summary of what you fixed.`;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// --- Goal file parsing ---
|
|
690
|
+
|
|
691
|
+
export function parseGoalFile(path: string): GoalConfig {
|
|
692
|
+
const content = readFileSync(path, "utf-8");
|
|
693
|
+
let data: GoalConfig;
|
|
694
|
+
try {
|
|
695
|
+
data = parseYaml(content) as GoalConfig;
|
|
696
|
+
} catch (e) {
|
|
697
|
+
const error = e as Error;
|
|
698
|
+
console.error(`\x1b[31mError parsing ${path}:\x1b[0m`);
|
|
699
|
+
console.error(` ${error.message.split("\n")[0]}`);
|
|
700
|
+
process.exit(1);
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
if (!data.goal) {
|
|
704
|
+
console.error(`\x1b[31mError: goal.yaml must have a 'goal' field\x1b[0m`);
|
|
705
|
+
process.exit(1);
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
return data;
|
|
709
|
+
}
|