claude-overnight 0.5.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -105
- package/dist/index.js +480 -72
- package/dist/planner.d.ts +15 -2
- package/dist/planner.js +99 -22
- package/dist/types.d.ts +34 -0
- package/package.json +14 -8
package/README.md
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# claude-overnight
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Run 10, 100, or 1000 Claude agents overnight. Come back to shipped work.
|
|
4
4
|
|
|
5
|
-
Describe what to build. Set a budget
|
|
5
|
+
Describe what to build. Set a budget. The tool plans, explores your codebase, breaks the objective into tasks, launches parallel agents in isolated git worktrees, iterates toward quality, and handles rate limits automatically. You press Run once, then go to sleep.
|
|
6
|
+
|
|
7
|
+
Built on the [Claude Agent SDK](https://www.npmjs.com/package/@anthropic-ai/claude-agent-sdk). Works with Claude Opus, Sonnet, and Haiku.
|
|
6
8
|
|
|
7
9
|
## Install
|
|
8
10
|
|
|
@@ -10,18 +12,14 @@ Describe what to build. Set a budget — 10 agents, 100, 1000. A planner agent a
|
|
|
10
12
|
npm install -g claude-overnight
|
|
11
13
|
```
|
|
12
14
|
|
|
13
|
-
Requires Node.js >= 20 and Claude authentication (
|
|
14
|
-
|
|
15
|
-
## Usage
|
|
15
|
+
Requires Node.js >= 20 and Claude authentication (`claude auth login`, or set `ANTHROPIC_API_KEY`).
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
## Quick start
|
|
18
18
|
|
|
19
19
|
```bash
|
|
20
20
|
claude-overnight
|
|
21
21
|
```
|
|
22
22
|
|
|
23
|
-
A guided flow walks you through each step:
|
|
24
|
-
|
|
25
23
|
```
|
|
26
24
|
🌙 claude-overnight
|
|
27
25
|
────────────────────────────────────
|
|
@@ -29,98 +27,102 @@ A guided flow walks you through each step:
|
|
|
29
27
|
① What should the agents do?
|
|
30
28
|
> refactor auth, add tests, update docs
|
|
31
29
|
|
|
32
|
-
② Budget [10]:
|
|
30
|
+
② Budget [10]: 200
|
|
33
31
|
|
|
34
32
|
③ Worker model:
|
|
35
33
|
● Sonnet — Sonnet 4.6 · Best for everyday tasks
|
|
36
34
|
○ Opus — Opus 4.6 · Most capable
|
|
37
|
-
○ Haiku — Haiku 4.5 · Fastest
|
|
38
35
|
|
|
39
36
|
④ Usage:
|
|
40
|
-
●
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
37
|
+
● 90% · leave 10% for other work
|
|
38
|
+
|
|
39
|
+
╭──────────────────────────────────────────╮
|
|
40
|
+
│ sonnet · budget 200 · 5× · flex · 90% │
|
|
41
|
+
╰──────────────────────────────────────────╯
|
|
42
|
+
|
|
43
|
+
✓ 5 themes → review, press Run, walk away
|
|
44
|
+
|
|
45
|
+
◆ Thinking: 5 agents exploring... ← architects analyze your codebase
|
|
46
|
+
◆ Orchestrating plan... ← synthesizes 50 concrete tasks
|
|
47
|
+
◆ Wave 1 · 50 tasks ← fully autonomous from here
|
|
48
|
+
◆ Assessing... how close to amazing?
|
|
49
|
+
◆ Wave 2 · 30 tasks ← improvements from assessment
|
|
50
|
+
◆ Reflection: 2 agents reviewing ← deep quality audit
|
|
51
|
+
◆ Wave 3 · 20 tasks ← fixes from review findings
|
|
52
|
+
◆ Assessing... ✓ Vision met
|
|
46
53
|
```
|
|
47
54
|
|
|
48
|
-
|
|
55
|
+
You interact once (objective, budget, model, review themes), then everything runs autonomously — thinking, planning, executing, reflecting, steering. Rate-limited? It waits and retries. Crash? Resume where you left off.
|
|
49
56
|
|
|
50
|
-
|
|
57
|
+
## How it works
|
|
51
58
|
|
|
52
|
-
|
|
53
|
-
claude-overnight tasks.json
|
|
54
|
-
```
|
|
59
|
+
### 1. Thinking wave
|
|
55
60
|
|
|
56
|
-
|
|
61
|
+
For budgets > 15, the tool launches **architect agents** that explore your codebase before any code is written. Each one gets a different research angle (architecture, data models, APIs, testing, etc.) and writes a structured design document. The number scales with budget: 5 for budget=50, 10 for budget=2000.
|
|
57
62
|
|
|
58
|
-
|
|
59
|
-
claude-overnight "fix auth bug in src/auth.ts" "add tests for user model"
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
## How the planner works
|
|
63
|
-
|
|
64
|
-
The planner always runs on the best available model (Opus) regardless of which model you pick for workers. This ensures high-quality task decomposition even when workers use a cheaper model.
|
|
63
|
+
### 2. Orchestration
|
|
65
64
|
|
|
66
|
-
|
|
65
|
+
An orchestrator agent reads all design documents and synthesizes concrete execution tasks — grounded in real files and patterns the architects found. No guesswork.
|
|
67
66
|
|
|
68
|
-
|
|
67
|
+
### 3. Iterative execution
|
|
69
68
|
|
|
70
|
-
|
|
71
|
-
⠋ identifying themes... → splits objective into N angles (< 30s)
|
|
72
|
-
✓ 10 themes → review themes, press Run, walk away
|
|
73
|
-
◆ Thinking: 10 agents exploring → each explores from its angle, writes a design doc
|
|
74
|
-
◆ Orchestrating plan... → reads all design docs, synthesizes execution tasks
|
|
75
|
-
◆ Wave 1 · 50 tasks → fully autonomous from here
|
|
76
|
-
◆ Steering... → adapts between waves, retries on rate limits
|
|
77
|
-
```
|
|
69
|
+
Tasks run in parallel (each agent in its own git worktree). After each wave, steering assesses: "how good is this?" — not "what's missing?" It can:
|
|
78
70
|
|
|
79
|
-
|
|
71
|
+
- **Execute** more tasks to build features, fix bugs, polish UX
|
|
72
|
+
- **Reflect** by spinning up 1-2 review agents for deep quality/architecture audits
|
|
73
|
+
- **Declare done** when the vision is met at high quality
|
|
80
74
|
|
|
81
|
-
|
|
75
|
+
### 4. Goal refinement
|
|
82
76
|
|
|
83
|
-
|
|
77
|
+
The tool starts with your broad objective but evolves its definition of "amazing" as it learns your codebase. Steering refines the goal after each wave. Late waves are informed by early discoveries.
|
|
84
78
|
|
|
85
|
-
###
|
|
79
|
+
### 5. Three-layer context
|
|
86
80
|
|
|
87
|
-
|
|
81
|
+
Long runs stay sharp because steering maintains three layers of memory:
|
|
88
82
|
|
|
89
|
-
**
|
|
83
|
+
- **Status** — a living project snapshot, updated every wave. Compressed, never truncated.
|
|
84
|
+
- **Milestones** — strategic snapshots archived every ~5 waves. Long-term memory.
|
|
85
|
+
- **Goal** — the evolving north star. What "amazing" means for this codebase.
|
|
90
86
|
|
|
91
|
-
|
|
87
|
+
## Run history and resume
|
|
92
88
|
|
|
93
|
-
|
|
89
|
+
Every run gets its own folder in `.claude-overnight/runs/`. Nothing is ever overwritten.
|
|
94
90
|
|
|
95
|
-
|
|
91
|
+
```
|
|
92
|
+
.claude-overnight/
|
|
93
|
+
runs/
|
|
94
|
+
2026-04-04T18-52-49/ ← run A (done, $200, 200 tasks)
|
|
95
|
+
run.json, status.md, goal.md, milestones/, sessions/
|
|
96
|
+
2026-04-05T10-30-00/ ← run B (crashed)
|
|
97
|
+
run.json, sessions/
|
|
98
|
+
```
|
|
96
99
|
|
|
97
|
-
|
|
100
|
+
If a run crashes, gets rate-limited, or you Ctrl+C:
|
|
98
101
|
|
|
99
|
-
|
|
102
|
+
```
|
|
103
|
+
⚠ Interrupted run
|
|
104
|
+
╭──────────────────────────────────────────────────╮
|
|
105
|
+
│ refactor auth, add tests, update docs │
|
|
106
|
+
│ 50/200 sessions · 3 waves · $69.16 │
|
|
107
|
+
│ 34 merged · 16 unmerged · 0 failed branches │
|
|
108
|
+
╰──────────────────────────────────────────────────╯
|
|
109
|
+
|
|
110
|
+
Resume │ Fresh │ Quit
|
|
111
|
+
```
|
|
100
112
|
|
|
101
|
-
|
|
113
|
+
On resume: unmerged branches auto-merge, the wave loop continues, all context is preserved.
|
|
102
114
|
|
|
103
|
-
**
|
|
115
|
+
**Knowledge carries forward** — new runs inherit knowledge from completed previous runs. Thinking agents and steering see what past runs built. Run 2 knows run 1 already built the auth system.
|
|
104
116
|
|
|
105
|
-
|
|
117
|
+
Add `.claude-overnight` to your `.gitignore`.
|
|
106
118
|
|
|
107
|
-
##
|
|
119
|
+
## Other usage modes
|
|
108
120
|
|
|
109
|
-
|
|
121
|
+
### Task file
|
|
110
122
|
|
|
123
|
+
```bash
|
|
124
|
+
claude-overnight tasks.json
|
|
111
125
|
```
|
|
112
|
-
④ Usage:
|
|
113
|
-
● Unlimited · full capacity, wait through rate limits
|
|
114
|
-
○ 90% · leave 10% for other work
|
|
115
|
-
○ 75% · conservative, plenty of headroom
|
|
116
|
-
○ 50% · use half, keep the rest
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
When utilization hits your cap, the swarm stops dispatching new tasks and lets active agents finish gracefully. This way you can run a big overnight job and still have capacity left for manual Claude usage.
|
|
120
|
-
|
|
121
|
-
Use `--usage-cap=90` on the command line, or `"usageCap": 90` in task files.
|
|
122
|
-
|
|
123
|
-
## Task file format
|
|
124
126
|
|
|
125
127
|
```json
|
|
126
128
|
{
|
|
@@ -135,71 +137,67 @@ Use `--usage-cap=90` on the command line, or `"usageCap": 90` in task files.
|
|
|
135
137
|
}
|
|
136
138
|
```
|
|
137
139
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
For multi-wave runs from a task file, add `objective` and `flexiblePlan`:
|
|
140
|
+
For multi-wave runs, add `objective` and `flexiblePlan`:
|
|
141
141
|
|
|
142
142
|
```json
|
|
143
143
|
{
|
|
144
|
-
"objective": "Modernize the auth system
|
|
144
|
+
"objective": "Modernize the auth system",
|
|
145
145
|
"flexiblePlan": true,
|
|
146
146
|
"tasks": ["Refactor auth middleware", "Add JWT validation"],
|
|
147
147
|
"usageCap": 90
|
|
148
148
|
}
|
|
149
149
|
```
|
|
150
150
|
|
|
151
|
-
|
|
151
|
+
### Inline
|
|
152
152
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
| `objective` | `string` | — | High-level goal for multi-wave steering (required when `flexiblePlan` is true) |
|
|
157
|
-
| `flexiblePlan` | `boolean` | `false` | Enable adaptive multi-wave planning from task files |
|
|
158
|
-
| `model` | `string` | prompted | Worker model (per-task overridable) |
|
|
159
|
-
| `concurrency` | `number` | `5` | Max parallel agents |
|
|
160
|
-
| `worktrees` | `boolean` | auto (git repo) | Isolate each agent in a git worktree |
|
|
161
|
-
| `permissionMode` | `"auto" \| "bypassPermissions" \| "default"` | `"auto"` | How agents handle dangerous operations |
|
|
162
|
-
| `cwd` | `string` | `process.cwd()` | Working directory |
|
|
163
|
-
| `allowedTools` | `string[]` | all | Restrict agent tools |
|
|
164
|
-
| `mergeStrategy` | `"yolo" \| "branch"` | `"yolo"` | Merge into HEAD or a new branch |
|
|
165
|
-
| `usageCap` | `number (0-100)` | unlimited | Stop at N% utilization (e.g. 90) |
|
|
153
|
+
```bash
|
|
154
|
+
claude-overnight "fix auth bug in src/auth.ts" "add tests for user model"
|
|
155
|
+
```
|
|
166
156
|
|
|
167
157
|
## CLI flags
|
|
168
158
|
|
|
169
159
|
| Flag | Default | Description |
|
|
170
160
|
|---|---|---|
|
|
171
|
-
| `--budget=N` | `10` | Total agent sessions
|
|
172
|
-
| `--concurrency=N` | `5` |
|
|
173
|
-
| `--model=NAME` | prompted | Worker model (planner
|
|
161
|
+
| `--budget=N` | `10` | Total agent sessions |
|
|
162
|
+
| `--concurrency=N` | `5` | Parallel agents |
|
|
163
|
+
| `--model=NAME` | prompted | Worker model (planner uses best available) |
|
|
174
164
|
| `--usage-cap=N` | unlimited | Stop at N% utilization |
|
|
175
|
-
| `--timeout=SECONDS` | `300` | Inactivity timeout
|
|
176
|
-
| `--no-flex` | — | Disable
|
|
165
|
+
| `--timeout=SECONDS` | `300` | Inactivity timeout per agent |
|
|
166
|
+
| `--no-flex` | — | Disable multi-wave steering |
|
|
177
167
|
| `--dry-run` | — | Show planned tasks without running |
|
|
178
|
-
| `-h, --help` | — | Help |
|
|
179
|
-
| `-v, --version` | — | Version |
|
|
180
168
|
|
|
181
|
-
|
|
169
|
+
## Task file fields
|
|
182
170
|
|
|
183
|
-
|
|
171
|
+
| Field | Type | Default | Description |
|
|
172
|
+
|---|---|---|---|
|
|
173
|
+
| `tasks` | `(string \| {prompt, cwd?, model?})[]` | required | Tasks to run |
|
|
174
|
+
| `objective` | `string` | — | High-level goal for steering |
|
|
175
|
+
| `flexiblePlan` | `boolean` | `false` | Enable multi-wave planning |
|
|
176
|
+
| `model` | `string` | prompted | Worker model |
|
|
177
|
+
| `concurrency` | `number` | `5` | Parallel agents |
|
|
178
|
+
| `worktrees` | `boolean` | auto | Git worktree isolation |
|
|
179
|
+
| `permissionMode` | `"auto" \| "bypassPermissions" \| "default"` | `"auto"` | Permission handling |
|
|
180
|
+
| `mergeStrategy` | `"yolo" \| "branch"` | `"yolo"` | Merge into HEAD or new branch |
|
|
181
|
+
| `usageCap` | `number (0-100)` | unlimited | Stop at N% utilization |
|
|
184
182
|
|
|
185
|
-
|
|
183
|
+
## Rate limits
|
|
186
184
|
|
|
187
|
-
|
|
188
|
-
- **Hard block**: API returns a reset timestamp — swarm pauses and resumes exactly when the window opens.
|
|
189
|
-
- **Soft throttle**: at >75% utilization, dispatch slows to avoid hitting the limit.
|
|
190
|
-
- **Retry with backoff**: transient errors (429, overloaded, connection reset) retry with exponential backoff.
|
|
191
|
-
- **Usage cap**: set a ceiling and the swarm stops dispatching when it's reached — active agents finish, no new ones start.
|
|
185
|
+
Built for unattended runs lasting hours or days.
|
|
192
186
|
|
|
193
|
-
|
|
187
|
+
- **Hard block**: pauses until the rate limit window resets, then resumes
|
|
188
|
+
- **Soft throttle**: slows dispatch at >75% utilization
|
|
189
|
+
- **Retry with backoff**: transient errors (429, overloaded) retry automatically
|
|
190
|
+
- **Usage cap**: set a ceiling, active agents finish, no new ones start
|
|
191
|
+
- **Planner retries**: steering and orchestration also retry on rate limits (30s/60s/120s backoff)
|
|
194
192
|
|
|
195
193
|
## Worktrees and merging
|
|
196
194
|
|
|
197
|
-
Each agent gets an isolated git worktree
|
|
195
|
+
Each agent gets an isolated git worktree (`swarm/task-N` branch). Changes auto-commit. After all agents complete, branches merge back.
|
|
198
196
|
|
|
199
|
-
- `"yolo"` (default): merges
|
|
200
|
-
- `"branch"`: creates a `swarm/run-{timestamp}` branch
|
|
197
|
+
- `"yolo"` (default): merges into your current branch
|
|
198
|
+
- `"branch"`: creates a new `swarm/run-{timestamp}` branch
|
|
201
199
|
|
|
202
|
-
|
|
200
|
+
Conflicts retry with `-X theirs`. Unresolved branches are preserved for manual merge.
|
|
203
201
|
|
|
204
202
|
## Exit codes
|
|
205
203
|
|
|
@@ -208,3 +206,7 @@ Merge conflicts retry with `-X theirs`. If that fails, the branch is preserved f
|
|
|
208
206
|
| `0` | All tasks succeeded |
|
|
209
207
|
| `1` | Some tasks failed |
|
|
210
208
|
| `2` | All failed or none completed |
|
|
209
|
+
|
|
210
|
+
## License
|
|
211
|
+
|
|
212
|
+
MIT
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { readFileSync, existsSync, mkdirSync, readdirSync, rmSync } from "fs";
|
|
2
|
+
import { readFileSync, existsSync, mkdirSync, readdirSync, rmSync, writeFileSync } from "fs";
|
|
3
3
|
import { resolve, dirname, join } from "path";
|
|
4
4
|
import { fileURLToPath } from "url";
|
|
5
5
|
import { execSync } from "child_process";
|
|
@@ -7,7 +7,7 @@ import { createInterface } from "readline";
|
|
|
7
7
|
import chalk from "chalk";
|
|
8
8
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
9
9
|
import { Swarm } from "./swarm.js";
|
|
10
|
-
import { planTasks, refinePlan, detectModelTier, steerWave, identifyThemes, buildThinkingTasks, orchestrate } from "./planner.js";
|
|
10
|
+
import { planTasks, refinePlan, detectModelTier, steerWave, identifyThemes, buildThinkingTasks, buildReflectionTasks, orchestrate } from "./planner.js";
|
|
11
11
|
import { startRenderLoop, renderSummary } from "./ui.js";
|
|
12
12
|
// ── CLI flag parsing ──
|
|
13
13
|
function parseCliFlags(argv) {
|
|
@@ -270,7 +270,7 @@ function showPlan(tasks) {
|
|
|
270
270
|
}
|
|
271
271
|
console.log(chalk.dim(` ${"─".repeat(ruleLen)}\n`));
|
|
272
272
|
}
|
|
273
|
-
function
|
|
273
|
+
function readMdDir(dir) {
|
|
274
274
|
try {
|
|
275
275
|
const files = readdirSync(dir).filter(f => f.endsWith(".md")).sort();
|
|
276
276
|
return files.map(f => {
|
|
@@ -282,6 +282,191 @@ function readDesignDocs(dir) {
|
|
|
282
282
|
return "";
|
|
283
283
|
}
|
|
284
284
|
}
|
|
285
|
+
function readRunMemory(runDir, previousRuns) {
|
|
286
|
+
let goal = "", status = "";
|
|
287
|
+
try {
|
|
288
|
+
goal = readFileSync(join(runDir, "goal.md"), "utf-8");
|
|
289
|
+
}
|
|
290
|
+
catch { }
|
|
291
|
+
try {
|
|
292
|
+
status = readFileSync(join(runDir, "status.md"), "utf-8");
|
|
293
|
+
}
|
|
294
|
+
catch { }
|
|
295
|
+
return {
|
|
296
|
+
designs: readMdDir(join(runDir, "designs")),
|
|
297
|
+
reflections: readMdDir(join(runDir, "reflections")),
|
|
298
|
+
milestones: readMdDir(join(runDir, "milestones")),
|
|
299
|
+
status,
|
|
300
|
+
goal,
|
|
301
|
+
previousRuns,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
function writeStatus(baseDir, status) {
|
|
305
|
+
writeFileSync(join(baseDir, "status.md"), status, "utf-8");
|
|
306
|
+
}
|
|
307
|
+
function saveRunState(runDir, state) {
|
|
308
|
+
mkdirSync(runDir, { recursive: true });
|
|
309
|
+
writeFileSync(join(runDir, "run.json"), JSON.stringify(state, null, 2), "utf-8");
|
|
310
|
+
}
|
|
311
|
+
function loadRunState(runDir) {
|
|
312
|
+
try {
|
|
313
|
+
return JSON.parse(readFileSync(join(runDir, "run.json"), "utf-8"));
|
|
314
|
+
}
|
|
315
|
+
catch {
|
|
316
|
+
return null;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
/** Find the latest incomplete run, or null. */
|
|
320
|
+
function findIncompleteRun(rootDir) {
|
|
321
|
+
const runsDir = join(rootDir, "runs");
|
|
322
|
+
try {
|
|
323
|
+
const dirs = readdirSync(runsDir).sort().reverse(); // newest first
|
|
324
|
+
for (const d of dirs) {
|
|
325
|
+
const state = loadRunState(join(runsDir, d));
|
|
326
|
+
if (state && state.phase !== "done")
|
|
327
|
+
return { dir: join(runsDir, d), state };
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
catch { }
|
|
331
|
+
return null;
|
|
332
|
+
}
|
|
333
|
+
/** Read final status + goal from all completed previous runs (newest first, max 5). */
|
|
334
|
+
function readPreviousRunKnowledge(rootDir) {
|
|
335
|
+
const runsDir = join(rootDir, "runs");
|
|
336
|
+
try {
|
|
337
|
+
const dirs = readdirSync(runsDir).sort().reverse();
|
|
338
|
+
const summaries = [];
|
|
339
|
+
for (const d of dirs) {
|
|
340
|
+
if (summaries.length >= 5)
|
|
341
|
+
break;
|
|
342
|
+
const state = loadRunState(join(runsDir, d));
|
|
343
|
+
if (!state || state.phase !== "done")
|
|
344
|
+
continue;
|
|
345
|
+
let status = "";
|
|
346
|
+
try {
|
|
347
|
+
status = readFileSync(join(runsDir, d, "status.md"), "utf-8");
|
|
348
|
+
}
|
|
349
|
+
catch { }
|
|
350
|
+
let goal = "";
|
|
351
|
+
try {
|
|
352
|
+
goal = readFileSync(join(runsDir, d, "goal.md"), "utf-8");
|
|
353
|
+
}
|
|
354
|
+
catch { }
|
|
355
|
+
const date = d.replace("T", " ").slice(0, 19);
|
|
356
|
+
const cost = state.accCost > 0 ? ` · $${state.accCost.toFixed(2)}` : "";
|
|
357
|
+
summaries.push(`### Run ${date} (${state.accCompleted} tasks${cost})\n${status || "(no status recorded)"}\n${goal ? `Goal: ${goal.slice(0, 500)}` : ""}`);
|
|
358
|
+
}
|
|
359
|
+
return summaries.join("\n\n");
|
|
360
|
+
}
|
|
361
|
+
catch {
|
|
362
|
+
return "";
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
function createRunDir(rootDir) {
|
|
366
|
+
const ts = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
|
|
367
|
+
const runDir = join(rootDir, "runs", ts);
|
|
368
|
+
mkdirSync(join(runDir, "designs"), { recursive: true });
|
|
369
|
+
mkdirSync(join(runDir, "reflections"), { recursive: true });
|
|
370
|
+
mkdirSync(join(runDir, "milestones"), { recursive: true });
|
|
371
|
+
mkdirSync(join(runDir, "sessions"), { recursive: true });
|
|
372
|
+
return runDir;
|
|
373
|
+
}
|
|
374
|
+
function saveWaveSession(baseDir, waveNum, kind, swarm) {
|
|
375
|
+
const dir = join(baseDir, "sessions");
|
|
376
|
+
mkdirSync(dir, { recursive: true });
|
|
377
|
+
writeFileSync(join(dir, `wave-${waveNum}.json`), JSON.stringify({
|
|
378
|
+
wave: waveNum, kind,
|
|
379
|
+
agents: swarm.agents.map(a => ({
|
|
380
|
+
id: a.id,
|
|
381
|
+
prompt: a.task.prompt,
|
|
382
|
+
status: a.status,
|
|
383
|
+
error: a.error,
|
|
384
|
+
cost: a.costUsd,
|
|
385
|
+
toolCalls: a.toolCalls,
|
|
386
|
+
filesChanged: a.filesChanged,
|
|
387
|
+
duration: a.finishedAt && a.startedAt ? a.finishedAt - a.startedAt : 0,
|
|
388
|
+
branch: a.branch,
|
|
389
|
+
})),
|
|
390
|
+
totalCost: swarm.totalCostUsd,
|
|
391
|
+
}, null, 2), "utf-8");
|
|
392
|
+
}
|
|
393
|
+
function recordBranches(swarm, branches) {
|
|
394
|
+
for (const a of swarm.agents) {
|
|
395
|
+
if (a.branch) {
|
|
396
|
+
branches.push({
|
|
397
|
+
branch: a.branch,
|
|
398
|
+
taskPrompt: a.task.prompt.slice(0, 200),
|
|
399
|
+
status: a.status === "done" ? "unmerged" : "failed",
|
|
400
|
+
filesChanged: a.filesChanged ?? 0,
|
|
401
|
+
costUsd: a.costUsd ?? 0,
|
|
402
|
+
});
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
// Update with merge results
|
|
406
|
+
for (const mr of swarm.mergeResults) {
|
|
407
|
+
const br = branches.find(b => b.branch === mr.branch);
|
|
408
|
+
if (br)
|
|
409
|
+
br.status = mr.ok ? "merged" : "merge-failed";
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
function autoMergeBranches(cwd, branches, onLog) {
|
|
413
|
+
const unmerged = branches.filter(b => b.status === "unmerged" && b.filesChanged > 0);
|
|
414
|
+
if (unmerged.length === 0)
|
|
415
|
+
return;
|
|
416
|
+
onLog(`Merging ${unmerged.length} unmerged branches...`);
|
|
417
|
+
for (const br of unmerged) {
|
|
418
|
+
try {
|
|
419
|
+
execSync(`git merge --no-edit "${br.branch}"`, { cwd, encoding: "utf-8", stdio: "pipe" });
|
|
420
|
+
br.status = "merged";
|
|
421
|
+
onLog(` ✓ ${br.branch} (${br.filesChanged} files)`);
|
|
422
|
+
}
|
|
423
|
+
catch {
|
|
424
|
+
try {
|
|
425
|
+
try {
|
|
426
|
+
execSync("git merge --abort", { cwd, encoding: "utf-8", stdio: "pipe" });
|
|
427
|
+
}
|
|
428
|
+
catch { }
|
|
429
|
+
execSync(`git merge --no-edit -X theirs "${br.branch}"`, { cwd, encoding: "utf-8", stdio: "pipe" });
|
|
430
|
+
br.status = "merged";
|
|
431
|
+
onLog(` ✓ ${br.branch} (auto-resolved)`);
|
|
432
|
+
}
|
|
433
|
+
catch {
|
|
434
|
+
try {
|
|
435
|
+
execSync("git merge --abort", { cwd, encoding: "utf-8", stdio: "pipe" });
|
|
436
|
+
}
|
|
437
|
+
catch { }
|
|
438
|
+
br.status = "merge-failed";
|
|
439
|
+
onLog(` ✗ ${br.branch} (conflict — preserved for manual merge)`);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
function archiveMilestone(baseDir, waveNum) {
|
|
445
|
+
const statusPath = join(baseDir, "status.md");
|
|
446
|
+
if (!existsSync(statusPath))
|
|
447
|
+
return;
|
|
448
|
+
const content = readFileSync(statusPath, "utf-8");
|
|
449
|
+
if (!content.trim())
|
|
450
|
+
return;
|
|
451
|
+
const milestoneDir = join(baseDir, "milestones");
|
|
452
|
+
mkdirSync(milestoneDir, { recursive: true });
|
|
453
|
+
const ts = new Date().toISOString().slice(0, 19).replace("T", " ");
|
|
454
|
+
writeFileSync(join(milestoneDir, `wave-${waveNum}.md`), `# Milestone — Wave ${waveNum} (${ts})\n\n${content}`, "utf-8");
|
|
455
|
+
}
|
|
456
|
+
function writeGoalUpdate(baseDir, update) {
|
|
457
|
+
const goalPath = join(baseDir, "goal.md");
|
|
458
|
+
let existing = "";
|
|
459
|
+
try {
|
|
460
|
+
existing = readFileSync(goalPath, "utf-8");
|
|
461
|
+
}
|
|
462
|
+
catch { }
|
|
463
|
+
const ts = new Date().toISOString().slice(0, 19).replace("T", " ");
|
|
464
|
+
const entry = `\n\n## Update — ${ts}\n${update}`;
|
|
465
|
+
const full = existing + entry;
|
|
466
|
+
// Keep it bounded: original + last ~3000 chars of updates
|
|
467
|
+
const trimmed = full.length > 4000 ? full.slice(0, 1000) + "\n\n...\n\n" + full.slice(-3000) : full;
|
|
468
|
+
writeFileSync(goalPath, trimmed, "utf-8");
|
|
469
|
+
}
|
|
285
470
|
const BRAILLE = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
286
471
|
function makeProgressLog() {
|
|
287
472
|
let frame = 0;
|
|
@@ -386,6 +571,77 @@ async function main() {
|
|
|
386
571
|
}
|
|
387
572
|
if (noTTY)
|
|
388
573
|
console.log(chalk.dim(" Non-interactive mode — using defaults\n"));
|
|
574
|
+
// ── Show run history ──
|
|
575
|
+
const rootDir = join(cwd, ".claude-overnight");
|
|
576
|
+
const runsDir = join(rootDir, "runs");
|
|
577
|
+
let completedRuns = [];
|
|
578
|
+
try {
|
|
579
|
+
const dirs = readdirSync(runsDir).sort().reverse();
|
|
580
|
+
for (const d of dirs) {
|
|
581
|
+
const s = loadRunState(join(runsDir, d));
|
|
582
|
+
if (s && s.phase === "done")
|
|
583
|
+
completedRuns.push({ dir: join(runsDir, d), state: s });
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
catch { }
|
|
587
|
+
if (completedRuns.length > 0 && !noTTY) {
|
|
588
|
+
console.log(chalk.dim(`\n ${completedRuns.length} previous run${completedRuns.length > 1 ? "s" : ""}`));
|
|
589
|
+
for (const r of completedRuns.slice(0, 3)) {
|
|
590
|
+
const date = r.state.startedAt?.slice(0, 10) || "unknown";
|
|
591
|
+
const obj = r.state.objective?.slice(0, 40) || "";
|
|
592
|
+
const cost = r.state.accCost > 0 ? ` · $${r.state.accCost.toFixed(0)}` : "";
|
|
593
|
+
console.log(chalk.dim(` ${date} · ${r.state.accCompleted} tasks${cost}${obj ? ` · ${obj}` : ""}${obj.length >= 40 ? "…" : ""}`));
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
// ── Resume detection ──
|
|
597
|
+
let resuming = false;
|
|
598
|
+
let resumeState = null;
|
|
599
|
+
let resumeRunDir;
|
|
600
|
+
const incomplete = findIncompleteRun(rootDir);
|
|
601
|
+
if (incomplete && incomplete.state.cwd === cwd && !noTTY && tasks.length === 0) {
|
|
602
|
+
const prev = incomplete.state;
|
|
603
|
+
const merged = prev.branches.filter(b => b.status === "merged").length;
|
|
604
|
+
const unmerged = prev.branches.filter(b => b.status === "unmerged").length;
|
|
605
|
+
const failed = prev.branches.filter(b => b.status === "failed" || b.status === "merge-failed").length;
|
|
606
|
+
const obj = prev.objective?.slice(0, 50) || "";
|
|
607
|
+
// Read last status for context
|
|
608
|
+
let lastStatus = "";
|
|
609
|
+
try {
|
|
610
|
+
lastStatus = readFileSync(join(incomplete.dir, "status.md"), "utf-8").trim().slice(0, 120);
|
|
611
|
+
}
|
|
612
|
+
catch { }
|
|
613
|
+
console.log(chalk.yellow(`\n ⚠ Interrupted run`));
|
|
614
|
+
const boxLines = [
|
|
615
|
+
`${obj}${obj.length >= 50 ? "…" : ""}`,
|
|
616
|
+
`${prev.accCompleted}/${prev.budget} sessions · ${prev.waveNum + 1} waves · $${prev.accCost.toFixed(2)}`,
|
|
617
|
+
];
|
|
618
|
+
if (lastStatus)
|
|
619
|
+
boxLines.push(lastStatus);
|
|
620
|
+
if (merged + unmerged + failed > 0)
|
|
621
|
+
boxLines.push(`${merged} merged · ${unmerged} unmerged · ${failed} failed branches`);
|
|
622
|
+
const boxW = Math.max(...boxLines.map(l => l.length)) + 4;
|
|
623
|
+
console.log(chalk.dim(` ╭${"─".repeat(boxW)}╮`));
|
|
624
|
+
for (const line of boxLines)
|
|
625
|
+
console.log(chalk.dim(" │") + ` ${line.padEnd(boxW - 2)}` + chalk.dim("│"));
|
|
626
|
+
console.log(chalk.dim(` ╰${"─".repeat(boxW)}╯`));
|
|
627
|
+
const action = await selectKey("", [
|
|
628
|
+
{ key: "r", desc: "esume" },
|
|
629
|
+
{ key: "f", desc: "resh" },
|
|
630
|
+
{ key: "q", desc: "uit" },
|
|
631
|
+
]);
|
|
632
|
+
if (action === "q") {
|
|
633
|
+
process.exit(0);
|
|
634
|
+
}
|
|
635
|
+
if (action === "r") {
|
|
636
|
+
resuming = true;
|
|
637
|
+
resumeState = prev;
|
|
638
|
+
resumeRunDir = incomplete.dir;
|
|
639
|
+
if (unmerged > 0) {
|
|
640
|
+
console.log("");
|
|
641
|
+
autoMergeBranches(cwd, prev.branches, (msg) => console.log(chalk.dim(` ${msg}`)));
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
389
645
|
// ── Interactive flow: Objective → Budget → Model → Usage cap → Plan → Review ──
|
|
390
646
|
let workerModel;
|
|
391
647
|
let plannerModel;
|
|
@@ -463,6 +719,8 @@ async function main() {
|
|
|
463
719
|
parts.push("flex");
|
|
464
720
|
if (usageCap != null)
|
|
465
721
|
parts.push(`cap ${Math.round(usageCap * 100)}%`);
|
|
722
|
+
if (completedRuns.length > 0)
|
|
723
|
+
parts.push(`${completedRuns.length} prior`);
|
|
466
724
|
const inner = parts.join(chalk.dim(" · "));
|
|
467
725
|
const innerLen = parts.join(" · ").length;
|
|
468
726
|
console.log(chalk.dim(`\n ╭${"─".repeat(innerLen + 4)}╮`));
|
|
@@ -506,13 +764,17 @@ async function main() {
|
|
|
506
764
|
console.log(chalk.dim(` ${workerModel} concurrency=${concurrency} worktrees=${useWorktrees} merge=${mergeStrategy} perms=${permissionMode}${capStr}`));
|
|
507
765
|
}
|
|
508
766
|
// ── Flex mode: adaptive multi-wave planning ──
|
|
509
|
-
|
|
767
|
+
let flex = !argv.includes("--no-flex") && (fileCfg?.flexiblePlan ?? objective != null) && objective != null && (budget ?? 10) > 2;
|
|
510
768
|
const agentTimeoutMs = cliFlags.timeout ? parseFloat(cliFlags.timeout) * 1000 : undefined;
|
|
511
769
|
let thinkingUsed = 0;
|
|
512
770
|
let thinkingCost = 0, thinkingIn = 0, thinkingOut = 0, thinkingTools = 0;
|
|
513
|
-
let
|
|
771
|
+
let thinkingHistory;
|
|
772
|
+
// Create run directory early so thinking wave can use it
|
|
773
|
+
const runDir = resuming && resumeRunDir ? resumeRunDir : createRunDir(rootDir);
|
|
774
|
+
const previousKnowledge = readPreviousRunKnowledge(rootDir);
|
|
514
775
|
// ── Plan phase (interactive: review loop, non-interactive: auto-plan or skip) ──
|
|
515
776
|
const needsPlan = tasks.length === 0;
|
|
777
|
+
const designDir = join(runDir, "designs");
|
|
516
778
|
if (needsPlan) {
|
|
517
779
|
if (noTTY) {
|
|
518
780
|
console.error(chalk.red(" No tasks provided and stdin is not a TTY. Provide tasks via args or a .json file."));
|
|
@@ -522,7 +784,6 @@ async function main() {
|
|
|
522
784
|
const planRestore = () => process.stdout.write("\x1B[?25h");
|
|
523
785
|
const useThinking = flex && (budget ?? 10) > concurrency * 3;
|
|
524
786
|
const thinkingCount = useThinking ? Math.min(Math.max(concurrency, Math.ceil((budget ?? 10) * 0.005)), 10) : 0;
|
|
525
|
-
const designDir = join(cwd, ".claude-overnight", "designs");
|
|
526
787
|
try {
|
|
527
788
|
if (useThinking) {
|
|
528
789
|
// Phase 1: Quick theme identification → review → then autonomous
|
|
@@ -580,7 +841,7 @@ async function main() {
|
|
|
580
841
|
process.stdout.write("\x1B[?25l");
|
|
581
842
|
// Phase 2: Thinking wave
|
|
582
843
|
mkdirSync(designDir, { recursive: true });
|
|
583
|
-
const thinkingTasks = buildThinkingTasks(objective, themes, designDir, plannerModel);
|
|
844
|
+
const thinkingTasks = buildThinkingTasks(objective, themes, designDir, plannerModel, previousKnowledge || undefined);
|
|
584
845
|
console.log(chalk.cyan(`\n ◆ Thinking: ${thinkingTasks.length} agents exploring...\n`));
|
|
585
846
|
const thinkingSwarm = new Swarm({
|
|
586
847
|
tasks: thinkingTasks, concurrency, cwd,
|
|
@@ -604,13 +865,24 @@ async function main() {
|
|
|
604
865
|
thinkingIn = thinkingSwarm.totalInputTokens;
|
|
605
866
|
thinkingOut = thinkingSwarm.totalOutputTokens;
|
|
606
867
|
thinkingTools = thinkingSwarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
|
|
868
|
+
// Record thinking wave so steering knows what happened
|
|
869
|
+
thinkingHistory = {
|
|
870
|
+
wave: -1,
|
|
871
|
+
kind: "think",
|
|
872
|
+
tasks: thinkingSwarm.agents.map(a => ({
|
|
873
|
+
prompt: a.task.prompt.slice(0, 200),
|
|
874
|
+
status: a.status,
|
|
875
|
+
filesChanged: a.filesChanged,
|
|
876
|
+
error: a.error,
|
|
877
|
+
})),
|
|
878
|
+
};
|
|
607
879
|
// Phase 3: Orchestrate from design docs
|
|
608
|
-
|
|
609
|
-
if (
|
|
880
|
+
const designs = readMdDir(designDir);
|
|
881
|
+
if (designs) {
|
|
610
882
|
const orchBudget = Math.min(50, Math.max(concurrency, Math.ceil(((budget ?? 10) - thinkingUsed) * 0.5)));
|
|
611
883
|
const flexNote = `This is wave 1 of an adaptive multi-wave run (total budget: ${(budget ?? 10) - thinkingUsed}). Plan the highest-impact foundational work first. Future waves will iterate based on what's learned.`;
|
|
612
884
|
console.log(chalk.cyan(`\n ◆ Orchestrating plan...\n`));
|
|
613
|
-
tasks = await orchestrate(objective,
|
|
885
|
+
tasks = await orchestrate(objective, designs, cwd, plannerModel, workerModel, permissionMode, orchBudget, concurrency, makeProgressLog(), flexNote);
|
|
614
886
|
process.stdout.write(`\x1B[2K\r ${chalk.green(`\u2713 ${tasks.length} tasks`)}\n\n`);
|
|
615
887
|
}
|
|
616
888
|
else {
|
|
@@ -713,14 +985,62 @@ async function main() {
|
|
|
713
985
|
process.stdout.write("\x1B[?25l");
|
|
714
986
|
const restore = () => process.stdout.write("\x1B[?25h\n");
|
|
715
987
|
const runStartedAt = Date.now();
|
|
716
|
-
// Wave-loop state
|
|
988
|
+
// Wave-loop state — either fresh or resumed
|
|
989
|
+
mkdirSync(join(runDir, "reflections"), { recursive: true });
|
|
990
|
+
mkdirSync(join(runDir, "milestones"), { recursive: true });
|
|
991
|
+
mkdirSync(join(runDir, "sessions"), { recursive: true });
|
|
717
992
|
let currentSwarm;
|
|
718
|
-
let remaining
|
|
719
|
-
let currentTasks
|
|
720
|
-
let waveNum
|
|
993
|
+
let remaining;
|
|
994
|
+
let currentTasks;
|
|
995
|
+
let waveNum;
|
|
721
996
|
const waveHistory = [];
|
|
722
|
-
let accCost
|
|
997
|
+
let accCost, accCompleted, accFailed, accTools;
|
|
998
|
+
let accIn = 0, accOut = 0;
|
|
723
999
|
let lastCapped = false, lastAborted = false;
|
|
1000
|
+
let lastWaveKind;
|
|
1001
|
+
let reflectionBudgetUsed;
|
|
1002
|
+
const branches = [];
|
|
1003
|
+
if (resuming && resumeState) {
|
|
1004
|
+
// Restore ALL config from saved state
|
|
1005
|
+
remaining = resumeState.remaining;
|
|
1006
|
+
currentTasks = resumeState.currentTasks;
|
|
1007
|
+
waveNum = resumeState.waveNum;
|
|
1008
|
+
accCost = resumeState.accCost;
|
|
1009
|
+
accCompleted = resumeState.accCompleted;
|
|
1010
|
+
accFailed = resumeState.accFailed;
|
|
1011
|
+
accTools = 0;
|
|
1012
|
+
lastWaveKind = resumeState.lastWaveKind;
|
|
1013
|
+
reflectionBudgetUsed = resumeState.reflectionBudgetUsed;
|
|
1014
|
+
branches.push(...resumeState.branches);
|
|
1015
|
+
objective = resumeState.objective;
|
|
1016
|
+
workerModel = resumeState.workerModel;
|
|
1017
|
+
plannerModel = resumeState.plannerModel;
|
|
1018
|
+
budget = resumeState.budget;
|
|
1019
|
+
concurrency = resumeState.concurrency;
|
|
1020
|
+
flex = resumeState.flex;
|
|
1021
|
+
usageCap = resumeState.usageCap;
|
|
1022
|
+
console.log(chalk.green(`\n ✓ Resumed`) + chalk.dim(` · wave ${waveNum + 1} · ${remaining} remaining · $${accCost.toFixed(2)} spent\n`));
|
|
1023
|
+
}
|
|
1024
|
+
else {
|
|
1025
|
+
// Fresh run
|
|
1026
|
+
if (objective && !existsSync(join(runDir, "goal.md"))) {
|
|
1027
|
+
writeFileSync(join(runDir, "goal.md"), `## Original Objective\n${objective}`, "utf-8");
|
|
1028
|
+
}
|
|
1029
|
+
remaining = (budget ?? tasks.length) - thinkingUsed;
|
|
1030
|
+
currentTasks = tasks;
|
|
1031
|
+
waveNum = 0;
|
|
1032
|
+
if (thinkingHistory)
|
|
1033
|
+
waveHistory.push(thinkingHistory);
|
|
1034
|
+
accCost = thinkingCost;
|
|
1035
|
+
accCompleted = 0;
|
|
1036
|
+
accFailed = 0;
|
|
1037
|
+
accTools = thinkingTools;
|
|
1038
|
+
accIn = thinkingIn;
|
|
1039
|
+
accOut = thinkingOut;
|
|
1040
|
+
lastWaveKind = "execute";
|
|
1041
|
+
reflectionBudgetUsed = 0;
|
|
1042
|
+
}
|
|
1043
|
+
const maxReflectionBudget = Math.max(2, Math.ceil((budget ?? 10) * 0.05));
|
|
724
1044
|
// For flex + branch strategy: create one target branch, waves merge via yolo into it
|
|
725
1045
|
let runBranch;
|
|
726
1046
|
let originalRef;
|
|
@@ -791,8 +1111,18 @@ async function main() {
|
|
|
791
1111
|
remaining -= swarm.completed + swarm.failed;
|
|
792
1112
|
lastCapped = swarm.cappedOut;
|
|
793
1113
|
lastAborted = swarm.aborted;
|
|
1114
|
+
recordBranches(swarm, branches);
|
|
1115
|
+
saveWaveSession(runDir, waveNum, lastWaveKind, swarm);
|
|
1116
|
+
saveRunState(runDir, {
|
|
1117
|
+
id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective, budget: budget ?? tasks.length,
|
|
1118
|
+
remaining, workerModel, plannerModel, concurrency, permissionMode,
|
|
1119
|
+
usageCap, flex, useWorktrees, mergeStrategy, waveNum, currentTasks,
|
|
1120
|
+
lastWaveKind, reflectionBudgetUsed, accCost, accCompleted, accFailed,
|
|
1121
|
+
branches, phase: "steering", startedAt: new Date(runStartedAt).toISOString(), cwd,
|
|
1122
|
+
});
|
|
794
1123
|
waveHistory.push({
|
|
795
1124
|
wave: waveNum,
|
|
1125
|
+
kind: lastWaveKind,
|
|
796
1126
|
tasks: swarm.agents.map(a => ({
|
|
797
1127
|
prompt: a.task.prompt,
|
|
798
1128
|
status: a.status,
|
|
@@ -802,30 +1132,116 @@ async function main() {
|
|
|
802
1132
|
});
|
|
803
1133
|
if (!flex || remaining <= 0 || swarm.aborted || swarm.cappedOut)
|
|
804
1134
|
break;
|
|
805
|
-
// ── Steer next
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
1135
|
+
// ── Steer: assess quality and decide next action ──
|
|
1136
|
+
// May loop through reflect→re-steer cycles before producing execution tasks
|
|
1137
|
+
let steerDone = false;
|
|
1138
|
+
let steerAttempts = 0;
|
|
1139
|
+
while (!steerDone && remaining > 0 && !stopping && steerAttempts < 4) {
|
|
1140
|
+
steerAttempts++;
|
|
1141
|
+
console.log(chalk.cyan(`\n ◆ Assessing...\n`));
|
|
1142
|
+
process.stdout.write("\x1B[?25l");
|
|
1143
|
+
try {
|
|
1144
|
+
const memory = readRunMemory(runDir, previousKnowledge || undefined);
|
|
1145
|
+
const steer = await steerWave(objective, waveHistory, remaining, cwd, plannerModel, workerModel, permissionMode, concurrency, makeProgressLog(), memory);
|
|
1146
|
+
process.stdout.write(`\x1B[2K\r`);
|
|
1147
|
+
process.stdout.write("\x1B[?25h");
|
|
1148
|
+
// Persist context layers
|
|
1149
|
+
if (steer.statusUpdate)
|
|
1150
|
+
writeStatus(runDir, steer.statusUpdate);
|
|
1151
|
+
if (steer.goalUpdate) {
|
|
1152
|
+
writeGoalUpdate(runDir, steer.goalUpdate);
|
|
1153
|
+
console.log(chalk.dim(` Goal refined: ${steer.goalUpdate.slice(0, 100)}\n`));
|
|
1154
|
+
}
|
|
1155
|
+
// Archive milestone every ~5 execution waves
|
|
1156
|
+
const execWaves = waveHistory.filter(w => w.kind === "execute").length;
|
|
1157
|
+
if (execWaves > 0 && execWaves % 5 === 0)
|
|
1158
|
+
archiveMilestone(runDir, waveNum);
|
|
1159
|
+
if (steer.done || steer.action === "done") {
|
|
1160
|
+
console.log(chalk.green(` \u2713 ${steer.reasoning}\n`));
|
|
1161
|
+
steerDone = true;
|
|
1162
|
+
remaining = 0; // exit outer loop too
|
|
1163
|
+
break;
|
|
1164
|
+
}
|
|
1165
|
+
if (steer.action === "reflect") {
|
|
1166
|
+
// Safety: no consecutive reflections, budget cap
|
|
1167
|
+
const canReflect = lastWaveKind !== "reflect" && reflectionBudgetUsed + 2 <= maxReflectionBudget;
|
|
1168
|
+
if (!canReflect) {
|
|
1169
|
+
console.log(chalk.dim(` ${steer.reasoning}`));
|
|
1170
|
+
console.log(chalk.yellow(` Reflection skipped (${lastWaveKind === "reflect" ? "consecutive" : "budget cap"}) — re-assessing\n`));
|
|
1171
|
+
lastWaveKind = "execute"; // allow next steer to see non-reflect
|
|
1172
|
+
continue; // re-steer in this inner loop
|
|
1173
|
+
}
|
|
1174
|
+
// Run reflection wave
|
|
1175
|
+
console.log(chalk.dim(` ${steer.reasoning}`));
|
|
1176
|
+
console.log(chalk.cyan(`\n ◆ Reflection: 2 agents reviewing...\n`));
|
|
1177
|
+
const reflectionDir = join(runDir, "reflections");
|
|
1178
|
+
waveNum++;
|
|
1179
|
+
const reflTasks = buildReflectionTasks(objective, memory.goal, reflectionDir, waveNum, plannerModel);
|
|
1180
|
+
const reflSwarm = new Swarm({
|
|
1181
|
+
tasks: reflTasks, concurrency: 2, cwd,
|
|
1182
|
+
model: plannerModel, permissionMode,
|
|
1183
|
+
useWorktrees: false, mergeStrategy: "yolo",
|
|
1184
|
+
agentTimeoutMs, usageCap,
|
|
1185
|
+
});
|
|
1186
|
+
currentSwarm = reflSwarm;
|
|
1187
|
+
const stopReflRender = startRenderLoop(reflSwarm);
|
|
1188
|
+
try {
|
|
1189
|
+
await reflSwarm.run();
|
|
1190
|
+
}
|
|
1191
|
+
finally {
|
|
1192
|
+
stopReflRender();
|
|
1193
|
+
}
|
|
1194
|
+
console.log(renderSummary(reflSwarm));
|
|
1195
|
+
accCost += reflSwarm.totalCostUsd;
|
|
1196
|
+
accIn += reflSwarm.totalInputTokens;
|
|
1197
|
+
accOut += reflSwarm.totalOutputTokens;
|
|
1198
|
+
accCompleted += reflSwarm.completed;
|
|
1199
|
+
accFailed += reflSwarm.failed;
|
|
1200
|
+
accTools += reflSwarm.agents.reduce((sum, a) => sum + a.toolCalls, 0);
|
|
1201
|
+
remaining -= reflSwarm.completed + reflSwarm.failed;
|
|
1202
|
+
reflectionBudgetUsed += reflSwarm.completed + reflSwarm.failed;
|
|
1203
|
+
waveHistory.push({
|
|
1204
|
+
wave: waveNum,
|
|
1205
|
+
kind: "reflect",
|
|
1206
|
+
tasks: reflSwarm.agents.map(a => ({ prompt: a.task.prompt, status: a.status, filesChanged: a.filesChanged, error: a.error })),
|
|
1207
|
+
});
|
|
1208
|
+
lastWaveKind = "reflect";
|
|
1209
|
+
continue; // re-steer with reflection artifacts
|
|
1210
|
+
}
|
|
1211
|
+
// action === "execute"
|
|
1212
|
+
if (steer.tasks.length === 0) {
|
|
1213
|
+
console.log(chalk.green(` \u2713 ${steer.reasoning}\n`));
|
|
1214
|
+
remaining = 0;
|
|
1215
|
+
break;
|
|
1216
|
+
}
|
|
1217
|
+
console.log(chalk.dim(` ${steer.reasoning}\n`));
|
|
1218
|
+
currentTasks = steer.tasks;
|
|
1219
|
+
lastWaveKind = "execute";
|
|
1220
|
+
steerDone = true; // exit inner loop, outer loop runs the tasks
|
|
1221
|
+
}
|
|
1222
|
+
catch (err) {
|
|
1223
|
+
process.stdout.write("\x1B[?25h");
|
|
1224
|
+
console.log(chalk.yellow(` Steering failed: ${err.message?.slice(0, 80)} \u2014 stopping\n`));
|
|
1225
|
+
remaining = 0;
|
|
814
1226
|
break;
|
|
815
1227
|
}
|
|
816
|
-
console.log(chalk.dim(` ${steer.reasoning}\n`));
|
|
817
|
-
currentTasks = steer.tasks;
|
|
818
|
-
waveNum++;
|
|
819
|
-
}
|
|
820
|
-
catch (err) {
|
|
821
|
-
process.stdout.write("\x1B[?25h");
|
|
822
|
-
console.log(chalk.yellow(` Steering failed: ${err.message?.slice(0, 80)} \u2014 stopping\n`));
|
|
823
|
-
break;
|
|
824
1228
|
}
|
|
1229
|
+
waveNum++;
|
|
1230
|
+
}
|
|
1231
|
+
// Mark run as done — keep sessions/milestones/status/goal, clean transient files
|
|
1232
|
+
saveRunState(runDir, {
|
|
1233
|
+
id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: budget ?? tasks.length,
|
|
1234
|
+
remaining, workerModel, plannerModel, concurrency, permissionMode,
|
|
1235
|
+
usageCap, flex, useWorktrees, mergeStrategy, waveNum, currentTasks: [],
|
|
1236
|
+
lastWaveKind, reflectionBudgetUsed, accCost, accCompleted, accFailed,
|
|
1237
|
+
branches, phase: "done", startedAt: new Date(runStartedAt).toISOString(), cwd,
|
|
1238
|
+
});
|
|
1239
|
+
try {
|
|
1240
|
+
rmSync(join(runDir, "designs"), { recursive: true, force: true });
|
|
825
1241
|
}
|
|
826
|
-
|
|
1242
|
+
catch { }
|
|
827
1243
|
try {
|
|
828
|
-
rmSync(join(
|
|
1244
|
+
rmSync(join(runDir, "reflections"), { recursive: true, force: true });
|
|
829
1245
|
}
|
|
830
1246
|
catch { }
|
|
831
1247
|
// Switch back if we created a run branch
|
|
@@ -837,48 +1253,40 @@ async function main() {
|
|
|
837
1253
|
}
|
|
838
1254
|
// ── Final summary ──
|
|
839
1255
|
const waves = waveNum + 1;
|
|
840
|
-
const
|
|
841
|
-
const
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
const costText = accCost > 0 ? chalk.dim(` · $${accCost.toFixed(3)}`) : "";
|
|
845
|
-
const wavePart = waves > 1 ? chalk.dim(`${waves} waves · `) : "";
|
|
1256
|
+
const elapsed = Math.round((Date.now() - runStartedAt) / 1000);
|
|
1257
|
+
const elapsedStr = elapsed < 60 ? `${elapsed}s` : elapsed < 3600 ? `${Math.floor(elapsed / 60)}m ${elapsed % 60}s` : `${Math.floor(elapsed / 3600)}h ${Math.floor((elapsed % 3600) / 60)}m`;
|
|
1258
|
+
const totalMerged = branches.filter(b => b.status === "merged").length;
|
|
1259
|
+
const totalConflicts = branches.filter(b => b.status === "merge-failed").length;
|
|
846
1260
|
console.log(chalk.dim(`\n ${"─".repeat(36)}`));
|
|
847
|
-
console.log(` ${chalk.green("✓")} ${chalk.bold("Complete")}
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
1261
|
+
console.log(` ${accFailed === 0 ? chalk.green("✓") : chalk.yellow("⚠")} ${chalk.bold("Complete")}\n`);
|
|
1262
|
+
const boxLines = [];
|
|
1263
|
+
const statusLine = accFailed > 0 ? `${accCompleted} done · ${accFailed} failed` : `${accCompleted} done`;
|
|
1264
|
+
boxLines.push(`${waves} wave${waves > 1 ? "s" : ""} · ${statusLine} · $${accCost.toFixed(2)}`);
|
|
1265
|
+
boxLines.push(`${elapsedStr} · ${fmtTokens(accIn)} in / ${fmtTokens(accOut)} out · ${accTools} tools`);
|
|
1266
|
+
if (totalMerged > 0 || totalConflicts > 0)
|
|
1267
|
+
boxLines.push(`${totalMerged} merged${totalConflicts > 0 ? ` · ${totalConflicts} conflicts` : ""}`);
|
|
1268
|
+
if (reflectionBudgetUsed > 0)
|
|
1269
|
+
boxLines.push(`${reflectionBudgetUsed} reflection agents`);
|
|
1270
|
+
if (lastCapped)
|
|
1271
|
+
boxLines.push(chalk.yellow(`Capped at ${usageCap != null ? Math.round(usageCap * 100) : 100}%`));
|
|
1272
|
+
const boxW = Math.max(...boxLines.map(l => l.replace(/\x1B\[[0-9;]*m/g, "").length)) + 4;
|
|
1273
|
+
console.log(chalk.dim(` ╭${"─".repeat(boxW)}╮`));
|
|
1274
|
+
for (const line of boxLines) {
|
|
1275
|
+
const plainLen = line.replace(/\x1B\[[0-9;]*m/g, "").length;
|
|
1276
|
+
console.log(chalk.dim(" │") + ` ${line}${" ".repeat(Math.max(0, boxW - 2 - plainLen))}` + chalk.dim("│"));
|
|
1277
|
+
}
|
|
1278
|
+
console.log(chalk.dim(` ╰${"─".repeat(boxW)}╯`));
|
|
1279
|
+
if (totalConflicts > 0) {
|
|
1280
|
+
const conflictBranches = branches.filter(b => b.status === "merge-failed");
|
|
1281
|
+
console.log(chalk.red(`\n Unresolved conflicts:`));
|
|
1282
|
+
for (const c of conflictBranches)
|
|
1283
|
+
console.log(chalk.red(` ${c.branch}`));
|
|
1284
|
+
console.log(chalk.dim(" git merge <branch> to resolve"));
|
|
857
1285
|
}
|
|
858
|
-
const elapsed = Math.round((Date.now() - runStartedAt) / 1000);
|
|
859
|
-
const elapsedStr = elapsed < 60 ? `${elapsed}s` : `${Math.floor(elapsed / 60)}m ${elapsed % 60}s`;
|
|
860
|
-
console.log(chalk.dim(` ${elapsedStr} ${fmtTokens(accIn)} in / ${fmtTokens(accOut)} out ${accTools} tool calls`));
|
|
861
1286
|
if (runBranch) {
|
|
862
|
-
console.log(chalk.dim(
|
|
863
|
-
}
|
|
864
|
-
else if (currentSwarm?.mergeResults && currentSwarm.mergeResults.length > 0) {
|
|
865
|
-
const merged = currentSwarm.mergeResults.filter((r) => r.ok);
|
|
866
|
-
const autoResolved = merged.filter((r) => r.autoResolved).length;
|
|
867
|
-
const conflicts = currentSwarm.mergeResults.filter((r) => !r.ok);
|
|
868
|
-
const target = currentSwarm.mergeBranch || "HEAD";
|
|
869
|
-
if (merged.length > 0) {
|
|
870
|
-
const extra = autoResolved > 0 ? chalk.yellow(` (${autoResolved} auto-resolved)`) : "";
|
|
871
|
-
console.log(chalk.green(` Merged ${merged.length} branch(es) into ${target}`) + extra);
|
|
872
|
-
}
|
|
873
|
-
if (currentSwarm.mergeBranch)
|
|
874
|
-
console.log(chalk.dim(` Branch: ${currentSwarm.mergeBranch} \u2014 create a PR or: git merge ${currentSwarm.mergeBranch}`));
|
|
875
|
-
if (conflicts.length > 0) {
|
|
876
|
-
console.log(chalk.red(` ${conflicts.length} unresolved conflict(s):`));
|
|
877
|
-
for (const c of conflicts)
|
|
878
|
-
console.log(chalk.red(` ${c.branch}`));
|
|
879
|
-
console.log(chalk.dim(" Merge manually: git merge <branch>"));
|
|
880
|
-
}
|
|
1287
|
+
console.log(chalk.dim(`\n Branch: ${runBranch} — git merge ${runBranch}`));
|
|
881
1288
|
}
|
|
1289
|
+
console.log(chalk.dim(` Run: ${runDir}`));
|
|
882
1290
|
if (currentSwarm?.logFile)
|
|
883
1291
|
console.log(chalk.dim(` Log: ${currentSwarm.logFile}`));
|
|
884
1292
|
console.log("");
|
package/dist/planner.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { Task, PermMode } from "./types.js";
|
|
2
2
|
export interface WaveSummary {
|
|
3
3
|
wave: number;
|
|
4
|
+
kind: "execute" | "reflect" | "think";
|
|
4
5
|
tasks: {
|
|
5
6
|
prompt: string;
|
|
6
7
|
status: string;
|
|
@@ -10,14 +11,26 @@ export interface WaveSummary {
|
|
|
10
11
|
}
|
|
11
12
|
export interface SteerResult {
|
|
12
13
|
done: boolean;
|
|
14
|
+
action: "execute" | "reflect" | "done";
|
|
13
15
|
tasks: Task[];
|
|
14
16
|
reasoning: string;
|
|
17
|
+
goalUpdate?: string;
|
|
18
|
+
statusUpdate?: string;
|
|
19
|
+
}
|
|
20
|
+
export interface RunMemory {
|
|
21
|
+
designs: string;
|
|
22
|
+
reflections: string;
|
|
23
|
+
milestones: string;
|
|
24
|
+
status: string;
|
|
25
|
+
goal: string;
|
|
26
|
+
previousRuns?: string;
|
|
15
27
|
}
|
|
16
28
|
export type ModelTier = "opus" | "sonnet" | "haiku" | "unknown";
|
|
17
29
|
export declare function detectModelTier(model: string): ModelTier;
|
|
18
30
|
export declare function planTasks(objective: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void, flexNote?: string): Promise<Task[]>;
|
|
19
31
|
export declare function identifyThemes(objective: string, count: number, model: string, permissionMode: PermMode): Promise<string[]>;
|
|
20
|
-
export declare function buildThinkingTasks(objective: string, themes: string[], designDir: string, plannerModel: string): Task[];
|
|
32
|
+
export declare function buildThinkingTasks(objective: string, themes: string[], designDir: string, plannerModel: string, previousKnowledge?: string): Task[];
|
|
33
|
+
export declare function buildReflectionTasks(objective: string, goal: string, reflectionDir: string, waveNum: number, plannerModel: string): Task[];
|
|
21
34
|
export declare function orchestrate(objective: string, designDocs: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number, concurrency: number, onLog: (text: string) => void, flexNote?: string): Promise<Task[]>;
|
|
22
35
|
export declare function refinePlan(objective: string, previousTasks: Task[], feedback: string, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, budget: number | undefined, concurrency: number, onLog: (text: string) => void): Promise<Task[]>;
|
|
23
|
-
export declare function steerWave(objective: string, history: WaveSummary[], remainingBudget: number, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, concurrency: number, onLog: (text: string) => void,
|
|
36
|
+
export declare function steerWave(objective: string, history: WaveSummary[], remainingBudget: number, cwd: string, plannerModel: string, workerModel: string, permissionMode: PermMode, concurrency: number, onLog: (text: string) => void, runMemory?: RunMemory): Promise<SteerResult>;
|
package/dist/planner.js
CHANGED
|
@@ -361,13 +361,14 @@ Return ONLY a JSON object: {"themes": ["angle description", ...]}`,
|
|
|
361
361
|
const fallback = ["architecture, patterns, and conventions", "data models, state, and persistence", "user-facing flows, components, and UX", "APIs, integrations, and services", "testing, quality, and error handling", "security, performance, and infrastructure", "build, deployment, and configuration", "documentation and developer experience"];
|
|
362
362
|
return Array.from({ length: count }, (_, i) => fallback[i % fallback.length]);
|
|
363
363
|
}
|
|
364
|
-
export function buildThinkingTasks(objective, themes, designDir, plannerModel) {
|
|
364
|
+
export function buildThinkingTasks(objective, themes, designDir, plannerModel, previousKnowledge) {
|
|
365
|
+
const prevBlock = previousKnowledge ? `\nKNOWLEDGE FROM PREVIOUS RUNS:\n${previousKnowledge}\n\nBuild on this — don't re-discover what's already known.\n` : "";
|
|
365
366
|
return themes.map((theme, i) => ({
|
|
366
367
|
id: `think-${i}`,
|
|
367
368
|
prompt: `You are a senior architect exploring a codebase to design a solution.
|
|
368
369
|
|
|
369
370
|
OVERALL OBJECTIVE: ${objective}
|
|
370
|
-
|
|
371
|
+
${prevBlock}
|
|
371
372
|
YOUR FOCUS: ${theme}
|
|
372
373
|
|
|
373
374
|
Explore the codebase thoroughly using Read, Glob, and Grep. Then write a design document to ${designDir}/focus-${i}.md with these sections:
|
|
@@ -389,6 +390,44 @@ Be thorough — your findings drive the execution plan.`,
|
|
|
389
390
|
model: plannerModel,
|
|
390
391
|
}));
|
|
391
392
|
}
|
|
393
|
+
export function buildReflectionTasks(objective, goal, reflectionDir, waveNum, plannerModel) {
|
|
394
|
+
const goalBlock = goal ? `\nEVOLVED GOAL:\n${goal}\n` : "";
|
|
395
|
+
return [
|
|
396
|
+
{
|
|
397
|
+
id: "review-0",
|
|
398
|
+
prompt: `You are a senior code reviewer performing a deep quality audit.
|
|
399
|
+
|
|
400
|
+
OBJECTIVE: ${objective}
|
|
401
|
+
${goalBlock}
|
|
402
|
+
Read the codebase thoroughly. Assess:
|
|
403
|
+
- **Correctness**: Bugs, missing error handling, broken flows?
|
|
404
|
+
- **Architecture**: Clean design? Unnecessary or missing abstractions?
|
|
405
|
+
- **Code quality**: Readability, naming, duplication, dead code?
|
|
406
|
+
- **Completeness**: What's missing vs. the objective? Half-done work?
|
|
407
|
+
- **Polish**: Edge cases, error messages, loading states?
|
|
408
|
+
|
|
409
|
+
Write findings to ${reflectionDir}/wave-${waveNum}-quality.md.
|
|
410
|
+
End with a ## Verdict: is this closer to "good enough" or "amazing"? What would make the biggest difference?`,
|
|
411
|
+
model: plannerModel,
|
|
412
|
+
},
|
|
413
|
+
{
|
|
414
|
+
id: "review-1",
|
|
415
|
+
prompt: `You are a UX and integration reviewer.
|
|
416
|
+
|
|
417
|
+
OBJECTIVE: ${objective}
|
|
418
|
+
${goalBlock}
|
|
419
|
+
Read the codebase. Assess:
|
|
420
|
+
- **UX coherence**: Do user-facing flows make sense end-to-end? Consistent experience?
|
|
421
|
+
- **Integration**: Do pieces fit together? Seams, inconsistencies, broken contracts?
|
|
422
|
+
- **Testing**: Meaningful coverage? Testing the right things?
|
|
423
|
+
- **Gaps**: Unhandled use cases? What would surprise a user?
|
|
424
|
+
|
|
425
|
+
Write findings to ${reflectionDir}/wave-${waveNum}-ux.md.
|
|
426
|
+
End with ## Priorities: rank the top 3 things that would most improve the result.`,
|
|
427
|
+
model: plannerModel,
|
|
428
|
+
},
|
|
429
|
+
];
|
|
430
|
+
}
|
|
392
431
|
export async function orchestrate(objective, designDocs, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog, flexNote) {
|
|
393
432
|
const capability = modelCapabilityBlock(workerModel);
|
|
394
433
|
const flexLine = flexNote ? `\n\n${flexNote}` : "";
|
|
@@ -549,34 +588,67 @@ async function extractTaskJson(raw, retry) {
|
|
|
549
588
|
throw new Error("Planner did not return valid task JSON after retry");
|
|
550
589
|
}
|
|
551
590
|
// ── Wave steering ──
|
|
552
|
-
export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, permissionMode, concurrency, onLog,
|
|
591
|
+
export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, permissionMode, concurrency, onLog, runMemory) {
|
|
553
592
|
const capability = modelCapabilityBlock(workerModel);
|
|
554
|
-
|
|
593
|
+
// Three-layer context: status (current), milestones (strategic), recent waves (tactical)
|
|
594
|
+
const recentWaves = history.slice(-3);
|
|
595
|
+
const recentText = recentWaves.length > 0 ? recentWaves.map(w => {
|
|
596
|
+
const tag = w.kind === "reflect" ? " (reflection)" : w.kind === "think" ? " (thinking)" : "";
|
|
555
597
|
const lines = w.tasks.map(t => {
|
|
556
598
|
const files = t.filesChanged ? ` (${t.filesChanged} files)` : "";
|
|
557
599
|
const err = t.error ? ` — ${t.error}` : "";
|
|
558
600
|
return ` - [${t.status}] ${t.prompt.slice(0, 120)}${files}${err}`;
|
|
559
601
|
}).join("\n");
|
|
560
|
-
return `Wave ${w.wave + 1}:\n${lines}`;
|
|
561
|
-
}).join("\n\n");
|
|
562
|
-
const
|
|
602
|
+
return `Wave ${w.wave + 1}${tag}:\n${lines}`;
|
|
603
|
+
}).join("\n\n") : "(first wave)";
|
|
604
|
+
const lastWasReflection = history.length > 0 && history[history.length - 1].kind === "reflect";
|
|
605
|
+
const noReflectHint = lastWasReflection ? `\nIMPORTANT: The previous wave was a reflection. You MUST choose "execute" or "done" — not "reflect" again.\n` : "";
|
|
606
|
+
const cap = (s, max) => s.length > max ? s.slice(0, max) + "\n...(truncated)" : s;
|
|
607
|
+
const statusBlock = runMemory?.status ? `\nCurrent project status:\n${runMemory.status}\n` : "";
|
|
608
|
+
const milestoneBlock = runMemory?.milestones ? `\nMilestone snapshots:\n${cap(runMemory.milestones, 4000)}\n` : "";
|
|
609
|
+
const designBlock = runMemory?.designs ? `\nArchitectural research:\n${cap(runMemory.designs, 4000)}\n` : "";
|
|
610
|
+
const reflectionBlock = runMemory?.reflections ? `\nLatest quality reports:\n${cap(runMemory.reflections, 3000)}\n` : "";
|
|
611
|
+
const goalBlock = runMemory?.goal ? `\nNorth star — what "amazing" means:\n${runMemory.goal}\n` : "";
|
|
612
|
+
const prevRunBlock = runMemory?.previousRuns ? `\nKnowledge from previous runs:\n${cap(runMemory.previousRuns, 3000)}\n` : "";
|
|
613
|
+
const prompt = `You are the quality director for an autonomous multi-wave agent system. Your job is to push the work toward "amazing," not just "done."
|
|
563
614
|
|
|
564
615
|
Objective: ${objective}
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
${
|
|
568
|
-
${
|
|
616
|
+
${goalBlock}${statusBlock}${milestoneBlock}${prevRunBlock}
|
|
617
|
+
Recent waves:
|
|
618
|
+
${recentText}
|
|
619
|
+
${designBlock}${reflectionBlock}
|
|
569
620
|
Remaining budget: ${remainingBudget} agent sessions. ${concurrency} agents run in parallel — tasks must touch DIFFERENT files.
|
|
570
621
|
${capability}
|
|
622
|
+
Total waves completed: ${history.length}
|
|
623
|
+
|
|
624
|
+
Read the codebase. Assess: how close is this to the VISION? Not "what's missing" — "how good is what we built?"
|
|
571
625
|
|
|
572
|
-
|
|
573
|
-
- Is the objective fully met? → {"done": true, "reasoning": "..."}
|
|
574
|
-
- More work needed? Plan the next wave → {"done": false, "reasoning": "what needs doing and why", "tasks": [{"prompt": "..."}]}
|
|
626
|
+
Then choose ONE action:
|
|
575
627
|
|
|
576
|
-
|
|
628
|
+
**"reflect"** — Spin up 1-2 review agents for a deep quality audit. Choose when:
|
|
629
|
+
- Substantial new code shipped and hasn't been reviewed
|
|
630
|
+
- You're unsure about quality and need expert eyes
|
|
631
|
+
- A subsystem just "completed" and deserves verification
|
|
577
632
|
|
|
578
|
-
|
|
579
|
-
|
|
633
|
+
**"execute"** — Plan the next batch of tasks. Choose when:
|
|
634
|
+
- You know what needs doing (from reviews or your own assessment)
|
|
635
|
+
- There are clear gaps, bugs, or improvements to make
|
|
636
|
+
|
|
637
|
+
**"done"** — The objective is met at high quality. Choose when:
|
|
638
|
+
- The code works correctly and handles edge cases
|
|
639
|
+
- The architecture is clean and pieces fit together
|
|
640
|
+
- Further work would be diminishing returns
|
|
641
|
+
${noReflectHint}
|
|
642
|
+
Respond with ONLY a JSON object (no markdown fences):
|
|
643
|
+
{
|
|
644
|
+
"action": "execute" | "reflect" | "done",
|
|
645
|
+
"done": true/false,
|
|
646
|
+
"reasoning": "your assessment and why you chose this action",
|
|
647
|
+
"goalUpdate": "optional — refine what 'amazing' means as you learn more",
|
|
648
|
+
"statusUpdate": "REQUIRED — write a concise project status: what's built, what works, what's rough, quality level, key gaps. This replaces the previous status and is your memory for future waves.",
|
|
649
|
+
"tasks": [{"prompt": "..."}]
|
|
650
|
+
}`;
|
|
651
|
+
onLog("Assessing...");
|
|
580
652
|
const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
|
|
581
653
|
const parsed = await (async () => {
|
|
582
654
|
const first = attemptJsonParse(resultText);
|
|
@@ -585,21 +657,26 @@ Respond with ONLY a JSON object (no markdown fences).`;
|
|
|
585
657
|
onLog("Retrying...");
|
|
586
658
|
let retryText = "";
|
|
587
659
|
for await (const msg of query({
|
|
588
|
-
prompt: `Output ONLY a JSON object: {"done":true/false,"reasoning":"...","tasks":[{"prompt":"..."}]}`,
|
|
660
|
+
prompt: `Output ONLY a JSON object: {"action":"execute"|"reflect"|"done","done":true/false,"reasoning":"...","tasks":[{"prompt":"..."}]}`,
|
|
589
661
|
options: { cwd, model: plannerModel, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
|
|
590
662
|
})) {
|
|
591
663
|
if (msg.type === "result" && msg.subtype === "success")
|
|
592
664
|
retryText = msg.result || "";
|
|
593
665
|
}
|
|
594
|
-
return attemptJsonParse(retryText) ?? { done: true, reasoning: "Could not parse steering response" };
|
|
666
|
+
return attemptJsonParse(retryText) ?? { action: "done", done: true, reasoning: "Could not parse steering response" };
|
|
595
667
|
})();
|
|
596
|
-
|
|
597
|
-
|
|
668
|
+
const action = parsed.action || (parsed.done ? "done" : "execute");
|
|
669
|
+
const statusUpdate = parsed.statusUpdate || undefined;
|
|
670
|
+
if (action === "done") {
|
|
671
|
+
return { done: true, action: "done", tasks: [], reasoning: parsed.reasoning || "Objective complete", goalUpdate: parsed.goalUpdate, statusUpdate };
|
|
672
|
+
}
|
|
673
|
+
if (action === "reflect") {
|
|
674
|
+
return { done: false, action: "reflect", tasks: [], reasoning: parsed.reasoning || "Quality audit needed", goalUpdate: parsed.goalUpdate, statusUpdate };
|
|
598
675
|
}
|
|
599
676
|
let tasks = (parsed.tasks || []).map((t, i) => ({
|
|
600
677
|
id: String(i),
|
|
601
678
|
prompt: typeof t === "string" ? t : t.prompt,
|
|
602
679
|
}));
|
|
603
680
|
tasks = postProcess(tasks, remainingBudget, onLog);
|
|
604
|
-
return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "" };
|
|
681
|
+
return { done: tasks.length === 0, action: tasks.length === 0 ? "done" : "execute", tasks, reasoning: parsed.reasoning || "", goalUpdate: parsed.goalUpdate, statusUpdate };
|
|
605
682
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -95,3 +95,37 @@ export type SwarmPhase = "planning" | "running" | "merging" | "done";
|
|
|
95
95
|
* - "branch": Create a new branch, merge everything there (main untouched).
|
|
96
96
|
*/
|
|
97
97
|
export type MergeStrategy = "yolo" | "branch";
|
|
98
|
+
/** Tracks a git branch created by an agent. */
|
|
99
|
+
export interface BranchRecord {
|
|
100
|
+
branch: string;
|
|
101
|
+
taskPrompt: string;
|
|
102
|
+
status: "merged" | "unmerged" | "failed" | "merge-failed";
|
|
103
|
+
filesChanged: number;
|
|
104
|
+
costUsd: number;
|
|
105
|
+
}
|
|
106
|
+
/** Persisted run state for crash recovery and resume. */
|
|
107
|
+
export interface RunState {
|
|
108
|
+
id: string;
|
|
109
|
+
objective: string;
|
|
110
|
+
budget: number;
|
|
111
|
+
remaining: number;
|
|
112
|
+
workerModel: string;
|
|
113
|
+
plannerModel: string;
|
|
114
|
+
concurrency: number;
|
|
115
|
+
permissionMode: PermMode;
|
|
116
|
+
usageCap?: number;
|
|
117
|
+
flex: boolean;
|
|
118
|
+
useWorktrees: boolean;
|
|
119
|
+
mergeStrategy: MergeStrategy;
|
|
120
|
+
waveNum: number;
|
|
121
|
+
currentTasks: Task[];
|
|
122
|
+
lastWaveKind: "execute" | "reflect" | "think";
|
|
123
|
+
reflectionBudgetUsed: number;
|
|
124
|
+
accCost: number;
|
|
125
|
+
accCompleted: number;
|
|
126
|
+
accFailed: number;
|
|
127
|
+
branches: BranchRecord[];
|
|
128
|
+
phase: "executing" | "steering" | "reflecting" | "done";
|
|
129
|
+
startedAt: string;
|
|
130
|
+
cwd: string;
|
|
131
|
+
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-overnight",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Run 10, 100, or 1000 Claude agents overnight. Parallel autonomous AI coding with thinking waves, iterative quality steering, crash recovery, and rate limit handling. Built on the Claude Agent SDK.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"claude-overnight": "dist/index.js"
|
|
@@ -28,14 +28,20 @@
|
|
|
28
28
|
},
|
|
29
29
|
"keywords": [
|
|
30
30
|
"claude",
|
|
31
|
-
"
|
|
32
|
-
"agents",
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
31
|
+
"claude-agent-sdk",
|
|
32
|
+
"ai-agents",
|
|
33
|
+
"ai-coding",
|
|
34
|
+
"parallel-agents",
|
|
35
|
+
"autonomous-coding",
|
|
36
|
+
"swarm",
|
|
36
37
|
"overnight",
|
|
37
38
|
"cli",
|
|
38
|
-
"orchestration"
|
|
39
|
+
"orchestration",
|
|
40
|
+
"multi-agent",
|
|
41
|
+
"code-generation",
|
|
42
|
+
"anthropic",
|
|
43
|
+
"worktrees",
|
|
44
|
+
"iterative"
|
|
39
45
|
],
|
|
40
46
|
"engines": {
|
|
41
47
|
"node": ">=20"
|