claude-overnight 1.16.16 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QUICKSHEET_PLAYWRIGHT.md +1 -1
- package/README.md +61 -55
- package/dist/bin.js +1 -1
- package/dist/cli.d.ts +1 -1
- package/dist/cli.js +4 -4
- package/dist/index.js +87 -46
- package/dist/merge.d.ts +1 -1
- package/dist/merge.js +7 -7
- package/dist/planner-query.d.ts +1 -1
- package/dist/planner-query.js +8 -8
- package/dist/planner.d.ts +1 -1
- package/dist/planner.js +21 -21
- package/dist/providers.d.ts +3 -1
- package/dist/providers.js +5 -3
- package/dist/render.d.ts +1 -1
- package/dist/render.js +3 -3
- package/dist/run.d.ts +21 -14
- package/dist/run.js +177 -74
- package/dist/state.d.ts +2 -2
- package/dist/state.js +9 -9
- package/dist/steering.d.ts +1 -1
- package/dist/steering.js +21 -21
- package/dist/swarm.d.ts +3 -3
- package/dist/swarm.js +38 -24
- package/dist/types.d.ts +47 -14
- package/dist/types.js +2 -2
- package/dist/ui.d.ts +4 -4
- package/dist/ui.js +4 -4
- package/package.json +1 -1
- package/plugins/claude-overnight/.claude-plugin/plugin.json +2 -2
- package/plugins/claude-overnight/skills/claude-overnight/SKILL.md +24 -19
package/QUICKSHEET_PLAYWRIGHT.md
CHANGED
|
@@ -62,4 +62,4 @@ npx ctx7@latest library playwright "parallel browser instances isolation"
|
|
|
62
62
|
npx ctx7@latest docs <libraryId> "parallel browser instances"
|
|
63
63
|
```
|
|
64
64
|
|
|
65
|
-
**Note:** ctx7 requires authentication (`npx ctx7@latest login` or `CONTEXT7_API_KEY` env var). If unauthenticated, lookups will fail
|
|
65
|
+
**Note:** ctx7 requires authentication (`npx ctx7@latest login` or `CONTEXT7_API_KEY` env var). If unauthenticated, lookups will fail -- agents should fall back to training data.
|
package/README.md
CHANGED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
# claude-overnight
|
|
2
2
|
|
|
3
|
-
**A background lane for your Claude Max plan.** Runs a capped swarm of Claude Agent SDK sessions in isolated git worktrees
|
|
3
|
+
**A background lane for your Claude Max plan.** Runs a capped swarm of Claude Agent SDK sessions in isolated git worktrees -- stops at a usage cap you set, so your interactive Claude Code always has headroom. Rate-limited? It waits. Crash? It resumes with full context.
|
|
4
4
|
|
|
5
5
|
Your Max plan rate limits eat interactive coding time. One deep refactor and the 5-hour window is gone before lunch. `claude-overnight` runs background agent sessions up to the percentage cap you pick (90% is typical), leaving the rest free for your own Claude Code session. Hand it an objective and a session budget, walk away, review the diff when the run ends.
|
|
6
6
|
|
|
7
|
-
Isolated by default. Every agent runs in its own git worktree on its own branch, so a misbehaving agent can't trash your working tree. You choose what agents can do before the run starts
|
|
7
|
+
Isolated by default. Every agent runs in its own git worktree on its own branch, so a misbehaving agent can't trash your working tree. You choose what agents can do before the run starts -- no surprise escalation mid-flight. Unmerged branches are preserved for manual review, never discarded. Built on the [Claude Agent SDK](https://www.npmjs.com/package/@anthropic-ai/claude-agent-sdk) -- not a Claude Code replacement, but a background lane that runs alongside it.
|
|
8
8
|
|
|
9
|
-
Different shape from hosted agent harnesses like [Claude Managed Agents](https://platform.claude.com/docs/en/managed-agents/overview): instead of one agent in one cloud container billed separately, you get many parallel sessions on your own machine, in your real repo, against your own Max plan (or API key). Works with Claude Opus, Sonnet, and Haiku
|
|
9
|
+
Different shape from hosted agent harnesses like [Claude Managed Agents](https://platform.claude.com/docs/en/managed-agents/overview): instead of one agent in one cloud container billed separately, you get many parallel sessions on your own machine, in your real repo, against your own Max plan (or API key). Works with Claude Opus, Sonnet, and Haiku -- or pair an Anthropic planner with a cheaper executor on Qwen, OpenRouter, or any Anthropic-compatible endpoint.
|
|
10
10
|
|
|
11
11
|
## Run on Qwen 3.6 Plus
|
|
12
12
|
|
|
13
|
-
Hit your Claude Max plan limits? Running on a tight budget? Qwen 3.6 Plus via Alibaba Cloud's DashScope gateway is a drop-in executor that speaks the Anthropic Messages API
|
|
13
|
+
Hit your Claude Max plan limits? Running on a tight budget? Qwen 3.6 Plus via Alibaba Cloud's DashScope gateway is a drop-in executor that speaks the Anthropic Messages API -- same client, same flow, pennies per run.
|
|
14
14
|
|
|
15
|
-
1. **Get an API key.** Sign up at [Alibaba Cloud](https://account.alibabacloud.com/login/login.htm?oauth_callback=https%3A%2F%2Fmodelstudio.console.alibabacloud.com%2Fap-southeast-1%3Ftab%3Ddashboard%23%2Fapi-key&clearRedirectCookie=1)
|
|
15
|
+
1. **Get an API key.** Sign up at [Alibaba Cloud](https://account.alibabacloud.com/login/login.htm?oauth_callback=https%3A%2F%2Fmodelstudio.console.alibabacloud.com%2Fap-southeast-1%3Ftab%3Ddashboard%23%2Fapi-key&clearRedirectCookie=1) -- the link takes you straight to the API key dashboard.
|
|
16
16
|
2. **Configure the provider.** Run `claude-overnight`, choose `Other…` on the executor step, and fill in:
|
|
17
17
|
|
|
18
18
|
| Field | Value |
|
|
@@ -39,7 +39,7 @@ claude-overnight
|
|
|
39
39
|
npm install -g claude-overnight
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
-
Requires Node.js ≥ 20 and Claude authentication (`claude auth login` or `ANTHROPIC_API_KEY`). No Anthropic plan or key? See **Run on Qwen 3.6 Plus** above
|
|
42
|
+
Requires Node.js ≥ 20 and Claude authentication (`claude auth login` or `ANTHROPIC_API_KEY`). No Anthropic plan or key? See **Run on Qwen 3.6 Plus** above -- a cheap, drop-in alternative.
|
|
43
43
|
|
|
44
44
|
## Quick start
|
|
45
45
|
|
|
@@ -56,13 +56,13 @@ claude-overnight
|
|
|
56
56
|
|
|
57
57
|
② Budget [10]: 200
|
|
58
58
|
|
|
59
|
-
④ Planner model (thinking, steering
|
|
60
|
-
● Opus
|
|
61
|
-
○ Sonnet
|
|
59
|
+
④ Planner model (thinking, steering -- use your strongest):
|
|
60
|
+
● Opus -- Opus 4.6 · Most capable
|
|
61
|
+
○ Sonnet -- Sonnet 4.6 · Best for everyday tasks
|
|
62
62
|
|
|
63
|
-
⑤ Executor model (what runs the tasks
|
|
64
|
-
● Sonnet
|
|
65
|
-
○ Opus
|
|
63
|
+
⑤ Executor model (what runs the tasks -- Qwen 3.6 Plus / OpenRouter / etc via Other…):
|
|
64
|
+
● Sonnet -- Sonnet 4.6 · Best for everyday tasks
|
|
65
|
+
○ Opus -- Opus 4.6 · Most capable
|
|
66
66
|
○ Other… · custom OpenAI/Anthropic-compatible endpoint
|
|
67
67
|
|
|
68
68
|
⑥ Usage cap:
|
|
@@ -89,58 +89,64 @@ claude-overnight
|
|
|
89
89
|
◆ Assessing... ✓ Done
|
|
90
90
|
```
|
|
91
91
|
|
|
92
|
-
You interact once (objective, budget, model, review themes), then the rest runs unattended
|
|
92
|
+
You interact once (objective, budget, model, review themes), then the rest runs unattended -- thinking, planning, executing, reflecting, steering. Rate-limited? It waits and retries. Crash? Resume where you left off. Capped at usage limit? Pick up next time with full context preserved.
|
|
93
93
|
|
|
94
94
|
## How it differs
|
|
95
95
|
|
|
96
96
|
- vs **Claude Code**: many agents, no driver, capped so your Claude Code session keeps its headroom
|
|
97
|
-
- vs **[Managed Agents](https://platform.claude.com/docs/en/managed-agents/overview)**: on your machine, against your Max plan, in your real git history
|
|
97
|
+
- vs **[Managed Agents](https://platform.claude.com/docs/en/managed-agents/overview)**: on your machine, against your Max plan, in your real git history -- not a cloud container billed separately
|
|
98
98
|
- vs **Cursor / Copilot / Cline**: asynchronous, off the keyboard
|
|
99
99
|
|
|
100
100
|
## Use cases
|
|
101
101
|
|
|
102
|
-
- **Overnight refactors**
|
|
103
|
-
- **Batch feature implementation**
|
|
104
|
-
- **Codebase-wide cleanups**
|
|
105
|
-
- **Test generation at scale**
|
|
106
|
-
- **Documentation sprints**
|
|
107
|
-
- **Framework migrations**
|
|
108
|
-
- **Quality audits**
|
|
109
|
-
- **Long research runs**
|
|
102
|
+
- **Overnight refactors** -- "Modernize the auth system" at budget 200.
|
|
103
|
+
- **Batch feature implementation** -- dozens of features from a task file, parallelized.
|
|
104
|
+
- **Codebase-wide cleanups** -- deduplicate, simplify, rename, normalize.
|
|
105
|
+
- **Test generation at scale** -- integration tests for every route or module.
|
|
106
|
+
- **Documentation sprints** -- API docs, READMEs, inline comments, changelogs.
|
|
107
|
+
- **Framework migrations** -- version upgrades, type annotations, config format swaps.
|
|
108
|
+
- **Quality audits** -- reflection waves surface architectural issues and code smells.
|
|
109
|
+
- **Long research runs** -- architect sessions explore a large codebase before any code lands.
|
|
110
110
|
|
|
111
111
|
Typical shape: one objective + a $20–$200 spend cap + walk away.
|
|
112
112
|
|
|
113
113
|
## How it works
|
|
114
114
|
|
|
115
|
-
### 1. Thinking phase
|
|
115
|
+
### 1. Thinking phase -- parallel architect sessions
|
|
116
116
|
|
|
117
117
|
For budgets > 15, the tool launches **architect agents** that explore your codebase before any code is written. Each one gets a different research angle (architecture, data models, APIs, testing, etc.) and writes a structured design document. The number scales with budget: 5 for budget=50, 10 for budget=2000.
|
|
118
118
|
|
|
119
119
|
### 2. Task orchestration
|
|
120
120
|
|
|
121
|
-
An orchestrator session reads all design documents and synthesizes concrete execution tasks
|
|
121
|
+
An orchestrator session reads all design documents and synthesizes concrete execution tasks -- grounded in real files and patterns the architects found. The task plan is also written to a file for resilience -- if orchestration is interrupted, partial results survive.
|
|
122
122
|
|
|
123
123
|
### 3. Parallel execution waves
|
|
124
124
|
|
|
125
|
-
Tasks run in parallel agent sessions (each in its own git worktree). After completing its task, each session automatically runs a **simplify pass**
|
|
125
|
+
Tasks run in parallel agent sessions (each in its own git worktree). After completing its task, each session automatically runs a **simplify pass** -- reviewing its own `git diff` for code reuse opportunities, quality issues, and inefficiencies, then fixing them before the framework commits. This is done via the SDK's **session resume** mechanism: the same agent session continues with a follow-up prompt, so the agent's full context from its task is still available -- no need to re-instruct or re-fill context.
|
|
126
126
|
|
|
127
|
-
|
|
127
|
+
### 4. Post-wave review
|
|
128
|
+
|
|
129
|
+
After each wave (flex mode, budget remaining), a dedicated **review agent** inspects the consolidated diff for issues the individual agents may have blind-spotted: missed reuse opportunities, copy-paste variations, leaky abstractions, efficiency regressions. Runs as a single-agent wave -- one session reviews what the swarm just produced.
|
|
130
|
+
|
|
131
|
+
### 5. Post-run final gate
|
|
132
|
+
|
|
133
|
+
When the run completes (steering declares done), a final **comprehensive review** runs against the full `git diff main`. Checks architecture coherence, consistency with existing patterns, build integrity, and test pass. The last quality gate before the diff lands.
|
|
134
|
+
|
|
135
|
+
### 6. Steering
|
|
136
|
+
|
|
137
|
+
After each wave, steering assesses: "how good is this?" -- not "what's missing?" It can:
|
|
128
138
|
|
|
129
139
|
- **Execute** more tasks to build features, fix bugs, polish UX
|
|
130
140
|
- **Reflect** by spinning up 1-2 review sessions for deep quality/architecture audits
|
|
131
141
|
- **Declare done** when the vision is met at high quality
|
|
132
142
|
|
|
133
|
-
###
|
|
134
|
-
|
|
135
|
-
The tool starts with your broad objective but refines its definition of quality as it learns your codebase. Steering updates the goal after each wave. Late waves are informed by early discoveries.
|
|
136
|
-
|
|
137
|
-
### 5. Three-layer context memory
|
|
143
|
+
### Three-layer context memory
|
|
138
144
|
|
|
139
145
|
Long runs stay sharp because steering maintains three layers of memory:
|
|
140
146
|
|
|
141
|
-
- **Status**
|
|
142
|
-
- **Milestones**
|
|
143
|
-
- **Goal**
|
|
147
|
+
- **Status** -- a living project snapshot, updated every wave. Compressed, never truncated.
|
|
148
|
+
- **Milestones** -- strategic snapshots archived every ~5 waves. Long-term memory.
|
|
149
|
+
- **Goal** -- the evolving north star. What quality means for this codebase.
|
|
144
150
|
|
|
145
151
|
## Run history, resume, and knowledge carryforward
|
|
146
152
|
|
|
@@ -155,7 +161,7 @@ Every run gets its own folder in `.claude-overnight/runs/`. Nothing is ever over
|
|
|
155
161
|
run.json, sessions/
|
|
156
162
|
```
|
|
157
163
|
|
|
158
|
-
Any run that stops before the steering system declares the objective complete
|
|
164
|
+
Any run that stops before the steering system declares the objective complete -- capped at usage limit, Ctrl+C, crash, rate limit timeout, steering failure -- is automatically resumable:
|
|
159
165
|
|
|
160
166
|
```
|
|
161
167
|
⚠ Unfinished run
|
|
@@ -170,7 +176,7 @@ Any run that stops before the steering system declares the objective complete
|
|
|
170
176
|
|
|
171
177
|
On resume: unmerged branches auto-merge, the wave loop continues, all context is preserved. Designs and reflections stay on disk until the objective is truly complete.
|
|
172
178
|
|
|
173
|
-
If the thinking phase succeeds but orchestration crashes, the next run detects the orphaned design docs and reuses them
|
|
179
|
+
If the thinking phase succeeds but orchestration crashes, the next run detects the orphaned design docs and reuses them -- no re-running $9 worth of architect sessions:
|
|
174
180
|
|
|
175
181
|
```
|
|
176
182
|
✓ Reusing 5 design docs (from prior attempt)
|
|
@@ -180,11 +186,11 @@ If the thinking phase succeeds but orchestration crashes, the next run detects t
|
|
|
180
186
|
...
|
|
181
187
|
```
|
|
182
188
|
|
|
183
|
-
**Knowledge carries forward**
|
|
189
|
+
**Knowledge carries forward** -- new runs inherit knowledge from completed previous runs. Thinking sessions and steering see what past runs built. Run 2 knows run 1 already built the auth system.
|
|
184
190
|
|
|
185
|
-
Add `.claude-overnight/` to your `.gitignore` (with the trailing slash
|
|
191
|
+
Add `.claude-overnight/` to your `.gitignore` (with the trailing slash -- see below).
|
|
186
192
|
|
|
187
|
-
A separate, tiny `claude-overnight.log.md` is also written at the repo root on every run. It's human-readable, append-only, one block per run (objective, start/finish, cost, outcome, branch), and is designed to be **committed**
|
|
193
|
+
A separate, tiny `claude-overnight.log.md` is also written at the repo root on every run. It's human-readable, append-only, one block per run (objective, start/finish, cost, outcome, branch), and is designed to be **committed** -- so even after `.claude-overnight/` is cleaned up you can still recover which prompt produced which commits. Use `.claude-overnight/` (with trailing slash) in your gitignore so this file isn't matched by accident.
|
|
188
194
|
|
|
189
195
|
## Task file and inline modes
|
|
190
196
|
|
|
@@ -228,20 +234,20 @@ claude-overnight "fix auth bug in src/auth.ts" "add tests for user model"
|
|
|
228
234
|
|---|---|---|
|
|
229
235
|
| `--budget=N` | `10` | Total agent sessions |
|
|
230
236
|
| `--concurrency=N` | `5` | Parallel agents |
|
|
231
|
-
| `--model=NAME` | prompted | Worker model
|
|
237
|
+
| `--model=NAME` | prompted | Worker model -- interactive picks planner + executor separately; `Other…` adds Qwen / OpenRouter / any Anthropic-compat endpoint. In non-interactive mode, a saved provider's model id is auto-resolved to the provider. |
|
|
232
238
|
| `--usage-cap=N` | unlimited | Stop at N% utilization |
|
|
233
239
|
| `--allow-extra-usage` | off | Allow extra/overage usage (billed separately) |
|
|
234
|
-
| `--extra-usage-budget=N` |
|
|
240
|
+
| `--extra-usage-budget=N` | -- | Max $ for extra usage (implies --allow-extra-usage) |
|
|
235
241
|
| `--timeout=SECONDS` | `900` | Inactivity timeout per agent (nudges at timeout, kills at 2×) |
|
|
236
|
-
| `--no-flex` |
|
|
237
|
-
| `--dry-run` |
|
|
242
|
+
| `--no-flex` | -- | Disable multi-wave steering |
|
|
243
|
+
| `--dry-run` | -- | Show planned tasks without running |
|
|
238
244
|
|
|
239
245
|
## Task file fields
|
|
240
246
|
|
|
241
247
|
| Field | Type | Default | Description |
|
|
242
248
|
|---|---|---|---|
|
|
243
249
|
| `tasks` | `(string \| {prompt, cwd?, model?})[]` | required | Tasks to run |
|
|
244
|
-
| `objective` | `string` |
|
|
250
|
+
| `objective` | `string` | -- | High-level goal for steering |
|
|
245
251
|
| `flexiblePlan` | `boolean` | `false` | Enable multi-wave planning |
|
|
246
252
|
| `model` | `string` | prompted | Worker model |
|
|
247
253
|
| `concurrency` | `number` | `5` | Parallel agents |
|
|
@@ -252,12 +258,12 @@ claude-overnight "fix auth bug in src/auth.ts" "add tests for user model"
|
|
|
252
258
|
|
|
253
259
|
## Custom providers (Qwen, OpenRouter, any Anthropic-compatible endpoint)
|
|
254
260
|
|
|
255
|
-
Planner and executor are picked separately
|
|
261
|
+
Planner and executor are picked separately -- pair Opus-on-Anthropic for the planner/thinker with a cheaper model on another provider for the bulk of execution.
|
|
256
262
|
|
|
257
263
|
From the interactive picker, choose `Other…` on the planner or executor step:
|
|
258
264
|
|
|
259
265
|
```
|
|
260
|
-
⑤ Executor model (what runs the tasks
|
|
266
|
+
⑤ Executor model (what runs the tasks -- Qwen 3.6 Plus / OpenRouter / etc via Other…):
|
|
261
267
|
○ Sonnet
|
|
262
268
|
○ Opus
|
|
263
269
|
● Other…
|
|
@@ -272,13 +278,13 @@ From the interactive picker, choose `Other…` on the planner or executor step:
|
|
|
272
278
|
|
|
273
279
|
Saved providers live user-level at `~/.claude/claude-overnight/providers.json` (mode 0600) and show up automatically in every repo. No per-project config.
|
|
274
280
|
|
|
275
|
-
**How routing works.** Each `query()` gets its own env override (`ANTHROPIC_BASE_URL` + `ANTHROPIC_AUTH_TOKEN`)
|
|
281
|
+
**How routing works.** Each `query()` gets its own env override (`ANTHROPIC_BASE_URL` + `ANTHROPIC_AUTH_TOKEN`) -- planner queries use the planner provider, executor queries use the executor provider. No global shell env, no proxy daemon, no `process.env` pollution between calls.
|
|
276
282
|
|
|
277
283
|
**Pre-flight.** Before the swarm starts, each custom provider is pinged with a 1-turn auth check. Bad keys fail fast with `✗ executor preflight failed: ...` instead of N scattered mid-run errors.
|
|
278
284
|
|
|
279
285
|
**Resume.** Provider ids are persisted in `run.json` and rehydrated on resume. If you deleted a provider between runs, resume refuses to start and tells you exactly which id is missing.
|
|
280
286
|
|
|
281
|
-
**Non-interactive / CI.** `claude-overnight --model=qwen3.6-plus` auto-resolves the model id to a saved provider
|
|
287
|
+
**Non-interactive / CI.** `claude-overnight --model=qwen3.6-plus` auto-resolves the model id to a saved provider -- no separate `--provider` flag.
|
|
282
288
|
|
|
283
289
|
## Parallel Playwright Testing
|
|
284
290
|
|
|
@@ -316,8 +322,8 @@ See `QUICKSHEET_PLAYWRIGHT.md` for full config examples.
|
|
|
316
322
|
|
|
317
323
|
By default, extra/overage usage is **blocked**. When your plan's rate limits are exhausted, the run stops cleanly and is resumable. You control this in the interactive prompt (step ⑤) or via CLI flags:
|
|
318
324
|
|
|
319
|
-
- `--allow-extra-usage`
|
|
320
|
-
- `--extra-usage-budget=20`
|
|
325
|
+
- `--allow-extra-usage` -- opt in to extra usage (billed separately)
|
|
326
|
+
- `--extra-usage-budget=20` -- allow up to $20 of extra usage, then stop
|
|
321
327
|
|
|
322
328
|
### Live controls during execution
|
|
323
329
|
|
|
@@ -329,11 +335,11 @@ Press these keys while agents are running:
|
|
|
329
335
|
| `t` | Change usage cap threshold (0-100%) |
|
|
330
336
|
| `q` | Graceful stop (press twice to force quit) |
|
|
331
337
|
|
|
332
|
-
Changes take effect between waves
|
|
338
|
+
Changes take effect between waves -- active agents finish their current task.
|
|
333
339
|
|
|
334
340
|
### Multi-window usage display
|
|
335
341
|
|
|
336
|
-
The usage bar cycles through all rate limit windows (5h, 7d, etc.) every 3 seconds, showing utilization per window. Usage info is shown during all phases
|
|
342
|
+
The usage bar cycles through all rate limit windows (5h, 7d, etc.) every 3 seconds, showing utilization per window. Usage info is shown during all phases -- thinking, orchestration, steering, and execution.
|
|
337
343
|
|
|
338
344
|
When using extra usage with a budget, a dedicated progress bar shows spend vs limit with color-coded fill (magenta → yellow → red).
|
|
339
345
|
|
|
@@ -341,14 +347,14 @@ When using extra usage with a budget, a dedicated progress bar shows spend vs li
|
|
|
341
347
|
|
|
342
348
|
Built for unattended runs lasting hours or days.
|
|
343
349
|
|
|
344
|
-
- **Smooth overage transition**: when extra usage is allowed, plan limit rejection is seamless
|
|
345
|
-
- **Interrupt + resume**: agents and planner queries that go silent are interrupted and resumed with full conversation context via SDK session resume
|
|
350
|
+
- **Smooth overage transition**: when extra usage is allowed, plan limit rejection is seamless -- no dispatch blocking, agents continue into overage
|
|
351
|
+
- **Interrupt + resume**: agents and planner queries that go silent are interrupted and resumed with full conversation context via SDK session resume -- not killed and restarted from scratch
|
|
346
352
|
- **Hard block**: pauses until the rate limit window resets, then resumes
|
|
347
353
|
- **Soft throttle**: slows dispatch at >75% utilization
|
|
348
354
|
- **Extra usage guard**: detects overage billing and stops unless explicitly allowed
|
|
349
355
|
- **Cooldown between phases**: waits for rate limit reset after thinking before starting orchestration
|
|
350
356
|
- **Retry with backoff**: transient errors (429, overloaded) retry automatically
|
|
351
|
-
- **Usage cap**: set a ceiling, active agents finish, no new ones start
|
|
357
|
+
- **Usage cap**: set a ceiling, active agents finish, no new ones start -- run is resumable
|
|
352
358
|
- **Planner retries**: steering and orchestration retry on rate limits (30s/60s/120s backoff) with full context
|
|
353
359
|
|
|
354
360
|
## Git worktrees and branch merging
|
package/dist/bin.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
// Tiny launcher: prints a splash the instant node is ready, then dynamically
|
|
3
3
|
// imports the real entrypoint. Loading `@anthropic-ai/claude-agent-sdk` and the
|
|
4
|
-
// rest of the module graph takes several seconds on a cold cache
|
|
4
|
+
// rest of the module graph takes several seconds on a cold cache -- without
|
|
5
5
|
// this, the terminal sits black that whole time. index.ts stops the splash
|
|
6
6
|
// via `globalThis.__coStopSplash` as soon as its header is about to print.
|
|
7
7
|
const argv = process.argv.slice(2);
|
package/dist/cli.d.ts
CHANGED
|
@@ -30,7 +30,7 @@ export declare function appendPasteToSegments(segs: InputSegment[], text: string
|
|
|
30
30
|
export declare function backspaceSegments(segs: InputSegment[]): void;
|
|
31
31
|
/**
|
|
32
32
|
* Read a line from the user with bracketed-paste awareness.
|
|
33
|
-
* Pasted multi-line text stays in the buffer as a single block
|
|
33
|
+
* Pasted multi-line text stays in the buffer as a single block -- only a typed
|
|
34
34
|
* Enter submits. Falls back to cooked readline when stdin isn't a TTY.
|
|
35
35
|
*/
|
|
36
36
|
export declare function ask(question: string): Promise<string>;
|
package/dist/cli.js
CHANGED
|
@@ -58,11 +58,11 @@ export async function fetchModels(timeoutMs = 10_000) {
|
|
|
58
58
|
// Silent: callers fall back to a text prompt with the current value as default.
|
|
59
59
|
}
|
|
60
60
|
else if (isAuthError(err)) {
|
|
61
|
-
console.error(chalk.red("\n Authentication failed
|
|
61
|
+
console.error(chalk.red("\n Authentication failed -- check your API key or run: claude auth\n"));
|
|
62
62
|
process.exit(1);
|
|
63
63
|
}
|
|
64
64
|
else {
|
|
65
|
-
console.warn(chalk.yellow(`\n Could not fetch models: ${String(err.message || err).slice(0, 80)}
|
|
65
|
+
console.warn(chalk.yellow(`\n Could not fetch models: ${String(err.message || err).slice(0, 80)} -- continuing with defaults`));
|
|
66
66
|
}
|
|
67
67
|
return [];
|
|
68
68
|
}
|
|
@@ -72,7 +72,7 @@ export async function fetchModels(timeoutMs = 10_000) {
|
|
|
72
72
|
// When the terminal is in bracketed paste mode, pasted content is wrapped with
|
|
73
73
|
// \x1B[200~ ... \x1B[201~ so we can distinguish typed Enter from pasted newlines.
|
|
74
74
|
// Multi-line or long pastes are stored as opaque segments and shown as a compact
|
|
75
|
-
// [Pasted +N lines] placeholder while editing
|
|
75
|
+
// [Pasted +N lines] placeholder while editing -- the full text is substituted on submit.
|
|
76
76
|
export const PASTE_START = "\x1B[200~";
|
|
77
77
|
export const PASTE_END = "\x1B[201~";
|
|
78
78
|
export const PASTE_PLACEHOLDER_MAX = 80;
|
|
@@ -150,7 +150,7 @@ function stripAnsi(s) {
|
|
|
150
150
|
// ── Interactive primitives ──
|
|
151
151
|
/**
|
|
152
152
|
* Read a line from the user with bracketed-paste awareness.
|
|
153
|
-
* Pasted multi-line text stays in the buffer as a single block
|
|
153
|
+
* Pasted multi-line text stays in the buffer as a single block -- only a typed
|
|
154
154
|
* Enter submits. Falls back to cooked readline when stdin isn't a TTY.
|
|
155
155
|
*/
|
|
156
156
|
export function ask(question) {
|
package/dist/index.js
CHANGED
|
@@ -169,7 +169,7 @@ async function main() {
|
|
|
169
169
|
}
|
|
170
170
|
if (argv.includes("-h") || argv.includes("--help")) {
|
|
171
171
|
console.log(`
|
|
172
|
-
${chalk.bold("🌙 claude-overnight")} ${chalk.dim("v" + VERSION + "
|
|
172
|
+
${chalk.bold("🌙 claude-overnight")} ${chalk.dim("v" + VERSION + " -- background lane for your Claude Max plan")}
|
|
173
173
|
${chalk.dim("─".repeat(60))}
|
|
174
174
|
|
|
175
175
|
${chalk.cyan("Usage")}
|
|
@@ -183,7 +183,8 @@ async function main() {
|
|
|
183
183
|
--dry-run Show planned tasks without running them
|
|
184
184
|
--budget=N Target number of agent runs ${chalk.dim("(default: 10)")}
|
|
185
185
|
--concurrency=N Max parallel agents ${chalk.dim("(default: 5)")}
|
|
186
|
-
--model=NAME Worker model override ${chalk.dim("(interactive mode picks planner + executor separately
|
|
186
|
+
--model=NAME Worker model override ${chalk.dim("(interactive mode picks planner + executor separately -- supports 'Other…' for Qwen / OpenRouter / etc.)")}
|
|
187
|
+
--fast-model=NAME Fast model for quick tasks ${chalk.dim("(optional -- checked by worker model in next wave)")}
|
|
187
188
|
--usage-cap=N Stop at N% utilization ${chalk.dim("(e.g. 90 to save 10% for other work)")}
|
|
188
189
|
--allow-extra-usage Allow extra/overage usage ${chalk.dim("(default: stop when plan limits hit)")}
|
|
189
190
|
--extra-usage-budget=N Max $ for extra usage ${chalk.dim("(implies --allow-extra-usage)")}
|
|
@@ -259,7 +260,7 @@ async function main() {
|
|
|
259
260
|
process.exit(1);
|
|
260
261
|
}
|
|
261
262
|
if (noTTY)
|
|
262
|
-
console.log(chalk.dim(" Non-interactive mode
|
|
263
|
+
console.log(chalk.dim(" Non-interactive mode -- using defaults\n"));
|
|
263
264
|
// ── Run history ──
|
|
264
265
|
const rootDir = join(cwd, ".claude-overnight");
|
|
265
266
|
const runsDir = join(rootDir, "runs");
|
|
@@ -406,7 +407,7 @@ async function main() {
|
|
|
406
407
|
// Covers two cases:
|
|
407
408
|
// 1. Planning-phase resumes (the prior run died before executeRun).
|
|
408
409
|
// 2. Stopped/capped runs whose state was saved with currentTasks: []
|
|
409
|
-
// (saveRunState always stores []
|
|
410
|
+
// (saveRunState always stores [] -- the plan is on disk in tasks.json).
|
|
410
411
|
if (resumeState.currentTasks.length === 0) {
|
|
411
412
|
const loaded = salvageFromFile(join(resumeRunDir, "tasks.json"), resumeState.budget, () => { }, "resume");
|
|
412
413
|
if (loaded) {
|
|
@@ -415,12 +416,12 @@ async function main() {
|
|
|
415
416
|
console.log(chalk.green(`\n ✓ ${label} · ${loaded.length} tasks loaded from tasks.json`));
|
|
416
417
|
}
|
|
417
418
|
else if (resumeState.phase === "planning") {
|
|
418
|
-
// No tasks.json
|
|
419
|
+
// No tasks.json -- the thinking wave got killed before orchestrate ran.
|
|
419
420
|
// If design docs survived, re-orchestrate from them (salvages the
|
|
420
421
|
// thinking spend instead of throwing it away).
|
|
421
422
|
const designs = readMdDir(join(resumeRunDir, "designs"));
|
|
422
423
|
if (!designs || !resumeState.objective) {
|
|
423
|
-
console.error(chalk.red(`\n Planning-phase run has no usable tasks.json or designs
|
|
424
|
+
console.error(chalk.red(`\n Planning-phase run has no usable tasks.json or designs -- start Fresh instead.\n`));
|
|
424
425
|
process.exit(1);
|
|
425
426
|
}
|
|
426
427
|
const remainingBudget = Math.max(resumeState.concurrency, resumeState.budget - resumeState.accCompleted);
|
|
@@ -456,8 +457,10 @@ async function main() {
|
|
|
456
457
|
// ── Config resolution ──
|
|
457
458
|
let workerModel;
|
|
458
459
|
let plannerModel;
|
|
460
|
+
let fastModel;
|
|
459
461
|
let workerProvider;
|
|
460
462
|
let plannerProvider;
|
|
463
|
+
let fastProvider;
|
|
461
464
|
let budget;
|
|
462
465
|
let concurrency;
|
|
463
466
|
let objective = fileCfg?.objective;
|
|
@@ -470,23 +473,22 @@ async function main() {
|
|
|
470
473
|
if (resuming) {
|
|
471
474
|
workerModel = resumeState.workerModel;
|
|
472
475
|
plannerModel = resumeState.plannerModel;
|
|
476
|
+
fastModel = resumeState.fastModel;
|
|
473
477
|
const saved = loadProviders();
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
+
const resolveProvider = (providerId, role) => {
|
|
479
|
+
if (!providerId)
|
|
480
|
+
return undefined;
|
|
481
|
+
const p = saved.find(s => s.id === providerId);
|
|
482
|
+
if (!p) {
|
|
483
|
+
console.error(chalk.red(`\n Resume aborted: ${role} provider "${providerId}" is no longer in ~/.claude/claude-overnight/providers.json`));
|
|
478
484
|
console.error(chalk.dim(` Re-add it via a fresh run's "Other…" flow, or start Fresh instead.\n`));
|
|
479
485
|
process.exit(1);
|
|
480
486
|
}
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
console.error(chalk.dim(` Re-add it via a fresh run's "Other…" flow, or start Fresh instead.\n`));
|
|
487
|
-
process.exit(1);
|
|
488
|
-
}
|
|
489
|
-
}
|
|
487
|
+
return p;
|
|
488
|
+
};
|
|
489
|
+
workerProvider = resolveProvider(resumeState.workerProviderId, "worker");
|
|
490
|
+
plannerProvider = resolveProvider(resumeState.plannerProviderId, "planner");
|
|
491
|
+
fastProvider = resolveProvider(resumeState.fastProviderId, "fast");
|
|
490
492
|
budget = resumeState.budget;
|
|
491
493
|
concurrency = resumeState.concurrency;
|
|
492
494
|
objective = resumeState.objective;
|
|
@@ -538,12 +540,22 @@ async function main() {
|
|
|
538
540
|
clearInterval(modelSpinner);
|
|
539
541
|
process.stdout.write(`\x1B[2K\r`);
|
|
540
542
|
}
|
|
541
|
-
const plannerPick = await pickModel(`${chalk.cyan("④")} Planner model ${chalk.dim("(thinking, steering
|
|
543
|
+
const plannerPick = await pickModel(`${chalk.cyan("④")} Planner model ${chalk.dim("(thinking, steering -- use your strongest)")}:`, models);
|
|
542
544
|
plannerModel = plannerPick.model;
|
|
543
545
|
plannerProvider = plannerPick.provider;
|
|
544
|
-
const workerPick = await pickModel(`${chalk.cyan("⑤")} Executor model ${chalk.dim("(what runs the tasks
|
|
546
|
+
const workerPick = await pickModel(`${chalk.cyan("⑤")} Executor model ${chalk.dim("(what runs the tasks -- Qwen 3.6 Plus / OpenRouter / etc via Other…)")}:`, models);
|
|
545
547
|
workerModel = workerPick.model;
|
|
546
548
|
workerProvider = workerPick.provider;
|
|
549
|
+
// ⑤b Optional fast model for quick tasks that will be verified
|
|
550
|
+
const fastChoice = await select(`${chalk.cyan("⑤b")} Fast model ${chalk.dim("(optional -- Haiku/Qwen for quick tasks, checked by worker)")}:`, [
|
|
551
|
+
{ name: "Skip", value: "skip", hint: "two-tier mode only (current setup)" },
|
|
552
|
+
{ name: "Pick a fast model", value: "pick", hint: "Haiku, Qwen, or any provider -- for well-scoped tasks" },
|
|
553
|
+
]);
|
|
554
|
+
if (fastChoice === "pick") {
|
|
555
|
+
const fastPick = await pickModel(`${chalk.cyan("⑤c")} Fast model:`, models);
|
|
556
|
+
fastModel = fastPick.model;
|
|
557
|
+
fastProvider = fastPick.provider;
|
|
558
|
+
}
|
|
547
559
|
usageCap = await select(`${chalk.cyan("⑥")} Usage cap:`, [
|
|
548
560
|
{ name: "Unlimited", value: undefined, hint: "full capacity, wait through rate limits" },
|
|
549
561
|
{ name: "90%", value: 0.9, hint: "leave 10% for other work" },
|
|
@@ -603,7 +615,9 @@ async function main() {
|
|
|
603
615
|
mergeStrategy = "yolo";
|
|
604
616
|
}
|
|
605
617
|
const parts = [];
|
|
606
|
-
if (
|
|
618
|
+
if (fastModel)
|
|
619
|
+
parts.push(`${detectModelTier(plannerModel)} → ${detectModelTier(workerModel)} + ${detectModelTier(fastModel)}`);
|
|
620
|
+
else if (workerModel !== plannerModel)
|
|
607
621
|
parts.push(`${detectModelTier(workerModel)} → ${detectModelTier(plannerModel)}`);
|
|
608
622
|
else
|
|
609
623
|
parts.push(detectModelTier(workerModel));
|
|
@@ -631,16 +645,35 @@ async function main() {
|
|
|
631
645
|
let models = [];
|
|
632
646
|
if (!cliFlags.model && !fileCfg?.model)
|
|
633
647
|
models = await fetchModels(5_000);
|
|
634
|
-
|
|
635
|
-
|
|
648
|
+
// Multi-provider default resolution: match current ANTHROPIC_BASE_URL against
|
|
649
|
+
// saved providers first, then fetched models, then other providers, then hardcoded
|
|
650
|
+
// Anthropic default. Adding a new provider to providers.json automatically affects
|
|
651
|
+
// the default without code changes.
|
|
652
|
+
const activeBaseURL = process.env.ANTHROPIC_BASE_URL;
|
|
653
|
+
const savedForCLI = loadProviders();
|
|
654
|
+
const activeProvider = activeBaseURL ? savedForCLI.find(p => p.baseURL === activeBaseURL) : undefined;
|
|
655
|
+
const defaultModel = activeProvider?.model
|
|
656
|
+
?? models[0]?.value
|
|
657
|
+
?? savedForCLI.find(p => p !== activeProvider)?.model
|
|
658
|
+
?? "claude-sonnet-4-6";
|
|
659
|
+
workerModel = cliFlags.model ?? fileCfg?.model ?? defaultModel;
|
|
660
|
+
plannerModel = activeProvider?.model ?? models[0]?.value ?? workerModel;
|
|
636
661
|
// Auto-resolve a saved custom provider if --model matches its id or model id.
|
|
637
662
|
// Lets `claude-overnight --model=qwen3-coder-plus` route correctly without a separate flag.
|
|
638
|
-
const
|
|
639
|
-
const matched = savedForCli.find(p => p.id === workerModel || p.model === workerModel);
|
|
663
|
+
const matched = savedForCLI.find(p => p.id === workerModel || p.model === workerModel);
|
|
640
664
|
if (matched) {
|
|
641
665
|
workerProvider = matched;
|
|
642
666
|
workerModel = matched.model;
|
|
643
667
|
}
|
|
668
|
+
// Fast model: --fast-model flag
|
|
669
|
+
if (cliFlags["fast-model"]) {
|
|
670
|
+
fastModel = cliFlags["fast-model"];
|
|
671
|
+
const matchedFast = savedForCLI.find(p => p.id === fastModel || p.model === fastModel);
|
|
672
|
+
if (matchedFast) {
|
|
673
|
+
fastProvider = matchedFast;
|
|
674
|
+
fastModel = matchedFast.model;
|
|
675
|
+
}
|
|
676
|
+
}
|
|
644
677
|
concurrency = cliFlags.concurrency ? parseInt(cliFlags.concurrency) : (fileCfg?.concurrency ?? 5);
|
|
645
678
|
budget = cliFlags.budget ? parseInt(cliFlags.budget) : undefined;
|
|
646
679
|
if (budget != null && (isNaN(budget) || budget < 1)) {
|
|
@@ -693,27 +726,35 @@ async function main() {
|
|
|
693
726
|
}
|
|
694
727
|
if (useWorktrees)
|
|
695
728
|
validateGitRepo(cwd);
|
|
696
|
-
// Custom-provider routing: build a model→env resolver so planner
|
|
697
|
-
// queries hit the right endpoint without touching process.env globally.
|
|
698
|
-
const envForModel = buildEnvResolver({ plannerModel, plannerProvider, workerModel, workerProvider });
|
|
729
|
+
// Custom-provider routing: build a model→env resolver so planner, worker,
|
|
730
|
+
// and fast queries hit the right endpoint without touching process.env globally.
|
|
731
|
+
const envForModel = buildEnvResolver({ plannerModel, plannerProvider, workerModel, workerProvider, fastModel, fastProvider });
|
|
699
732
|
setPlannerEnvResolver(envForModel);
|
|
700
|
-
// Fail fast if a custom provider is misconfigured
|
|
733
|
+
// Fail fast if a custom provider is misconfigured -- one bad key would
|
|
701
734
|
// otherwise surface as N agent failures scattered across the run.
|
|
702
|
-
if (plannerProvider || workerProvider) {
|
|
735
|
+
if (plannerProvider || workerProvider || fastProvider) {
|
|
736
|
+
const seen = new Set();
|
|
737
|
+
const all = [
|
|
738
|
+
["planner", plannerProvider],
|
|
739
|
+
["executor", workerProvider],
|
|
740
|
+
["fast", fastProvider],
|
|
741
|
+
];
|
|
703
742
|
const pending = [];
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
743
|
+
for (const [role, p] of all) {
|
|
744
|
+
if (p && !seen.has(p.id)) {
|
|
745
|
+
seen.add(p.id);
|
|
746
|
+
pending.push([role, p]);
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
process.stdout.write(` ${chalk.dim(`◆ Pinging ${pending.map(([r, p]) => `${r} (${p.displayName})`).join(", ")}...`)}\n`);
|
|
750
|
+
const results = await Promise.all(pending.map(async ([role, p]) => ({ role, provider: p, result: await preflightProvider(p, cwd) })));
|
|
751
|
+
for (const { role, provider, result } of results) {
|
|
752
|
+
if (!result.ok) {
|
|
753
|
+
console.error(chalk.red(` ✗ ${role} preflight failed: ${chalk.dim(result.error)}`));
|
|
713
754
|
console.error(chalk.red(`\n Fix the provider at ~/.claude/claude-overnight/providers.json and retry.\n`));
|
|
714
755
|
process.exit(1);
|
|
715
756
|
}
|
|
716
|
-
|
|
757
|
+
console.log(` ${chalk.green(`✓ ${role} ready`)} ${chalk.dim(`· ${provider.displayName} · ${provider.model}`)}`);
|
|
717
758
|
}
|
|
718
759
|
}
|
|
719
760
|
if (nonInteractive) {
|
|
@@ -736,7 +777,7 @@ async function main() {
|
|
|
736
777
|
// Persist an early planning-phase state so the run is visible to the resume
|
|
737
778
|
// picker even if orchestrate dies before executeRun gets a chance to run.
|
|
738
779
|
// Without this, a crashed plan phase leaves no run.json and the run vanishes
|
|
739
|
-
// from findIncompleteRuns
|
|
780
|
+
// from findIncompleteRuns -- you pay for orchestration and can't see it.
|
|
740
781
|
if (needsPlan && objective) {
|
|
741
782
|
try {
|
|
742
783
|
saveRunState(runDir, {
|
|
@@ -906,7 +947,7 @@ async function main() {
|
|
|
906
947
|
process.stdout.write(`\x1B[2K\r ${chalk.green(`✓ ${tasks.length} tasks`)}\n\n`);
|
|
907
948
|
}
|
|
908
949
|
else {
|
|
909
|
-
console.log(chalk.yellow(`\n No design docs
|
|
950
|
+
console.log(chalk.yellow(`\n No design docs -- falling back to direct planning\n`));
|
|
910
951
|
const waveBudget = Math.min(50, Math.max(concurrency, Math.ceil(((budget ?? 10) - thinkingUsed) * 0.5)));
|
|
911
952
|
tasks = await planTasks(objective, cwd, plannerModel, workerModel, permissionMode, waveBudget, concurrency, makeProgressLog(), undefined, taskFile);
|
|
912
953
|
process.stdout.write(`\x1B[2K\r ${chalk.green(`✓ ${tasks.length} tasks`)}\n\n`);
|
|
@@ -977,7 +1018,7 @@ async function main() {
|
|
|
977
1018
|
catch (err) {
|
|
978
1019
|
planRestore();
|
|
979
1020
|
if (isAuthError(err))
|
|
980
|
-
console.error(chalk.red(`\n Authentication failed
|
|
1021
|
+
console.error(chalk.red(`\n Authentication failed -- check your API key or run: claude auth\n`));
|
|
981
1022
|
else
|
|
982
1023
|
console.error(chalk.red(`\n Planning failed: ${err.message}\n`));
|
|
983
1024
|
process.exit(1);
|
|
@@ -994,8 +1035,8 @@ async function main() {
|
|
|
994
1035
|
}
|
|
995
1036
|
// ── Execute ──
|
|
996
1037
|
await executeRun({
|
|
997
|
-
tasks, objective, budget: budget ?? tasks.length, workerModel, plannerModel,
|
|
998
|
-
workerProvider, plannerProvider, concurrency,
|
|
1038
|
+
tasks, objective, budget: budget ?? tasks.length, workerModel, plannerModel, fastModel,
|
|
1039
|
+
workerProvider, plannerProvider, fastProvider, concurrency,
|
|
999
1040
|
permissionMode, useWorktrees, mergeStrategy, usageCap, allowExtraUsage, extraUsageBudget,
|
|
1000
1041
|
flex, agentTimeoutMs, cwd, allowedTools, runDir, previousKnowledge,
|
|
1001
1042
|
resuming, resumeState: resumeState ?? undefined,
|
package/dist/merge.d.ts
CHANGED
|
@@ -22,7 +22,7 @@ export declare function mergeAllBranches(agents: {
|
|
|
22
22
|
* 3-way merge. Walks `git diff --name-status base..branch` and for each entry
|
|
23
23
|
* either checks out the branch's version (add/modify/rename) or removes the
|
|
24
24
|
* file (delete). Always succeeds unless the branch itself is broken. Trades
|
|
25
|
-
* merge-graph fidelity for "your changes actually land"
|
|
25
|
+
* merge-graph fidelity for "your changes actually land" -- the right call for
|
|
26
26
|
* an autonomous swarm.
|
|
27
27
|
*/
|
|
28
28
|
export declare function forceMergeOverlay(branch: string, cwd: string): boolean;
|