claude-overnight 0.1.2 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -29
- package/dist/index.js +254 -85
- package/dist/planner.d.ts +19 -2
- package/dist/planner.js +284 -75
- package/dist/swarm.d.ts +7 -1
- package/dist/swarm.js +62 -21
- package/dist/types.d.ts +6 -0
- package/dist/ui.js +38 -15
- package/package.json +1 -1
package/dist/planner.js
CHANGED
|
@@ -1,17 +1,76 @@
|
|
|
1
1
|
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
2
2
|
const INACTIVITY_MS = 5 * 60 * 1000;
|
|
3
|
-
function
|
|
4
|
-
const
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
export function detectModelTier(model) {
|
|
4
|
+
const m = model.toLowerCase();
|
|
5
|
+
if (m.includes("opus"))
|
|
6
|
+
return "opus";
|
|
7
|
+
if (m.includes("sonnet"))
|
|
8
|
+
return "sonnet";
|
|
9
|
+
if (m.includes("haiku"))
|
|
10
|
+
return "haiku";
|
|
11
|
+
return "unknown";
|
|
12
|
+
}
|
|
13
|
+
function modelCapabilityBlock(model) {
|
|
14
|
+
switch (detectModelTier(model)) {
|
|
15
|
+
case "opus":
|
|
16
|
+
return `Each agent runs Claude Opus with 1M context — a powerhouse. It can own entire epics, do deep codebase research, make architectural decisions, implement complex multi-file systems end-to-end, use browser tools for analysis, and deliver expert-level work. These agents can work for 30+ minutes on the most complex tasks. Do NOT waste them on trivial edits — give them ownership and autonomy.`;
|
|
17
|
+
case "sonnet":
|
|
18
|
+
return `Each agent runs Claude Sonnet — capable of substantial implementation, refactoring, testing, and design work. Can work autonomously for 10-20 minutes on complex tasks. Give agents meaningful scope — not just single-line edits.`;
|
|
19
|
+
case "haiku":
|
|
20
|
+
return `Each agent runs Claude Haiku — fast and efficient, best for focused, well-specified tasks. Be explicit about files, functions, and expected changes. Keep each task scoped to a clear, concrete deliverable.`;
|
|
21
|
+
default:
|
|
22
|
+
return `Each agent has full codebase access and can work autonomously.`;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
// ── Budget + model aware prompt strategy ──
|
|
26
|
+
function plannerPrompt(objective, workerModel, budget, concurrency, flexNote) {
|
|
27
|
+
const b = budget ?? 10;
|
|
28
|
+
const tier = detectModelTier(workerModel);
|
|
29
|
+
const capability = modelCapabilityBlock(workerModel);
|
|
30
|
+
const concLine = concurrency
|
|
31
|
+
? `\n- ${concurrency} agents run in parallel — tasks that run concurrently must touch DIFFERENT files to avoid merge conflicts`
|
|
32
|
+
: "";
|
|
33
|
+
const flexLine = flexNote ? `\n\n${flexNote}` : "";
|
|
34
|
+
// Haiku always gets specific guided tasks regardless of budget
|
|
35
|
+
if (tier === "haiku") {
|
|
36
|
+
return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
|
|
37
|
+
|
|
38
|
+
Objective: ${objective}
|
|
39
|
+
|
|
40
|
+
AGENT CAPABILITY: ${capability}
|
|
41
|
+
|
|
42
|
+
Requirements:
|
|
43
|
+
- Target exactly ~${b} tasks
|
|
44
|
+
- Each task MUST be independent — no task depends on another
|
|
45
|
+
- Each task should target specific files/areas to avoid merge conflicts
|
|
46
|
+
- Be specific: mention exact file paths, function names, what to change
|
|
47
|
+
- Keep tasks focused: one concrete change per task — Haiku agents work best with clear, scoped instructions${concLine}${flexLine}
|
|
48
|
+
|
|
49
|
+
Respond with ONLY a JSON object (no markdown fences):
|
|
50
|
+
{
|
|
51
|
+
"tasks": [
|
|
52
|
+
{ "prompt": "In src/foo.ts, refactor the bar() function to..." },
|
|
53
|
+
{ "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
|
|
54
|
+
]
|
|
55
|
+
}`;
|
|
56
|
+
}
|
|
57
|
+
// Opus gets ambitious missions even at moderate budgets
|
|
58
|
+
const smallThreshold = tier === "opus" ? 5 : 15;
|
|
59
|
+
const mediumThreshold = tier === "opus" ? 30 : 50;
|
|
60
|
+
// Small budget: specific tasks
|
|
61
|
+
if (b <= smallThreshold) {
|
|
62
|
+
return `You are a task coordinator for a parallel agent system. Analyze this codebase and break the following objective into independent tasks.
|
|
7
63
|
|
|
8
64
|
Objective: ${objective}
|
|
9
65
|
|
|
66
|
+
AGENT CAPABILITY: ${capability}
|
|
67
|
+
|
|
10
68
|
Requirements:
|
|
11
69
|
- Each task MUST be independent — no task depends on another
|
|
12
70
|
- Each task should target specific files/areas to avoid merge conflicts
|
|
13
71
|
- Be specific: mention exact file paths, function names, what to change
|
|
14
|
-
- Keep tasks focused: one logical change per task
|
|
72
|
+
- Keep tasks focused: one logical change per task
|
|
73
|
+
- Target exactly ~${b} tasks${concLine}${flexLine}
|
|
15
74
|
|
|
16
75
|
Respond with ONLY a JSON object (no markdown fences):
|
|
17
76
|
{
|
|
@@ -19,10 +78,77 @@ Respond with ONLY a JSON object (no markdown fences):
|
|
|
19
78
|
{ "prompt": "In src/foo.ts, refactor the bar() function to..." },
|
|
20
79
|
{ "prompt": "Add unit tests for the baz module in test/baz.test.ts..." }
|
|
21
80
|
]
|
|
81
|
+
}`;
|
|
82
|
+
}
|
|
83
|
+
// Medium budget: substantial missions with autonomy
|
|
84
|
+
if (b <= mediumThreshold) {
|
|
85
|
+
return `You are a task coordinator for a parallel agent system with ${b} agent sessions available.
|
|
86
|
+
|
|
87
|
+
Objective: ${objective}
|
|
88
|
+
|
|
89
|
+
AGENT CAPABILITY: ${capability}
|
|
90
|
+
|
|
91
|
+
Do NOT over-specify. Give each agent a MISSION, not step-by-step instructions. Let agents make their own decisions about implementation details.
|
|
92
|
+
|
|
93
|
+
Requirements:
|
|
94
|
+
- Target exactly ~${b} tasks
|
|
95
|
+
- Each task should be a substantial piece of work (5-30 minutes of agent time)
|
|
96
|
+
- Each task MUST be independent — no task depends on another
|
|
97
|
+
- Tasks that run concurrently must touch DIFFERENT files/areas to avoid merge conflicts
|
|
98
|
+
- Give agents scope and autonomy: "Design and implement X" not "In file Y, add function Z"
|
|
99
|
+
- Include research/exploration tasks, design tasks, implementation tasks, testing tasks, and polish tasks
|
|
100
|
+
- Think in terms of workstreams: architecture, features, tests, docs, UX, performance, etc.${concLine}${flexLine}
|
|
101
|
+
|
|
102
|
+
Respond with ONLY a JSON object (no markdown fences):
|
|
103
|
+
{
|
|
104
|
+
"tasks": [
|
|
105
|
+
{ "prompt": "Design and implement the complete user favorites system: database schema, API routes, client hooks, and error handling. Research existing patterns in the codebase first." },
|
|
106
|
+
{ "prompt": "Audit all existing API routes for consistency, error handling, and input validation. Fix any issues found." }
|
|
107
|
+
]
|
|
108
|
+
}`;
|
|
109
|
+
}
|
|
110
|
+
// Large budget: ambitious multi-workstream decomposition
|
|
111
|
+
return `You are a task coordinator for a parallel agent system with ${b} agent sessions available. This is a LARGE budget — equivalent to months of professional engineering work.
|
|
112
|
+
|
|
113
|
+
Objective: ${objective}
|
|
114
|
+
|
|
115
|
+
AGENT CAPABILITY: ${capability}
|
|
116
|
+
|
|
117
|
+
With ${b} sessions, you should think BIG:
|
|
118
|
+
- Full feature implementations spanning multiple files
|
|
119
|
+
- Deep refactoring of entire subsystems
|
|
120
|
+
- Comprehensive test suites for each module
|
|
121
|
+
- UX audits and polishing passes
|
|
122
|
+
- Performance optimization investigations
|
|
123
|
+
- Security audits and hardening
|
|
124
|
+
- Documentation and code quality passes
|
|
125
|
+
- Multiple iterations of the same area (implement, then separately review/improve)
|
|
126
|
+
- Edge case handling, error recovery, accessibility
|
|
127
|
+
- Integration testing across features
|
|
128
|
+
|
|
129
|
+
Requirements:
|
|
130
|
+
- Target exactly ~${b} tasks
|
|
131
|
+
- Each task should be substantial: 10-30 minutes of autonomous agent work
|
|
132
|
+
- Each task MUST be independent — no task depends on another
|
|
133
|
+
- Tasks that run concurrently must target DIFFERENT files/areas to avoid merge conflicts
|
|
134
|
+
- Give agents missions with full autonomy: "Own the entire X subsystem" not "edit line 42 of Y.ts"
|
|
135
|
+
- Cover ALL aspects: architecture, implementation, testing, UX, performance, security, polish
|
|
136
|
+
- It's OK to have multiple tasks for the same area if they target different concerns (e.g. one implements, another writes tests, another does a UX polish pass)
|
|
137
|
+
- Organize by workstreams: core features, supporting infrastructure, quality, polish
|
|
138
|
+
- Think about what a team of ${b} senior engineers could accomplish in parallel${concLine}${flexLine}
|
|
139
|
+
|
|
140
|
+
Respond with ONLY a JSON object (no markdown fences):
|
|
141
|
+
{
|
|
142
|
+
"tasks": [
|
|
143
|
+
{ "prompt": "Own the complete implementation of [feature X]: research the codebase for patterns, design the architecture, implement the database layer, API routes, and client hooks. Make it production-ready." },
|
|
144
|
+
{ "prompt": "Comprehensive test suite for [module Y]: unit tests, integration tests, edge cases, error scenarios. Aim for high coverage and meaningful assertions." },
|
|
145
|
+
{ "prompt": "UX audit and polish pass on [area Z]: review all user-facing flows, improve error messages, loading states, empty states, and micro-interactions." }
|
|
146
|
+
]
|
|
22
147
|
}`;
|
|
23
148
|
}
|
|
24
149
|
async function runPlannerQuery(prompt, opts, onLog) {
|
|
25
150
|
let resultText = "";
|
|
151
|
+
const startedAt = Date.now();
|
|
26
152
|
const pq = query({
|
|
27
153
|
prompt,
|
|
28
154
|
options: {
|
|
@@ -36,6 +162,17 @@ async function runPlannerQuery(prompt, opts, onLog) {
|
|
|
36
162
|
includePartialMessages: true,
|
|
37
163
|
},
|
|
38
164
|
});
|
|
165
|
+
// Progress ticker — show elapsed time so it doesn't look frozen
|
|
166
|
+
let lastLogText = "";
|
|
167
|
+
let toolCount = 0;
|
|
168
|
+
const ticker = setInterval(() => {
|
|
169
|
+
const elapsed = Math.round((Date.now() - startedAt) / 1000);
|
|
170
|
+
const m = Math.floor(elapsed / 60);
|
|
171
|
+
const s = elapsed % 60;
|
|
172
|
+
const timeStr = m > 0 ? `${m}m ${s}s` : `${s}s`;
|
|
173
|
+
const extra = lastLogText ? ` — ${lastLogText}` : "";
|
|
174
|
+
onLog(`${timeStr} elapsed, ${toolCount} tool calls${extra}`);
|
|
175
|
+
}, 3000);
|
|
39
176
|
let lastActivity = Date.now();
|
|
40
177
|
let timer;
|
|
41
178
|
const watchdog = new Promise((_, reject) => {
|
|
@@ -55,8 +192,21 @@ async function runPlannerQuery(prompt, opts, onLog) {
|
|
|
55
192
|
lastActivity = Date.now();
|
|
56
193
|
if (msg.type === "stream_event") {
|
|
57
194
|
const ev = msg.event;
|
|
58
|
-
if (ev?.type === "content_block_start" && ev.content_block?.type === "tool_use")
|
|
195
|
+
if (ev?.type === "content_block_start" && ev.content_block?.type === "tool_use") {
|
|
196
|
+
toolCount++;
|
|
197
|
+
lastLogText = ev.content_block.name;
|
|
59
198
|
onLog(ev.content_block.name);
|
|
199
|
+
}
|
|
200
|
+
// Stream text snippets so the user sees the planner is thinking
|
|
201
|
+
if (ev?.type === "content_block_delta") {
|
|
202
|
+
const delta = ev.delta;
|
|
203
|
+
if (delta?.type === "text_delta" && delta.text) {
|
|
204
|
+
const snippet = delta.text.trim();
|
|
205
|
+
if (snippet.length > 3) {
|
|
206
|
+
lastLogText = snippet.slice(0, 60);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
60
210
|
}
|
|
61
211
|
if (msg.type === "result") {
|
|
62
212
|
if (msg.subtype === "success")
|
|
@@ -71,10 +221,11 @@ async function runPlannerQuery(prompt, opts, onLog) {
|
|
|
71
221
|
}
|
|
72
222
|
finally {
|
|
73
223
|
clearTimeout(timer);
|
|
224
|
+
clearInterval(ticker);
|
|
74
225
|
}
|
|
75
226
|
return resultText;
|
|
76
227
|
}
|
|
77
|
-
function postProcess(raw, onLog) {
|
|
228
|
+
function postProcess(raw, budget, onLog) {
|
|
78
229
|
let tasks = raw;
|
|
79
230
|
// Filter garbage (< 3 words)
|
|
80
231
|
const before = tasks.length;
|
|
@@ -92,7 +243,7 @@ function postProcess(raw, onLog) {
|
|
|
92
243
|
continue;
|
|
93
244
|
const setB = new Set(tasks[j].prompt.toLowerCase().split(/\s+/));
|
|
94
245
|
const shared = [...setA].filter((w) => setB.has(w)).length;
|
|
95
|
-
const overlap = shared / Math.
|
|
246
|
+
const overlap = shared / Math.max(setA.size, setB.size);
|
|
96
247
|
if (overlap > 0.8) {
|
|
97
248
|
const drop = setA.size >= setB.size ? j : i;
|
|
98
249
|
dominated.add(drop);
|
|
@@ -105,48 +256,44 @@ function postProcess(raw, onLog) {
|
|
|
105
256
|
tasks = tasks.filter((_, i) => !dominated.has(i));
|
|
106
257
|
onLog(`Deduplicated to ${tasks.length} tasks`);
|
|
107
258
|
}
|
|
108
|
-
// Warn on
|
|
109
|
-
|
|
110
|
-
const
|
|
111
|
-
|
|
112
|
-
|
|
259
|
+
// Warn on file overlap (only for small budgets where tasks are file-specific)
|
|
260
|
+
if ((budget ?? 10) <= 15) {
|
|
261
|
+
const fileRe = /(?:^|\s)((?:[\w.-]+\/)+[\w.-]+\.\w+)/g;
|
|
262
|
+
const pathToTasks = new Map();
|
|
263
|
+
for (const t of tasks) {
|
|
264
|
+
for (const m of t.prompt.matchAll(fileRe)) {
|
|
265
|
+
const ids = pathToTasks.get(m[1]);
|
|
266
|
+
if (ids)
|
|
267
|
+
ids.push(t.id);
|
|
268
|
+
else
|
|
269
|
+
pathToTasks.set(m[1], [t.id]);
|
|
270
|
+
}
|
|
113
271
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
const pathToTasks = new Map();
|
|
118
|
-
for (const t of tasks) {
|
|
119
|
-
for (const m of t.prompt.matchAll(fileRe)) {
|
|
120
|
-
const ids = pathToTasks.get(m[1]);
|
|
121
|
-
if (ids)
|
|
122
|
-
ids.push(t.id);
|
|
123
|
-
else
|
|
124
|
-
pathToTasks.set(m[1], [t.id]);
|
|
272
|
+
for (const [path, ids] of pathToTasks) {
|
|
273
|
+
if (ids.length > 1)
|
|
274
|
+
onLog(`Overlap risk: ${path} in tasks ${ids.join(", ")}`);
|
|
125
275
|
}
|
|
126
276
|
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
if (tasks.length > 30) {
|
|
133
|
-
onLog(`Truncating ${tasks.length} → 30`);
|
|
134
|
-
tasks = tasks.slice(0, 30);
|
|
277
|
+
// Cap at budget (with generous headroom) — no arbitrary 30 limit
|
|
278
|
+
const cap = budget ? Math.ceil(budget * 1.2) : 30;
|
|
279
|
+
if (tasks.length > cap) {
|
|
280
|
+
onLog(`Truncating ${tasks.length} → ${cap}`);
|
|
281
|
+
tasks = tasks.slice(0, cap);
|
|
135
282
|
}
|
|
136
283
|
tasks.sort((a, b) => Number(/\btest/i.test(a.prompt)) - Number(/\btest/i.test(b.prompt)));
|
|
137
284
|
// Re-index
|
|
138
285
|
tasks = tasks.map((t, i) => ({ ...t, id: String(i) }));
|
|
139
286
|
return tasks;
|
|
140
287
|
}
|
|
141
|
-
export async function planTasks(objective, cwd,
|
|
288
|
+
export async function planTasks(objective, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog, flexNote) {
|
|
142
289
|
onLog("Analyzing codebase...");
|
|
143
|
-
const resultText = await runPlannerQuery(plannerPrompt(objective, budget, concurrency), { cwd, model, permissionMode }, onLog);
|
|
290
|
+
const resultText = await runPlannerQuery(plannerPrompt(objective, workerModel, budget, concurrency, flexNote), { cwd, model: plannerModel, permissionMode }, onLog);
|
|
144
291
|
const parsed = await extractTaskJson(resultText, async () => {
|
|
145
292
|
onLog("Retrying for valid JSON...");
|
|
146
293
|
let retryText = "";
|
|
147
294
|
for await (const msg of query({
|
|
148
295
|
prompt: `Your previous response did not contain valid JSON. Output ONLY a JSON object:\n{"tasks":[{"prompt":"..."}]}`,
|
|
149
|
-
options: { cwd, model, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
|
|
296
|
+
options: { cwd, model: plannerModel, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
|
|
150
297
|
})) {
|
|
151
298
|
if (msg.type === "result" && msg.subtype === "success")
|
|
152
299
|
retryText = msg.result || "";
|
|
@@ -157,16 +304,22 @@ export async function planTasks(objective, cwd, model, permissionMode, budget, c
|
|
|
157
304
|
id: String(i),
|
|
158
305
|
prompt: typeof t === "string" ? t : t.prompt,
|
|
159
306
|
}));
|
|
160
|
-
tasks = postProcess(tasks, onLog);
|
|
307
|
+
tasks = postProcess(tasks, budget, onLog);
|
|
161
308
|
if (tasks.length === 0)
|
|
162
309
|
throw new Error("Planner generated 0 tasks");
|
|
163
310
|
onLog(`${tasks.length} tasks`);
|
|
164
311
|
return tasks;
|
|
165
312
|
}
|
|
166
|
-
export async function refinePlan(objective, previousTasks, feedback, cwd,
|
|
313
|
+
export async function refinePlan(objective, previousTasks, feedback, cwd, plannerModel, workerModel, permissionMode, budget, concurrency, onLog) {
|
|
167
314
|
onLog("Refining plan...");
|
|
168
315
|
const prev = previousTasks.map((t, i) => `${i + 1}. ${t.prompt}`).join("\n");
|
|
169
|
-
const
|
|
316
|
+
const capability = modelCapabilityBlock(workerModel);
|
|
317
|
+
const b = budget ?? 10;
|
|
318
|
+
const scaleNote = b > 50
|
|
319
|
+
? `This is a LARGE budget (${b} sessions). Think big — missions, not micro-tasks.`
|
|
320
|
+
: b > 15
|
|
321
|
+
? `Each of the ${b} sessions is a capable AI agent. Give substantial missions, not trivial edits.`
|
|
322
|
+
: `Target ~${b} tasks.`;
|
|
170
323
|
const prompt = `You are a task coordinator. You previously planned these tasks for the objective:
|
|
171
324
|
|
|
172
325
|
Objective: ${objective}
|
|
@@ -176,17 +329,19 @@ ${prev}
|
|
|
176
329
|
|
|
177
330
|
The user wants changes: ${feedback}
|
|
178
331
|
|
|
179
|
-
|
|
332
|
+
AGENT CAPABILITY: ${capability}
|
|
333
|
+
|
|
334
|
+
${scaleNote} ${concurrency} agents run in parallel. Update the plan accordingly. Keep tasks independent and targeting different files/areas.
|
|
180
335
|
|
|
181
336
|
Respond with ONLY a JSON object (no markdown):
|
|
182
337
|
{"tasks":[{"prompt":"..."}]}`;
|
|
183
|
-
const resultText = await runPlannerQuery(prompt, { cwd, model, permissionMode }, onLog);
|
|
338
|
+
const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
|
|
184
339
|
const parsed = await extractTaskJson(resultText, async () => {
|
|
185
340
|
onLog("Retrying...");
|
|
186
341
|
let retryText = "";
|
|
187
342
|
for await (const msg of query({
|
|
188
343
|
prompt: `Output ONLY a JSON object:\n{"tasks":[{"prompt":"..."}]}`,
|
|
189
|
-
options: { cwd, model, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
|
|
344
|
+
options: { cwd, model: plannerModel, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
|
|
190
345
|
})) {
|
|
191
346
|
if (msg.type === "result" && msg.subtype === "success")
|
|
192
347
|
retryText = msg.result || "";
|
|
@@ -197,7 +352,7 @@ Respond with ONLY a JSON object (no markdown):
|
|
|
197
352
|
id: String(i),
|
|
198
353
|
prompt: typeof t === "string" ? t : t.prompt,
|
|
199
354
|
}));
|
|
200
|
-
tasks = postProcess(tasks, onLog);
|
|
355
|
+
tasks = postProcess(tasks, budget, onLog);
|
|
201
356
|
if (tasks.length === 0)
|
|
202
357
|
throw new Error("Refinement produced 0 tasks");
|
|
203
358
|
onLog(`${tasks.length} tasks`);
|
|
@@ -219,50 +374,104 @@ function extractOutermostBraces(text) {
|
|
|
219
374
|
}
|
|
220
375
|
return null;
|
|
221
376
|
}
|
|
222
|
-
/** Try multiple strategies to parse
|
|
223
|
-
|
|
224
|
-
|
|
377
|
+
/** Try multiple strategies to parse JSON from LLM output. */
|
|
378
|
+
function attemptJsonParse(text) {
|
|
379
|
+
try {
|
|
380
|
+
const obj = JSON.parse(text);
|
|
381
|
+
if (typeof obj === "object" && obj !== null)
|
|
382
|
+
return obj;
|
|
383
|
+
}
|
|
384
|
+
catch { }
|
|
385
|
+
const braces = extractOutermostBraces(text);
|
|
386
|
+
if (braces) {
|
|
225
387
|
try {
|
|
226
|
-
const obj = JSON.parse(
|
|
227
|
-
if (obj
|
|
388
|
+
const obj = JSON.parse(braces);
|
|
389
|
+
if (typeof obj === "object" && obj !== null)
|
|
228
390
|
return obj;
|
|
229
391
|
}
|
|
230
392
|
catch { }
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
catch { }
|
|
393
|
+
}
|
|
394
|
+
const stripped = text.replace(/```json?\s*/g, "").replace(/```/g, "").trim();
|
|
395
|
+
if (stripped !== text) {
|
|
396
|
+
try {
|
|
397
|
+
const obj = JSON.parse(stripped);
|
|
398
|
+
if (typeof obj === "object" && obj !== null)
|
|
399
|
+
return obj;
|
|
239
400
|
}
|
|
240
|
-
|
|
241
|
-
|
|
401
|
+
catch { }
|
|
402
|
+
const b2 = extractOutermostBraces(stripped);
|
|
403
|
+
if (b2) {
|
|
242
404
|
try {
|
|
243
|
-
|
|
244
|
-
if (obj?.tasks)
|
|
245
|
-
return obj;
|
|
405
|
+
return JSON.parse(b2);
|
|
246
406
|
}
|
|
247
407
|
catch { }
|
|
248
|
-
const b2 = extractOutermostBraces(stripped);
|
|
249
|
-
if (b2) {
|
|
250
|
-
try {
|
|
251
|
-
const obj = JSON.parse(b2);
|
|
252
|
-
if (obj?.tasks)
|
|
253
|
-
return obj;
|
|
254
|
-
}
|
|
255
|
-
catch { }
|
|
256
|
-
}
|
|
257
408
|
}
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
409
|
+
}
|
|
410
|
+
return null;
|
|
411
|
+
}
|
|
412
|
+
/** Extract task JSON with validation and one retry. */
|
|
413
|
+
async function extractTaskJson(raw, retry) {
|
|
414
|
+
const first = attemptJsonParse(raw);
|
|
415
|
+
if (first?.tasks)
|
|
262
416
|
return first;
|
|
263
417
|
const retryText = await retry();
|
|
264
|
-
const second =
|
|
265
|
-
if (second)
|
|
418
|
+
const second = attemptJsonParse(retryText);
|
|
419
|
+
if (second?.tasks)
|
|
266
420
|
return second;
|
|
267
421
|
throw new Error("Planner did not return valid task JSON after retry");
|
|
268
422
|
}
|
|
423
|
+
// ── Wave steering ──
|
|
424
|
+
export async function steerWave(objective, history, remainingBudget, cwd, plannerModel, workerModel, permissionMode, concurrency, onLog) {
|
|
425
|
+
const capability = modelCapabilityBlock(workerModel);
|
|
426
|
+
const historyText = history.map(w => {
|
|
427
|
+
const lines = w.tasks.map(t => {
|
|
428
|
+
const files = t.filesChanged ? ` (${t.filesChanged} files)` : "";
|
|
429
|
+
const err = t.error ? ` — ${t.error}` : "";
|
|
430
|
+
return ` - [${t.status}] ${t.prompt.slice(0, 120)}${files}${err}`;
|
|
431
|
+
}).join("\n");
|
|
432
|
+
return `Wave ${w.wave + 1}:\n${lines}`;
|
|
433
|
+
}).join("\n\n");
|
|
434
|
+
const prompt = `You are steering an autonomous multi-wave agent system. Read the codebase to understand current state, then decide what's next.
|
|
435
|
+
|
|
436
|
+
Objective: ${objective}
|
|
437
|
+
|
|
438
|
+
Work completed so far:
|
|
439
|
+
${historyText}
|
|
440
|
+
|
|
441
|
+
Remaining budget: ${remainingBudget} agent sessions. ${concurrency} agents run in parallel — tasks must touch DIFFERENT files.
|
|
442
|
+
${capability}
|
|
443
|
+
|
|
444
|
+
Read the codebase. Then decide:
|
|
445
|
+
- Is the objective fully met? → {"done": true, "reasoning": "..."}
|
|
446
|
+
- More work needed? Plan the next wave → {"done": false, "reasoning": "what needs doing and why", "tasks": [{"prompt": "..."}]}
|
|
447
|
+
|
|
448
|
+
Think like a tech lead between sprints: what shipped, what's missing, what needs polish, what should be scrapped and redone, what's over-engineered. Less is more — don't add work for the sake of filling budget.
|
|
449
|
+
|
|
450
|
+
Respond with ONLY a JSON object (no markdown fences).`;
|
|
451
|
+
onLog("Reading codebase...");
|
|
452
|
+
const resultText = await runPlannerQuery(prompt, { cwd, model: plannerModel, permissionMode }, onLog);
|
|
453
|
+
const parsed = await (async () => {
|
|
454
|
+
const first = attemptJsonParse(resultText);
|
|
455
|
+
if (first)
|
|
456
|
+
return first;
|
|
457
|
+
onLog("Retrying...");
|
|
458
|
+
let retryText = "";
|
|
459
|
+
for await (const msg of query({
|
|
460
|
+
prompt: `Output ONLY a JSON object: {"done":true/false,"reasoning":"...","tasks":[{"prompt":"..."}]}`,
|
|
461
|
+
options: { cwd, model: plannerModel, permissionMode, ...(permissionMode === "bypassPermissions" && { allowDangerouslySkipPermissions: true }), persistSession: false },
|
|
462
|
+
})) {
|
|
463
|
+
if (msg.type === "result" && msg.subtype === "success")
|
|
464
|
+
retryText = msg.result || "";
|
|
465
|
+
}
|
|
466
|
+
return attemptJsonParse(retryText) ?? { done: true, reasoning: "Could not parse steering response" };
|
|
467
|
+
})();
|
|
468
|
+
if (parsed.done) {
|
|
469
|
+
return { done: true, tasks: [], reasoning: parsed.reasoning || "Objective complete" };
|
|
470
|
+
}
|
|
471
|
+
let tasks = (parsed.tasks || []).map((t, i) => ({
|
|
472
|
+
id: String(i),
|
|
473
|
+
prompt: typeof t === "string" ? t : t.prompt,
|
|
474
|
+
}));
|
|
475
|
+
tasks = postProcess(tasks, remainingBudget, onLog);
|
|
476
|
+
return { done: tasks.length === 0, tasks, reasoning: parsed.reasoning || "" };
|
|
477
|
+
}
|
package/dist/swarm.d.ts
CHANGED
|
@@ -10,6 +10,8 @@ export interface SwarmConfig {
|
|
|
10
10
|
agentTimeoutMs?: number;
|
|
11
11
|
maxRetries?: number;
|
|
12
12
|
mergeStrategy?: MergeStrategy;
|
|
13
|
+
/** Stop dispatching new tasks when rate-limit utilization reaches this fraction (0-1). */
|
|
14
|
+
usageCap?: number;
|
|
13
15
|
}
|
|
14
16
|
export interface MergeResult {
|
|
15
17
|
branch: string;
|
|
@@ -35,10 +37,11 @@ export declare class Swarm {
|
|
|
35
37
|
totalOutputTokens: number;
|
|
36
38
|
phase: SwarmPhase;
|
|
37
39
|
aborted: boolean;
|
|
40
|
+
cappedOut: boolean;
|
|
38
41
|
mergeResults: MergeResult[];
|
|
39
42
|
rateLimitUtilization: number;
|
|
40
43
|
rateLimitStatus: string;
|
|
41
|
-
|
|
44
|
+
rateLimitResetsAt?: number;
|
|
42
45
|
private queue;
|
|
43
46
|
private config;
|
|
44
47
|
private nextId;
|
|
@@ -47,11 +50,14 @@ export declare class Swarm {
|
|
|
47
50
|
private cleanedUp;
|
|
48
51
|
logFile?: string;
|
|
49
52
|
readonly model: string | undefined;
|
|
53
|
+
readonly usageCap: number | undefined;
|
|
50
54
|
constructor(config: SwarmConfig);
|
|
51
55
|
get active(): number;
|
|
52
56
|
get pending(): number;
|
|
53
57
|
run(): Promise<void>;
|
|
54
58
|
abort(): void;
|
|
59
|
+
/** Monotonic counter so non-TTY consumers can detect log trimming. */
|
|
60
|
+
logSequence: number;
|
|
55
61
|
log(agentId: number, text: string): void;
|
|
56
62
|
private worker;
|
|
57
63
|
private throttle;
|