@robzilla1738/agentswarm 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +36 -5
  2. package/dist/agent.js +64 -32
  3. package/dist/cli.js +18 -4
  4. package/dist/config.js +35 -5
  5. package/dist/crawltools.js +247 -0
  6. package/dist/deepseek.js +125 -10
  7. package/dist/executor.js +771 -122
  8. package/dist/hub.js +40 -3
  9. package/dist/journal.js +61 -11
  10. package/dist/memory.js +83 -0
  11. package/dist/prompts.js +109 -16
  12. package/dist/report.js +252 -0
  13. package/dist/run.js +7 -2
  14. package/dist/searchcore.js +191 -0
  15. package/dist/state.js +57 -3
  16. package/dist/tools.js +202 -12
  17. package/dist/webtools.js +191 -60
  18. package/package.json +3 -2
  19. package/ui/out/404/index.html +1 -1
  20. package/ui/out/404.html +1 -1
  21. package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
  22. package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +1 -0
  23. package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
  24. package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +1 -0
  25. package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +1 -0
  26. package/ui/out/_next/static/css/9f7bd82b8e4c762c.css +3 -0
  27. package/ui/out/fonts/PlanetKosmos.ttf +0 -0
  28. package/ui/out/index.html +1 -1
  29. package/ui/out/index.txt +3 -3
  30. package/ui/out/run/index.html +1 -1
  31. package/ui/out/run/index.txt +3 -3
  32. package/ui/out/settings/index.html +1 -1
  33. package/ui/out/settings/index.txt +3 -3
  34. package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
  35. package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
  36. package/ui/out/_next/static/chunks/677-b37981ba0eca75b2.js +0 -1
  37. package/ui/out/_next/static/chunks/app/page-0c9f35bd4aa8e370.js +0 -1
  38. package/ui/out/_next/static/chunks/app/run/page-13dc41a57e34da71.js +0 -1
  39. package/ui/out/_next/static/chunks/app/settings/page-a1763be7f6de888c.js +0 -1
  40. package/ui/out/_next/static/css/82edaa7a5942f894.css +0 -3
  41. /package/ui/out/_next/static/{eiQeDU9uBHNsBj0CFkp8M → errjtBR_bKoee8ogLp8xk}/_buildManifest.js +0 -0
  42. /package/ui/out/_next/static/{eiQeDU9uBHNsBj0CFkp8M → errjtBR_bKoee8ogLp8xk}/_ssgManifest.js +0 -0
package/dist/executor.js CHANGED
@@ -40,12 +40,14 @@ const agent_1 = require("./agent");
40
40
  const config_1 = require("./config");
41
41
  const control_1 = require("./control");
42
42
  const deepseek_1 = require("./deepseek");
43
+ const journal_1 = require("./journal");
43
44
  const tools_1 = require("./tools");
44
45
  const prompts_1 = require("./prompts");
46
+ const memory_1 = require("./memory");
47
+ const report_1 = require("./report");
45
48
  const sandbox_1 = require("./sandbox");
46
49
  const types_1 = require("./types");
47
50
  const util_1 = require("./util");
48
- const VERIFY_MAX_ATTEMPTS = 2;
49
51
  class Executor {
50
52
  cfg;
51
53
  meta;
@@ -59,6 +61,7 @@ class Executor {
59
61
  inflight = new Map();
60
62
  settledSinceUpdate = [];
61
63
  notes = [];
64
+ phase = null;
62
65
  conductorMessages = [];
63
66
  spentTokens = 0;
64
67
  cost = 0;
@@ -67,19 +70,43 @@ class Executor {
67
70
  finishReason = "";
68
71
  fatal = null;
69
72
  lastConductorAction = "none";
73
+ conductorFailures = 0;
74
+ /** True when the last conductor turn ended in a call error, not a decision. */
75
+ lastConductorErrored = false;
70
76
  resumed = false;
71
77
  sandbox;
72
- constructor(cfg, meta, journal) {
78
+ mode;
79
+ teamId;
80
+ opts;
81
+ /** Team-mode result: the consolidated report handed back to the parent task. */
82
+ teamReport = "";
83
+ constructor(cfg, meta, journal, opts = {}) {
73
84
  this.cfg = cfg;
74
85
  this.meta = meta;
75
- this.runDirPath = (0, config_1.runDir)(meta.id);
86
+ this.runDirPath = opts.runDirPath ?? (0, config_1.runDir)(meta.id);
76
87
  this.journal = journal;
77
88
  this.control = new control_1.ControlReader(this.runDirPath);
89
+ this.mode = opts.mode ?? "root";
90
+ this.teamId = opts.teamId;
91
+ this.opts = opts;
92
+ if (opts.sharedNotes)
93
+ this.notes = opts.sharedNotes;
78
94
  (0, util_1.ensureDir)(path.join(this.runDirPath, "artifacts"));
79
- // "A directory on disk" runs always execute on the host — touching the
80
- // operator's real files is the entire point of that mode.
81
- const kind = meta.sandbox ? meta.options.sandboxRuntime ?? "host" : "host";
82
- this.sandbox = (0, sandbox_1.createSandbox)(kind, { runId: meta.id, hostDir: meta.cwd, cfg });
95
+ if (opts.sandbox) {
96
+ this.sandbox = opts.sandbox;
97
+ }
98
+ else {
99
+ // "A directory on disk" runs always execute on the host — touching the
100
+ // operator's real files is the entire point of that mode.
101
+ const kind = meta.sandbox ? meta.options.sandboxRuntime ?? "host" : "host";
102
+ this.sandbox = (0, sandbox_1.createSandbox)(kind, { runId: meta.id, hostDir: meta.cwd, cfg });
103
+ }
104
+ if (opts.parentSignal) {
105
+ if (opts.parentSignal.aborted)
106
+ this.ac.abort();
107
+ else
108
+ opts.parentSignal.addEventListener("abort", () => this.ac.abort(), { once: true });
109
+ }
83
110
  }
84
111
  cancel() {
85
112
  this.finishing = true;
@@ -107,18 +134,47 @@ class Executor {
107
134
  const n = Number(/^T(\d+)$/.exec(copy.id)?.[1] ?? 0);
108
135
  this.taskCounter = Math.max(this.taskCounter, n);
109
136
  }
110
- this.notes = state.notes.map((n) => ({ taskId: n.taskId, key: n.key, text: n.text }));
137
+ this.notes = state.notes.map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text }));
138
+ const lastPhase = state.phases[state.phases.length - 1];
139
+ if (lastPhase)
140
+ this.phase = { name: lastPhase.name, goal: lastPhase.goal, exitCriteria: lastPhase.exitCriteria };
111
141
  this.spentTokens = state.totalUsage.promptTokens + state.totalUsage.completionTokens;
112
142
  this.cost = state.cost;
143
+ try {
144
+ // The living plan survives restarts from disk, not from the journal.
145
+ this.planDoc = fs.readFileSync(path.join(this.runDirPath, "artifacts", this.planFileName()), "utf8");
146
+ }
147
+ catch {
148
+ /* no plan yet */
149
+ }
113
150
  this.resumed = true;
114
151
  }
115
152
  setStatus(status, reason) {
153
+ // A team is one task of the parent run, not a run of its own.
154
+ if (this.mode === "team")
155
+ return;
116
156
  this.journal.append("run.status", { status, reason });
117
157
  }
158
+ budgetWarned = new Set();
118
159
  onUsage = (model, usage) => {
119
160
  this.spentTokens += usage.promptTokens + usage.completionTokens;
120
161
  this.cost += (0, types_1.usageCost)(usage, this.cfg.pricing[model]);
121
162
  this.journal.append("usage", { model, usage, cost: this.cost });
163
+ // Team spend also counts against the parent's (authoritative) budget.
164
+ this.opts.onUsageForward?.(model, usage);
165
+ const cap = this.meta.options.maxTokens;
166
+ if (cap > 0) {
167
+ const pct = (this.spentTokens / cap) * 100;
168
+ for (const threshold of [50, 80, 95]) {
169
+ if (pct >= threshold && !this.budgetWarned.has(threshold)) {
170
+ this.budgetWarned.add(threshold);
171
+ this.journal.append("log", {
172
+ level: threshold >= 95 ? "warn" : "info",
173
+ msg: `budget: ${threshold}% of the run's token cap used (est. $${this.cost.toFixed(2)})`,
174
+ });
175
+ }
176
+ }
177
+ }
122
178
  };
123
179
  budgetExceeded() {
124
180
  return this.spentTokens >= this.meta.options.maxTokens;
@@ -126,39 +182,69 @@ class Executor {
126
182
  blackboardDigest(max = 1800) {
127
183
  if (!this.notes.length)
128
184
  return "";
129
- const lines = this.notes
130
- .slice(-40)
131
- .map((n) => `• ${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.taskId ? ` (${n.taskId})` : ""}`);
132
- let out = lines.join("\n");
133
- if (out.length > max)
134
- out = out.slice(out.length - max);
135
- return out;
185
+ const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.taskId ? ` (${n.taskId})` : ""}`;
186
+ // Decisions anchor mission-wide coherence and are never trimmed out of the
187
+ // digest; everything else shows only its recent tail.
188
+ const decisions = this.notes.filter((n) => n.kind === "decision").map(fmt);
189
+ const rest = this.notes.filter((n) => n.kind !== "decision").slice(-80).map(fmt);
190
+ let tail = rest.join("\n");
191
+ const budget = Math.max(400, max - decisions.join("\n").length);
192
+ if (tail.length > budget)
193
+ tail = tail.slice(tail.length - budget);
194
+ return [decisions.join("\n"), tail].filter(Boolean).join("\n");
195
+ }
196
+ searchNotes(query) {
197
+ const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
198
+ if (!terms.length)
199
+ return "empty query";
200
+ const scored = this.notes
201
+ .map((n) => {
202
+ const hay = `${n.key ?? ""} ${n.kind ?? ""} ${n.text}`.toLowerCase();
203
+ const score = terms.reduce((s, t) => s + (hay.includes(t) ? 1 : 0), 0);
204
+ return { n, score };
205
+ })
206
+ .filter((x) => x.score > 0)
207
+ .sort((a, b) => b.score - a.score)
208
+ .slice(0, 12);
209
+ if (!scored.length)
210
+ return "no notes matched";
211
+ return scored
212
+ .map(({ n }) => `• ${n.kind ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.clip)(n.text, 400)}${n.taskId ? ` (${n.taskId})` : ""}`)
213
+ .join("\n");
136
214
  }
137
215
  // ---------------------------------------------------------------- main
138
216
  async run() {
139
217
  this.setStatus("planning");
140
- // Preflight: validate auth before doing any work so the operator gets an
141
- // instant, clear error instead of a phantom "done" run.
142
- const auth = await (0, deepseek_1.validateAuth)(this.cfg);
143
- if (auth.status === "invalid") {
144
- this.fatal = `Provider authentication failed — ${auth.message || "invalid API key"}. Set a valid key in Settings (or: swarm config set apiKey <...>).`;
145
- this.finishReason = this.fatal;
146
- this.journal.append("log", { level: "error", msg: this.fatal });
147
- await this.fail(this.fatal);
148
- return;
149
- }
150
- // Boot the sandbox before any work a dead Docker daemon or a bad cloud
151
- // key must fail the run instantly with a clear reason, not mid-mission.
152
- try {
153
- await this.sandbox.start((msg) => this.journal.append("log", { level: "info", msg }));
154
- this.journal.append("log", { level: "info", msg: `sandbox: ${this.sandbox.label}` });
155
- }
156
- catch (e) {
157
- this.fatal = `Sandbox failed to start ${(0, util_1.errMsg)(e)}`;
158
- this.finishReason = this.fatal;
159
- this.journal.append("log", { level: "error", msg: this.fatal });
160
- await this.fail(this.fatal);
161
- return;
218
+ if (this.mode === "root") {
219
+ // Surface AIMD limiter adjustments (429 pressure) in the journal/UI.
220
+ (0, deepseek_1.gateFor)(this.cfg).onState = (s) => {
221
+ this.journal.append("limiter.state", { ceiling: s.ceiling, active: s.active, queued: s.queued });
222
+ };
223
+ // Preflight: validate auth before doing any work so the operator gets an
224
+ // instant, clear error instead of a phantom "done" run. (Teams inherit a
225
+ // parent that already passed.)
226
+ const auth = await (0, deepseek_1.validateAuth)(this.cfg);
227
+ if (auth.status === "invalid") {
228
+ this.fatal = `Provider authentication failed ${auth.message || "invalid API key"}. Set a valid key in Settings (or: swarm config set apiKey <...>).`;
229
+ this.finishReason = this.fatal;
230
+ this.journal.append("log", { level: "error", msg: this.fatal });
231
+ await this.fail(this.fatal);
232
+ return;
233
+ }
234
+ // Boot the sandbox before any work — a dead Docker daemon or a bad cloud
235
+ // key must fail the run instantly with a clear reason, not mid-mission.
236
+ // (Teams share the parent's already-running sandbox.)
237
+ try {
238
+ await this.sandbox.start((msg) => this.journal.append("log", { level: "info", msg }));
239
+ this.journal.append("log", { level: "info", msg: `sandbox: ${this.sandbox.label}` });
240
+ }
241
+ catch (e) {
242
+ this.fatal = `Sandbox failed to start — ${(0, util_1.errMsg)(e)}`;
243
+ this.finishReason = this.fatal;
244
+ this.journal.append("log", { level: "error", msg: this.fatal });
245
+ await this.fail(this.fatal);
246
+ return;
247
+ }
162
248
  }
163
249
  // Operator control must land while agents are mid-task, not only when the
164
250
  // scheduler wakes up — a Stop click aborts in-flight work within ~1s.
@@ -170,8 +256,11 @@ class Executor {
170
256
  /* control polling must never kill the run */
171
257
  }
172
258
  }, 750);
259
+ // Real-directory runs remember: prior missions in the same workspace feed
260
+ // the conductor so it builds on settled decisions instead of starting cold.
261
+ const memory = this.mode === "root" && !this.meta.sandbox ? (0, memory_1.memoryBlock)(this.meta.cwd) : "";
173
262
  this.conductorMessages = [
174
- { role: "system", content: (0, prompts_1.conductorSystem)(this.meta) },
263
+ { role: "system", content: (0, prompts_1.conductorSystem)(this.meta) + (memory ? `\n\n${memory}` : "") },
175
264
  {
176
265
  role: "user",
177
266
  content: this.resumed
@@ -191,53 +280,11 @@ class Executor {
191
280
  try {
192
281
  await this.conductorTurn();
193
282
  this.setStatus("running");
194
- while (!this.finishing) {
195
- this.drainControl();
196
- if (this.finishing)
197
- break;
198
- if (this.budgetExceeded()) {
199
- this.finishing = true;
200
- this.finishReason = "token budget reached";
201
- break;
202
- }
203
- this.startReadyTasks();
204
- if (this.inflight.size === 0) {
205
- const runnable = this.runnableTasks();
206
- if (runnable.length > 0)
207
- continue; // loop starts them
208
- // Nothing running, nothing runnable. Include any reports that
209
- // settled while the conductor was mid-turn — they must not be lost.
210
- this.blockStuckTasks();
211
- const reports = this.drainSettled();
212
- if (!this.hasOpenWork()) {
213
- // Everything is terminal. Ask the conductor for a final decision.
214
- this.appendConductorUpdate("All tasks have settled and no tasks are runnable.", reports);
215
- await this.conductorTurn();
216
- if (this.lastConductorAction !== "spawn") {
217
- this.finishing = true;
218
- this.finishReason = this.finishReason || "all tasks settled";
219
- }
220
- }
221
- else {
222
- // Stuck: pending tasks exist but can't run (failed/blocked deps).
223
- this.appendConductorUpdate("Some tasks cannot run because their dependencies failed or were blocked. Re-plan around them or finish.", reports);
224
- await this.conductorTurn();
225
- if (this.lastConductorAction === "wait") {
226
- this.finishing = true;
227
- this.finishReason = "stalled: dependencies unmet and conductor chose to wait";
228
- }
229
- }
230
- continue;
231
- }
232
- // Tasks are running — wait for at least one to settle.
233
- await Promise.race([...this.inflight.values()]);
234
- this.drainControl();
235
- const reports = this.drainSettled();
236
- if (reports.length && !this.finishing) {
237
- this.appendConductorUpdate(undefined, reports);
238
- await this.conductorTurn();
239
- }
240
- }
283
+ await this.mainLoop();
284
+ // Strict verification: one completeness review before synthesis; if it
285
+ // finds real gaps the conductor gets one chance to fill them.
286
+ if (await this.completenessPass())
287
+ await this.mainLoop();
241
288
  }
242
289
  catch (e) {
243
290
  if (!this.ac.signal.aborted) {
@@ -251,12 +298,231 @@ class Executor {
251
298
  await Promise.allSettled([...this.inflight.values()]);
252
299
  }
253
300
  this.drainSettled();
301
+ if (this.mode === "team") {
302
+ await this.consolidateTeam();
303
+ return; // the parent owns the sandbox, final flush, and run status
304
+ }
254
305
  await this.synthesize();
255
- await this.sandbox.destroy().catch(() => {
256
- /* container/sandbox teardown is best-effort */
257
- });
306
+ // Teardown is best-effort AND bounded — a wedged container must not hang
307
+ // the engine after the report is already written.
308
+ await Promise.race([
309
+ this.sandbox.destroy().catch(() => { }),
310
+ new Promise((r) => setTimeout(r, 15_000).unref()),
311
+ ]);
258
312
  await this.journal.flush();
259
313
  }
314
+ // ---------------------------------------------------------------- teams
315
+ /** All artifacts reported by this (team) executor's tasks. */
316
+ teamArtifacts() {
317
+ return [...new Set(this.taskList().flatMap((t) => t.artifacts))];
318
+ }
319
+ /** Whether any task here actually completed. */
320
+ anyTaskDone() {
321
+ return this.taskList().some((t) => t.status === "done");
322
+ }
323
+ /** Team-mode finale: one consolidated report instead of run synthesis. */
324
+ async consolidateTeam() {
325
+ const tasks = this.taskList();
326
+ const reports = tasks.length ? tasks.map(prompts_1.reportBlock).join("\n\n") : "(no tasks were completed)";
327
+ try {
328
+ const res = await (0, deepseek_1.chat)(this.cfg, {
329
+ model: this.meta.options.conductorModel,
330
+ priority: "high",
331
+ messages: [
332
+ {
333
+ role: "user",
334
+ content: `You led a sub-team inside a larger agent swarm. Consolidate your team's work into ONE report for the parent conductor: what was accomplished (with evidence and exact paths), what failed or remains open, and the key facts the rest of the mission needs.\n\nTEAM OBJECTIVE\n${this.meta.mission}\n\nOUTCOME: ${this.finishReason || "completed"}\nLead's closing notes: ${this.finishNotes || "(none)"}\n\nTASK REPORTS\n${(0, util_1.truncateMiddle)(reports, 60_000, "chars")}\n\nReply with the consolidated report only.`,
335
+ },
336
+ ],
337
+ thinking: false,
338
+ maxTokens: 4096,
339
+ signal: new AbortController().signal, // consolidation runs even when cancelled
340
+ });
341
+ this.onUsage(this.meta.options.conductorModel, res.usage);
342
+ this.teamReport = (res.content || "").trim();
343
+ }
344
+ catch (e) {
345
+ this.journal.append("log", { level: "warn", msg: `team consolidation failed: ${(0, util_1.errMsg)(e)}` });
346
+ }
347
+ if (!this.teamReport) {
348
+ this.teamReport = tasks
349
+ .map((t) => `${t.id} [${t.status}] ${t.title}: ${(0, util_1.oneLine)(t.report ?? t.error ?? "(no output)", 200)}`)
350
+ .join("\n");
351
+ }
352
+ }
353
+ /** Run a team:true task as a sub-swarm sharing this run's everything. */
354
+ async runTeam(task) {
355
+ const remaining = Math.max(0, this.meta.options.maxTokens - this.spentTokens);
356
+ const childMeta = {
357
+ ...this.meta,
358
+ mission: `${task.objective}${task.context ? `\n\nContext from the parent conductor:\n${task.context}` : ""}`,
359
+ options: {
360
+ ...this.meta.options,
361
+ maxWorkers: task.teamMaxWorkers || Math.max(2, Math.min(16, Math.floor(this.meta.options.maxWorkers / 2))),
362
+ maxTokens: Math.min(remaining, task.teamBudgetTokens || Math.max(50_000, Math.floor(remaining / 4))),
363
+ maxTasks: Math.min(this.meta.options.maxTasks, 24),
364
+ },
365
+ };
366
+ this.journal.append("team.created", {
367
+ taskId: task.id,
368
+ maxWorkers: childMeta.options.maxWorkers,
369
+ budgetTokens: childMeta.options.maxTokens,
370
+ });
371
+ const child = new Executor(this.cfg, childMeta, new journal_1.TeamJournal(this.journal, task.id), {
372
+ mode: "team",
373
+ teamId: task.id,
374
+ sandbox: this.sandbox,
375
+ runDirPath: this.runDirPath,
376
+ onUsageForward: (model, usage) => {
377
+ // Absorb tokens/cost only — the child already journaled the usage event.
378
+ this.spentTokens += usage.promptTokens + usage.completionTokens;
379
+ this.cost += (0, types_1.usageCost)(usage, this.cfg.pricing[model]);
380
+ },
381
+ parentSignal: this.ac.signal,
382
+ sharedNotes: this.notes,
383
+ });
384
+ await child.run();
385
+ if (this.ac.signal.aborted) {
386
+ this.finalizeTask(task, "failed", "run cancelled");
387
+ return;
388
+ }
389
+ const report = child.teamReport || "(team produced no consolidated report)";
390
+ for (const a of child.teamArtifacts())
391
+ if (!task.artifacts.includes(a))
392
+ task.artifacts.push(a);
393
+ task.report = report;
394
+ task.reportStatus = "done";
395
+ this.journal.append("team.report", { taskId: task.id, report, artifacts: task.artifacts });
396
+ this.journal.append("task.report", { taskId: task.id, status: "done", report, artifacts: task.artifacts });
397
+ this.finalizeTask(task, child.anyTaskDone() ? "done" : "failed", report);
398
+ }
399
+ async mainLoop() {
400
+ while (!this.finishing) {
401
+ this.drainControl();
402
+ if (this.finishing)
403
+ break;
404
+ if (this.budgetExceeded()) {
405
+ this.finishing = true;
406
+ this.finishReason = "token budget reached";
407
+ break;
408
+ }
409
+ if (this.journal.degraded) {
410
+ // The journal is the source of truth; if it can't be written, the
411
+ // run must stop loudly rather than burn tokens on unrecorded work.
412
+ this.finishing = true;
413
+ this.finishReason = "journal writes are failing — run state is no longer durable";
414
+ this.ac.abort();
415
+ break;
416
+ }
417
+ this.startReadyTasks();
418
+ if (this.inflight.size === 0) {
419
+ const runnable = this.runnableTasks();
420
+ if (runnable.length > 0)
421
+ continue; // loop starts them
422
+ // Nothing running, nothing runnable. Include any reports that
423
+ // settled while the conductor was mid-turn — they must not be lost.
424
+ this.blockStuckTasks();
425
+ const reports = this.drainSettled();
426
+ if (!this.hasOpenWork()) {
427
+ // Everything is terminal. Ask the conductor for a final decision.
428
+ this.appendConductorUpdate("All tasks have settled and no tasks are runnable.", reports);
429
+ await this.conductorTurn();
430
+ // An errored turn is not a decision — keep looping so the breaker
431
+ // can retry (and eventually trip) instead of misreading the error
432
+ // as "the conductor chose to stop".
433
+ if (this.lastConductorAction !== "spawn" && !this.lastConductorErrored) {
434
+ this.finishing = true;
435
+ this.finishReason = this.finishReason || "all tasks settled";
436
+ }
437
+ }
438
+ else {
439
+ // Stuck: pending tasks exist but can't run (failed/blocked deps).
440
+ this.appendConductorUpdate("Some tasks cannot run because their dependencies failed or were blocked. Re-plan around them or finish.", reports);
441
+ await this.conductorTurn();
442
+ if (this.lastConductorAction === "wait" && !this.lastConductorErrored) {
443
+ this.finishing = true;
444
+ this.finishReason = "stalled: dependencies unmet and conductor chose to wait";
445
+ }
446
+ }
447
+ continue;
448
+ }
449
+ // Tasks are running — wait for at least one to settle, then debounce:
450
+ // at 100 agents, settles arrive constantly, and waking the conductor
451
+ // for every one of them serializes the whole swarm on its turns.
452
+ await Promise.race([...this.inflight.values()]);
453
+ const debounceMs = Number(process.env.SWARM_SETTLE_DEBOUNCE_MS ?? "2000");
454
+ const settleCap = Math.max(3, Math.ceil(this.activeWorkerCount() / 8));
455
+ while (debounceMs > 0 && this.inflight.size > 0 && this.settledSinceUpdate.length < settleCap) {
456
+ const before = this.settledSinceUpdate.length;
457
+ await Promise.race([...this.inflight.values(), (0, util_1.sleep)(debounceMs)]);
458
+ if (this.settledSinceUpdate.length === before)
459
+ break; // quiet period — flush to the conductor
460
+ this.drainControl();
461
+ if (this.finishing)
462
+ break;
463
+ this.startReadyTasks(); // settles free dep chains; don't idle workers during the debounce
464
+ }
465
+ this.drainControl();
466
+ const reports = this.drainSettled();
467
+ if (reports.length && !this.finishing) {
468
+ this.appendConductorUpdate(undefined, reports);
469
+ await this.conductorTurn();
470
+ }
471
+ }
472
+ }
473
+ /**
474
+ * Strict-mode gap review before synthesis. Returns true when the conductor
475
+ * accepted gap-filling work (the main loop must run again).
476
+ */
477
+ gapPassDone = false;
478
+ async completenessPass() {
479
+ if (this.mode === "team")
480
+ return false; // the root run owns gap review
481
+ if (this.cfg.verification !== "strict" || this.gapPassDone)
482
+ return false;
483
+ if (this.fatal || this.ac.signal.aborted || this.budgetExceeded())
484
+ return false;
485
+ if (this.finishReason.includes("cancel") || this.finishReason.includes("conductor unavailable"))
486
+ return false;
487
+ if (!this.taskList().some((t) => t.status === "done"))
488
+ return false;
489
+ this.gapPassDone = true;
490
+ let verdict = "";
491
+ try {
492
+ const res = await (0, deepseek_1.chat)(this.cfg, {
493
+ model: this.meta.options.conductorModel,
494
+ messages: [
495
+ {
496
+ role: "user",
497
+ content: (0, prompts_1.completenessPrompt)(this.meta.mission, (0, prompts_1.taskTable)(this.taskList()), (0, util_1.truncateMiddle)(this.taskList().map(prompts_1.reportBlock).join("\n\n"), 80_000, "chars")),
498
+ },
499
+ ],
500
+ thinking: false,
501
+ maxTokens: 2048,
502
+ signal: this.ac.signal,
503
+ });
504
+ this.onUsage(this.meta.options.conductorModel, res.usage);
505
+ verdict = (res.content || "").trim();
506
+ }
507
+ catch (e) {
508
+ this.journal.append("log", { level: "warn", msg: `completeness review failed: ${(0, util_1.errMsg)(e)}` });
509
+ return false;
510
+ }
511
+ if (!verdict || /^COMPLETE\b/i.test(verdict)) {
512
+ this.journal.append("log", { level: "info", msg: "completeness review: no gaps found" });
513
+ return false;
514
+ }
515
+ this.journal.append("log", { level: "info", msg: `completeness review found gaps:\n${(0, util_1.clip)(verdict, 1500)}` });
516
+ this.finishing = false;
517
+ this.appendConductorUpdate(`COMPLETENESS REVIEW found gaps before final synthesis:\n${(0, util_1.clip)(verdict, 2000)}\n` +
518
+ "Spawn focused tasks to close the REAL gaps (or finish if you judge them immaterial). This is the final round.");
519
+ await this.conductorTurn();
520
+ if (this.lastConductorAction === "spawn")
521
+ return true;
522
+ this.finishing = true;
523
+ this.finishReason = this.finishReason || "all tasks settled";
524
+ return false;
525
+ }
260
526
  // ---------------------------------------------------------------- conductor
261
527
  nextId() {
262
528
  return this.taskCounter + 1;
@@ -267,7 +533,7 @@ class Executor {
267
533
  // Re-bound the history every turn — the nudge loop and tool-result pushes
268
534
  // below grow it outside appendConductorUpdate's trim.
269
535
  this.trimConductorHistory();
270
- const tools = [tools_1.SPAWN_TASKS_TOOL, tools_1.WAIT_TOOL, tools_1.FINISH_TOOL];
536
+ const tools = [tools_1.SPAWN_TASKS_TOOL, tools_1.SET_PHASE_TOOL, tools_1.UPDATE_PLAN_TOOL, tools_1.CONDUCTOR_READ_REPORT_TOOL, tools_1.WAIT_TOOL, tools_1.FINISH_TOOL];
271
537
  for (let attempt = 0; attempt < 3; attempt++) {
272
538
  let res;
273
539
  try {
@@ -278,6 +544,9 @@ class Executor {
278
544
  // "auto" rather than "required" for cross-provider safety; the prompt
279
545
  // mandates a tool call and the no-tool nudge loop below enforces it.
280
546
  toolChoice: "auto",
547
+ // The conductor is the swarm's brain: it must never queue behind a
548
+ // hundred worker streams.
549
+ priority: "high",
281
550
  thinking: this.meta.options.thinking,
282
551
  reasoningEffort: this.meta.options.reasoningEffort,
283
552
  // Generous: with thinking enabled, reasoning + a large spawn_tasks
@@ -297,11 +566,26 @@ class Executor {
297
566
  this.fatal = `Provider authentication failed — ${msg}. Set a valid key in Settings.`;
298
567
  this.finishing = true;
299
568
  this.finishReason = this.fatal;
569
+ return;
570
+ }
571
+ // Circuit breaker: a transient failure degrades to "wait" so the loop
572
+ // keeps draining tasks, but repeated consecutive failures must end the
573
+ // run with a clear reason rather than spin forever.
574
+ this.conductorFailures++;
575
+ if (this.conductorFailures >= 5) {
576
+ this.finishing = true;
577
+ this.finishReason = `conductor unavailable: ${this.conductorFailures} consecutive call failures (last: ${msg})`;
578
+ return;
300
579
  }
301
- // Treat a transient conductor failure as a wait so the loop keeps draining tasks.
580
+ const scale = Number(process.env.SWARM_BACKOFF_SCALE || "1") || 1;
581
+ const backoff = [2_000, 5_000, 15_000, 30_000][Math.min(this.conductorFailures - 1, 3)] * scale;
582
+ await new Promise((r) => setTimeout(r, backoff));
302
583
  this.lastConductorAction = "wait";
584
+ this.lastConductorErrored = true;
303
585
  return;
304
586
  }
587
+ this.conductorFailures = 0;
588
+ this.lastConductorErrored = false;
305
589
  this.onUsage(this.meta.options.conductorModel, res.usage);
306
590
  if (res.content.trim())
307
591
  this.journal.append("conductor.say", { text: (0, util_1.clip)(res.content, 4000) });
@@ -334,6 +618,41 @@ class Executor {
334
618
  toolResult = "Acknowledged. Synthesizing the final deliverable.";
335
619
  acted = "finish";
336
620
  }
621
+ else if (call.function.name === "update_plan") {
622
+ const md = String(args.markdown ?? "");
623
+ if (md.trim()) {
624
+ this.planDoc = md;
625
+ try {
626
+ fs.writeFileSync(path.join(this.runDirPath, "artifacts", this.planFileName()), md, "utf8");
627
+ }
628
+ catch (e) {
629
+ this.journal.append("log", { level: "warn", msg: `plan write failed: ${(0, util_1.errMsg)(e)}` });
630
+ }
631
+ this.journal.append("plan.updated", { teamScoped: this.mode === "team" || undefined, excerpt: (0, util_1.clip)(md, 1200) });
632
+ toolResult = `Plan saved to artifacts/${this.planFileName()}.`;
633
+ }
634
+ else {
635
+ toolResult = "Plan was empty — not saved.";
636
+ }
637
+ // Bookkeeping, not a scheduling decision — falls through to the nudge.
638
+ }
639
+ else if (call.function.name === "read_report") {
640
+ toolResult = (0, util_1.truncateMiddle)(this.readReportText(String(args.task_id ?? "")), 8000, "chars");
641
+ // Information lookup, not a scheduling decision — falls through to
642
+ // the nudge loop if the conductor stopped here.
643
+ }
644
+ else if (call.function.name === "set_phase") {
645
+ const name = (0, util_1.clip)(String(args.name ?? ""), 80);
646
+ this.phase = {
647
+ name,
648
+ goal: args.goal ? String(args.goal) : undefined,
649
+ exitCriteria: args.exit_criteria ? String(args.exit_criteria) : undefined,
650
+ };
651
+ this.journal.append("phase.set", { name, goal: this.phase.goal, exit_criteria: this.phase.exitCriteria });
652
+ toolResult = `Phase set: ${name}. Now also call spawn_tasks, wait, or finish.`;
653
+ // Not a scheduling decision by itself — fall through to the nudge
654
+ // loop if the conductor stopped here.
655
+ }
337
656
  else if (call.function.name === "wait") {
338
657
  toolResult = "Waiting for running tasks to report.";
339
658
  if (acted === "none")
@@ -344,6 +663,12 @@ class Executor {
344
663
  }
345
664
  this.conductorMessages.push({ role: "tool", tool_call_id: call.id, content: toolResult });
346
665
  }
666
+ if (acted === "none") {
667
+ // set_phase (or an unknown tool) alone is not a scheduling decision —
668
+ // ask again rather than letting the run misread it as "wait"/"finish".
669
+ this.conductorMessages.push({ role: "user", content: "Now call spawn_tasks, wait, or finish." });
670
+ continue;
671
+ }
347
672
  this.lastConductorAction = acted;
348
673
  this.journal.append("conductor.action", { kind: acted });
349
674
  return;
@@ -380,6 +705,7 @@ class Executor {
380
705
  warnings.push(`${id}: dropped dep "${d}" (${idx >= i ? "same-batch later task — would deadlock" : "unknown task"})`);
381
706
  return false;
382
707
  });
708
+ const rawSpec = spec;
383
709
  const task = {
384
710
  id,
385
711
  title: (0, util_1.clip)(String(spec.title ?? "task"), 120),
@@ -388,6 +714,10 @@ class Executor {
388
714
  deps,
389
715
  verify: Boolean(spec.verify) && this.cfg.verification !== "off",
390
716
  context: spec.context ? String(spec.context) : undefined,
717
+ modelTier: ["cheap", "strong"].includes(String(spec.model)) ? spec.model : undefined,
718
+ team: Boolean(spec.team) && this.mode === "root",
719
+ teamMaxWorkers: Number(rawSpec.team_max_workers ?? rawSpec.teamMaxWorkers) || undefined,
720
+ teamBudgetTokens: Number(rawSpec.team_budget_tokens ?? rawSpec.teamBudgetTokens) || undefined,
391
721
  status: "pending",
392
722
  attempt: 1,
393
723
  wave,
@@ -415,14 +745,46 @@ class Executor {
415
745
  w = Math.max(w, t.wave);
416
746
  return w + 1;
417
747
  }
748
+ /** The conductor's living plan document (mission-plan.md). */
749
+ planDoc = "";
750
+ planFileName() {
751
+ return this.mode === "team" ? `mission-plan-${this.teamId}.md` : "mission-plan.md";
752
+ }
753
+ planPin() {
754
+ if (!this.planDoc)
755
+ return undefined;
756
+ return `MISSION PLAN (artifacts/${this.planFileName()}, maintained via update_plan):\n${(0, util_1.clip)(this.planDoc, 1500)}`;
757
+ }
758
+ phaseLine() {
759
+ if (!this.phase)
760
+ return undefined;
761
+ return `CURRENT PHASE: ${this.phase.name}${this.phase.goal ? ` — ${this.phase.goal}` : ""}${this.phase.exitCriteria ? ` (exit: ${this.phase.exitCriteria})` : ""}`;
762
+ }
763
+ /** Full text for the reports that matter, one-liners past the cap. */
764
+ digestReports(reports) {
765
+ const CAP = 12;
766
+ if (reports.length <= CAP)
767
+ return reports.map(prompts_1.reportBlock);
768
+ const important = reports.filter((t) => t.status !== "done");
769
+ const done = reports.filter((t) => t.status === "done");
770
+ const fullDone = done.slice(-Math.max(0, CAP - important.length));
771
+ const briefDone = done.slice(0, done.length - fullDone.length);
772
+ return [
773
+ ...important.map(prompts_1.reportBlock),
774
+ ...fullDone.map(prompts_1.reportBlock),
775
+ ...briefDone.map((t) => `── ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → DONE — ${(0, util_1.oneLine)(t.report ?? "", 140)} (full text: read_report)`),
776
+ ];
777
+ }
418
778
  appendConductorUpdate(extra, reports) {
419
779
  const ops = this.consumeOperatorNotes();
420
780
  this.conductorMessages.push({
421
781
  role: "user",
422
782
  content: (0, prompts_1.conductorUpdate)({
423
- reports: reports?.map(prompts_1.reportBlock),
783
+ reports: reports ? this.digestReports(reports) : undefined,
424
784
  operatorNotes: ops,
425
785
  blackboard: this.blackboardDigest(),
786
+ phase: this.phaseLine(),
787
+ plan: this.planPin(),
426
788
  nextId: this.nextId(),
427
789
  taskTable: (0, prompts_1.taskTable)(this.taskList()),
428
790
  budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
@@ -432,16 +794,63 @@ class Executor {
432
794
  // Keep the conductor's own history from growing without bound.
433
795
  this.trimConductorHistory();
434
796
  }
797
+ /**
798
+ * One-screen summary of everything durable about the run so far. Replaces
799
+ * trimmed history so the conductor never loses the plot on long missions —
800
+ * rebuilt fresh each trim from current state, so it also survives resume.
801
+ */
802
+ missionLedger() {
803
+ const lines = ["[Earlier orchestration history was trimmed. MISSION LEDGER — durable state so far:]"];
804
+ if (this.phase)
805
+ lines.push(this.phaseLine());
806
+ const settled = this.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status));
807
+ if (settled.length) {
808
+ lines.push("Settled tasks:");
809
+ const failures = settled.filter((t) => t.status !== "done");
810
+ const done = settled.filter((t) => t.status === "done");
811
+ // Failures stay itemized forever; done tasks collapse by wave once the
812
+ // run gets big (a 500-task ledger must still fit on one screen).
813
+ if (done.length > 30) {
814
+ const waves = [...new Set(done.map((t) => t.wave))].sort((a, b) => a - b);
815
+ for (const w of waves) {
816
+ const ws = done.filter((t) => t.wave === w);
817
+ lines.push(`- wave ${w}: ${ws.length} done (${ws.map((t) => t.id).join(",")})`);
818
+ }
819
+ }
820
+ else {
821
+ for (const t of done)
822
+ lines.push(`- ${t.id} [done] ${(0, util_1.clip)(t.title, 60)}${t.report ? ` — ${(0, util_1.oneLine)(t.report, 120)}` : ""}`);
823
+ }
824
+ for (const t of failures) {
825
+ lines.push(`- ${t.id} [${t.status}] ${(0, util_1.clip)(t.title, 60)}${t.error ? ` — ${(0, util_1.oneLine)(t.error, 80)}` : ""}`);
826
+ }
827
+ }
828
+ const decisions = this.notes.filter((n) => n.kind === "decision");
829
+ if (decisions.length) {
830
+ lines.push("Decisions:");
831
+ for (const d of decisions.slice(-20))
832
+ lines.push(`- ${(0, util_1.oneLine)(d.text, 140)}`);
833
+ }
834
+ return (0, util_1.clip)(lines.join("\n"), 8000);
835
+ }
435
836
  trimConductorHistory() {
436
837
  const MAX = 60;
437
- const TRIM_NOTICE = "[Earlier orchestration history was trimmed. Current swarm state is below.]";
838
+ const LEDGER_MARK = "MISSION LEDGER";
839
+ const setLedger = () => {
840
+ const msg = { role: "user", content: this.missionLedger() };
841
+ if (this.conductorMessages[1]?.content?.includes(LEDGER_MARK))
842
+ this.conductorMessages[1] = msg;
843
+ else
844
+ this.conductorMessages.splice(1, 0, msg);
845
+ };
438
846
  if (this.conductorMessages.length > MAX) {
439
847
  const system = this.conductorMessages[0];
440
848
  const tail = this.conductorMessages.slice(-(MAX - 2));
441
849
  // Don't begin the tail on an orphic tool result.
442
850
  while (tail.length && tail[0].role === "tool")
443
851
  tail.shift();
444
- this.conductorMessages = [system, { role: "user", content: TRIM_NOTICE }, ...tail];
852
+ this.conductorMessages = [system, ...tail];
853
+ setLedger();
445
854
  }
446
855
  // Count alone doesn't bound size: every update embeds the full task table,
447
856
  // so a deep run can blow the model window long before 60 messages. The
@@ -449,9 +858,7 @@ class Executor {
449
858
  const budget = Math.floor(this.cfg.contextTokenLimit * 0.75);
450
859
  if ((0, agent_1.estimateMessages)(this.conductorMessages) <= budget)
451
860
  return;
452
- if (this.conductorMessages[1]?.content !== TRIM_NOTICE) {
453
- this.conductorMessages.splice(1, 0, { role: "user", content: TRIM_NOTICE });
454
- }
861
+ setLedger();
455
862
  while ((0, agent_1.estimateMessages)(this.conductorMessages) > budget && this.conductorMessages.length > 10) {
456
863
  this.conductorMessages.splice(2, 1);
457
864
  // Never leave tool results whose assistant turn was dropped.
@@ -486,8 +893,17 @@ class Executor {
486
893
  }
487
894
  }
488
895
  }
896
+ /** Tasks occupying a worker slot: running, not those awaiting verification. */
897
+ activeWorkerCount() {
898
+ let n = 0;
899
+ for (const id of this.inflight.keys()) {
900
+ if (this.tasks.get(id)?.status === "running")
901
+ n++;
902
+ }
903
+ return n;
904
+ }
489
905
  startReadyTasks() {
490
- while (this.inflight.size < this.meta.options.maxWorkers && !this.finishing) {
906
+ while (this.activeWorkerCount() < this.meta.options.maxWorkers && !this.finishing) {
491
907
  const next = this.runnableTasks()[0];
492
908
  if (!next)
493
909
  break;
@@ -516,12 +932,14 @@ class Executor {
516
932
  depReportsFor(task) {
517
933
  if (!task.deps.length)
518
934
  return "";
935
+ // Excerpts, not full reports: a fan-in task with many deps must not blow
936
+ // its context window on day one. Workers fetch full text with read_report.
519
937
  return task.deps
520
938
  .map((d) => {
521
939
  const dep = this.tasks.get(d);
522
940
  if (!dep)
523
941
  return `(${d}: missing)`;
524
- return (0, prompts_1.reportBlock)(dep);
942
+ return (0, prompts_1.depReportBlock)(dep);
525
943
  })
526
944
  .join("\n\n");
527
945
  }
@@ -535,13 +953,31 @@ class Executor {
535
953
  agentId,
536
954
  taskId: task?.id,
537
955
  signal: this.ac.signal,
538
- addNote: (text, key) => {
539
- this.notes.push({ taskId: task?.id, key, text });
956
+ addCheckpoint: task ? (summary) => this.recordCheckpoint(task, agentId, summary) : undefined,
957
+ addNote: (text, key, kind) => {
958
+ this.notes.push({ taskId: task?.id, key, kind, text });
540
959
  // Only the recent tail ever feeds digests; without a cap a multi-day
541
- // run accumulates every note in memory.
542
- if (this.notes.length > 2000)
543
- this.notes.splice(0, this.notes.length - 2000);
544
- this.journal.append("note.added", { taskId: task?.id, agentId, key, text: (0, util_1.clip)(text, 1200) });
960
+ // run accumulates every note in memory. Decisions are kept regardless.
961
+ if (this.notes.length > 4000) {
962
+ const decisions = this.notes.filter((n) => n.kind === "decision");
963
+ const rest = this.notes.filter((n) => n.kind !== "decision");
964
+ rest.splice(0, rest.length - Math.max(0, 4000 - decisions.length));
965
+ this.notes = [...decisions, ...rest];
966
+ }
967
+ this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, text: (0, util_1.clip)(text, 1200) });
968
+ },
969
+ searchNotes: (q) => this.searchNotes(q),
970
+ readReport: (taskId) => this.readReportText(taskId),
971
+ checkClaim: (rel) => {
972
+ const norm = rel.replace(/^\.\//, "");
973
+ const claim = this.notes.find((n) => n.kind === "claim" &&
974
+ n.key === norm &&
975
+ n.taskId &&
976
+ n.taskId !== task?.id &&
977
+ ["running", "verifying"].includes(this.tasks.get(n.taskId)?.status ?? ""));
978
+ return claim
979
+ ? `⚠ ${claim.taskId} holds a claim on ${norm} ("${(0, util_1.oneLine)(claim.text, 80)}") — coordinate via the blackboard before further edits.`
980
+ : null;
545
981
  },
546
982
  addArtifact: (rel) => {
547
983
  if (task && !task.artifacts.includes(rel))
@@ -553,7 +989,33 @@ class Executor {
553
989
  },
554
990
  };
555
991
  }
992
+ readReportText(taskId) {
993
+ const t = this.tasks.get(taskId.trim().toUpperCase());
994
+ if (!t)
995
+ return `no such task: ${taskId}`;
996
+ if (!t.report)
997
+ return `${t.id} has not reported yet (status: ${t.status})`;
998
+ return `${t.id} "${t.title}" → ${t.status}\n${t.report}${t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : ""}`;
999
+ }
1000
+ recordCheckpoint(task, agentId, summary) {
1001
+ task.lastCheckpoint = (0, util_1.clip)(summary, 4000);
1002
+ this.journal.append("task.checkpoint", {
1003
+ taskId: task.id,
1004
+ agentId,
1005
+ attempt: task.attempt,
1006
+ summary: task.lastCheckpoint,
1007
+ });
1008
+ }
556
1009
  async runTaskPipeline(task) {
1010
+ if (task.team) {
1011
+ try {
1012
+ await this.runTeam(task);
1013
+ }
1014
+ catch (e) {
1015
+ this.finalizeTask(task, "failed", `team error: ${(0, util_1.errMsg)(e)}`);
1016
+ }
1017
+ return;
1018
+ }
557
1019
  for (;;) {
558
1020
  try {
559
1021
  const outcome = await this.runWorker(task);
@@ -566,7 +1028,7 @@ class Executor {
566
1028
  this.finalizeTask(task, "failed", task.feedback || task.error || "not retried: run is winding down");
567
1029
  return;
568
1030
  }
569
- if (task.attempt < VERIFY_MAX_ATTEMPTS) {
1031
+ if (task.attempt < this.cfg.verifyMaxAttempts) {
570
1032
  task.attempt++;
571
1033
  task.status = "running";
572
1034
  this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt });
@@ -582,7 +1044,7 @@ class Executor {
582
1044
  this.finalizeTask(task, "failed", "run cancelled");
583
1045
  return;
584
1046
  }
585
- if (task.attempt < VERIFY_MAX_ATTEMPTS && !this.finishing && !this.budgetExceeded()) {
1047
+ if (task.attempt < this.cfg.verifyMaxAttempts && !this.finishing && !this.budgetExceeded()) {
586
1048
  task.attempt++;
587
1049
  task.error = (0, util_1.errMsg)(e);
588
1050
  task.status = "running";
@@ -594,9 +1056,17 @@ class Executor {
594
1056
  }
595
1057
  }
596
1058
  }
1059
+ resolveModel(tier) {
1060
+ if (tier === "cheap")
1061
+ return this.cfg.cheapModel || this.meta.options.model;
1062
+ if (tier === "strong")
1063
+ return this.cfg.strongModel || this.meta.options.model;
1064
+ return this.meta.options.model;
1065
+ }
597
1066
  /** Returns "retry" to request another attempt, or "done" when finalized. */
598
1067
  async runWorker(task) {
599
1068
  const agentId = (0, util_1.rid)("w");
1069
+ const model = this.resolveModel(task.modelTier);
600
1070
  task.agentIds.push(agentId);
601
1071
  const dirListing = this.topListing();
602
1072
  const system = (0, prompts_1.workerSystem)({
@@ -614,23 +1084,26 @@ class Executor {
614
1084
  agentId,
615
1085
  taskId: task.id,
616
1086
  role: task.role,
617
- model: this.meta.options.model,
1087
+ model,
618
1088
  purpose: task.title,
619
1089
  });
620
1090
  const outcome = await (0, agent_1.runAgent)({
621
1091
  cfg: this.cfg,
622
1092
  agentId,
623
- model: this.meta.options.model,
1093
+ model,
624
1094
  thinking: this.meta.options.thinking,
625
1095
  reasoningEffort: this.meta.options.reasoningEffort,
626
1096
  system,
627
1097
  kickoff: prompts_1.WORKER_KICKOFF,
628
- tools: (0, tools_1.workerToolset)(),
1098
+ tools: (0, tools_1.workerToolset)(this.cfg),
629
1099
  terminal: [tools_1.REPORT_TOOL],
630
1100
  maxSteps: this.meta.options.maxStepsPerTask,
631
1101
  signal: this.ac.signal,
632
1102
  ctx: this.makeToolCtx(agentId, task),
633
- hooks: this.agentHooks(agentId, task.id),
1103
+ hooks: {
1104
+ ...this.agentHooks(agentId, task.id),
1105
+ onCheckpoint: (summary) => this.recordCheckpoint(task, agentId, summary),
1106
+ },
634
1107
  stop: this.agentStop,
635
1108
  });
636
1109
  this.flushDeltas(agentId);
@@ -650,11 +1123,18 @@ class Executor {
650
1123
  task.artifacts.push(art);
651
1124
  task.report = report;
652
1125
  task.reportStatus = reportStatus;
1126
+ const strList = (v, max) => Array.isArray(v) ? v.map((x) => (0, util_1.clip)(String(x), 300)).slice(0, max) : undefined;
1127
+ task.keyFacts = strList(a.key_facts, 8);
1128
+ task.openQuestions = strList(a.open_questions, 6);
1129
+ task.filesTouched = strList(a.files_touched, 40);
653
1130
  this.journal.append("task.report", {
654
1131
  taskId: task.id,
655
1132
  status: reportStatus,
656
1133
  report,
657
1134
  artifacts: task.artifacts,
1135
+ keyFacts: task.keyFacts,
1136
+ openQuestions: task.openQuestions,
1137
+ filesTouched: task.filesTouched,
658
1138
  });
659
1139
  if (reportStatus === "blocked") {
660
1140
  this.finalizeTask(task, "blocked", report);
@@ -663,6 +1143,14 @@ class Executor {
663
1143
  if (task.verify && this.cfg.verification !== "off") {
664
1144
  task.status = "verifying";
665
1145
  this.journal.append("task.status", { taskId: task.id, status: "verifying", attempt: task.attempt });
1146
+ // Mechanical checks first: free, instant, and they catch the most common
1147
+ // fabrications (claimed artifacts that don't exist) without an LLM call.
1148
+ const mech = this.preVerify(task);
1149
+ if (mech) {
1150
+ task.feedback = mech;
1151
+ this.journal.append("verify.result", { taskId: task.id, pass: false, feedback: mech, mechanical: true });
1152
+ return "retry";
1153
+ }
666
1154
  const pass = await this.runVerifier(task);
667
1155
  if (!pass)
668
1156
  return "retry";
@@ -670,20 +1158,52 @@ class Executor {
670
1158
  this.finalizeTask(task, "done", report);
671
1159
  return "done";
672
1160
  }
1161
+ /** Zero-token sanity checks before the LLM verifier. Returns failure feedback or null. */
1162
+ preVerify(task) {
1163
+ const report = task.report ?? "";
1164
+ if (report.trim().length < 40) {
1165
+ return "Report is too thin to verify. Re-do the task and report concretely: what was done, what was verified, exact paths.";
1166
+ }
1167
+ const missing = [];
1168
+ // Remote sandboxes own their filesystem — only check host-visible paths.
1169
+ if (this.sandbox.localFs) {
1170
+ const okAt = (p) => {
1171
+ try {
1172
+ return fs.statSync(p).size > 0;
1173
+ }
1174
+ catch {
1175
+ return false;
1176
+ }
1177
+ };
1178
+ for (const rel of task.artifacts) {
1179
+ const inArtifacts = path.join(this.runDirPath, "artifacts", rel);
1180
+ const inWorkdir = path.resolve(this.meta.cwd, rel);
1181
+ if (!okAt(inArtifacts) && !okAt(inWorkdir))
1182
+ missing.push(rel);
1183
+ }
1184
+ }
1185
+ if (missing.length) {
1186
+ return `Claimed artifact(s) do not exist or are empty: ${missing.join(", ")}. Actually create them (use save_artifact), then report again.`;
1187
+ }
1188
+ return null;
1189
+ }
673
1190
  async runVerifier(task) {
674
1191
  const agentId = (0, util_1.rid)("v");
1192
+ // Verification gets the strong tier when configured — a weak verifier
1193
+ // rubber-stamps exactly the tasks that most need scrutiny.
1194
+ const model = this.cfg.strongModel || this.meta.options.model;
675
1195
  task.agentIds.push(agentId);
676
1196
  this.journal.append("agent.spawned", {
677
1197
  agentId,
678
1198
  taskId: task.id,
679
1199
  role: "verifier",
680
- model: this.meta.options.model,
1200
+ model,
681
1201
  purpose: `verify ${task.id}`,
682
1202
  });
683
1203
  const outcome = await (0, agent_1.runAgent)({
684
1204
  cfg: this.cfg,
685
1205
  agentId,
686
- model: this.meta.options.model,
1206
+ model,
687
1207
  thinking: this.meta.options.thinking,
688
1208
  reasoningEffort: this.meta.options.reasoningEffort,
689
1209
  system: (0, prompts_1.verifierSystem)(this.meta, task),
@@ -692,7 +1212,10 @@ class Executor {
692
1212
  terminal: [tools_1.VERDICT_TOOL],
693
1213
  maxSteps: Math.min(14, this.meta.options.maxStepsPerTask),
694
1214
  signal: this.ac.signal,
695
- ctx: this.makeToolCtx(agentId, task),
1215
+ // Blind verification: the verifier judges deliverables against the
1216
+ // objective with its own tools — it must not inherit the swarm's shared
1217
+ // beliefs (blackboard) or the worker's narrative beyond the claims.
1218
+ ctx: { ...this.makeToolCtx(agentId, task), readBlackboard: () => "", searchNotes: undefined },
696
1219
  hooks: this.agentHooks(agentId, task.id),
697
1220
  stop: this.agentStop,
698
1221
  });
@@ -716,6 +1239,58 @@ class Executor {
716
1239
  task.error = reason;
717
1240
  this.journal.append("task.status", { taskId: task.id, status, attempt: task.attempt, reason });
718
1241
  this.settledSinceUpdate.push(task.id);
1242
+ this.maybeSnapshot();
1243
+ }
1244
+ // ---------------------------------------------------------------- progress snapshots
1245
+ snapshotCounter = 0;
1246
+ settledSinceSnapshot = 0;
1247
+ snapshotInflight = false;
1248
+ /**
1249
+ * Periodic partial deliverable: every N settled tasks, write a cheap-tier
1250
+ * progress report to artifacts/. Fire-and-forget — a multi-day run always
1251
+ * has something readable, and a snapshot failure never blocks scheduling.
1252
+ */
1253
+ maybeSnapshot() {
1254
+ if (this.mode !== "root" || this.finishing || this.snapshotInflight)
1255
+ return;
1256
+ const every = Number(process.env.SWARM_SNAPSHOT_EVERY ?? "25");
1257
+ if (!every || every < 1)
1258
+ return;
1259
+ if (++this.settledSinceSnapshot < every)
1260
+ return;
1261
+ this.settledSinceSnapshot = 0;
1262
+ this.snapshotInflight = true;
1263
+ const n = ++this.snapshotCounter;
1264
+ const model = this.cfg.cheapModel || this.meta.options.conductorModel;
1265
+ const tasks = this.taskList();
1266
+ const settled = tasks.filter((t) => ["done", "failed", "blocked"].includes(t.status));
1267
+ (0, deepseek_1.chat)(this.cfg, {
1268
+ model,
1269
+ messages: [
1270
+ {
1271
+ role: "user",
1272
+ content: `Write a concise interim progress report (markdown) for an in-flight agent-swarm mission. Cover: what has been accomplished so far (with concrete results/paths from the reports), what failed, what is currently running, and what remains. This is a partial deliverable for the operator — informative, no filler.\n\nMISSION\n${this.meta.mission}\n\nTASKS\n${(0, prompts_1.taskTable)(tasks)}\n\nSETTLED REPORTS\n${(0, util_1.truncateMiddle)(settled.map(prompts_1.reportBlock).join("\n\n"), 50_000, "chars")}`,
1273
+ },
1274
+ ],
1275
+ thinking: false,
1276
+ maxTokens: 4096,
1277
+ signal: this.ac.signal,
1278
+ })
1279
+ .then((res) => {
1280
+ this.onUsage(model, res.usage);
1281
+ if (!res.content.trim())
1282
+ return;
1283
+ const rel = `progress-report-${n}.md`;
1284
+ fs.writeFileSync(path.join(this.runDirPath, "artifacts", rel), res.content, "utf8");
1285
+ this.journal.append("log", { level: "info", msg: `progress snapshot written: artifacts/${rel}` });
1286
+ })
1287
+ .catch((e) => {
1288
+ if (!this.ac.signal.aborted)
1289
+ this.journal.append("log", { level: "warn", msg: `progress snapshot failed: ${(0, util_1.errMsg)(e)}` });
1290
+ })
1291
+ .finally(() => {
1292
+ this.snapshotInflight = false;
1293
+ });
719
1294
  }
720
1295
  topListing() {
721
1296
  // Remote sandboxes own their filesystem; a host listing would be a lie.
@@ -750,18 +1325,31 @@ class Executor {
750
1325
  */
751
1326
  deltaBuf = new Map();
752
1327
  deltaTimer = null;
1328
+ thinkDropLogged = false;
753
1329
  queueDelta(agentId, taskId, channel, text) {
1330
+ // Deltas are UI sugar, never state — thin them under load so a 100-agent
1331
+ // swarm doesn't write gigabytes of streaming chatter into the journal.
1332
+ const load = this.activeWorkerCount();
1333
+ if (channel === "think" && load > 48) {
1334
+ if (!this.thinkDropLogged) {
1335
+ this.thinkDropLogged = true;
1336
+ this.journal.append("log", { level: "info", msg: `thinking streams muted above 48 active agents (currently ${load})` });
1337
+ }
1338
+ return;
1339
+ }
1340
+ const flushChars = load > 24 ? 2000 : 480;
1341
+ const flushMs = load > 24 ? 1000 : 200;
754
1342
  const key = `${agentId}:${channel}`;
755
1343
  const buf = this.deltaBuf.get(key);
756
1344
  if (buf)
757
1345
  buf.text += text;
758
1346
  else
759
1347
  this.deltaBuf.set(key, { agentId, taskId, channel, text });
760
- if (this.deltaBuf.get(key).text.length >= 480) {
1348
+ if (this.deltaBuf.get(key).text.length >= flushChars) {
761
1349
  this.flushDeltas(agentId);
762
1350
  }
763
1351
  else if (!this.deltaTimer) {
764
- this.deltaTimer = setTimeout(() => this.flushDeltas(), 200);
1352
+ this.deltaTimer = setTimeout(() => this.flushDeltas(), flushMs);
765
1353
  }
766
1354
  }
767
1355
  flushDeltas(onlyAgent) {
@@ -802,6 +1390,10 @@ class Executor {
802
1390
  // ---------------------------------------------------------------- operator control
803
1391
  operatorQueue = [];
804
1392
  drainControl() {
1393
+ // Only the root executor consumes operator control; teams are cancelled
1394
+ // via the parent's abort signal and would otherwise steal queued notes.
1395
+ if (this.mode === "team")
1396
+ return;
805
1397
  for (const msg of this.control.poll()) {
806
1398
  if (msg.kind === "cancel") {
807
1399
  this.journal.append("operator.note", { text: "⛔ Cancel requested by operator." });
@@ -830,8 +1422,25 @@ class Executor {
830
1422
  const reportPath = path.join(this.runDirPath, "artifacts", "final-report.md");
831
1423
  (0, util_1.ensureDir)(path.dirname(reportPath));
832
1424
  fs.writeFileSync(reportPath, reportMarkdown, "utf8");
1425
+ // Always ship a readable, shareable HTML rendering alongside the raw
1426
+ // markdown; a rendering bug must never block run finalization.
1427
+ let htmlPath;
1428
+ try {
1429
+ htmlPath = path.join(this.runDirPath, "artifacts", "final-report.html");
1430
+ fs.writeFileSync(htmlPath, (0, report_1.renderFinalHtml)({
1431
+ markdown: reportMarkdown,
1432
+ mission: this.meta.mission,
1433
+ runId: this.meta.id,
1434
+ status,
1435
+ finishedAt: Date.now(),
1436
+ }), "utf8");
1437
+ }
1438
+ catch (e) {
1439
+ htmlPath = undefined;
1440
+ this.journal.append("log", { level: "warn", msg: `final-report.html render failed: ${(0, util_1.errMsg)(e)}` });
1441
+ }
833
1442
  this.setStatus(status, reason);
834
- this.journal.append("run.final", { summary, reportPath, reason, status });
1443
+ this.journal.append("run.final", { summary, reportPath, htmlPath, reason, status });
835
1444
  await this.journal.flush();
836
1445
  }
837
1446
  /** Terminate the run as failed without any further model calls. */
@@ -865,7 +1474,7 @@ class Executor {
865
1474
  const agentId = (0, util_1.rid)("synth");
866
1475
  let summary = "";
867
1476
  let reportMarkdown = "";
868
- try {
1477
+ const synthOnce = async (extraNote) => {
869
1478
  const outcome = await (0, agent_1.runAgent)({
870
1479
  cfg: this.cfg,
871
1480
  agentId,
@@ -874,17 +1483,17 @@ class Executor {
874
1483
  reasoningEffort: this.meta.options.reasoningEffort,
875
1484
  system: (0, prompts_1.synthSystem)({
876
1485
  meta: this.meta,
877
- finishNotes: this.finishNotes,
878
- reports: (0, util_1.truncateMiddle)(reports, 120_000, "chars"),
879
- blackboard: this.blackboardDigest(4000),
1486
+ finishNotes: [this.finishNotes, extraNote].filter(Boolean).join("\n\n"),
1487
+ reports: (0, util_1.truncateMiddle)(reports, 300_000, "chars"),
1488
+ blackboard: this.blackboardDigest(6000),
880
1489
  artifactList,
881
1490
  reason: this.finishReason || "completed",
882
1491
  }),
883
1492
  kickoff: prompts_1.SYNTH_KICKOFF,
884
1493
  tools: (0, tools_1.synthToolset)(),
885
1494
  terminal: [tools_1.SUBMIT_FINAL_TOOL],
886
- maxSteps: 12,
887
- maxTokensOut: 16384,
1495
+ maxSteps: 24,
1496
+ maxTokensOut: 32000,
888
1497
  signal: new AbortController().signal, // synthesis should finish even if run was cancelled
889
1498
  ctx: this.makeToolCtx(agentId, null),
890
1499
  hooks: this.agentHooks(agentId, ""),
@@ -892,6 +1501,36 @@ class Executor {
892
1501
  const a = (outcome.terminal?.args ?? {});
893
1502
  reportMarkdown = String(a.report_markdown ?? outcome.finalText ?? "");
894
1503
  summary = String(a.summary ?? "");
1504
+ };
1505
+ try {
1506
+ await synthOnce();
1507
+ // Strict mode: check the final report's claims against the task reports
1508
+ // (the ground truth) and re-synthesize once if it misrepresents them.
1509
+ if (this.cfg.verification === "strict" && reportMarkdown.trim() && tasks.length) {
1510
+ try {
1511
+ const res = await (0, deepseek_1.chat)(this.cfg, {
1512
+ model: this.meta.options.conductorModel,
1513
+ messages: [
1514
+ {
1515
+ role: "user",
1516
+ content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars")),
1517
+ },
1518
+ ],
1519
+ thinking: false,
1520
+ maxTokens: 2048,
1521
+ signal: new AbortController().signal,
1522
+ });
1523
+ this.onUsage(this.meta.options.conductorModel, res.usage);
1524
+ const check = (res.content || "").trim();
1525
+ if (check && !/^OK\b/i.test(check)) {
1526
+ this.journal.append("log", { level: "warn", msg: `synthesis check found discrepancies:\n${(0, util_1.clip)(check, 1500)}` });
1527
+ await synthOnce(`A faithfulness review of your previous draft found these discrepancies — fix them, claiming only what the task reports support:\n${(0, util_1.clip)(check, 2000)}`);
1528
+ }
1529
+ }
1530
+ catch (e) {
1531
+ this.journal.append("log", { level: "warn", msg: `synthesis check failed: ${(0, util_1.errMsg)(e)}` });
1532
+ }
1533
+ }
895
1534
  }
896
1535
  catch (e) {
897
1536
  this.journal.append("log", { level: "error", msg: `synthesis failed: ${(0, util_1.errMsg)(e)}` });
@@ -920,6 +1559,16 @@ class Executor {
920
1559
  reason = `All ${tasks.length} task(s) failed or were blocked.`;
921
1560
  }
922
1561
  await this.writeFinal(status, reason, reportMarkdown, summary || (0, util_1.clip)(reportMarkdown, 600));
1562
+ // Cross-run memory: real-directory runs leave a trace for the next swarm.
1563
+ if (!this.meta.sandbox && status !== "cancelled") {
1564
+ (0, memory_1.appendMemory)(this.meta.cwd, {
1565
+ mission: this.meta.mission,
1566
+ finishedAt: Date.now(),
1567
+ status,
1568
+ summary: (0, util_1.clip)(summary || reportMarkdown, 600),
1569
+ keyDecisions: this.notes.filter((n) => n.kind === "decision").slice(-10).map((n) => n.text),
1570
+ });
1571
+ }
923
1572
  }
924
1573
  fallbackReport(tasks) {
925
1574
  const lines = [`# ${this.meta.mission}`, ``, `_Run ${this.meta.id} — ${this.finishReason}_`, ``];