@robzilla1738/agentswarm 0.3.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +51 -11
  2. package/dist/agent.js +18 -2
  3. package/dist/cli.js +39 -8
  4. package/dist/config.js +62 -6
  5. package/dist/crawltools.js +247 -0
  6. package/dist/deepseek.js +125 -10
  7. package/dist/executor.js +993 -144
  8. package/dist/hub.js +85 -6
  9. package/dist/journal.js +61 -11
  10. package/dist/memory.js +84 -0
  11. package/dist/pdftext.js +211 -0
  12. package/dist/prompts.js +124 -23
  13. package/dist/report.js +289 -0
  14. package/dist/run.js +15 -2
  15. package/dist/sandbox.js +11 -0
  16. package/dist/searchcore.js +244 -0
  17. package/dist/state.js +85 -3
  18. package/dist/tools.js +392 -25
  19. package/dist/util.js +85 -0
  20. package/dist/webtools.js +327 -66
  21. package/package.json +3 -2
  22. package/ui/out/404/index.html +1 -1
  23. package/ui/out/404.html +1 -1
  24. package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
  25. package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
  26. package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
  27. package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
  28. package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
  29. package/ui/out/_next/static/css/d95c2ba395730031.css +3 -0
  30. package/ui/out/fonts/PlanetKosmos.ttf +0 -0
  31. package/ui/out/index.html +1 -1
  32. package/ui/out/index.txt +3 -3
  33. package/ui/out/run/index.html +1 -1
  34. package/ui/out/run/index.txt +3 -3
  35. package/ui/out/settings/index.html +1 -1
  36. package/ui/out/settings/index.txt +3 -3
  37. package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
  38. package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
  39. package/ui/out/_next/static/chunks/677-7ab85a6f38c3a235.js +0 -1
  40. package/ui/out/_next/static/chunks/app/page-0fda5b8e77d90b84.js +0 -1
  41. package/ui/out/_next/static/chunks/app/run/page-07aab6b1224c3c8c.js +0 -1
  42. package/ui/out/_next/static/chunks/app/settings/page-528482d468d84cfa.js +0 -1
  43. package/ui/out/_next/static/css/e2c82b53bf4519e8.css +0 -3
  44. /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
  45. /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0
package/dist/executor.js CHANGED
@@ -40,12 +40,14 @@ const agent_1 = require("./agent");
40
40
  const config_1 = require("./config");
41
41
  const control_1 = require("./control");
42
42
  const deepseek_1 = require("./deepseek");
43
+ const journal_1 = require("./journal");
43
44
  const tools_1 = require("./tools");
44
45
  const prompts_1 = require("./prompts");
46
+ const memory_1 = require("./memory");
47
+ const report_1 = require("./report");
45
48
  const sandbox_1 = require("./sandbox");
46
49
  const types_1 = require("./types");
47
50
  const util_1 = require("./util");
48
- const VERIFY_MAX_ATTEMPTS = 2;
49
51
  class Executor {
50
52
  cfg;
51
53
  meta;
@@ -59,6 +61,7 @@ class Executor {
59
61
  inflight = new Map();
60
62
  settledSinceUpdate = [];
61
63
  notes = [];
64
+ phase = null;
62
65
  conductorMessages = [];
63
66
  spentTokens = 0;
64
67
  cost = 0;
@@ -67,19 +70,43 @@ class Executor {
67
70
  finishReason = "";
68
71
  fatal = null;
69
72
  lastConductorAction = "none";
73
+ conductorFailures = 0;
74
+ /** True when the last conductor turn ended in a call error, not a decision. */
75
+ lastConductorErrored = false;
70
76
  resumed = false;
71
77
  sandbox;
72
- constructor(cfg, meta, journal) {
78
+ mode;
79
+ teamId;
80
+ opts;
81
+ /** Team-mode result: the consolidated report handed back to the parent task. */
82
+ teamReport = "";
83
+ constructor(cfg, meta, journal, opts = {}) {
73
84
  this.cfg = cfg;
74
85
  this.meta = meta;
75
- this.runDirPath = (0, config_1.runDir)(meta.id);
86
+ this.runDirPath = opts.runDirPath ?? (0, config_1.runDir)(meta.id);
76
87
  this.journal = journal;
77
88
  this.control = new control_1.ControlReader(this.runDirPath);
89
+ this.mode = opts.mode ?? "root";
90
+ this.teamId = opts.teamId;
91
+ this.opts = opts;
92
+ if (opts.sharedNotes)
93
+ this.notes = opts.sharedNotes;
78
94
  (0, util_1.ensureDir)(path.join(this.runDirPath, "artifacts"));
79
- // "A directory on disk" runs always execute on the host — touching the
80
- // operator's real files is the entire point of that mode.
81
- const kind = meta.sandbox ? meta.options.sandboxRuntime ?? "host" : "host";
82
- this.sandbox = (0, sandbox_1.createSandbox)(kind, { runId: meta.id, hostDir: meta.cwd, cfg });
95
+ if (opts.sandbox) {
96
+ this.sandbox = opts.sandbox;
97
+ }
98
+ else {
99
+ // "A directory on disk" runs always execute on the host — touching the
100
+ // operator's real files is the entire point of that mode.
101
+ const kind = meta.sandbox ? meta.options.sandboxRuntime ?? "host" : "host";
102
+ this.sandbox = (0, sandbox_1.createSandbox)(kind, { runId: meta.id, hostDir: meta.cwd, cfg });
103
+ }
104
+ if (opts.parentSignal) {
105
+ if (opts.parentSignal.aborted)
106
+ this.ac.abort();
107
+ else
108
+ opts.parentSignal.addEventListener("abort", () => this.ac.abort(), { once: true });
109
+ }
83
110
  }
84
111
  cancel() {
85
112
  this.finishing = true;
@@ -107,18 +134,52 @@ class Executor {
107
134
  const n = Number(/^T(\d+)$/.exec(copy.id)?.[1] ?? 0);
108
135
  this.taskCounter = Math.max(this.taskCounter, n);
109
136
  }
110
- this.notes = state.notes.map((n) => ({ taskId: n.taskId, key: n.key, text: n.text }));
137
+ // Drop claims held by settled tasks they were released on task end and
138
+ // must not resurrect across a restart.
139
+ const settled = new Set(state.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status) && !reset.has(t.id)).map((t) => t.id));
140
+ this.notes = state.notes
141
+ .map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text, url: n.url }))
142
+ .filter((n) => !(n.kind === "claim" && n.taskId && settled.has(n.taskId)));
143
+ const lastPhase = state.phases[state.phases.length - 1];
144
+ if (lastPhase)
145
+ this.phase = { name: lastPhase.name, goal: lastPhase.goal, exitCriteria: lastPhase.exitCriteria };
111
146
  this.spentTokens = state.totalUsage.promptTokens + state.totalUsage.completionTokens;
112
147
  this.cost = state.cost;
148
+ try {
149
+ // The living plan survives restarts from disk, not from the journal.
150
+ this.planDoc = fs.readFileSync(path.join(this.runDirPath, "artifacts", this.planFileName()), "utf8");
151
+ }
152
+ catch {
153
+ /* no plan yet */
154
+ }
113
155
  this.resumed = true;
114
156
  }
115
157
  setStatus(status, reason) {
158
+ // A team is one task of the parent run, not a run of its own.
159
+ if (this.mode === "team")
160
+ return;
116
161
  this.journal.append("run.status", { status, reason });
117
162
  }
163
+ budgetWarned = new Set();
118
164
  onUsage = (model, usage) => {
119
165
  this.spentTokens += usage.promptTokens + usage.completionTokens;
120
166
  this.cost += (0, types_1.usageCost)(usage, this.cfg.pricing[model]);
121
167
  this.journal.append("usage", { model, usage, cost: this.cost });
168
+ // Team spend also counts against the parent's (authoritative) budget.
169
+ this.opts.onUsageForward?.(model, usage);
170
+ const cap = this.meta.options.maxTokens;
171
+ if (cap > 0) {
172
+ const pct = (this.spentTokens / cap) * 100;
173
+ for (const threshold of [50, 80, 95]) {
174
+ if (pct >= threshold && !this.budgetWarned.has(threshold)) {
175
+ this.budgetWarned.add(threshold);
176
+ this.journal.append("log", {
177
+ level: threshold >= 95 ? "warn" : "info",
178
+ msg: `budget: ${threshold}% of the run's token cap used (est. $${this.cost.toFixed(2)})`,
179
+ });
180
+ }
181
+ }
182
+ }
122
183
  };
123
184
  budgetExceeded() {
124
185
  return this.spentTokens >= this.meta.options.maxTokens;
@@ -126,39 +187,69 @@ class Executor {
126
187
  blackboardDigest(max = 1800) {
127
188
  if (!this.notes.length)
128
189
  return "";
129
- const lines = this.notes
130
- .slice(-40)
131
- .map((n) => `• ${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.taskId ? ` (${n.taskId})` : ""}`);
132
- let out = lines.join("\n");
133
- if (out.length > max)
134
- out = out.slice(out.length - max);
135
- return out;
190
+ const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.url ? ` <${n.url}>` : ""}${n.taskId ? ` (${n.taskId})` : ""}`;
191
+ // Decisions and conflicts anchor mission-wide coherence and are never
192
+ // trimmed out of the digest; everything else shows only its recent tail.
193
+ const pinned = this.notes.filter((n) => n.kind === "decision" || n.kind === "conflict").map(fmt);
194
+ const rest = this.notes.filter((n) => n.kind !== "decision" && n.kind !== "conflict").slice(-80).map(fmt);
195
+ let tail = rest.join("\n");
196
+ const budget = Math.max(400, max - pinned.join("\n").length);
197
+ if (tail.length > budget)
198
+ tail = tail.slice(tail.length - budget);
199
+ return [pinned.join("\n"), tail].filter(Boolean).join("\n");
200
+ }
201
+ searchNotes(query) {
202
+ const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
203
+ if (!terms.length)
204
+ return "empty query";
205
+ const scored = this.notes
206
+ .map((n) => {
207
+ const hay = `${n.key ?? ""} ${n.kind ?? ""} ${n.text}`.toLowerCase();
208
+ const score = terms.reduce((s, t) => s + (hay.includes(t) ? 1 : 0), 0);
209
+ return { n, score };
210
+ })
211
+ .filter((x) => x.score > 0)
212
+ .sort((a, b) => b.score - a.score)
213
+ .slice(0, 12);
214
+ if (!scored.length)
215
+ return "no notes matched";
216
+ return scored
217
+ .map(({ n }) => `• ${n.kind ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.clip)(n.text, 400)}${n.taskId ? ` (${n.taskId})` : ""}`)
218
+ .join("\n");
136
219
  }
137
220
  // ---------------------------------------------------------------- main
138
221
  async run() {
139
222
  this.setStatus("planning");
140
- // Preflight: validate auth before doing any work so the operator gets an
141
- // instant, clear error instead of a phantom "done" run.
142
- const auth = await (0, deepseek_1.validateAuth)(this.cfg);
143
- if (auth.status === "invalid") {
144
- this.fatal = `Provider authentication failed — ${auth.message || "invalid API key"}. Set a valid key in Settings (or: swarm config set apiKey <...>).`;
145
- this.finishReason = this.fatal;
146
- this.journal.append("log", { level: "error", msg: this.fatal });
147
- await this.fail(this.fatal);
148
- return;
149
- }
150
- // Boot the sandbox before any work a dead Docker daemon or a bad cloud
151
- // key must fail the run instantly with a clear reason, not mid-mission.
152
- try {
153
- await this.sandbox.start((msg) => this.journal.append("log", { level: "info", msg }));
154
- this.journal.append("log", { level: "info", msg: `sandbox: ${this.sandbox.label}` });
155
- }
156
- catch (e) {
157
- this.fatal = `Sandbox failed to start ${(0, util_1.errMsg)(e)}`;
158
- this.finishReason = this.fatal;
159
- this.journal.append("log", { level: "error", msg: this.fatal });
160
- await this.fail(this.fatal);
161
- return;
223
+ if (this.mode === "root") {
224
+ // Surface AIMD limiter adjustments (429 pressure) in the journal/UI.
225
+ (0, deepseek_1.gateFor)(this.cfg).onState = (s) => {
226
+ this.journal.append("limiter.state", { ceiling: s.ceiling, active: s.active, queued: s.queued });
227
+ };
228
+ // Preflight: validate auth before doing any work so the operator gets an
229
+ // instant, clear error instead of a phantom "done" run. (Teams inherit a
230
+ // parent that already passed.)
231
+ const auth = await (0, deepseek_1.validateAuth)(this.cfg);
232
+ if (auth.status === "invalid") {
233
+ this.fatal = `Provider authentication failed ${auth.message || "invalid API key"}. Set a valid key in Settings (or: swarm config set apiKey <...>).`;
234
+ this.finishReason = this.fatal;
235
+ this.journal.append("log", { level: "error", msg: this.fatal });
236
+ await this.fail(this.fatal);
237
+ return;
238
+ }
239
+ // Boot the sandbox before any work — a dead Docker daemon or a bad cloud
240
+ // key must fail the run instantly with a clear reason, not mid-mission.
241
+ // (Teams share the parent's already-running sandbox.)
242
+ try {
243
+ await this.sandbox.start((msg) => this.journal.append("log", { level: "info", msg }));
244
+ this.journal.append("log", { level: "info", msg: `sandbox: ${this.sandbox.label}` });
245
+ }
246
+ catch (e) {
247
+ this.fatal = `Sandbox failed to start — ${(0, util_1.errMsg)(e)}`;
248
+ this.finishReason = this.fatal;
249
+ this.journal.append("log", { level: "error", msg: this.fatal });
250
+ await this.fail(this.fatal);
251
+ return;
252
+ }
162
253
  }
163
254
  // Operator control must land while agents are mid-task, not only when the
164
255
  // scheduler wakes up — a Stop click aborts in-flight work within ~1s.
@@ -170,13 +261,18 @@ class Executor {
170
261
  /* control polling must never kill the run */
171
262
  }
172
263
  }, 750);
264
+ // Real-directory runs remember: prior missions in the same workspace feed
265
+ // the conductor so it builds on settled decisions instead of starting cold.
266
+ const memory = this.mode === "root" && !this.meta.sandbox ? (0, memory_1.memoryBlock)(this.meta.cwd) : "";
173
267
  this.conductorMessages = [
174
- { role: "system", content: (0, prompts_1.conductorSystem)(this.meta) },
268
+ { role: "system", content: (0, prompts_1.conductorSystem)(this.meta) + (memory ? `\n\n${memory}` : "") },
175
269
  {
176
270
  role: "user",
177
271
  content: this.resumed
178
272
  ? (0, prompts_1.conductorUpdate)({
179
273
  blackboard: this.blackboardDigest(),
274
+ phase: this.phaseLine(),
275
+ plan: this.planPin(),
180
276
  nextId: this.nextId(),
181
277
  taskTable: (0, prompts_1.taskTable)(this.taskList()),
182
278
  budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
@@ -188,56 +284,23 @@ class Executor {
188
284
  : (0, prompts_1.conductorInitialUpdate)(this.meta, this.nextId()),
189
285
  },
190
286
  ];
287
+ if (this.resumed) {
288
+ // The conductor's reasoning history died with the old process. Re-seed
289
+ // the durable facts into the same slot trimConductorHistory() maintains,
290
+ // so a resumed conductor knows what settled and what was decided.
291
+ this.conductorMessages.splice(1, 0, {
292
+ role: "user",
293
+ content: this.missionLedger("This run was resumed — prior orchestration history is gone."),
294
+ });
295
+ }
191
296
  try {
192
297
  await this.conductorTurn();
193
298
  this.setStatus("running");
194
- while (!this.finishing) {
195
- this.drainControl();
196
- if (this.finishing)
197
- break;
198
- if (this.budgetExceeded()) {
199
- this.finishing = true;
200
- this.finishReason = "token budget reached";
201
- break;
202
- }
203
- this.startReadyTasks();
204
- if (this.inflight.size === 0) {
205
- const runnable = this.runnableTasks();
206
- if (runnable.length > 0)
207
- continue; // loop starts them
208
- // Nothing running, nothing runnable. Include any reports that
209
- // settled while the conductor was mid-turn — they must not be lost.
210
- this.blockStuckTasks();
211
- const reports = this.drainSettled();
212
- if (!this.hasOpenWork()) {
213
- // Everything is terminal. Ask the conductor for a final decision.
214
- this.appendConductorUpdate("All tasks have settled and no tasks are runnable.", reports);
215
- await this.conductorTurn();
216
- if (this.lastConductorAction !== "spawn") {
217
- this.finishing = true;
218
- this.finishReason = this.finishReason || "all tasks settled";
219
- }
220
- }
221
- else {
222
- // Stuck: pending tasks exist but can't run (failed/blocked deps).
223
- this.appendConductorUpdate("Some tasks cannot run because their dependencies failed or were blocked. Re-plan around them or finish.", reports);
224
- await this.conductorTurn();
225
- if (this.lastConductorAction === "wait") {
226
- this.finishing = true;
227
- this.finishReason = "stalled: dependencies unmet and conductor chose to wait";
228
- }
229
- }
230
- continue;
231
- }
232
- // Tasks are running — wait for at least one to settle.
233
- await Promise.race([...this.inflight.values()]);
234
- this.drainControl();
235
- const reports = this.drainSettled();
236
- if (reports.length && !this.finishing) {
237
- this.appendConductorUpdate(undefined, reports);
238
- await this.conductorTurn();
239
- }
240
- }
299
+ await this.mainLoop();
300
+ // Strict verification: one completeness review before synthesis; if it
301
+ // finds real gaps the conductor gets one chance to fill them.
302
+ if (await this.completenessPass())
303
+ await this.mainLoop();
241
304
  }
242
305
  catch (e) {
243
306
  if (!this.ac.signal.aborted) {
@@ -251,12 +314,231 @@ class Executor {
251
314
  await Promise.allSettled([...this.inflight.values()]);
252
315
  }
253
316
  this.drainSettled();
317
+ if (this.mode === "team") {
318
+ await this.consolidateTeam();
319
+ return; // the parent owns the sandbox, final flush, and run status
320
+ }
254
321
  await this.synthesize();
255
- await this.sandbox.destroy().catch(() => {
256
- /* container/sandbox teardown is best-effort */
257
- });
322
+ // Teardown is best-effort AND bounded — a wedged container must not hang
323
+ // the engine after the report is already written.
324
+ await Promise.race([
325
+ this.sandbox.destroy().catch(() => { }),
326
+ new Promise((r) => setTimeout(r, 15_000).unref()),
327
+ ]);
258
328
  await this.journal.flush();
259
329
  }
330
+ // ---------------------------------------------------------------- teams
331
+ /** All artifacts reported by this (team) executor's tasks. */
332
+ teamArtifacts() {
333
+ return [...new Set(this.taskList().flatMap((t) => t.artifacts))];
334
+ }
335
+ /** Whether any task here actually completed. */
336
+ anyTaskDone() {
337
+ return this.taskList().some((t) => t.status === "done");
338
+ }
339
+ /** Team-mode finale: one consolidated report instead of run synthesis. */
340
+ async consolidateTeam() {
341
+ const tasks = this.taskList();
342
+ const reports = tasks.length ? tasks.map(prompts_1.reportBlock).join("\n\n") : "(no tasks were completed)";
343
+ try {
344
+ const res = await (0, deepseek_1.chat)(this.cfg, {
345
+ model: this.meta.options.conductorModel,
346
+ priority: "high",
347
+ messages: [
348
+ {
349
+ role: "user",
350
+ content: `You led a sub-team inside a larger agent swarm. Consolidate your team's work into ONE report for the parent conductor: what was accomplished (with evidence and exact paths), what failed or remains open, and the key facts the rest of the mission needs.\n\nTEAM OBJECTIVE\n${this.meta.mission}\n\nOUTCOME: ${this.finishReason || "completed"}\nLead's closing notes: ${this.finishNotes || "(none)"}\n\nTASK REPORTS\n${(0, util_1.truncateMiddle)(reports, 60_000, "chars")}\n\nReply with the consolidated report only.`,
351
+ },
352
+ ],
353
+ thinking: false,
354
+ maxTokens: 4096,
355
+ signal: new AbortController().signal, // consolidation runs even when cancelled
356
+ });
357
+ this.onUsage(this.meta.options.conductorModel, res.usage);
358
+ this.teamReport = (res.content || "").trim();
359
+ }
360
+ catch (e) {
361
+ this.journal.append("log", { level: "warn", msg: `team consolidation failed: ${(0, util_1.errMsg)(e)}` });
362
+ }
363
+ if (!this.teamReport) {
364
+ this.teamReport = tasks
365
+ .map((t) => `${t.id} [${t.status}] ${t.title}: ${(0, util_1.oneLine)(t.report ?? t.error ?? "(no output)", 200)}`)
366
+ .join("\n");
367
+ }
368
+ }
369
+ /** Run a team:true task as a sub-swarm sharing this run's everything. */
370
+ async runTeam(task) {
371
+ const remaining = Math.max(0, this.meta.options.maxTokens - this.spentTokens);
372
+ const childMeta = {
373
+ ...this.meta,
374
+ mission: `${task.objective}${task.context ? `\n\nContext from the parent conductor:\n${task.context}` : ""}`,
375
+ options: {
376
+ ...this.meta.options,
377
+ maxWorkers: task.teamMaxWorkers || Math.max(2, Math.min(16, Math.floor(this.meta.options.maxWorkers / 2))),
378
+ maxTokens: Math.min(remaining, task.teamBudgetTokens || Math.max(50_000, Math.floor(remaining / 4))),
379
+ maxTasks: Math.min(this.meta.options.maxTasks, 24),
380
+ },
381
+ };
382
+ this.journal.append("team.created", {
383
+ taskId: task.id,
384
+ maxWorkers: childMeta.options.maxWorkers,
385
+ budgetTokens: childMeta.options.maxTokens,
386
+ });
387
+ const child = new Executor(this.cfg, childMeta, new journal_1.TeamJournal(this.journal, task.id), {
388
+ mode: "team",
389
+ teamId: task.id,
390
+ sandbox: this.sandbox,
391
+ runDirPath: this.runDirPath,
392
+ onUsageForward: (model, usage) => {
393
+ // Absorb tokens/cost only — the child already journaled the usage event.
394
+ this.spentTokens += usage.promptTokens + usage.completionTokens;
395
+ this.cost += (0, types_1.usageCost)(usage, this.cfg.pricing[model]);
396
+ },
397
+ parentSignal: this.ac.signal,
398
+ sharedNotes: this.notes,
399
+ });
400
+ await child.run();
401
+ if (this.ac.signal.aborted) {
402
+ this.finalizeTask(task, "failed", "run cancelled");
403
+ return;
404
+ }
405
+ const report = child.teamReport || "(team produced no consolidated report)";
406
+ for (const a of child.teamArtifacts())
407
+ if (!task.artifacts.includes(a))
408
+ task.artifacts.push(a);
409
+ task.report = report;
410
+ task.reportStatus = "done";
411
+ this.journal.append("team.report", { taskId: task.id, report, artifacts: task.artifacts });
412
+ this.journal.append("task.report", { taskId: task.id, status: "done", report, artifacts: task.artifacts });
413
+ this.finalizeTask(task, child.anyTaskDone() ? "done" : "failed", report);
414
+ }
415
+ async mainLoop() {
416
+ while (!this.finishing) {
417
+ this.drainControl();
418
+ if (this.finishing)
419
+ break;
420
+ if (this.budgetExceeded()) {
421
+ this.finishing = true;
422
+ this.finishReason = "token budget reached";
423
+ break;
424
+ }
425
+ if (this.journal.degraded) {
426
+ // The journal is the source of truth; if it can't be written, the
427
+ // run must stop loudly rather than burn tokens on unrecorded work.
428
+ this.finishing = true;
429
+ this.finishReason = "journal writes are failing — run state is no longer durable";
430
+ this.ac.abort();
431
+ break;
432
+ }
433
+ this.startReadyTasks();
434
+ if (this.inflight.size === 0) {
435
+ const runnable = this.runnableTasks();
436
+ if (runnable.length > 0)
437
+ continue; // loop starts them
438
+ // Nothing running, nothing runnable. Include any reports that
439
+ // settled while the conductor was mid-turn — they must not be lost.
440
+ this.blockStuckTasks();
441
+ const reports = this.drainSettled();
442
+ if (!this.hasOpenWork()) {
443
+ // Everything is terminal. Ask the conductor for a final decision.
444
+ this.appendConductorUpdate("All tasks have settled and no tasks are runnable.", reports);
445
+ await this.conductorTurn();
446
+ // An errored turn is not a decision — keep looping so the breaker
447
+ // can retry (and eventually trip) instead of misreading the error
448
+ // as "the conductor chose to stop".
449
+ if (this.lastConductorAction !== "spawn" && !this.lastConductorErrored) {
450
+ this.finishing = true;
451
+ this.finishReason = this.finishReason || "all tasks settled";
452
+ }
453
+ }
454
+ else {
455
+ // Stuck: pending tasks exist but can't run (failed/blocked deps).
456
+ this.appendConductorUpdate("Some tasks cannot run because their dependencies failed or were blocked. Re-plan around them or finish.", reports);
457
+ await this.conductorTurn();
458
+ if (this.lastConductorAction === "wait" && !this.lastConductorErrored) {
459
+ this.finishing = true;
460
+ this.finishReason = "stalled: dependencies unmet and conductor chose to wait";
461
+ }
462
+ }
463
+ continue;
464
+ }
465
+ // Tasks are running — wait for at least one to settle, then debounce:
466
+ // at 100 agents, settles arrive constantly, and waking the conductor
467
+ // for every one of them serializes the whole swarm on its turns.
468
+ await Promise.race([...this.inflight.values()]);
469
+ const debounceMs = Number(process.env.SWARM_SETTLE_DEBOUNCE_MS ?? "2000");
470
+ const settleCap = Math.max(3, Math.ceil(this.activeWorkerCount() / 8));
471
+ while (debounceMs > 0 && this.inflight.size > 0 && this.settledSinceUpdate.length < settleCap) {
472
+ const before = this.settledSinceUpdate.length;
473
+ await Promise.race([...this.inflight.values(), (0, util_1.sleep)(debounceMs)]);
474
+ if (this.settledSinceUpdate.length === before)
475
+ break; // quiet period — flush to the conductor
476
+ this.drainControl();
477
+ if (this.finishing)
478
+ break;
479
+ this.startReadyTasks(); // settles free dep chains; don't idle workers during the debounce
480
+ }
481
+ this.drainControl();
482
+ const reports = this.drainSettled();
483
+ if (reports.length && !this.finishing) {
484
+ this.appendConductorUpdate(undefined, reports);
485
+ await this.conductorTurn();
486
+ }
487
+ }
488
+ }
489
+ /**
490
+ * Strict-mode gap review before synthesis. Returns true when the conductor
491
+ * accepted gap-filling work (the main loop must run again).
492
+ */
493
+ gapPassDone = false;
494
+ async completenessPass() {
495
+ if (this.mode === "team")
496
+ return false; // the root run owns gap review
497
+ if (this.cfg.verification !== "strict" || this.gapPassDone)
498
+ return false;
499
+ if (this.fatal || this.ac.signal.aborted || this.budgetExceeded())
500
+ return false;
501
+ if (this.finishReason.includes("cancel") || this.finishReason.includes("conductor unavailable"))
502
+ return false;
503
+ if (!this.taskList().some((t) => t.status === "done"))
504
+ return false;
505
+ this.gapPassDone = true;
506
+ let verdict = "";
507
+ try {
508
+ const res = await (0, deepseek_1.chat)(this.cfg, {
509
+ model: this.meta.options.conductorModel,
510
+ messages: [
511
+ {
512
+ role: "user",
513
+ content: (0, prompts_1.completenessPrompt)(this.meta.mission, (0, prompts_1.taskTable)(this.taskList()), (0, util_1.truncateMiddle)(this.taskList().map(prompts_1.reportBlock).join("\n\n"), 80_000, "chars")),
514
+ },
515
+ ],
516
+ thinking: false,
517
+ maxTokens: 2048,
518
+ signal: this.ac.signal,
519
+ });
520
+ this.onUsage(this.meta.options.conductorModel, res.usage);
521
+ verdict = (res.content || "").trim();
522
+ }
523
+ catch (e) {
524
+ this.journal.append("log", { level: "warn", msg: `completeness review failed: ${(0, util_1.errMsg)(e)}` });
525
+ return false;
526
+ }
527
+ if (!verdict || /^COMPLETE\b/i.test(verdict)) {
528
+ this.journal.append("log", { level: "info", msg: "completeness review: no gaps found" });
529
+ return false;
530
+ }
531
+ this.journal.append("log", { level: "info", msg: `completeness review found gaps:\n${(0, util_1.clip)(verdict, 1500)}` });
532
+ this.finishing = false;
533
+ this.appendConductorUpdate(`COMPLETENESS REVIEW found gaps before final synthesis:\n${(0, util_1.clip)(verdict, 2000)}\n` +
534
+ "Spawn focused tasks to close the REAL gaps (or finish if you judge them immaterial). This is the final round.");
535
+ await this.conductorTurn();
536
+ if (this.lastConductorAction === "spawn")
537
+ return true;
538
+ this.finishing = true;
539
+ this.finishReason = this.finishReason || "all tasks settled";
540
+ return false;
541
+ }
260
542
  // ---------------------------------------------------------------- conductor
261
543
  nextId() {
262
544
  return this.taskCounter + 1;
@@ -267,7 +549,7 @@ class Executor {
267
549
  // Re-bound the history every turn — the nudge loop and tool-result pushes
268
550
  // below grow it outside appendConductorUpdate's trim.
269
551
  this.trimConductorHistory();
270
- const tools = [tools_1.SPAWN_TASKS_TOOL, tools_1.WAIT_TOOL, tools_1.FINISH_TOOL];
552
+ const tools = [tools_1.SPAWN_TASKS_TOOL, tools_1.SET_PHASE_TOOL, tools_1.UPDATE_PLAN_TOOL, tools_1.CONDUCTOR_READ_REPORT_TOOL, tools_1.WAIT_TOOL, tools_1.FINISH_TOOL];
271
553
  for (let attempt = 0; attempt < 3; attempt++) {
272
554
  let res;
273
555
  try {
@@ -278,6 +560,9 @@ class Executor {
278
560
  // "auto" rather than "required" for cross-provider safety; the prompt
279
561
  // mandates a tool call and the no-tool nudge loop below enforces it.
280
562
  toolChoice: "auto",
563
+ // The conductor is the swarm's brain: it must never queue behind a
564
+ // hundred worker streams.
565
+ priority: "high",
281
566
  thinking: this.meta.options.thinking,
282
567
  reasoningEffort: this.meta.options.reasoningEffort,
283
568
  // Generous: with thinking enabled, reasoning + a large spawn_tasks
@@ -297,11 +582,26 @@ class Executor {
297
582
  this.fatal = `Provider authentication failed — ${msg}. Set a valid key in Settings.`;
298
583
  this.finishing = true;
299
584
  this.finishReason = this.fatal;
585
+ return;
586
+ }
587
+ // Circuit breaker: a transient failure degrades to "wait" so the loop
588
+ // keeps draining tasks, but repeated consecutive failures must end the
589
+ // run with a clear reason rather than spin forever.
590
+ this.conductorFailures++;
591
+ if (this.conductorFailures >= 5) {
592
+ this.finishing = true;
593
+ this.finishReason = `conductor unavailable: ${this.conductorFailures} consecutive call failures (last: ${msg})`;
594
+ return;
300
595
  }
301
- // Treat a transient conductor failure as a wait so the loop keeps draining tasks.
596
+ const scale = Number(process.env.SWARM_BACKOFF_SCALE || "1") || 1;
597
+ const backoff = [2_000, 5_000, 15_000, 30_000][Math.min(this.conductorFailures - 1, 3)] * scale;
598
+ await new Promise((r) => setTimeout(r, backoff));
302
599
  this.lastConductorAction = "wait";
600
+ this.lastConductorErrored = true;
303
601
  return;
304
602
  }
603
+ this.conductorFailures = 0;
604
+ this.lastConductorErrored = false;
305
605
  this.onUsage(this.meta.options.conductorModel, res.usage);
306
606
  if (res.content.trim())
307
607
  this.journal.append("conductor.say", { text: (0, util_1.clip)(res.content, 4000) });
@@ -334,6 +634,41 @@ class Executor {
334
634
  toolResult = "Acknowledged. Synthesizing the final deliverable.";
335
635
  acted = "finish";
336
636
  }
637
+ else if (call.function.name === "update_plan") {
638
+ const md = String(args.markdown ?? "");
639
+ if (md.trim()) {
640
+ this.planDoc = md;
641
+ try {
642
+ fs.writeFileSync(path.join(this.runDirPath, "artifacts", this.planFileName()), md, "utf8");
643
+ }
644
+ catch (e) {
645
+ this.journal.append("log", { level: "warn", msg: `plan write failed: ${(0, util_1.errMsg)(e)}` });
646
+ }
647
+ this.journal.append("plan.updated", { teamScoped: this.mode === "team" || undefined, excerpt: (0, util_1.clip)(md, 1200) });
648
+ toolResult = `Plan saved to artifacts/${this.planFileName()}.`;
649
+ }
650
+ else {
651
+ toolResult = "Plan was empty — not saved.";
652
+ }
653
+ // Bookkeeping, not a scheduling decision — falls through to the nudge.
654
+ }
655
+ else if (call.function.name === "read_report") {
656
+ toolResult = (0, util_1.truncateMiddle)(this.readReportText(String(args.task_id ?? "")), 8000, "chars");
657
+ // Information lookup, not a scheduling decision — falls through to
658
+ // the nudge loop if the conductor stopped here.
659
+ }
660
+ else if (call.function.name === "set_phase") {
661
+ const name = (0, util_1.clip)(String(args.name ?? ""), 80);
662
+ this.phase = {
663
+ name,
664
+ goal: args.goal ? String(args.goal) : undefined,
665
+ exitCriteria: args.exit_criteria ? String(args.exit_criteria) : undefined,
666
+ };
667
+ this.journal.append("phase.set", { name, goal: this.phase.goal, exit_criteria: this.phase.exitCriteria });
668
+ toolResult = `Phase set: ${name}. Now also call spawn_tasks, wait, or finish.`;
669
+ // Not a scheduling decision by itself — fall through to the nudge
670
+ // loop if the conductor stopped here.
671
+ }
337
672
  else if (call.function.name === "wait") {
338
673
  toolResult = "Waiting for running tasks to report.";
339
674
  if (acted === "none")
@@ -344,6 +679,12 @@ class Executor {
344
679
  }
345
680
  this.conductorMessages.push({ role: "tool", tool_call_id: call.id, content: toolResult });
346
681
  }
682
+ if (acted === "none") {
683
+ // set_phase (or an unknown tool) alone is not a scheduling decision —
684
+ // ask again rather than letting the run misread it as "wait"/"finish".
685
+ this.conductorMessages.push({ role: "user", content: "Now call spawn_tasks, wait, or finish." });
686
+ continue;
687
+ }
347
688
  this.lastConductorAction = acted;
348
689
  this.journal.append("conductor.action", { kind: acted });
349
690
  return;
@@ -380,6 +721,7 @@ class Executor {
380
721
  warnings.push(`${id}: dropped dep "${d}" (${idx >= i ? "same-batch later task — would deadlock" : "unknown task"})`);
381
722
  return false;
382
723
  });
724
+ const rawSpec = spec;
383
725
  const task = {
384
726
  id,
385
727
  title: (0, util_1.clip)(String(spec.title ?? "task"), 120),
@@ -388,6 +730,10 @@ class Executor {
388
730
  deps,
389
731
  verify: Boolean(spec.verify) && this.cfg.verification !== "off",
390
732
  context: spec.context ? String(spec.context) : undefined,
733
+ modelTier: ["cheap", "strong"].includes(String(spec.model)) ? spec.model : undefined,
734
+ team: Boolean(spec.team) && this.mode === "root",
735
+ teamMaxWorkers: Number(rawSpec.team_max_workers ?? rawSpec.teamMaxWorkers) || undefined,
736
+ teamBudgetTokens: Number(rawSpec.team_budget_tokens ?? rawSpec.teamBudgetTokens) || undefined,
391
737
  status: "pending",
392
738
  attempt: 1,
393
739
  wave,
@@ -415,14 +761,46 @@ class Executor {
415
761
  w = Math.max(w, t.wave);
416
762
  return w + 1;
417
763
  }
764
+ /** The conductor's living plan document (mission-plan.md). */
765
+ planDoc = "";
766
+ planFileName() {
767
+ return this.mode === "team" ? `mission-plan-${this.teamId}.md` : "mission-plan.md";
768
+ }
769
+ planPin() {
770
+ if (!this.planDoc)
771
+ return undefined;
772
+ return `MISSION PLAN (artifacts/${this.planFileName()}, maintained via update_plan):\n${(0, util_1.clip)(this.planDoc, 1500)}`;
773
+ }
774
+ phaseLine() {
775
+ if (!this.phase)
776
+ return undefined;
777
+ return `CURRENT PHASE: ${this.phase.name}${this.phase.goal ? ` — ${this.phase.goal}` : ""}${this.phase.exitCriteria ? ` (exit: ${this.phase.exitCriteria})` : ""}`;
778
+ }
779
+ /** Full text for the reports that matter, one-liners past the cap. */
780
+ digestReports(reports) {
781
+ const CAP = 12;
782
+ if (reports.length <= CAP)
783
+ return reports.map(prompts_1.reportBlock);
784
+ const important = reports.filter((t) => t.status !== "done");
785
+ const done = reports.filter((t) => t.status === "done");
786
+ const fullDone = done.slice(-Math.max(0, CAP - important.length));
787
+ const briefDone = done.slice(0, done.length - fullDone.length);
788
+ return [
789
+ ...important.map(prompts_1.reportBlock),
790
+ ...fullDone.map(prompts_1.reportBlock),
791
+ ...briefDone.map((t) => `── ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → DONE — ${(0, util_1.oneLine)(t.report ?? "", 140)} (full text: read_report)`),
792
+ ];
793
+ }
418
794
  appendConductorUpdate(extra, reports) {
419
795
  const ops = this.consumeOperatorNotes();
420
796
  this.conductorMessages.push({
421
797
  role: "user",
422
798
  content: (0, prompts_1.conductorUpdate)({
423
- reports: reports?.map(prompts_1.reportBlock),
799
+ reports: reports ? this.digestReports(reports) : undefined,
424
800
  operatorNotes: ops,
425
801
  blackboard: this.blackboardDigest(),
802
+ phase: this.phaseLine(),
803
+ plan: this.planPin(),
426
804
  nextId: this.nextId(),
427
805
  taskTable: (0, prompts_1.taskTable)(this.taskList()),
428
806
  budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
@@ -432,26 +810,84 @@ class Executor {
432
810
  // Keep the conductor's own history from growing without bound.
433
811
  this.trimConductorHistory();
434
812
  }
813
+ /**
814
+ * One-screen summary of everything durable about the run so far. Replaces
815
+ * trimmed history so the conductor never loses the plot on long missions —
816
+ * rebuilt fresh each trim from current state, so it also survives resume.
817
+ */
818
+ missionLedger(intro = "Earlier orchestration history was trimmed.") {
819
+ const lines = [`[${intro} MISSION LEDGER — durable state so far:]`];
820
+ if (this.phase)
821
+ lines.push(this.phaseLine());
822
+ const settled = this.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status));
823
+ if (settled.length) {
824
+ lines.push("Settled tasks:");
825
+ const failures = settled.filter((t) => t.status !== "done");
826
+ const done = settled.filter((t) => t.status === "done");
827
+ // Failures stay itemized forever; done tasks collapse by wave once the
828
+ // run gets big (a 500-task ledger must still fit on one screen).
829
+ if (done.length > 30) {
830
+ const waves = [...new Set(done.map((t) => t.wave))].sort((a, b) => a - b);
831
+ for (const w of waves) {
832
+ const ws = done.filter((t) => t.wave === w);
833
+ lines.push(`- wave ${w}: ${ws.length} done (${ws.map((t) => t.id).join(",")})`);
834
+ }
835
+ }
836
+ else {
837
+ for (const t of done)
838
+ lines.push(`- ${t.id} [done] ${(0, util_1.clip)(t.title, 60)}${t.report ? ` — ${(0, util_1.oneLine)(t.report, 120)}` : ""}`);
839
+ }
840
+ for (const t of failures) {
841
+ lines.push(`- ${t.id} [${t.status}] ${(0, util_1.clip)(t.title, 60)}${t.error ? ` — ${(0, util_1.oneLine)(t.error, 80)}` : ""}`);
842
+ }
843
+ }
844
+ const decisions = this.notes.filter((n) => n.kind === "decision");
845
+ if (decisions.length) {
846
+ lines.push("Decisions:");
847
+ for (const d of decisions.slice(-20))
848
+ lines.push(`- ${(0, util_1.oneLine)(d.text, 140)}`);
849
+ }
850
+ return (0, util_1.clip)(lines.join("\n"), 8000);
851
+ }
435
852
  trimConductorHistory() {
436
853
  const MAX = 60;
437
- const TRIM_NOTICE = "[Earlier orchestration history was trimmed. Current swarm state is below.]";
854
+ const LEDGER_MARK = "MISSION LEDGER";
855
+ const setLedger = () => {
856
+ const msg = { role: "user", content: this.missionLedger() };
857
+ if (this.conductorMessages[1]?.content?.includes(LEDGER_MARK))
858
+ this.conductorMessages[1] = msg;
859
+ else
860
+ this.conductorMessages.splice(1, 0, msg);
861
+ };
862
+ // Old conductor turns carry the bulk in thinking traces and verbose prose;
863
+ // the durable decisions live in the ledger and the plan pin. Compact them
864
+ // in place before resorting to dropping whole messages. (sanitizeMessages
865
+ // backfills reasoning_content with "" for DeepSeek tool-call turns.)
866
+ for (let i = 1; i < this.conductorMessages.length - 6; i++) {
867
+ const m = this.conductorMessages[i];
868
+ if (m.role !== "assistant")
869
+ continue;
870
+ if (m.reasoning_content)
871
+ m.reasoning_content = "";
872
+ if (m.content && m.content.length > 400)
873
+ m.content = (0, util_1.clip)(m.content, 400);
874
+ }
438
875
  if (this.conductorMessages.length > MAX) {
439
876
  const system = this.conductorMessages[0];
440
877
  const tail = this.conductorMessages.slice(-(MAX - 2));
441
878
  // Don't begin the tail on an orphic tool result.
442
879
  while (tail.length && tail[0].role === "tool")
443
880
  tail.shift();
444
- this.conductorMessages = [system, { role: "user", content: TRIM_NOTICE }, ...tail];
881
+ this.conductorMessages = [system, ...tail];
882
+ setLedger();
445
883
  }
446
884
  // Count alone doesn't bound size: every update embeds the full task table,
447
885
  // so a deep run can blow the model window long before 60 messages. The
448
886
  // mission itself lives in the system message and always survives.
449
- const budget = Math.floor(this.cfg.contextTokenLimit * 0.75);
887
+ const budget = Math.floor((0, config_1.contextLimitFor)(this.cfg, this.meta.options.conductorModel) * 0.75);
450
888
  if ((0, agent_1.estimateMessages)(this.conductorMessages) <= budget)
451
889
  return;
452
- if (this.conductorMessages[1]?.content !== TRIM_NOTICE) {
453
- this.conductorMessages.splice(1, 0, { role: "user", content: TRIM_NOTICE });
454
- }
890
+ setLedger();
455
891
  while ((0, agent_1.estimateMessages)(this.conductorMessages) > budget && this.conductorMessages.length > 10) {
456
892
  this.conductorMessages.splice(2, 1);
457
893
  // Never leave tool results whose assistant turn was dropped.
@@ -469,25 +905,62 @@ class Executor {
469
905
  hasOpenWork() {
470
906
  return this.taskList().some((t) => ["pending", "running", "verifying"].includes(t.status));
471
907
  }
908
+ /** Walk a failed/blocked dep chain down to the task that actually failed. */
909
+ rootFailure(id) {
910
+ let cur = this.tasks.get(id);
911
+ const seen = new Set();
912
+ while (cur && !seen.has(cur.id)) {
913
+ seen.add(cur.id);
914
+ const next = cur.deps
915
+ .map((d) => this.tasks.get(d))
916
+ .find((t) => !!t && (t.status === "failed" || t.status === "blocked"));
917
+ if (!next)
918
+ return cur;
919
+ cur = next;
920
+ }
921
+ return cur;
922
+ }
472
923
  blockStuckTasks() {
473
- for (const t of this.taskList()) {
474
- if (t.status !== "pending")
475
- continue;
476
- const bad = t.deps.find((d) => {
477
- const s = this.tasks.get(d)?.status;
478
- return s === "failed" || s === "blocked";
479
- });
480
- if (bad) {
924
+ // Fixpoint: a failed dep chain T1→T2→T5 must block the whole chain in one
925
+ // pass, not one level per conductor turn.
926
+ for (let changed = true; changed;) {
927
+ changed = false;
928
+ for (const t of this.taskList()) {
929
+ if (t.status !== "pending")
930
+ continue;
931
+ const bad = t.deps.find((d) => {
932
+ const s = this.tasks.get(d)?.status;
933
+ return s === "failed" || s === "blocked";
934
+ });
935
+ if (!bad)
936
+ continue;
937
+ // Carry the root cause so the conductor re-plans around the actual
938
+ // failure, not a chain of "dependency did not complete".
939
+ const root = this.rootFailure(bad);
940
+ const cause = root ? (0, util_1.oneLine)(root.feedback ?? root.error ?? "unknown failure", 160) : "";
481
941
  t.status = "blocked";
482
- t.error = `dependency ${bad} did not complete`;
942
+ t.error =
943
+ root && root.id !== bad
944
+ ? `dependency ${bad} did not complete (root cause ${root.id}: ${cause})`
945
+ : `dependency ${bad} did not complete${cause ? ` (${cause})` : ""}`;
483
946
  t.endedAt = Date.now();
484
947
  this.journal.append("task.status", { taskId: t.id, status: "blocked", attempt: t.attempt, reason: t.error });
485
948
  this.settledSinceUpdate.push(t.id);
949
+ changed = true;
486
950
  }
487
951
  }
488
952
  }
953
+ /** Tasks occupying a worker slot: running, not those awaiting verification. */
954
+ activeWorkerCount() {
955
+ let n = 0;
956
+ for (const id of this.inflight.keys()) {
957
+ if (this.tasks.get(id)?.status === "running")
958
+ n++;
959
+ }
960
+ return n;
961
+ }
489
962
  startReadyTasks() {
490
- while (this.inflight.size < this.meta.options.maxWorkers && !this.finishing) {
963
+ while (this.activeWorkerCount() < this.meta.options.maxWorkers && !this.finishing) {
491
964
  const next = this.runnableTasks()[0];
492
965
  if (!next)
493
966
  break;
@@ -516,12 +989,14 @@ class Executor {
516
989
  depReportsFor(task) {
517
990
  if (!task.deps.length)
518
991
  return "";
992
+ // Excerpts, not full reports: a fan-in task with many deps must not blow
993
+ // its context window on day one. Workers fetch full text with read_report.
519
994
  return task.deps
520
995
  .map((d) => {
521
996
  const dep = this.tasks.get(d);
522
997
  if (!dep)
523
998
  return `(${d}: missing)`;
524
- return (0, prompts_1.reportBlock)(dep);
999
+ return (0, prompts_1.depReportBlock)(dep);
525
1000
  })
526
1001
  .join("\n\n");
527
1002
  }
@@ -535,13 +1010,39 @@ class Executor {
535
1010
  agentId,
536
1011
  taskId: task?.id,
537
1012
  signal: this.ac.signal,
538
- addNote: (text, key) => {
539
- this.notes.push({ taskId: task?.id, key, text });
1013
+ addCheckpoint: task ? (summary) => this.recordCheckpoint(task, agentId, summary) : undefined,
1014
+ addNote: (text, key, kind, url) => {
1015
+ this.notes.push({ taskId: task?.id, key, kind, text, url });
540
1016
  // Only the recent tail ever feeds digests; without a cap a multi-day
541
- // run accumulates every note in memory.
542
- if (this.notes.length > 2000)
543
- this.notes.splice(0, this.notes.length - 2000);
544
- this.journal.append("note.added", { taskId: task?.id, agentId, key, text: (0, util_1.clip)(text, 1200) });
1017
+ // run accumulates every note in memory. Decisions and conflicts are
1018
+ // kept regardless. In-place splice: teams share this array by reference.
1019
+ if (this.notes.length > 4000) {
1020
+ const keep = (n) => n.kind === "decision" || n.kind === "conflict";
1021
+ const pinnedCount = this.notes.filter(keep).length;
1022
+ let toDrop = this.notes.length - Math.max(pinnedCount, 4000);
1023
+ for (let i = 0; i < this.notes.length && toDrop > 0;) {
1024
+ if (!keep(this.notes[i])) {
1025
+ this.notes.splice(i, 1);
1026
+ toDrop--;
1027
+ }
1028
+ else
1029
+ i++;
1030
+ }
1031
+ }
1032
+ this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, url, text: (0, util_1.clip)(text, 1200) });
1033
+ },
1034
+ searchNotes: (q) => this.searchNotes(q),
1035
+ readReport: (taskId) => this.readReportText(taskId),
1036
+ checkClaim: (rel) => {
1037
+ const norm = rel.replace(/^\.\//, "");
1038
+ const claim = this.notes.find((n) => n.kind === "claim" &&
1039
+ n.key === norm &&
1040
+ n.taskId &&
1041
+ n.taskId !== task?.id &&
1042
+ ["running", "verifying"].includes(this.tasks.get(n.taskId)?.status ?? ""));
1043
+ return claim
1044
+ ? `⚠ ${claim.taskId} holds a claim on ${norm} ("${(0, util_1.oneLine)(claim.text, 80)}") — coordinate via the blackboard before further edits.`
1045
+ : null;
545
1046
  },
546
1047
  addArtifact: (rel) => {
547
1048
  if (task && !task.artifacts.includes(rel))
@@ -553,7 +1054,33 @@ class Executor {
553
1054
  },
554
1055
  };
555
1056
  }
1057
+ readReportText(taskId) {
1058
+ const t = this.tasks.get(taskId.trim().toUpperCase());
1059
+ if (!t)
1060
+ return `no such task: ${taskId}`;
1061
+ if (!t.report)
1062
+ return `${t.id} has not reported yet (status: ${t.status})`;
1063
+ return `${t.id} "${t.title}" → ${t.status}\n${t.report}${t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : ""}`;
1064
+ }
1065
+ recordCheckpoint(task, agentId, summary) {
1066
+ task.lastCheckpoint = (0, util_1.clip)(summary, 4000);
1067
+ this.journal.append("task.checkpoint", {
1068
+ taskId: task.id,
1069
+ agentId,
1070
+ attempt: task.attempt,
1071
+ summary: task.lastCheckpoint,
1072
+ });
1073
+ }
556
1074
  async runTaskPipeline(task) {
1075
+ if (task.team) {
1076
+ try {
1077
+ await this.runTeam(task);
1078
+ }
1079
+ catch (e) {
1080
+ this.finalizeTask(task, "failed", `team error: ${(0, util_1.errMsg)(e)}`);
1081
+ }
1082
+ return;
1083
+ }
557
1084
  for (;;) {
558
1085
  try {
559
1086
  const outcome = await this.runWorker(task);
@@ -566,10 +1093,15 @@ class Executor {
566
1093
  this.finalizeTask(task, "failed", task.feedback || task.error || "not retried: run is winding down");
567
1094
  return;
568
1095
  }
569
- if (task.attempt < VERIFY_MAX_ATTEMPTS) {
1096
+ if (task.attempt < this.cfg.verifyMaxAttempts) {
570
1097
  task.attempt++;
571
1098
  task.status = "running";
572
- this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt });
1099
+ this.journal.append("task.status", {
1100
+ taskId: task.id,
1101
+ status: "running",
1102
+ attempt: task.attempt,
1103
+ reason: task.feedback || task.error,
1104
+ });
573
1105
  continue;
574
1106
  }
575
1107
  this.finalizeTask(task, "failed", task.feedback || task.error || "verification failed after retries");
@@ -582,22 +1114,31 @@ class Executor {
582
1114
  this.finalizeTask(task, "failed", "run cancelled");
583
1115
  return;
584
1116
  }
585
- if (task.attempt < VERIFY_MAX_ATTEMPTS && !this.finishing && !this.budgetExceeded()) {
1117
+ if (task.attempt < this.cfg.verifyMaxAttempts && !this.finishing && !this.budgetExceeded()) {
586
1118
  task.attempt++;
587
- task.error = (0, util_1.errMsg)(e);
1119
+ task.error = `${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`;
588
1120
  task.status = "running";
589
1121
  this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt, reason: task.error });
590
1122
  continue;
591
1123
  }
592
- this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}`);
1124
+ this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`);
593
1125
  return;
594
1126
  }
595
1127
  }
596
1128
  }
1129
+ resolveModel(tier) {
1130
+ if (tier === "cheap")
1131
+ return this.cfg.cheapModel || this.meta.options.model;
1132
+ if (tier === "strong")
1133
+ return this.cfg.strongModel || this.meta.options.model;
1134
+ return this.meta.options.model;
1135
+ }
597
1136
  /** Returns "retry" to request another attempt, or "done" when finalized. */
598
1137
  async runWorker(task) {
599
1138
  const agentId = (0, util_1.rid)("w");
1139
+ const model = this.resolveModel(task.modelTier);
600
1140
  task.agentIds.push(agentId);
1141
+ task.lastToolError = undefined; // diagnostics are per-attempt
601
1142
  const dirListing = this.topListing();
602
1143
  const system = (0, prompts_1.workerSystem)({
603
1144
  agentId,
@@ -614,23 +1155,26 @@ class Executor {
614
1155
  agentId,
615
1156
  taskId: task.id,
616
1157
  role: task.role,
617
- model: this.meta.options.model,
1158
+ model,
618
1159
  purpose: task.title,
619
1160
  });
620
1161
  const outcome = await (0, agent_1.runAgent)({
621
1162
  cfg: this.cfg,
622
1163
  agentId,
623
- model: this.meta.options.model,
1164
+ model,
624
1165
  thinking: this.meta.options.thinking,
625
1166
  reasoningEffort: this.meta.options.reasoningEffort,
626
1167
  system,
627
1168
  kickoff: prompts_1.WORKER_KICKOFF,
628
- tools: (0, tools_1.workerToolset)(),
1169
+ tools: (0, tools_1.workerToolset)(this.cfg),
629
1170
  terminal: [tools_1.REPORT_TOOL],
630
1171
  maxSteps: this.meta.options.maxStepsPerTask,
631
1172
  signal: this.ac.signal,
632
1173
  ctx: this.makeToolCtx(agentId, task),
633
- hooks: this.agentHooks(agentId, task.id),
1174
+ hooks: {
1175
+ ...this.agentHooks(agentId, task.id, task),
1176
+ onCheckpoint: (summary) => this.recordCheckpoint(task, agentId, summary),
1177
+ },
634
1178
  stop: this.agentStop,
635
1179
  });
636
1180
  this.flushDeltas(agentId);
@@ -638,7 +1182,11 @@ class Executor {
638
1182
  if (this.ac.signal.aborted)
639
1183
  return "done";
640
1184
  if (!outcome.terminal) {
641
- task.error = "worker ended without reporting";
1185
+ const lastWords = (0, util_1.oneLine)(outcome.finalText ?? "", 200);
1186
+ task.error =
1187
+ "worker ended without reporting" +
1188
+ (task.lastToolError ? ` — last tool failure: ${task.lastToolError}` : "") +
1189
+ (lastWords ? `; last words: ${lastWords}` : "");
642
1190
  return "retry";
643
1191
  }
644
1192
  const a = outcome.terminal.args;
@@ -650,11 +1198,33 @@ class Executor {
650
1198
  task.artifacts.push(art);
651
1199
  task.report = report;
652
1200
  task.reportStatus = reportStatus;
1201
+ const strList = (v, max) => Array.isArray(v) ? v.map((x) => (0, util_1.clip)(String(x), 300)).slice(0, max) : undefined;
1202
+ task.keyFacts = strList(a.key_facts, 8);
1203
+ task.openQuestions = strList(a.open_questions, 6);
1204
+ task.filesTouched = strList(a.files_touched, 40);
1205
+ // Structured sources: the citation pipeline's entry point. Only real
1206
+ // http(s) URLs survive; they flow into dep handoffs and the bibliography.
1207
+ const sources = Array.isArray(a.sources)
1208
+ ? a.sources
1209
+ .filter((s) => s && typeof s === "object" && /^https?:\/\//.test(String(s.url ?? "")))
1210
+ .slice(0, 40)
1211
+ .map((s) => ({
1212
+ url: (0, util_1.clip)(String(s.url), 500),
1213
+ title: s.title ? (0, util_1.clip)(String(s.title), 200) : undefined,
1214
+ date: s.date ? (0, util_1.clip)(String(s.date), 40) : undefined,
1215
+ note: s.note ? (0, util_1.clip)(String(s.note), 300) : undefined,
1216
+ }))
1217
+ : [];
1218
+ task.sources = sources.length ? sources : undefined;
653
1219
  this.journal.append("task.report", {
654
1220
  taskId: task.id,
655
1221
  status: reportStatus,
656
1222
  report,
657
1223
  artifacts: task.artifacts,
1224
+ keyFacts: task.keyFacts,
1225
+ openQuestions: task.openQuestions,
1226
+ filesTouched: task.filesTouched,
1227
+ sources: task.sources,
658
1228
  });
659
1229
  if (reportStatus === "blocked") {
660
1230
  this.finalizeTask(task, "blocked", report);
@@ -663,6 +1233,14 @@ class Executor {
663
1233
  if (task.verify && this.cfg.verification !== "off") {
664
1234
  task.status = "verifying";
665
1235
  this.journal.append("task.status", { taskId: task.id, status: "verifying", attempt: task.attempt });
1236
+ // Mechanical checks first: free, instant, and they catch the most common
1237
+ // fabrications (claimed artifacts that don't exist) without an LLM call.
1238
+ const mech = this.preVerify(task);
1239
+ if (mech) {
1240
+ task.feedback = mech;
1241
+ this.journal.append("verify.result", { taskId: task.id, pass: false, feedback: mech, mechanical: true });
1242
+ return "retry";
1243
+ }
666
1244
  const pass = await this.runVerifier(task);
667
1245
  if (!pass)
668
1246
  return "retry";
@@ -670,43 +1248,147 @@ class Executor {
670
1248
  this.finalizeTask(task, "done", report);
671
1249
  return "done";
672
1250
  }
673
- async runVerifier(task) {
1251
+ /** Zero-token sanity checks before the LLM verifier. Returns failure feedback or null. */
1252
+ preVerify(task) {
1253
+ const report = task.report ?? "";
1254
+ if (report.trim().length < 40) {
1255
+ return "Report is too thin to verify. Re-do the task and report concretely: what was done, what was verified, exact paths.";
1256
+ }
1257
+ const missing = [];
1258
+ const malformed = [];
1259
+ // Remote sandboxes own their filesystem — only check host-visible paths.
1260
+ if (this.sandbox.localFs) {
1261
+ const okAt = (p) => {
1262
+ try {
1263
+ return fs.statSync(p).size > 0;
1264
+ }
1265
+ catch {
1266
+ return false;
1267
+ }
1268
+ };
1269
+ for (const rel of task.artifacts) {
1270
+ const inArtifacts = path.join(this.runDirPath, "artifacts", rel);
1271
+ const inWorkdir = path.resolve(this.meta.cwd, rel);
1272
+ if (!okAt(inArtifacts) && !okAt(inWorkdir)) {
1273
+ missing.push(rel);
1274
+ continue;
1275
+ }
1276
+ // Structural format check (json parses, csv is rectangular, html is
1277
+ // not a stub) — free, and catches what the LLM verifier wastes a whole
1278
+ // agent run discovering.
1279
+ const problem = (0, util_1.validateArtifactFormat)(okAt(inArtifacts) ? inArtifacts : inWorkdir);
1280
+ if (problem)
1281
+ malformed.push(`${rel}: ${problem}`);
1282
+ }
1283
+ }
1284
+ if (missing.length) {
1285
+ return `Claimed artifact(s) do not exist or are empty: ${missing.join(", ")}. Actually create them (use save_artifact), then report again.`;
1286
+ }
1287
+ if (malformed.length) {
1288
+ return `Claimed artifact(s) are malformed — fix them and report again: ${malformed.join("; ")}`;
1289
+ }
1290
+ return null;
1291
+ }
1292
+ /** One verifier agent pass; returns the outcome plus how many evidence-gathering tool calls it made. */
1293
+ async verifierAgent(task, kickoff) {
674
1294
  const agentId = (0, util_1.rid)("v");
1295
+ // Verification gets the strong tier when configured — a weak verifier
1296
+ // rubber-stamps exactly the tasks that most need scrutiny.
1297
+ const model = this.cfg.strongModel || this.meta.options.model;
675
1298
  task.agentIds.push(agentId);
676
1299
  this.journal.append("agent.spawned", {
677
1300
  agentId,
678
1301
  taskId: task.id,
679
1302
  role: "verifier",
680
- model: this.meta.options.model,
1303
+ model,
681
1304
  purpose: `verify ${task.id}`,
682
1305
  });
1306
+ let evidenceCalls = 0;
1307
+ const baseHooks = this.agentHooks(agentId, task.id);
683
1308
  const outcome = await (0, agent_1.runAgent)({
684
1309
  cfg: this.cfg,
685
1310
  agentId,
686
- model: this.meta.options.model,
1311
+ model,
687
1312
  thinking: this.meta.options.thinking,
688
1313
  reasoningEffort: this.meta.options.reasoningEffort,
689
- system: (0, prompts_1.verifierSystem)(this.meta, task),
690
- kickoff: prompts_1.VERIFIER_KICKOFF,
1314
+ system: (0, prompts_1.verifierSystem)(this.meta, task, this.depReportsFor(task)),
1315
+ kickoff,
691
1316
  tools: (0, tools_1.verifierToolset)(),
692
1317
  terminal: [tools_1.VERDICT_TOOL],
693
1318
  maxSteps: Math.min(14, this.meta.options.maxStepsPerTask),
694
1319
  signal: this.ac.signal,
695
- ctx: this.makeToolCtx(agentId, task),
696
- hooks: this.agentHooks(agentId, task.id),
1320
+ // Blind verification: the verifier judges deliverables against the
1321
+ // objective with its own tools — it must not inherit the swarm's shared
1322
+ // beliefs (blackboard) or the worker's narrative beyond the claims.
1323
+ // (Dep reports are settled upstream outputs, not the worker's story.)
1324
+ ctx: { ...this.makeToolCtx(agentId, task), readBlackboard: () => "", searchNotes: undefined },
1325
+ hooks: {
1326
+ ...baseHooks,
1327
+ onToolCall: (callId, name, args) => {
1328
+ if (name !== "verdict")
1329
+ evidenceCalls++;
1330
+ baseHooks.onToolCall(callId, name, args);
1331
+ },
1332
+ },
697
1333
  stop: this.agentStop,
698
1334
  });
699
1335
  this.flushDeltas(agentId);
700
1336
  this.journal.append("agent.done", { agentId, taskId: task.id, steps: outcome.steps });
1337
+ return { outcome, evidenceCalls };
1338
+ }
1339
+ async runVerifier(task) {
1340
+ const strict = this.cfg.verification === "strict";
1341
+ let { outcome, evidenceCalls } = await this.verifierAgent(task, prompts_1.VERIFIER_KICKOFF);
701
1342
  if (this.ac.signal.aborted)
702
1343
  return true;
1344
+ // Strict mode: a pass verdict backed by zero tool calls is an opinion,
1345
+ // not a verification. One re-run demanding evidence; if that also passes
1346
+ // tool-free, accept but say so in the journal.
1347
+ if (strict && outcome.terminal && Boolean(outcome.terminal.args.pass) && evidenceCalls === 0) {
1348
+ this.journal.append("log", {
1349
+ level: "info",
1350
+ msg: `verifier passed ${task.id} without evidence — re-running with a tools-required kickoff`,
1351
+ });
1352
+ const second = await this.verifierAgent(task, "A previous verdict on this task cited no tool-gathered evidence. Verify concretely NOW — read the claimed files, run the commands — then call verdict(...).");
1353
+ if (this.ac.signal.aborted)
1354
+ return true;
1355
+ if (second.outcome.terminal) {
1356
+ if (second.evidenceCalls === 0) {
1357
+ this.journal.append("log", { level: "warn", msg: `verifier passed ${task.id} without gathering evidence` });
1358
+ }
1359
+ outcome = second.outcome;
1360
+ }
1361
+ }
703
1362
  const v = (outcome.terminal?.args ?? {});
704
- const strict = this.cfg.verification === "strict";
705
1363
  // No verdict returned: in strict mode fail closed, otherwise accept.
706
1364
  const pass = outcome.terminal ? Boolean(v.pass) : !strict;
707
- const feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
1365
+ let feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
1366
+ // Structured issues become the retry's worklist — numbered, with evidence.
1367
+ const issues = Array.isArray(v.issues)
1368
+ ? v.issues
1369
+ .filter((i) => i && typeof i === "object" && i.problem)
1370
+ .slice(0, 5)
1371
+ .map((i) => ({
1372
+ problem: (0, util_1.oneLine)(String(i.problem), 300),
1373
+ evidence: i.evidence ? (0, util_1.oneLine)(String(i.evidence), 300) : undefined,
1374
+ fix: i.fix ? (0, util_1.oneLine)(String(i.fix), 300) : undefined,
1375
+ }))
1376
+ : [];
1377
+ if (!pass && issues.length) {
1378
+ feedback = [
1379
+ feedback,
1380
+ ...issues.map((i, n) => `${n + 1}. ${i.problem}${i.evidence ? `\n evidence: ${i.evidence}` : ""}${i.fix ? `\n fix: ${i.fix}` : ""}`),
1381
+ ]
1382
+ .filter(Boolean)
1383
+ .join("\n");
1384
+ }
708
1385
  task.feedback = feedback;
709
- this.journal.append("verify.result", { taskId: task.id, pass, feedback });
1386
+ this.journal.append("verify.result", {
1387
+ taskId: task.id,
1388
+ pass,
1389
+ feedback,
1390
+ ...(issues.length ? { issues } : {}),
1391
+ });
710
1392
  return pass;
711
1393
  }
712
1394
  finalizeTask(task, status, reason) {
@@ -714,8 +1396,80 @@ class Executor {
714
1396
  task.endedAt = Date.now();
715
1397
  if (reason && status !== "done")
716
1398
  task.error = reason;
1399
+ // A settled task holds no file claims — release them so the digest and
1400
+ // search_notes don't accumulate dead claims on long runs. In-place splice:
1401
+ // teams share this array by reference.
1402
+ for (let i = this.notes.length - 1; i >= 0; i--) {
1403
+ const n = this.notes[i];
1404
+ if (n.kind === "claim" && n.taskId === task.id)
1405
+ this.notes.splice(i, 1);
1406
+ }
717
1407
  this.journal.append("task.status", { taskId: task.id, status, attempt: task.attempt, reason });
718
1408
  this.settledSinceUpdate.push(task.id);
1409
+ this.maybeSnapshot();
1410
+ }
1411
+ // ---------------------------------------------------------------- progress snapshots
1412
+ snapshotCounter = 0;
1413
+ settledSinceSnapshot = 0;
1414
+ snapshotInflight = false;
1415
+ /**
1416
+ * Periodic partial deliverable: every N settled tasks, write a cheap-tier
1417
+ * progress report to artifacts/. Fire-and-forget — a multi-day run always
1418
+ * has something readable, and a snapshot failure never blocks scheduling.
1419
+ */
1420
+ maybeSnapshot() {
1421
+ if (this.mode !== "root" || this.finishing || this.snapshotInflight)
1422
+ return;
1423
+ const every = Number(process.env.SWARM_SNAPSHOT_EVERY ?? "25");
1424
+ if (!every || every < 1)
1425
+ return;
1426
+ if (++this.settledSinceSnapshot < every)
1427
+ return;
1428
+ this.settledSinceSnapshot = 0;
1429
+ this.snapshotInflight = true;
1430
+ const n = ++this.snapshotCounter;
1431
+ const model = this.cfg.cheapModel || this.meta.options.conductorModel;
1432
+ const tasks = this.taskList();
1433
+ const settled = tasks.filter((t) => ["done", "failed", "blocked"].includes(t.status));
1434
+ (0, deepseek_1.chat)(this.cfg, {
1435
+ model,
1436
+ messages: [
1437
+ {
1438
+ role: "user",
1439
+ content: `Write a concise interim progress report (markdown) for an in-flight agent-swarm mission. Cover: what has been accomplished so far (with concrete results/paths from the reports), what failed, what is currently running, and what remains. This is a partial deliverable for the operator — informative, no filler.\n\nMISSION\n${this.meta.mission}\n\nTASKS\n${(0, prompts_1.taskTable)(tasks)}\n\nSETTLED REPORTS\n${(0, util_1.truncateMiddle)(settled.map(prompts_1.reportBlock).join("\n\n"), 50_000, "chars")}`,
1440
+ },
1441
+ ],
1442
+ thinking: false,
1443
+ maxTokens: 4096,
1444
+ signal: this.ac.signal,
1445
+ })
1446
+ .then((res) => {
1447
+ this.onUsage(model, res.usage);
1448
+ if (!res.content.trim())
1449
+ return;
1450
+ const rel = `progress-report-${n}.md`;
1451
+ fs.writeFileSync(path.join(this.runDirPath, "artifacts", rel), res.content, "utf8");
1452
+ this.journal.append("log", { level: "info", msg: `progress snapshot written: artifacts/${rel}` });
1453
+ // Interim memory: a multi-day run that dies before synthesis still
1454
+ // leaves the next swarm in this workspace something to build on.
1455
+ if (!this.meta.sandbox) {
1456
+ (0, memory_1.appendMemory)(this.meta.cwd, {
1457
+ runId: this.meta.id,
1458
+ mission: this.meta.mission,
1459
+ finishedAt: Date.now(),
1460
+ status: "in-progress",
1461
+ summary: (0, util_1.clip)(res.content, 600),
1462
+ keyDecisions: this.notes.filter((nt) => nt.kind === "decision").slice(-10).map((nt) => nt.text),
1463
+ });
1464
+ }
1465
+ })
1466
+ .catch((e) => {
1467
+ if (!this.ac.signal.aborted)
1468
+ this.journal.append("log", { level: "warn", msg: `progress snapshot failed: ${(0, util_1.errMsg)(e)}` });
1469
+ })
1470
+ .finally(() => {
1471
+ this.snapshotInflight = false;
1472
+ });
719
1473
  }
720
1474
  topListing() {
721
1475
  // Remote sandboxes own their filesystem; a host listing would be a lie.
@@ -750,18 +1504,31 @@ class Executor {
750
1504
  */
751
1505
  deltaBuf = new Map();
752
1506
  deltaTimer = null;
1507
+ thinkDropLogged = false;
753
1508
  queueDelta(agentId, taskId, channel, text) {
1509
+ // Deltas are UI sugar, never state — thin them under load so a 100-agent
1510
+ // swarm doesn't write gigabytes of streaming chatter into the journal.
1511
+ const load = this.activeWorkerCount();
1512
+ if (channel === "think" && load > 48) {
1513
+ if (!this.thinkDropLogged) {
1514
+ this.thinkDropLogged = true;
1515
+ this.journal.append("log", { level: "info", msg: `thinking streams muted above 48 active agents (currently ${load})` });
1516
+ }
1517
+ return;
1518
+ }
1519
+ const flushChars = load > 24 ? 2000 : 480;
1520
+ const flushMs = load > 24 ? 1000 : 200;
754
1521
  const key = `${agentId}:${channel}`;
755
1522
  const buf = this.deltaBuf.get(key);
756
1523
  if (buf)
757
1524
  buf.text += text;
758
1525
  else
759
1526
  this.deltaBuf.set(key, { agentId, taskId, channel, text });
760
- if (this.deltaBuf.get(key).text.length >= 480) {
1527
+ if (this.deltaBuf.get(key).text.length >= flushChars) {
761
1528
  this.flushDeltas(agentId);
762
1529
  }
763
1530
  else if (!this.deltaTimer) {
764
- this.deltaTimer = setTimeout(() => this.flushDeltas(), 200);
1531
+ this.deltaTimer = setTimeout(() => this.flushDeltas(), flushMs);
765
1532
  }
766
1533
  }
767
1534
  flushDeltas(onlyAgent) {
@@ -781,7 +1548,7 @@ class Executor {
781
1548
  });
782
1549
  }
783
1550
  }
784
- agentHooks(agentId, taskId) {
1551
+ agentHooks(agentId, taskId, trackErrorsOn) {
785
1552
  return {
786
1553
  onDelta: (channel, text) => {
787
1554
  this.queueDelta(agentId, taskId, channel, text);
@@ -791,6 +1558,8 @@ class Executor {
791
1558
  this.journal.append("tool.call", { agentId, taskId, callId, name, args });
792
1559
  },
793
1560
  onToolResult: (callId, name, ok, summary) => {
1561
+ if (!ok && trackErrorsOn)
1562
+ trackErrorsOn.lastToolError = `${name}: ${(0, util_1.oneLine)(summary, 200)}`;
794
1563
  this.journal.append("tool.result", { agentId, taskId, callId, name, ok, summary });
795
1564
  },
796
1565
  onUsage: this.onUsage,
@@ -802,6 +1571,10 @@ class Executor {
802
1571
  // ---------------------------------------------------------------- operator control
803
1572
  operatorQueue = [];
804
1573
  drainControl() {
1574
+ // Only the root executor consumes operator control; teams are cancelled
1575
+ // via the parent's abort signal and would otherwise steal queued notes.
1576
+ if (this.mode === "team")
1577
+ return;
805
1578
  for (const msg of this.control.poll()) {
806
1579
  if (msg.kind === "cancel") {
807
1580
  this.journal.append("operator.note", { text: "⛔ Cancel requested by operator." });
@@ -830,8 +1603,25 @@ class Executor {
830
1603
  const reportPath = path.join(this.runDirPath, "artifacts", "final-report.md");
831
1604
  (0, util_1.ensureDir)(path.dirname(reportPath));
832
1605
  fs.writeFileSync(reportPath, reportMarkdown, "utf8");
1606
+ // Always ship a readable, shareable HTML rendering alongside the raw
1607
+ // markdown; a rendering bug must never block run finalization.
1608
+ let htmlPath;
1609
+ try {
1610
+ htmlPath = path.join(this.runDirPath, "artifacts", "final-report.html");
1611
+ fs.writeFileSync(htmlPath, (0, report_1.renderFinalHtml)({
1612
+ markdown: reportMarkdown,
1613
+ mission: this.meta.mission,
1614
+ runId: this.meta.id,
1615
+ status,
1616
+ finishedAt: Date.now(),
1617
+ }), "utf8");
1618
+ }
1619
+ catch (e) {
1620
+ htmlPath = undefined;
1621
+ this.journal.append("log", { level: "warn", msg: `final-report.html render failed: ${(0, util_1.errMsg)(e)}` });
1622
+ }
833
1623
  this.setStatus(status, reason);
834
- this.journal.append("run.final", { summary, reportPath, reason, status });
1624
+ this.journal.append("run.final", { summary, reportPath, htmlPath, reason, status });
835
1625
  await this.journal.flush();
836
1626
  }
837
1627
  /** Terminate the run as failed without any further model calls. */
@@ -862,10 +1652,14 @@ class Executor {
862
1652
  ? tasks.map(prompts_1.reportBlock).join("\n\n")
863
1653
  : "(no tasks were completed)";
864
1654
  const artifactList = this.listArtifacts().join("\n") || "(none)";
1655
+ // The citation pipeline's last hop: every source any worker reported,
1656
+ // deduplicated and numbered, becomes the synthesizer's bibliography.
1657
+ const allSources = (0, report_1.aggregateSources)(tasks);
1658
+ const sourcesText = allSources.length ? (0, util_1.truncateMiddle)((0, report_1.sourcesBlock)(allSources), 40_000, "chars") : "";
865
1659
  const agentId = (0, util_1.rid)("synth");
866
1660
  let summary = "";
867
1661
  let reportMarkdown = "";
868
- try {
1662
+ const synthOnce = async (extraNote) => {
869
1663
  const outcome = await (0, agent_1.runAgent)({
870
1664
  cfg: this.cfg,
871
1665
  agentId,
@@ -874,17 +1668,18 @@ class Executor {
874
1668
  reasoningEffort: this.meta.options.reasoningEffort,
875
1669
  system: (0, prompts_1.synthSystem)({
876
1670
  meta: this.meta,
877
- finishNotes: this.finishNotes,
878
- reports: (0, util_1.truncateMiddle)(reports, 120_000, "chars"),
879
- blackboard: this.blackboardDigest(4000),
1671
+ finishNotes: [this.finishNotes, extraNote].filter(Boolean).join("\n\n"),
1672
+ reports: (0, util_1.truncateMiddle)(reports, 300_000, "chars"),
1673
+ blackboard: this.blackboardDigest(6000),
880
1674
  artifactList,
881
1675
  reason: this.finishReason || "completed",
1676
+ sources: sourcesText,
882
1677
  }),
883
1678
  kickoff: prompts_1.SYNTH_KICKOFF,
884
1679
  tools: (0, tools_1.synthToolset)(),
885
1680
  terminal: [tools_1.SUBMIT_FINAL_TOOL],
886
- maxSteps: 12,
887
- maxTokensOut: 16384,
1681
+ maxSteps: 24,
1682
+ maxTokensOut: 32000,
888
1683
  signal: new AbortController().signal, // synthesis should finish even if run was cancelled
889
1684
  ctx: this.makeToolCtx(agentId, null),
890
1685
  hooks: this.agentHooks(agentId, ""),
@@ -892,6 +1687,36 @@ class Executor {
892
1687
  const a = (outcome.terminal?.args ?? {});
893
1688
  reportMarkdown = String(a.report_markdown ?? outcome.finalText ?? "");
894
1689
  summary = String(a.summary ?? "");
1690
+ };
1691
+ try {
1692
+ await synthOnce();
1693
+ // Strict mode: check the final report's claims against the task reports
1694
+ // (the ground truth) and re-synthesize once if it misrepresents them.
1695
+ if (this.cfg.verification === "strict" && reportMarkdown.trim() && tasks.length) {
1696
+ try {
1697
+ const res = await (0, deepseek_1.chat)(this.cfg, {
1698
+ model: this.meta.options.conductorModel,
1699
+ messages: [
1700
+ {
1701
+ role: "user",
1702
+ content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars"), sourcesText ? (0, util_1.truncateMiddle)(sourcesText, 20_000, "chars") : undefined),
1703
+ },
1704
+ ],
1705
+ thinking: false,
1706
+ maxTokens: 2048,
1707
+ signal: new AbortController().signal,
1708
+ });
1709
+ this.onUsage(this.meta.options.conductorModel, res.usage);
1710
+ const check = (res.content || "").trim();
1711
+ if (check && !/^OK\b/i.test(check)) {
1712
+ this.journal.append("log", { level: "warn", msg: `synthesis check found discrepancies:\n${(0, util_1.clip)(check, 1500)}` });
1713
+ await synthOnce(`A faithfulness review of your previous draft found these discrepancies — fix them, claiming only what the task reports support:\n${(0, util_1.clip)(check, 2000)}`);
1714
+ }
1715
+ }
1716
+ catch (e) {
1717
+ this.journal.append("log", { level: "warn", msg: `synthesis check failed: ${(0, util_1.errMsg)(e)}` });
1718
+ }
1719
+ }
895
1720
  }
896
1721
  catch (e) {
897
1722
  this.journal.append("log", { level: "error", msg: `synthesis failed: ${(0, util_1.errMsg)(e)}` });
@@ -920,9 +1745,24 @@ class Executor {
920
1745
  reason = `All ${tasks.length} task(s) failed or were blocked.`;
921
1746
  }
922
1747
  await this.writeFinal(status, reason, reportMarkdown, summary || (0, util_1.clip)(reportMarkdown, 600));
1748
+ // Cross-run memory: real-directory runs leave a trace for the next swarm.
1749
+ if (!this.meta.sandbox && status !== "cancelled") {
1750
+ (0, memory_1.appendMemory)(this.meta.cwd, {
1751
+ runId: this.meta.id,
1752
+ mission: this.meta.mission,
1753
+ finishedAt: Date.now(),
1754
+ status,
1755
+ summary: (0, util_1.clip)(summary || reportMarkdown, 600),
1756
+ keyDecisions: this.notes.filter((n) => n.kind === "decision").slice(-10).map((n) => n.text),
1757
+ });
1758
+ }
923
1759
  }
924
1760
  fallbackReport(tasks) {
925
1761
  const lines = [`# ${this.meta.mission}`, ``, `_Run ${this.meta.id} — ${this.finishReason}_`, ``];
1762
+ // Even without a synthesizer, surface the cross-task essentials first.
1763
+ const facts = tasks.flatMap((t) => (t.keyFacts ?? []).map((f) => `- ${f} _(${t.id})_`));
1764
+ if (facts.length)
1765
+ lines.push(`## Key facts`, ...facts.slice(0, 60), "");
926
1766
  for (const t of tasks) {
927
1767
  lines.push(`## ${t.id} ${t.title} (${t.status})`);
928
1768
  lines.push(t.report || t.error || "(no output)");
@@ -930,6 +1770,15 @@ class Executor {
930
1770
  lines.push(`Artifacts: ${t.artifacts.join(", ")}`);
931
1771
  lines.push("");
932
1772
  }
1773
+ const sources = (0, report_1.aggregateSources)(tasks);
1774
+ if (sources.length) {
1775
+ lines.push(`## Sources`);
1776
+ for (const s of sources.slice(0, 100)) {
1777
+ lines.push(`${s.n}. [${s.title || s.url}](${s.url})${s.date ? ` (${s.date})` : ""}`);
1778
+ }
1779
+ if (sources.length > 100)
1780
+ lines.push(`…and ${sources.length - 100} more in the task reports.`);
1781
+ }
933
1782
  return lines.join("\n");
934
1783
  }
935
1784
  listArtifacts() {