@robzilla1738/agentswarm 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +29 -12
  2. package/dist/agent.js +6 -15
  3. package/dist/cli.js +31 -4
  4. package/dist/config.js +44 -1
  5. package/dist/crawltools.js +3 -22
  6. package/dist/executor.js +276 -60
  7. package/dist/hub.js +67 -3
  8. package/dist/journal.js +39 -5
  9. package/dist/memory.js +17 -11
  10. package/dist/pdftext.js +211 -0
  11. package/dist/prompts.js +23 -15
  12. package/dist/report.js +39 -1
  13. package/dist/run.js +8 -0
  14. package/dist/sandbox.js +11 -0
  15. package/dist/searchcore.js +55 -2
  16. package/dist/state.js +67 -17
  17. package/dist/tools.js +208 -19
  18. package/dist/util.js +117 -3
  19. package/dist/webtools.js +185 -32
  20. package/package.json +1 -1
  21. package/ui/out/404/index.html +1 -1
  22. package/ui/out/404.html +1 -1
  23. package/ui/out/_next/static/chunks/677-a62d486d6734bcf3.js +1 -0
  24. package/ui/out/_next/static/chunks/app/run/page-c29f95c51af08c60.js +1 -0
  25. package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
  26. package/ui/out/_next/static/css/{9f7bd82b8e4c762c.css → d95c2ba395730031.css} +1 -1
  27. package/ui/out/index.html +1 -1
  28. package/ui/out/index.txt +3 -3
  29. package/ui/out/run/index.html +1 -1
  30. package/ui/out/run/index.txt +3 -3
  31. package/ui/out/settings/index.html +1 -1
  32. package/ui/out/settings/index.txt +3 -3
  33. package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +0 -1
  34. package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +0 -1
  35. package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +0 -1
  36. /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → JFkx5KtNi0DYyqm_THzbY}/_buildManifest.js +0 -0
  37. /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → JFkx5KtNi0DYyqm_THzbY}/_ssgManifest.js +0 -0
package/dist/executor.js CHANGED
@@ -69,10 +69,9 @@ class Executor {
69
69
  finishNotes = "";
70
70
  finishReason = "";
71
71
  fatal = null;
72
+ /** "error" = the turn ended in a call failure, not a decision. */
72
73
  lastConductorAction = "none";
73
74
  conductorFailures = 0;
74
- /** True when the last conductor turn ended in a call error, not a decision. */
75
- lastConductorErrored = false;
76
75
  resumed = false;
77
76
  sandbox;
78
77
  mode;
@@ -134,7 +133,14 @@ class Executor {
134
133
  const n = Number(/^T(\d+)$/.exec(copy.id)?.[1] ?? 0);
135
134
  this.taskCounter = Math.max(this.taskCounter, n);
136
135
  }
137
- this.notes = state.notes.map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text }));
136
+ // Drop claims held by settled tasks they were released on task end and
137
+ // must not resurrect across a restart.
138
+ const settled = new Set(state.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status) && !reset.has(t.id)).map((t) => t.id));
139
+ this.notes = state.notes
140
+ .map((n) => ({ taskId: n.taskId, teamId: n.teamId, key: n.key, kind: n.kind, text: n.text, url: n.url }))
141
+ // Team claims always drop: the owning child executor died with the
142
+ // crash, and a re-run team task re-claims from scratch.
143
+ .filter((n) => !(n.kind === "claim" && (n.teamId || (n.taskId && settled.has(n.taskId)))));
138
144
  const lastPhase = state.phases[state.phases.length - 1];
139
145
  if (lastPhase)
140
146
  this.phase = { name: lastPhase.name, goal: lastPhase.goal, exitCriteria: lastPhase.exitCriteria };
@@ -182,16 +188,16 @@ class Executor {
182
188
  blackboardDigest(max = 1800) {
183
189
  if (!this.notes.length)
184
190
  return "";
185
- const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.taskId ? ` (${n.taskId})` : ""}`;
186
- // Decisions anchor mission-wide coherence and are never trimmed out of the
187
- // digest; everything else shows only its recent tail.
188
- const decisions = this.notes.filter((n) => n.kind === "decision").map(fmt);
189
- const rest = this.notes.filter((n) => n.kind !== "decision").slice(-80).map(fmt);
191
+ const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.url ? ` <${n.url}>` : ""}${n.taskId ? ` (${n.taskId})` : ""}`;
192
+ // Decisions and conflicts anchor mission-wide coherence and are never
193
+ // trimmed out of the digest; everything else shows only its recent tail.
194
+ const pinned = this.notes.filter((n) => n.kind === "decision" || n.kind === "conflict").map(fmt);
195
+ const rest = this.notes.filter((n) => n.kind !== "decision" && n.kind !== "conflict").slice(-80).map(fmt);
190
196
  let tail = rest.join("\n");
191
- const budget = Math.max(400, max - decisions.join("\n").length);
197
+ const budget = Math.max(400, max - pinned.join("\n").length);
192
198
  if (tail.length > budget)
193
199
  tail = tail.slice(tail.length - budget);
194
- return [decisions.join("\n"), tail].filter(Boolean).join("\n");
200
+ return [pinned.join("\n"), tail].filter(Boolean).join("\n");
195
201
  }
196
202
  searchNotes(query) {
197
203
  const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
@@ -266,6 +272,8 @@ class Executor {
266
272
  content: this.resumed
267
273
  ? (0, prompts_1.conductorUpdate)({
268
274
  blackboard: this.blackboardDigest(),
275
+ phase: this.phaseLine(),
276
+ plan: this.planPin(),
269
277
  nextId: this.nextId(),
270
278
  taskTable: (0, prompts_1.taskTable)(this.taskList()),
271
279
  budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
@@ -277,6 +285,15 @@ class Executor {
277
285
  : (0, prompts_1.conductorInitialUpdate)(this.meta, this.nextId()),
278
286
  },
279
287
  ];
288
+ if (this.resumed) {
289
+ // The conductor's reasoning history died with the old process. Re-seed
290
+ // the durable facts into the same slot trimConductorHistory() maintains,
291
+ // so a resumed conductor knows what settled and what was decided.
292
+ this.conductorMessages.splice(1, 0, {
293
+ role: "user",
294
+ content: this.missionLedger("This run was resumed — prior orchestration history is gone."),
295
+ });
296
+ }
280
297
  try {
281
298
  await this.conductorTurn();
282
299
  this.setStatus("running");
@@ -382,6 +399,13 @@ class Executor {
382
399
  sharedNotes: this.notes,
383
400
  });
384
401
  await child.run();
402
+ // The sub-swarm is over: claims its tasks left behind (e.g. after a child
403
+ // cancellation) are no longer live and must not haunt the shared board.
404
+ for (let i = this.notes.length - 1; i >= 0; i--) {
405
+ const n = this.notes[i];
406
+ if (n.kind === "claim" && n.teamId === task.id)
407
+ this.notes.splice(i, 1);
408
+ }
385
409
  if (this.ac.signal.aborted) {
386
410
  this.finalizeTask(task, "failed", "run cancelled");
387
411
  return;
@@ -390,11 +414,13 @@ class Executor {
390
414
  for (const a of child.teamArtifacts())
391
415
  if (!task.artifacts.includes(a))
392
416
  task.artifacts.push(a);
417
+ const ok = child.anyTaskDone();
418
+ const reportStatus = ok ? "done" : "blocked";
393
419
  task.report = report;
394
- task.reportStatus = "done";
420
+ task.reportStatus = reportStatus;
395
421
  this.journal.append("team.report", { taskId: task.id, report, artifacts: task.artifacts });
396
- this.journal.append("task.report", { taskId: task.id, status: "done", report, artifacts: task.artifacts });
397
- this.finalizeTask(task, child.anyTaskDone() ? "done" : "failed", report);
422
+ this.journal.append("task.report", { taskId: task.id, status: reportStatus, report, artifacts: task.artifacts });
423
+ this.finalizeTask(task, ok ? "done" : "failed", report);
398
424
  }
399
425
  async mainLoop() {
400
426
  while (!this.finishing) {
@@ -430,7 +456,7 @@ class Executor {
430
456
  // An errored turn is not a decision — keep looping so the breaker
431
457
  // can retry (and eventually trip) instead of misreading the error
432
458
  // as "the conductor chose to stop".
433
- if (this.lastConductorAction !== "spawn" && !this.lastConductorErrored) {
459
+ if (this.lastConductorAction !== "spawn" && this.lastConductorAction !== "error") {
434
460
  this.finishing = true;
435
461
  this.finishReason = this.finishReason || "all tasks settled";
436
462
  }
@@ -439,7 +465,7 @@ class Executor {
439
465
  // Stuck: pending tasks exist but can't run (failed/blocked deps).
440
466
  this.appendConductorUpdate("Some tasks cannot run because their dependencies failed or were blocked. Re-plan around them or finish.", reports);
441
467
  await this.conductorTurn();
442
- if (this.lastConductorAction === "wait" && !this.lastConductorErrored) {
468
+ if (this.lastConductorAction === "wait") {
443
469
  this.finishing = true;
444
470
  this.finishReason = "stalled: dependencies unmet and conductor chose to wait";
445
471
  }
@@ -580,12 +606,10 @@ class Executor {
580
606
  const scale = Number(process.env.SWARM_BACKOFF_SCALE || "1") || 1;
581
607
  const backoff = [2_000, 5_000, 15_000, 30_000][Math.min(this.conductorFailures - 1, 3)] * scale;
582
608
  await new Promise((r) => setTimeout(r, backoff));
583
- this.lastConductorAction = "wait";
584
- this.lastConductorErrored = true;
609
+ this.lastConductorAction = "error";
585
610
  return;
586
611
  }
587
612
  this.conductorFailures = 0;
588
- this.lastConductorErrored = false;
589
613
  this.onUsage(this.meta.options.conductorModel, res.usage);
590
614
  if (res.content.trim())
591
615
  this.journal.append("conductor.say", { text: (0, util_1.clip)(res.content, 4000) });
@@ -767,7 +791,8 @@ class Executor {
767
791
  return reports.map(prompts_1.reportBlock);
768
792
  const important = reports.filter((t) => t.status !== "done");
769
793
  const done = reports.filter((t) => t.status === "done");
770
- const fullDone = done.slice(-Math.max(0, CAP - important.length));
794
+ const room = Math.max(0, CAP - important.length);
795
+ const fullDone = room > 0 ? done.slice(-room) : []; // slice(-0) would return everything
771
796
  const briefDone = done.slice(0, done.length - fullDone.length);
772
797
  return [
773
798
  ...important.map(prompts_1.reportBlock),
@@ -799,8 +824,8 @@ class Executor {
799
824
  * trimmed history so the conductor never loses the plot on long missions —
800
825
  * rebuilt fresh each trim from current state, so it also survives resume.
801
826
  */
802
- missionLedger() {
803
- const lines = ["[Earlier orchestration history was trimmed. MISSION LEDGER — durable state so far:]"];
827
+ missionLedger(intro = "Earlier orchestration history was trimmed.") {
828
+ const lines = [`[${intro} MISSION LEDGER — durable state so far:]`];
804
829
  if (this.phase)
805
830
  lines.push(this.phaseLine());
806
831
  const settled = this.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status));
@@ -843,6 +868,19 @@ class Executor {
843
868
  else
844
869
  this.conductorMessages.splice(1, 0, msg);
845
870
  };
871
+ // Old conductor turns carry the bulk in thinking traces and verbose prose;
872
+ // the durable decisions live in the ledger and the plan pin. Compact them
873
+ // in place before resorting to dropping whole messages. (sanitizeMessages
874
+ // backfills reasoning_content with "" for DeepSeek tool-call turns.)
875
+ for (let i = 1; i < this.conductorMessages.length - 6; i++) {
876
+ const m = this.conductorMessages[i];
877
+ if (m.role !== "assistant")
878
+ continue;
879
+ if (m.reasoning_content)
880
+ m.reasoning_content = "";
881
+ if (m.content && m.content.length > 400)
882
+ m.content = (0, util_1.clip)(m.content, 400);
883
+ }
846
884
  if (this.conductorMessages.length > MAX) {
847
885
  const system = this.conductorMessages[0];
848
886
  const tail = this.conductorMessages.slice(-(MAX - 2));
@@ -855,7 +893,7 @@ class Executor {
855
893
  // Count alone doesn't bound size: every update embeds the full task table,
856
894
  // so a deep run can blow the model window long before 60 messages. The
857
895
  // mission itself lives in the system message and always survives.
858
- const budget = Math.floor(this.cfg.contextTokenLimit * 0.75);
896
+ const budget = Math.floor((0, config_1.contextLimitFor)(this.cfg, this.meta.options.conductorModel) * 0.75);
859
897
  if ((0, agent_1.estimateMessages)(this.conductorMessages) <= budget)
860
898
  return;
861
899
  setLedger();
@@ -876,20 +914,48 @@ class Executor {
876
914
  hasOpenWork() {
877
915
  return this.taskList().some((t) => ["pending", "running", "verifying"].includes(t.status));
878
916
  }
917
+ /** Walk a failed/blocked dep chain down to the task that actually failed. */
918
+ rootFailure(id) {
919
+ let cur = this.tasks.get(id);
920
+ const seen = new Set();
921
+ while (cur && !seen.has(cur.id)) {
922
+ seen.add(cur.id);
923
+ const next = cur.deps
924
+ .map((d) => this.tasks.get(d))
925
+ .find((t) => !!t && (t.status === "failed" || t.status === "blocked"));
926
+ if (!next)
927
+ return cur;
928
+ cur = next;
929
+ }
930
+ return cur;
931
+ }
879
932
  blockStuckTasks() {
880
- for (const t of this.taskList()) {
881
- if (t.status !== "pending")
882
- continue;
883
- const bad = t.deps.find((d) => {
884
- const s = this.tasks.get(d)?.status;
885
- return s === "failed" || s === "blocked";
886
- });
887
- if (bad) {
933
+ // Fixpoint: a failed dep chain T1→T2→T5 must block the whole chain in one
934
+ // pass, not one level per conductor turn.
935
+ for (let changed = true; changed;) {
936
+ changed = false;
937
+ for (const t of this.taskList()) {
938
+ if (t.status !== "pending")
939
+ continue;
940
+ const bad = t.deps.find((d) => {
941
+ const s = this.tasks.get(d)?.status;
942
+ return s === "failed" || s === "blocked";
943
+ });
944
+ if (!bad)
945
+ continue;
946
+ // Carry the root cause so the conductor re-plans around the actual
947
+ // failure, not a chain of "dependency did not complete".
948
+ const root = this.rootFailure(bad);
949
+ const cause = root ? (0, util_1.oneLine)(root.feedback ?? root.error ?? "unknown failure", 160) : "";
888
950
  t.status = "blocked";
889
- t.error = `dependency ${bad} did not complete`;
951
+ t.error =
952
+ root && root.id !== bad
953
+ ? `dependency ${bad} did not complete (root cause ${root.id}: ${cause})`
954
+ : `dependency ${bad} did not complete${cause ? ` (${cause})` : ""}`;
890
955
  t.endedAt = Date.now();
891
956
  this.journal.append("task.status", { taskId: t.id, status: "blocked", attempt: t.attempt, reason: t.error });
892
957
  this.settledSinceUpdate.push(t.id);
958
+ changed = true;
893
959
  }
894
960
  }
895
961
  }
@@ -954,27 +1020,40 @@ class Executor {
954
1020
  taskId: task?.id,
955
1021
  signal: this.ac.signal,
956
1022
  addCheckpoint: task ? (summary) => this.recordCheckpoint(task, agentId, summary) : undefined,
957
- addNote: (text, key, kind) => {
958
- this.notes.push({ taskId: task?.id, key, kind, text });
1023
+ addNote: (text, key, kind, url) => {
1024
+ this.notes.push({ taskId: task?.id, teamId: this.teamId, key, kind, text, url });
959
1025
  // Only the recent tail ever feeds digests; without a cap a multi-day
960
- // run accumulates every note in memory. Decisions are kept regardless.
1026
+ // run accumulates every note in memory. Decisions and conflicts are
1027
+ // kept regardless. In-place splice: teams share this array by reference.
961
1028
  if (this.notes.length > 4000) {
962
- const decisions = this.notes.filter((n) => n.kind === "decision");
963
- const rest = this.notes.filter((n) => n.kind !== "decision");
964
- rest.splice(0, rest.length - Math.max(0, 4000 - decisions.length));
965
- this.notes = [...decisions, ...rest];
1029
+ const keep = (n) => n.kind === "decision" || n.kind === "conflict";
1030
+ const pinnedCount = this.notes.filter(keep).length;
1031
+ let toDrop = this.notes.length - Math.max(pinnedCount, 4000);
1032
+ for (let i = 0; i < this.notes.length && toDrop > 0;) {
1033
+ if (!keep(this.notes[i])) {
1034
+ this.notes.splice(i, 1);
1035
+ toDrop--;
1036
+ }
1037
+ else
1038
+ i++;
1039
+ }
966
1040
  }
967
- this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, text: (0, util_1.clip)(text, 1200) });
1041
+ this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, url, text: (0, util_1.clip)(text, 1200) });
968
1042
  },
969
1043
  searchNotes: (q) => this.searchNotes(q),
970
1044
  readReport: (taskId) => this.readReportText(taskId),
971
1045
  checkClaim: (rel) => {
972
1046
  const norm = rel.replace(/^\.\//, "");
973
- const claim = this.notes.find((n) => n.kind === "claim" &&
974
- n.key === norm &&
975
- n.taskId &&
976
- n.taskId !== task?.id &&
977
- ["running", "verifying"].includes(this.tasks.get(n.taskId)?.status ?? ""));
1047
+ const claim = this.notes.find((n) => {
1048
+ if (n.kind !== "claim" || n.key !== norm || !n.taskId)
1049
+ return false;
1050
+ // Another executor's claim: its tasks aren't in this.tasks, but
1051
+ // claims are spliced out when their task settles (and when a team
1052
+ // ends), so presence alone means the holder is still live.
1053
+ if (n.teamId !== this.teamId)
1054
+ return true;
1055
+ return n.taskId !== task?.id && ["running", "verifying"].includes(this.tasks.get(n.taskId)?.status ?? "");
1056
+ });
978
1057
  return claim
979
1058
  ? `⚠ ${claim.taskId} holds a claim on ${norm} ("${(0, util_1.oneLine)(claim.text, 80)}") — coordinate via the blackboard before further edits.`
980
1059
  : null;
@@ -1031,7 +1110,12 @@ class Executor {
1031
1110
  if (task.attempt < this.cfg.verifyMaxAttempts) {
1032
1111
  task.attempt++;
1033
1112
  task.status = "running";
1034
- this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt });
1113
+ this.journal.append("task.status", {
1114
+ taskId: task.id,
1115
+ status: "running",
1116
+ attempt: task.attempt,
1117
+ reason: task.feedback || task.error,
1118
+ });
1035
1119
  continue;
1036
1120
  }
1037
1121
  this.finalizeTask(task, "failed", task.feedback || task.error || "verification failed after retries");
@@ -1046,12 +1130,12 @@ class Executor {
1046
1130
  }
1047
1131
  if (task.attempt < this.cfg.verifyMaxAttempts && !this.finishing && !this.budgetExceeded()) {
1048
1132
  task.attempt++;
1049
- task.error = (0, util_1.errMsg)(e);
1133
+ task.error = `${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`;
1050
1134
  task.status = "running";
1051
1135
  this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt, reason: task.error });
1052
1136
  continue;
1053
1137
  }
1054
- this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}`);
1138
+ this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`);
1055
1139
  return;
1056
1140
  }
1057
1141
  }
@@ -1068,6 +1152,7 @@ class Executor {
1068
1152
  const agentId = (0, util_1.rid)("w");
1069
1153
  const model = this.resolveModel(task.modelTier);
1070
1154
  task.agentIds.push(agentId);
1155
+ task.lastToolError = undefined; // diagnostics are per-attempt
1071
1156
  const dirListing = this.topListing();
1072
1157
  const system = (0, prompts_1.workerSystem)({
1073
1158
  agentId,
@@ -1101,7 +1186,7 @@ class Executor {
1101
1186
  signal: this.ac.signal,
1102
1187
  ctx: this.makeToolCtx(agentId, task),
1103
1188
  hooks: {
1104
- ...this.agentHooks(agentId, task.id),
1189
+ ...this.agentHooks(agentId, task.id, task),
1105
1190
  onCheckpoint: (summary) => this.recordCheckpoint(task, agentId, summary),
1106
1191
  },
1107
1192
  stop: this.agentStop,
@@ -1111,7 +1196,11 @@ class Executor {
1111
1196
  if (this.ac.signal.aborted)
1112
1197
  return "done";
1113
1198
  if (!outcome.terminal) {
1114
- task.error = "worker ended without reporting";
1199
+ const lastWords = (0, util_1.oneLine)(outcome.finalText ?? "", 200);
1200
+ task.error =
1201
+ "worker ended without reporting" +
1202
+ (task.lastToolError ? ` — last tool failure: ${task.lastToolError}` : "") +
1203
+ (lastWords ? `; last words: ${lastWords}` : "");
1115
1204
  return "retry";
1116
1205
  }
1117
1206
  const a = outcome.terminal.args;
@@ -1127,6 +1216,20 @@ class Executor {
1127
1216
  task.keyFacts = strList(a.key_facts, 8);
1128
1217
  task.openQuestions = strList(a.open_questions, 6);
1129
1218
  task.filesTouched = strList(a.files_touched, 40);
1219
+ // Structured sources: the citation pipeline's entry point. Only real
1220
+ // http(s) URLs survive; they flow into dep handoffs and the bibliography.
1221
+ const sources = Array.isArray(a.sources)
1222
+ ? a.sources
1223
+ .filter((s) => s && typeof s === "object" && /^https?:\/\//.test(String(s.url ?? "")))
1224
+ .slice(0, 40)
1225
+ .map((s) => ({
1226
+ url: (0, util_1.clip)(String(s.url), 500),
1227
+ title: s.title ? (0, util_1.clip)(String(s.title), 200) : undefined,
1228
+ date: s.date ? (0, util_1.clip)(String(s.date), 40) : undefined,
1229
+ note: s.note ? (0, util_1.clip)(String(s.note), 300) : undefined,
1230
+ }))
1231
+ : [];
1232
+ task.sources = sources.length ? sources : undefined;
1130
1233
  this.journal.append("task.report", {
1131
1234
  taskId: task.id,
1132
1235
  status: reportStatus,
@@ -1135,6 +1238,7 @@ class Executor {
1135
1238
  keyFacts: task.keyFacts,
1136
1239
  openQuestions: task.openQuestions,
1137
1240
  filesTouched: task.filesTouched,
1241
+ sources: task.sources,
1138
1242
  });
1139
1243
  if (reportStatus === "blocked") {
1140
1244
  this.finalizeTask(task, "blocked", report);
@@ -1165,6 +1269,7 @@ class Executor {
1165
1269
  return "Report is too thin to verify. Re-do the task and report concretely: what was done, what was verified, exact paths.";
1166
1270
  }
1167
1271
  const missing = [];
1272
+ const malformed = [];
1168
1273
  // Remote sandboxes own their filesystem — only check host-visible paths.
1169
1274
  if (this.sandbox.localFs) {
1170
1275
  const okAt = (p) => {
@@ -1178,16 +1283,28 @@ class Executor {
1178
1283
  for (const rel of task.artifacts) {
1179
1284
  const inArtifacts = path.join(this.runDirPath, "artifacts", rel);
1180
1285
  const inWorkdir = path.resolve(this.meta.cwd, rel);
1181
- if (!okAt(inArtifacts) && !okAt(inWorkdir))
1286
+ if (!okAt(inArtifacts) && !okAt(inWorkdir)) {
1182
1287
  missing.push(rel);
1288
+ continue;
1289
+ }
1290
+ // Structural format check (json parses, csv is rectangular, html is
1291
+ // not a stub) — free, and catches what the LLM verifier wastes a whole
1292
+ // agent run discovering.
1293
+ const problem = (0, util_1.validateArtifactFormat)(okAt(inArtifacts) ? inArtifacts : inWorkdir);
1294
+ if (problem)
1295
+ malformed.push(`${rel}: ${problem}`);
1183
1296
  }
1184
1297
  }
1185
1298
  if (missing.length) {
1186
1299
  return `Claimed artifact(s) do not exist or are empty: ${missing.join(", ")}. Actually create them (use save_artifact), then report again.`;
1187
1300
  }
1301
+ if (malformed.length) {
1302
+ return `Claimed artifact(s) are malformed — fix them and report again: ${malformed.join("; ")}`;
1303
+ }
1188
1304
  return null;
1189
1305
  }
1190
- async runVerifier(task) {
1306
+ /** One verifier agent pass; returns the outcome plus how many evidence-gathering tool calls it made. */
1307
+ async verifierAgent(task, kickoff) {
1191
1308
  const agentId = (0, util_1.rid)("v");
1192
1309
  // Verification gets the strong tier when configured — a weak verifier
1193
1310
  // rubber-stamps exactly the tasks that most need scrutiny.
@@ -1200,14 +1317,16 @@ class Executor {
1200
1317
  model,
1201
1318
  purpose: `verify ${task.id}`,
1202
1319
  });
1320
+ let evidenceCalls = 0;
1321
+ const baseHooks = this.agentHooks(agentId, task.id);
1203
1322
  const outcome = await (0, agent_1.runAgent)({
1204
1323
  cfg: this.cfg,
1205
1324
  agentId,
1206
1325
  model,
1207
1326
  thinking: this.meta.options.thinking,
1208
1327
  reasoningEffort: this.meta.options.reasoningEffort,
1209
- system: (0, prompts_1.verifierSystem)(this.meta, task),
1210
- kickoff: prompts_1.VERIFIER_KICKOFF,
1328
+ system: (0, prompts_1.verifierSystem)(this.meta, task, this.depReportsFor(task)),
1329
+ kickoff,
1211
1330
  tools: (0, tools_1.verifierToolset)(),
1212
1331
  terminal: [tools_1.VERDICT_TOOL],
1213
1332
  maxSteps: Math.min(14, this.meta.options.maxStepsPerTask),
@@ -1215,21 +1334,75 @@ class Executor {
1215
1334
  // Blind verification: the verifier judges deliverables against the
1216
1335
  // objective with its own tools — it must not inherit the swarm's shared
1217
1336
  // beliefs (blackboard) or the worker's narrative beyond the claims.
1337
+ // (Dep reports are settled upstream outputs, not the worker's story.)
1218
1338
  ctx: { ...this.makeToolCtx(agentId, task), readBlackboard: () => "", searchNotes: undefined },
1219
- hooks: this.agentHooks(agentId, task.id),
1339
+ hooks: {
1340
+ ...baseHooks,
1341
+ onToolCall: (callId, name, args) => {
1342
+ if (name !== "verdict")
1343
+ evidenceCalls++;
1344
+ baseHooks.onToolCall(callId, name, args);
1345
+ },
1346
+ },
1220
1347
  stop: this.agentStop,
1221
1348
  });
1222
1349
  this.flushDeltas(agentId);
1223
1350
  this.journal.append("agent.done", { agentId, taskId: task.id, steps: outcome.steps });
1351
+ return { outcome, evidenceCalls };
1352
+ }
1353
+ async runVerifier(task) {
1354
+ const strict = this.cfg.verification === "strict";
1355
+ let { outcome, evidenceCalls } = await this.verifierAgent(task, prompts_1.VERIFIER_KICKOFF);
1224
1356
  if (this.ac.signal.aborted)
1225
1357
  return true;
1358
+ // Strict mode: a pass verdict backed by zero tool calls is an opinion,
1359
+ // not a verification. One re-run demanding evidence; if that also passes
1360
+ // tool-free, accept but say so in the journal.
1361
+ if (strict && outcome.terminal && Boolean(outcome.terminal.args.pass) && evidenceCalls === 0) {
1362
+ this.journal.append("log", {
1363
+ level: "info",
1364
+ msg: `verifier passed ${task.id} without evidence — re-running with a tools-required kickoff`,
1365
+ });
1366
+ const second = await this.verifierAgent(task, "A previous verdict on this task cited no tool-gathered evidence. Verify concretely NOW — read the claimed files, run the commands — then call verdict(...).");
1367
+ if (this.ac.signal.aborted)
1368
+ return true;
1369
+ if (second.outcome.terminal) {
1370
+ if (second.evidenceCalls === 0) {
1371
+ this.journal.append("log", { level: "warn", msg: `verifier passed ${task.id} without gathering evidence` });
1372
+ }
1373
+ outcome = second.outcome;
1374
+ }
1375
+ }
1226
1376
  const v = (outcome.terminal?.args ?? {});
1227
- const strict = this.cfg.verification === "strict";
1228
1377
  // No verdict returned: in strict mode fail closed, otherwise accept.
1229
1378
  const pass = outcome.terminal ? Boolean(v.pass) : !strict;
1230
- const feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
1379
+ let feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
1380
+ // Structured issues become the retry's worklist — numbered, with evidence.
1381
+ const issues = Array.isArray(v.issues)
1382
+ ? v.issues
1383
+ .filter((i) => i && typeof i === "object" && i.problem)
1384
+ .slice(0, 5)
1385
+ .map((i) => ({
1386
+ problem: (0, util_1.oneLine)(String(i.problem), 300),
1387
+ evidence: i.evidence ? (0, util_1.oneLine)(String(i.evidence), 300) : undefined,
1388
+ fix: i.fix ? (0, util_1.oneLine)(String(i.fix), 300) : undefined,
1389
+ }))
1390
+ : [];
1391
+ if (!pass && issues.length) {
1392
+ feedback = [
1393
+ feedback,
1394
+ ...issues.map((i, n) => `${n + 1}. ${i.problem}${i.evidence ? `\n evidence: ${i.evidence}` : ""}${i.fix ? `\n fix: ${i.fix}` : ""}`),
1395
+ ]
1396
+ .filter(Boolean)
1397
+ .join("\n");
1398
+ }
1231
1399
  task.feedback = feedback;
1232
- this.journal.append("verify.result", { taskId: task.id, pass, feedback });
1400
+ this.journal.append("verify.result", {
1401
+ taskId: task.id,
1402
+ pass,
1403
+ feedback,
1404
+ ...(issues.length ? { issues } : {}),
1405
+ });
1233
1406
  return pass;
1234
1407
  }
1235
1408
  finalizeTask(task, status, reason) {
@@ -1237,6 +1410,14 @@ class Executor {
1237
1410
  task.endedAt = Date.now();
1238
1411
  if (reason && status !== "done")
1239
1412
  task.error = reason;
1413
+ // A settled task holds no file claims — release them so the digest and
1414
+ // search_notes don't accumulate dead claims on long runs. In-place splice:
1415
+ // teams share this array by reference.
1416
+ for (let i = this.notes.length - 1; i >= 0; i--) {
1417
+ const n = this.notes[i];
1418
+ if (n.kind === "claim" && n.taskId === task.id && n.teamId === this.teamId)
1419
+ this.notes.splice(i, 1);
1420
+ }
1240
1421
  this.journal.append("task.status", { taskId: task.id, status, attempt: task.attempt, reason });
1241
1422
  this.settledSinceUpdate.push(task.id);
1242
1423
  this.maybeSnapshot();
@@ -1283,6 +1464,18 @@ class Executor {
1283
1464
  const rel = `progress-report-${n}.md`;
1284
1465
  fs.writeFileSync(path.join(this.runDirPath, "artifacts", rel), res.content, "utf8");
1285
1466
  this.journal.append("log", { level: "info", msg: `progress snapshot written: artifacts/${rel}` });
1467
+ // Interim memory: a multi-day run that dies before synthesis still
1468
+ // leaves the next swarm in this workspace something to build on.
1469
+ if (!this.meta.sandbox) {
1470
+ (0, memory_1.appendMemory)(this.meta.cwd, {
1471
+ runId: this.meta.id,
1472
+ mission: this.meta.mission,
1473
+ finishedAt: Date.now(),
1474
+ status: "in-progress",
1475
+ summary: (0, util_1.clip)(res.content, 600),
1476
+ keyDecisions: this.notes.filter((nt) => nt.kind === "decision").slice(-10).map((nt) => nt.text),
1477
+ });
1478
+ }
1286
1479
  })
1287
1480
  .catch((e) => {
1288
1481
  if (!this.ac.signal.aborted)
@@ -1329,7 +1522,9 @@ class Executor {
1329
1522
  queueDelta(agentId, taskId, channel, text) {
1330
1523
  // Deltas are UI sugar, never state — thin them under load so a 100-agent
1331
1524
  // swarm doesn't write gigabytes of streaming chatter into the journal.
1332
- const load = this.activeWorkerCount();
1525
+ // inflight.size over-counts verifying tasks slightly, but these are fuzzy
1526
+ // thresholds and this runs per streaming token — O(1) matters here.
1527
+ const load = this.inflight.size;
1333
1528
  if (channel === "think" && load > 48) {
1334
1529
  if (!this.thinkDropLogged) {
1335
1530
  this.thinkDropLogged = true;
@@ -1369,7 +1564,7 @@ class Executor {
1369
1564
  });
1370
1565
  }
1371
1566
  }
1372
- agentHooks(agentId, taskId) {
1567
+ agentHooks(agentId, taskId, trackErrorsOn) {
1373
1568
  return {
1374
1569
  onDelta: (channel, text) => {
1375
1570
  this.queueDelta(agentId, taskId, channel, text);
@@ -1379,6 +1574,8 @@ class Executor {
1379
1574
  this.journal.append("tool.call", { agentId, taskId, callId, name, args });
1380
1575
  },
1381
1576
  onToolResult: (callId, name, ok, summary) => {
1577
+ if (!ok && trackErrorsOn)
1578
+ trackErrorsOn.lastToolError = `${name}: ${(0, util_1.oneLine)(summary, 200)}`;
1382
1579
  this.journal.append("tool.result", { agentId, taskId, callId, name, ok, summary });
1383
1580
  },
1384
1581
  onUsage: this.onUsage,
@@ -1471,6 +1668,10 @@ class Executor {
1471
1668
  ? tasks.map(prompts_1.reportBlock).join("\n\n")
1472
1669
  : "(no tasks were completed)";
1473
1670
  const artifactList = this.listArtifacts().join("\n") || "(none)";
1671
+ // The citation pipeline's last hop: every source any worker reported,
1672
+ // deduplicated and numbered, becomes the synthesizer's bibliography.
1673
+ const allSources = (0, report_1.aggregateSources)(tasks);
1674
+ const sourcesText = allSources.length ? (0, util_1.truncateMiddle)((0, report_1.sourcesBlock)(allSources), 40_000, "chars") : "";
1474
1675
  const agentId = (0, util_1.rid)("synth");
1475
1676
  let summary = "";
1476
1677
  let reportMarkdown = "";
@@ -1488,6 +1689,7 @@ class Executor {
1488
1689
  blackboard: this.blackboardDigest(6000),
1489
1690
  artifactList,
1490
1691
  reason: this.finishReason || "completed",
1692
+ sources: sourcesText,
1491
1693
  }),
1492
1694
  kickoff: prompts_1.SYNTH_KICKOFF,
1493
1695
  tools: (0, tools_1.synthToolset)(),
@@ -1513,7 +1715,7 @@ class Executor {
1513
1715
  messages: [
1514
1716
  {
1515
1717
  role: "user",
1516
- content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars")),
1718
+ content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars"), sourcesText ? (0, util_1.truncateMiddle)(sourcesText, 20_000, "chars") : undefined),
1517
1719
  },
1518
1720
  ],
1519
1721
  thinking: false,
@@ -1562,6 +1764,7 @@ class Executor {
1562
1764
  // Cross-run memory: real-directory runs leave a trace for the next swarm.
1563
1765
  if (!this.meta.sandbox && status !== "cancelled") {
1564
1766
  (0, memory_1.appendMemory)(this.meta.cwd, {
1767
+ runId: this.meta.id,
1565
1768
  mission: this.meta.mission,
1566
1769
  finishedAt: Date.now(),
1567
1770
  status,
@@ -1572,6 +1775,10 @@ class Executor {
1572
1775
  }
1573
1776
  fallbackReport(tasks) {
1574
1777
  const lines = [`# ${this.meta.mission}`, ``, `_Run ${this.meta.id} — ${this.finishReason}_`, ``];
1778
+ // Even without a synthesizer, surface the cross-task essentials first.
1779
+ const facts = tasks.flatMap((t) => (t.keyFacts ?? []).map((f) => `- ${f} _(${t.id})_`));
1780
+ if (facts.length)
1781
+ lines.push(`## Key facts`, ...facts.slice(0, 60), "");
1575
1782
  for (const t of tasks) {
1576
1783
  lines.push(`## ${t.id} ${t.title} (${t.status})`);
1577
1784
  lines.push(t.report || t.error || "(no output)");
@@ -1579,6 +1786,15 @@ class Executor {
1579
1786
  lines.push(`Artifacts: ${t.artifacts.join(", ")}`);
1580
1787
  lines.push("");
1581
1788
  }
1789
+ const sources = (0, report_1.aggregateSources)(tasks);
1790
+ if (sources.length) {
1791
+ lines.push(`## Sources`);
1792
+ for (const s of sources.slice(0, 100)) {
1793
+ lines.push(`${s.n}. [${s.title || s.url}](${s.url})${s.date ? ` (${s.date})` : ""}`);
1794
+ }
1795
+ if (sources.length > 100)
1796
+ lines.push(`…and ${sources.length - 100} more in the task reports.`);
1797
+ }
1582
1798
  return lines.join("\n");
1583
1799
  }
1584
1800
  listArtifacts() {