@robzilla1738/agentswarm 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +29 -12
  2. package/dist/agent.js +2 -1
  3. package/dist/cli.js +21 -4
  4. package/dist/config.js +27 -1
  5. package/dist/executor.js +243 -43
  6. package/dist/hub.js +69 -3
  7. package/dist/memory.js +5 -4
  8. package/dist/pdftext.js +211 -0
  9. package/dist/prompts.js +23 -15
  10. package/dist/report.js +37 -0
  11. package/dist/run.js +8 -0
  12. package/dist/sandbox.js +11 -0
  13. package/dist/searchcore.js +55 -2
  14. package/dist/state.js +34 -6
  15. package/dist/tools.js +196 -19
  16. package/dist/util.js +85 -0
  17. package/dist/webtools.js +145 -15
  18. package/package.json +1 -1
  19. package/ui/out/404/index.html +1 -1
  20. package/ui/out/404.html +1 -1
  21. package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
  22. package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
  23. package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
  24. package/ui/out/_next/static/css/{9f7bd82b8e4c762c.css → d95c2ba395730031.css} +1 -1
  25. package/ui/out/index.html +1 -1
  26. package/ui/out/index.txt +3 -3
  27. package/ui/out/run/index.html +1 -1
  28. package/ui/out/run/index.txt +3 -3
  29. package/ui/out/settings/index.html +1 -1
  30. package/ui/out/settings/index.txt +3 -3
  31. package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +0 -1
  32. package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +0 -1
  33. package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +0 -1
  34. /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
  35. /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0
package/dist/executor.js CHANGED
@@ -134,7 +134,12 @@ class Executor {
134
134
  const n = Number(/^T(\d+)$/.exec(copy.id)?.[1] ?? 0);
135
135
  this.taskCounter = Math.max(this.taskCounter, n);
136
136
  }
137
- this.notes = state.notes.map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text }));
137
+ // Drop claims held by settled tasks they were released on task end and
138
+ // must not resurrect across a restart.
139
+ const settled = new Set(state.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status) && !reset.has(t.id)).map((t) => t.id));
140
+ this.notes = state.notes
141
+ .map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text, url: n.url }))
142
+ .filter((n) => !(n.kind === "claim" && n.taskId && settled.has(n.taskId)));
138
143
  const lastPhase = state.phases[state.phases.length - 1];
139
144
  if (lastPhase)
140
145
  this.phase = { name: lastPhase.name, goal: lastPhase.goal, exitCriteria: lastPhase.exitCriteria };
@@ -182,16 +187,16 @@ class Executor {
182
187
  blackboardDigest(max = 1800) {
183
188
  if (!this.notes.length)
184
189
  return "";
185
- const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.taskId ? ` (${n.taskId})` : ""}`;
186
- // Decisions anchor mission-wide coherence and are never trimmed out of the
187
- // digest; everything else shows only its recent tail.
188
- const decisions = this.notes.filter((n) => n.kind === "decision").map(fmt);
189
- const rest = this.notes.filter((n) => n.kind !== "decision").slice(-80).map(fmt);
190
+ const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.url ? ` <${n.url}>` : ""}${n.taskId ? ` (${n.taskId})` : ""}`;
191
+ // Decisions and conflicts anchor mission-wide coherence and are never
192
+ // trimmed out of the digest; everything else shows only its recent tail.
193
+ const pinned = this.notes.filter((n) => n.kind === "decision" || n.kind === "conflict").map(fmt);
194
+ const rest = this.notes.filter((n) => n.kind !== "decision" && n.kind !== "conflict").slice(-80).map(fmt);
190
195
  let tail = rest.join("\n");
191
- const budget = Math.max(400, max - decisions.join("\n").length);
196
+ const budget = Math.max(400, max - pinned.join("\n").length);
192
197
  if (tail.length > budget)
193
198
  tail = tail.slice(tail.length - budget);
194
- return [decisions.join("\n"), tail].filter(Boolean).join("\n");
199
+ return [pinned.join("\n"), tail].filter(Boolean).join("\n");
195
200
  }
196
201
  searchNotes(query) {
197
202
  const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
@@ -266,6 +271,8 @@ class Executor {
266
271
  content: this.resumed
267
272
  ? (0, prompts_1.conductorUpdate)({
268
273
  blackboard: this.blackboardDigest(),
274
+ phase: this.phaseLine(),
275
+ plan: this.planPin(),
269
276
  nextId: this.nextId(),
270
277
  taskTable: (0, prompts_1.taskTable)(this.taskList()),
271
278
  budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
@@ -277,6 +284,15 @@ class Executor {
277
284
  : (0, prompts_1.conductorInitialUpdate)(this.meta, this.nextId()),
278
285
  },
279
286
  ];
287
+ if (this.resumed) {
288
+ // The conductor's reasoning history died with the old process. Re-seed
289
+ // the durable facts into the same slot trimConductorHistory() maintains,
290
+ // so a resumed conductor knows what settled and what was decided.
291
+ this.conductorMessages.splice(1, 0, {
292
+ role: "user",
293
+ content: this.missionLedger("This run was resumed — prior orchestration history is gone."),
294
+ });
295
+ }
280
296
  try {
281
297
  await this.conductorTurn();
282
298
  this.setStatus("running");
@@ -799,8 +815,8 @@ class Executor {
799
815
  * trimmed history so the conductor never loses the plot on long missions —
800
816
  * rebuilt fresh each trim from current state, so it also survives resume.
801
817
  */
802
- missionLedger() {
803
- const lines = ["[Earlier orchestration history was trimmed. MISSION LEDGER — durable state so far:]"];
818
+ missionLedger(intro = "Earlier orchestration history was trimmed.") {
819
+ const lines = [`[${intro} MISSION LEDGER — durable state so far:]`];
804
820
  if (this.phase)
805
821
  lines.push(this.phaseLine());
806
822
  const settled = this.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status));
@@ -843,6 +859,19 @@ class Executor {
843
859
  else
844
860
  this.conductorMessages.splice(1, 0, msg);
845
861
  };
862
+ // Old conductor turns carry the bulk in thinking traces and verbose prose;
863
+ // the durable decisions live in the ledger and the plan pin. Compact them
864
+ // in place before resorting to dropping whole messages. (sanitizeMessages
865
+ // backfills reasoning_content with "" for DeepSeek tool-call turns.)
866
+ for (let i = 1; i < this.conductorMessages.length - 6; i++) {
867
+ const m = this.conductorMessages[i];
868
+ if (m.role !== "assistant")
869
+ continue;
870
+ if (m.reasoning_content)
871
+ m.reasoning_content = "";
872
+ if (m.content && m.content.length > 400)
873
+ m.content = (0, util_1.clip)(m.content, 400);
874
+ }
846
875
  if (this.conductorMessages.length > MAX) {
847
876
  const system = this.conductorMessages[0];
848
877
  const tail = this.conductorMessages.slice(-(MAX - 2));
@@ -855,7 +884,7 @@ class Executor {
855
884
  // Count alone doesn't bound size: every update embeds the full task table,
856
885
  // so a deep run can blow the model window long before 60 messages. The
857
886
  // mission itself lives in the system message and always survives.
858
- const budget = Math.floor(this.cfg.contextTokenLimit * 0.75);
887
+ const budget = Math.floor((0, config_1.contextLimitFor)(this.cfg, this.meta.options.conductorModel) * 0.75);
859
888
  if ((0, agent_1.estimateMessages)(this.conductorMessages) <= budget)
860
889
  return;
861
890
  setLedger();
@@ -876,20 +905,48 @@ class Executor {
876
905
  hasOpenWork() {
877
906
  return this.taskList().some((t) => ["pending", "running", "verifying"].includes(t.status));
878
907
  }
908
+ /** Walk a failed/blocked dep chain down to the task that actually failed. */
909
+ rootFailure(id) {
910
+ let cur = this.tasks.get(id);
911
+ const seen = new Set();
912
+ while (cur && !seen.has(cur.id)) {
913
+ seen.add(cur.id);
914
+ const next = cur.deps
915
+ .map((d) => this.tasks.get(d))
916
+ .find((t) => !!t && (t.status === "failed" || t.status === "blocked"));
917
+ if (!next)
918
+ return cur;
919
+ cur = next;
920
+ }
921
+ return cur;
922
+ }
879
923
  blockStuckTasks() {
880
- for (const t of this.taskList()) {
881
- if (t.status !== "pending")
882
- continue;
883
- const bad = t.deps.find((d) => {
884
- const s = this.tasks.get(d)?.status;
885
- return s === "failed" || s === "blocked";
886
- });
887
- if (bad) {
924
+ // Fixpoint: a failed dep chain T1→T2→T5 must block the whole chain in one
925
+ // pass, not one level per conductor turn.
926
+ for (let changed = true; changed;) {
927
+ changed = false;
928
+ for (const t of this.taskList()) {
929
+ if (t.status !== "pending")
930
+ continue;
931
+ const bad = t.deps.find((d) => {
932
+ const s = this.tasks.get(d)?.status;
933
+ return s === "failed" || s === "blocked";
934
+ });
935
+ if (!bad)
936
+ continue;
937
+ // Carry the root cause so the conductor re-plans around the actual
938
+ // failure, not a chain of "dependency did not complete".
939
+ const root = this.rootFailure(bad);
940
+ const cause = root ? (0, util_1.oneLine)(root.feedback ?? root.error ?? "unknown failure", 160) : "";
888
941
  t.status = "blocked";
889
- t.error = `dependency ${bad} did not complete`;
942
+ t.error =
943
+ root && root.id !== bad
944
+ ? `dependency ${bad} did not complete (root cause ${root.id}: ${cause})`
945
+ : `dependency ${bad} did not complete${cause ? ` (${cause})` : ""}`;
890
946
  t.endedAt = Date.now();
891
947
  this.journal.append("task.status", { taskId: t.id, status: "blocked", attempt: t.attempt, reason: t.error });
892
948
  this.settledSinceUpdate.push(t.id);
949
+ changed = true;
893
950
  }
894
951
  }
895
952
  }
@@ -954,17 +1011,25 @@ class Executor {
954
1011
  taskId: task?.id,
955
1012
  signal: this.ac.signal,
956
1013
  addCheckpoint: task ? (summary) => this.recordCheckpoint(task, agentId, summary) : undefined,
957
- addNote: (text, key, kind) => {
958
- this.notes.push({ taskId: task?.id, key, kind, text });
1014
+ addNote: (text, key, kind, url) => {
1015
+ this.notes.push({ taskId: task?.id, key, kind, text, url });
959
1016
  // Only the recent tail ever feeds digests; without a cap a multi-day
960
- // run accumulates every note in memory. Decisions are kept regardless.
1017
+ // run accumulates every note in memory. Decisions and conflicts are
1018
+ // kept regardless. In-place splice: teams share this array by reference.
961
1019
  if (this.notes.length > 4000) {
962
- const decisions = this.notes.filter((n) => n.kind === "decision");
963
- const rest = this.notes.filter((n) => n.kind !== "decision");
964
- rest.splice(0, rest.length - Math.max(0, 4000 - decisions.length));
965
- this.notes = [...decisions, ...rest];
1020
+ const keep = (n) => n.kind === "decision" || n.kind === "conflict";
1021
+ const pinnedCount = this.notes.filter(keep).length;
1022
+ let toDrop = this.notes.length - Math.max(pinnedCount, 4000);
1023
+ for (let i = 0; i < this.notes.length && toDrop > 0;) {
1024
+ if (!keep(this.notes[i])) {
1025
+ this.notes.splice(i, 1);
1026
+ toDrop--;
1027
+ }
1028
+ else
1029
+ i++;
1030
+ }
966
1031
  }
967
- this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, text: (0, util_1.clip)(text, 1200) });
1032
+ this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, url, text: (0, util_1.clip)(text, 1200) });
968
1033
  },
969
1034
  searchNotes: (q) => this.searchNotes(q),
970
1035
  readReport: (taskId) => this.readReportText(taskId),
@@ -1031,7 +1096,12 @@ class Executor {
1031
1096
  if (task.attempt < this.cfg.verifyMaxAttempts) {
1032
1097
  task.attempt++;
1033
1098
  task.status = "running";
1034
- this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt });
1099
+ this.journal.append("task.status", {
1100
+ taskId: task.id,
1101
+ status: "running",
1102
+ attempt: task.attempt,
1103
+ reason: task.feedback || task.error,
1104
+ });
1035
1105
  continue;
1036
1106
  }
1037
1107
  this.finalizeTask(task, "failed", task.feedback || task.error || "verification failed after retries");
@@ -1046,12 +1116,12 @@ class Executor {
1046
1116
  }
1047
1117
  if (task.attempt < this.cfg.verifyMaxAttempts && !this.finishing && !this.budgetExceeded()) {
1048
1118
  task.attempt++;
1049
- task.error = (0, util_1.errMsg)(e);
1119
+ task.error = `${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`;
1050
1120
  task.status = "running";
1051
1121
  this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt, reason: task.error });
1052
1122
  continue;
1053
1123
  }
1054
- this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}`);
1124
+ this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`);
1055
1125
  return;
1056
1126
  }
1057
1127
  }
@@ -1068,6 +1138,7 @@ class Executor {
1068
1138
  const agentId = (0, util_1.rid)("w");
1069
1139
  const model = this.resolveModel(task.modelTier);
1070
1140
  task.agentIds.push(agentId);
1141
+ task.lastToolError = undefined; // diagnostics are per-attempt
1071
1142
  const dirListing = this.topListing();
1072
1143
  const system = (0, prompts_1.workerSystem)({
1073
1144
  agentId,
@@ -1101,7 +1172,7 @@ class Executor {
1101
1172
  signal: this.ac.signal,
1102
1173
  ctx: this.makeToolCtx(agentId, task),
1103
1174
  hooks: {
1104
- ...this.agentHooks(agentId, task.id),
1175
+ ...this.agentHooks(agentId, task.id, task),
1105
1176
  onCheckpoint: (summary) => this.recordCheckpoint(task, agentId, summary),
1106
1177
  },
1107
1178
  stop: this.agentStop,
@@ -1111,7 +1182,11 @@ class Executor {
1111
1182
  if (this.ac.signal.aborted)
1112
1183
  return "done";
1113
1184
  if (!outcome.terminal) {
1114
- task.error = "worker ended without reporting";
1185
+ const lastWords = (0, util_1.oneLine)(outcome.finalText ?? "", 200);
1186
+ task.error =
1187
+ "worker ended without reporting" +
1188
+ (task.lastToolError ? ` — last tool failure: ${task.lastToolError}` : "") +
1189
+ (lastWords ? `; last words: ${lastWords}` : "");
1115
1190
  return "retry";
1116
1191
  }
1117
1192
  const a = outcome.terminal.args;
@@ -1127,6 +1202,20 @@ class Executor {
1127
1202
  task.keyFacts = strList(a.key_facts, 8);
1128
1203
  task.openQuestions = strList(a.open_questions, 6);
1129
1204
  task.filesTouched = strList(a.files_touched, 40);
1205
+ // Structured sources: the citation pipeline's entry point. Only real
1206
+ // http(s) URLs survive; they flow into dep handoffs and the bibliography.
1207
+ const sources = Array.isArray(a.sources)
1208
+ ? a.sources
1209
+ .filter((s) => s && typeof s === "object" && /^https?:\/\//.test(String(s.url ?? "")))
1210
+ .slice(0, 40)
1211
+ .map((s) => ({
1212
+ url: (0, util_1.clip)(String(s.url), 500),
1213
+ title: s.title ? (0, util_1.clip)(String(s.title), 200) : undefined,
1214
+ date: s.date ? (0, util_1.clip)(String(s.date), 40) : undefined,
1215
+ note: s.note ? (0, util_1.clip)(String(s.note), 300) : undefined,
1216
+ }))
1217
+ : [];
1218
+ task.sources = sources.length ? sources : undefined;
1130
1219
  this.journal.append("task.report", {
1131
1220
  taskId: task.id,
1132
1221
  status: reportStatus,
@@ -1135,6 +1224,7 @@ class Executor {
1135
1224
  keyFacts: task.keyFacts,
1136
1225
  openQuestions: task.openQuestions,
1137
1226
  filesTouched: task.filesTouched,
1227
+ sources: task.sources,
1138
1228
  });
1139
1229
  if (reportStatus === "blocked") {
1140
1230
  this.finalizeTask(task, "blocked", report);
@@ -1165,6 +1255,7 @@ class Executor {
1165
1255
  return "Report is too thin to verify. Re-do the task and report concretely: what was done, what was verified, exact paths.";
1166
1256
  }
1167
1257
  const missing = [];
1258
+ const malformed = [];
1168
1259
  // Remote sandboxes own their filesystem — only check host-visible paths.
1169
1260
  if (this.sandbox.localFs) {
1170
1261
  const okAt = (p) => {
@@ -1178,16 +1269,28 @@ class Executor {
1178
1269
  for (const rel of task.artifacts) {
1179
1270
  const inArtifacts = path.join(this.runDirPath, "artifacts", rel);
1180
1271
  const inWorkdir = path.resolve(this.meta.cwd, rel);
1181
- if (!okAt(inArtifacts) && !okAt(inWorkdir))
1272
+ if (!okAt(inArtifacts) && !okAt(inWorkdir)) {
1182
1273
  missing.push(rel);
1274
+ continue;
1275
+ }
1276
+ // Structural format check (json parses, csv is rectangular, html is
1277
+ // not a stub) — free, and catches what the LLM verifier wastes a whole
1278
+ // agent run discovering.
1279
+ const problem = (0, util_1.validateArtifactFormat)(okAt(inArtifacts) ? inArtifacts : inWorkdir);
1280
+ if (problem)
1281
+ malformed.push(`${rel}: ${problem}`);
1183
1282
  }
1184
1283
  }
1185
1284
  if (missing.length) {
1186
1285
  return `Claimed artifact(s) do not exist or are empty: ${missing.join(", ")}. Actually create them (use save_artifact), then report again.`;
1187
1286
  }
1287
+ if (malformed.length) {
1288
+ return `Claimed artifact(s) are malformed — fix them and report again: ${malformed.join("; ")}`;
1289
+ }
1188
1290
  return null;
1189
1291
  }
1190
- async runVerifier(task) {
1292
+ /** One verifier agent pass; returns the outcome plus how many evidence-gathering tool calls it made. */
1293
+ async verifierAgent(task, kickoff) {
1191
1294
  const agentId = (0, util_1.rid)("v");
1192
1295
  // Verification gets the strong tier when configured — a weak verifier
1193
1296
  // rubber-stamps exactly the tasks that most need scrutiny.
@@ -1200,14 +1303,16 @@ class Executor {
1200
1303
  model,
1201
1304
  purpose: `verify ${task.id}`,
1202
1305
  });
1306
+ let evidenceCalls = 0;
1307
+ const baseHooks = this.agentHooks(agentId, task.id);
1203
1308
  const outcome = await (0, agent_1.runAgent)({
1204
1309
  cfg: this.cfg,
1205
1310
  agentId,
1206
1311
  model,
1207
1312
  thinking: this.meta.options.thinking,
1208
1313
  reasoningEffort: this.meta.options.reasoningEffort,
1209
- system: (0, prompts_1.verifierSystem)(this.meta, task),
1210
- kickoff: prompts_1.VERIFIER_KICKOFF,
1314
+ system: (0, prompts_1.verifierSystem)(this.meta, task, this.depReportsFor(task)),
1315
+ kickoff,
1211
1316
  tools: (0, tools_1.verifierToolset)(),
1212
1317
  terminal: [tools_1.VERDICT_TOOL],
1213
1318
  maxSteps: Math.min(14, this.meta.options.maxStepsPerTask),
@@ -1215,21 +1320,75 @@ class Executor {
1215
1320
  // Blind verification: the verifier judges deliverables against the
1216
1321
  // objective with its own tools — it must not inherit the swarm's shared
1217
1322
  // beliefs (blackboard) or the worker's narrative beyond the claims.
1323
+ // (Dep reports are settled upstream outputs, not the worker's story.)
1218
1324
  ctx: { ...this.makeToolCtx(agentId, task), readBlackboard: () => "", searchNotes: undefined },
1219
- hooks: this.agentHooks(agentId, task.id),
1325
+ hooks: {
1326
+ ...baseHooks,
1327
+ onToolCall: (callId, name, args) => {
1328
+ if (name !== "verdict")
1329
+ evidenceCalls++;
1330
+ baseHooks.onToolCall(callId, name, args);
1331
+ },
1332
+ },
1220
1333
  stop: this.agentStop,
1221
1334
  });
1222
1335
  this.flushDeltas(agentId);
1223
1336
  this.journal.append("agent.done", { agentId, taskId: task.id, steps: outcome.steps });
1337
+ return { outcome, evidenceCalls };
1338
+ }
1339
+ async runVerifier(task) {
1340
+ const strict = this.cfg.verification === "strict";
1341
+ let { outcome, evidenceCalls } = await this.verifierAgent(task, prompts_1.VERIFIER_KICKOFF);
1224
1342
  if (this.ac.signal.aborted)
1225
1343
  return true;
1344
+ // Strict mode: a pass verdict backed by zero tool calls is an opinion,
1345
+ // not a verification. One re-run demanding evidence; if that also passes
1346
+ // tool-free, accept but say so in the journal.
1347
+ if (strict && outcome.terminal && Boolean(outcome.terminal.args.pass) && evidenceCalls === 0) {
1348
+ this.journal.append("log", {
1349
+ level: "info",
1350
+ msg: `verifier passed ${task.id} without evidence — re-running with a tools-required kickoff`,
1351
+ });
1352
+ const second = await this.verifierAgent(task, "A previous verdict on this task cited no tool-gathered evidence. Verify concretely NOW — read the claimed files, run the commands — then call verdict(...).");
1353
+ if (this.ac.signal.aborted)
1354
+ return true;
1355
+ if (second.outcome.terminal) {
1356
+ if (second.evidenceCalls === 0) {
1357
+ this.journal.append("log", { level: "warn", msg: `verifier passed ${task.id} without gathering evidence` });
1358
+ }
1359
+ outcome = second.outcome;
1360
+ }
1361
+ }
1226
1362
  const v = (outcome.terminal?.args ?? {});
1227
- const strict = this.cfg.verification === "strict";
1228
1363
  // No verdict returned: in strict mode fail closed, otherwise accept.
1229
1364
  const pass = outcome.terminal ? Boolean(v.pass) : !strict;
1230
- const feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
1365
+ let feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
1366
+ // Structured issues become the retry's worklist — numbered, with evidence.
1367
+ const issues = Array.isArray(v.issues)
1368
+ ? v.issues
1369
+ .filter((i) => i && typeof i === "object" && i.problem)
1370
+ .slice(0, 5)
1371
+ .map((i) => ({
1372
+ problem: (0, util_1.oneLine)(String(i.problem), 300),
1373
+ evidence: i.evidence ? (0, util_1.oneLine)(String(i.evidence), 300) : undefined,
1374
+ fix: i.fix ? (0, util_1.oneLine)(String(i.fix), 300) : undefined,
1375
+ }))
1376
+ : [];
1377
+ if (!pass && issues.length) {
1378
+ feedback = [
1379
+ feedback,
1380
+ ...issues.map((i, n) => `${n + 1}. ${i.problem}${i.evidence ? `\n evidence: ${i.evidence}` : ""}${i.fix ? `\n fix: ${i.fix}` : ""}`),
1381
+ ]
1382
+ .filter(Boolean)
1383
+ .join("\n");
1384
+ }
1231
1385
  task.feedback = feedback;
1232
- this.journal.append("verify.result", { taskId: task.id, pass, feedback });
1386
+ this.journal.append("verify.result", {
1387
+ taskId: task.id,
1388
+ pass,
1389
+ feedback,
1390
+ ...(issues.length ? { issues } : {}),
1391
+ });
1233
1392
  return pass;
1234
1393
  }
1235
1394
  finalizeTask(task, status, reason) {
@@ -1237,6 +1396,14 @@ class Executor {
1237
1396
  task.endedAt = Date.now();
1238
1397
  if (reason && status !== "done")
1239
1398
  task.error = reason;
1399
+ // A settled task holds no file claims — release them so the digest and
1400
+ // search_notes don't accumulate dead claims on long runs. In-place splice:
1401
+ // teams share this array by reference.
1402
+ for (let i = this.notes.length - 1; i >= 0; i--) {
1403
+ const n = this.notes[i];
1404
+ if (n.kind === "claim" && n.taskId === task.id)
1405
+ this.notes.splice(i, 1);
1406
+ }
1240
1407
  this.journal.append("task.status", { taskId: task.id, status, attempt: task.attempt, reason });
1241
1408
  this.settledSinceUpdate.push(task.id);
1242
1409
  this.maybeSnapshot();
@@ -1283,6 +1450,18 @@ class Executor {
1283
1450
  const rel = `progress-report-${n}.md`;
1284
1451
  fs.writeFileSync(path.join(this.runDirPath, "artifacts", rel), res.content, "utf8");
1285
1452
  this.journal.append("log", { level: "info", msg: `progress snapshot written: artifacts/${rel}` });
1453
+ // Interim memory: a multi-day run that dies before synthesis still
1454
+ // leaves the next swarm in this workspace something to build on.
1455
+ if (!this.meta.sandbox) {
1456
+ (0, memory_1.appendMemory)(this.meta.cwd, {
1457
+ runId: this.meta.id,
1458
+ mission: this.meta.mission,
1459
+ finishedAt: Date.now(),
1460
+ status: "in-progress",
1461
+ summary: (0, util_1.clip)(res.content, 600),
1462
+ keyDecisions: this.notes.filter((nt) => nt.kind === "decision").slice(-10).map((nt) => nt.text),
1463
+ });
1464
+ }
1286
1465
  })
1287
1466
  .catch((e) => {
1288
1467
  if (!this.ac.signal.aborted)
@@ -1369,7 +1548,7 @@ class Executor {
1369
1548
  });
1370
1549
  }
1371
1550
  }
1372
- agentHooks(agentId, taskId) {
1551
+ agentHooks(agentId, taskId, trackErrorsOn) {
1373
1552
  return {
1374
1553
  onDelta: (channel, text) => {
1375
1554
  this.queueDelta(agentId, taskId, channel, text);
@@ -1379,6 +1558,8 @@ class Executor {
1379
1558
  this.journal.append("tool.call", { agentId, taskId, callId, name, args });
1380
1559
  },
1381
1560
  onToolResult: (callId, name, ok, summary) => {
1561
+ if (!ok && trackErrorsOn)
1562
+ trackErrorsOn.lastToolError = `${name}: ${(0, util_1.oneLine)(summary, 200)}`;
1382
1563
  this.journal.append("tool.result", { agentId, taskId, callId, name, ok, summary });
1383
1564
  },
1384
1565
  onUsage: this.onUsage,
@@ -1471,6 +1652,10 @@ class Executor {
1471
1652
  ? tasks.map(prompts_1.reportBlock).join("\n\n")
1472
1653
  : "(no tasks were completed)";
1473
1654
  const artifactList = this.listArtifacts().join("\n") || "(none)";
1655
+ // The citation pipeline's last hop: every source any worker reported,
1656
+ // deduplicated and numbered, becomes the synthesizer's bibliography.
1657
+ const allSources = (0, report_1.aggregateSources)(tasks);
1658
+ const sourcesText = allSources.length ? (0, util_1.truncateMiddle)((0, report_1.sourcesBlock)(allSources), 40_000, "chars") : "";
1474
1659
  const agentId = (0, util_1.rid)("synth");
1475
1660
  let summary = "";
1476
1661
  let reportMarkdown = "";
@@ -1488,6 +1673,7 @@ class Executor {
1488
1673
  blackboard: this.blackboardDigest(6000),
1489
1674
  artifactList,
1490
1675
  reason: this.finishReason || "completed",
1676
+ sources: sourcesText,
1491
1677
  }),
1492
1678
  kickoff: prompts_1.SYNTH_KICKOFF,
1493
1679
  tools: (0, tools_1.synthToolset)(),
@@ -1513,7 +1699,7 @@ class Executor {
1513
1699
  messages: [
1514
1700
  {
1515
1701
  role: "user",
1516
- content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars")),
1702
+ content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars"), sourcesText ? (0, util_1.truncateMiddle)(sourcesText, 20_000, "chars") : undefined),
1517
1703
  },
1518
1704
  ],
1519
1705
  thinking: false,
@@ -1562,6 +1748,7 @@ class Executor {
1562
1748
  // Cross-run memory: real-directory runs leave a trace for the next swarm.
1563
1749
  if (!this.meta.sandbox && status !== "cancelled") {
1564
1750
  (0, memory_1.appendMemory)(this.meta.cwd, {
1751
+ runId: this.meta.id,
1565
1752
  mission: this.meta.mission,
1566
1753
  finishedAt: Date.now(),
1567
1754
  status,
@@ -1572,6 +1759,10 @@ class Executor {
1572
1759
  }
1573
1760
  fallbackReport(tasks) {
1574
1761
  const lines = [`# ${this.meta.mission}`, ``, `_Run ${this.meta.id} — ${this.finishReason}_`, ``];
1762
+ // Even without a synthesizer, surface the cross-task essentials first.
1763
+ const facts = tasks.flatMap((t) => (t.keyFacts ?? []).map((f) => `- ${f} _(${t.id})_`));
1764
+ if (facts.length)
1765
+ lines.push(`## Key facts`, ...facts.slice(0, 60), "");
1575
1766
  for (const t of tasks) {
1576
1767
  lines.push(`## ${t.id} ${t.title} (${t.status})`);
1577
1768
  lines.push(t.report || t.error || "(no output)");
@@ -1579,6 +1770,15 @@ class Executor {
1579
1770
  lines.push(`Artifacts: ${t.artifacts.join(", ")}`);
1580
1771
  lines.push("");
1581
1772
  }
1773
+ const sources = (0, report_1.aggregateSources)(tasks);
1774
+ if (sources.length) {
1775
+ lines.push(`## Sources`);
1776
+ for (const s of sources.slice(0, 100)) {
1777
+ lines.push(`${s.n}. [${s.title || s.url}](${s.url})${s.date ? ` (${s.date})` : ""}`);
1778
+ }
1779
+ if (sources.length > 100)
1780
+ lines.push(`…and ${sources.length - 100} more in the task reports.`);
1781
+ }
1582
1782
  return lines.join("\n");
1583
1783
  }
1584
1784
  listArtifacts() {
package/dist/hub.js CHANGED
@@ -43,6 +43,7 @@ const url_1 = require("url");
43
43
  const config_1 = require("./config");
44
44
  const control_1 = require("./control");
45
45
  const crawltools_1 = require("./crawltools");
46
+ const webtools_1 = require("./webtools");
46
47
  const deepseek_1 = require("./deepseek");
47
48
  const providers_1 = require("./providers");
48
49
  const journal_1 = require("./journal");
@@ -83,9 +84,16 @@ function startHub(opts) {
83
84
  async function handle(req, res, opts) {
84
85
  const url = new url_1.URL(req.url || "/", `http://localhost:${opts.port}`);
85
86
  const p = url.pathname;
86
- res.setHeader("access-control-allow-origin", "*");
87
- res.setHeader("access-control-allow-methods", "GET, POST, DELETE, OPTIONS");
88
- res.setHeader("access-control-allow-headers", "content-type");
87
+ // Localhost-only CORS. The hub launches runs and reads reports with the
88
+ // operator's keys a random website's JS must never get a readable
89
+ // response. The dev UI on another localhost port is the one legitimate
90
+ // cross-origin client; everyone else gets no CORS headers at all.
91
+ const origin = String(req.headers.origin || "");
92
+ if (/^https?:\/\/(localhost|127\.0\.0\.1|\[::1\])(:\d+)?$/.test(origin)) {
93
+ res.setHeader("access-control-allow-origin", origin);
94
+ res.setHeader("access-control-allow-methods", "GET, POST, DELETE, OPTIONS");
95
+ res.setHeader("access-control-allow-headers", "content-type");
96
+ }
89
97
  if (req.method === "OPTIONS") {
90
98
  res.writeHead(204);
91
99
  res.end();
@@ -159,6 +167,51 @@ async function api(req, res, url, opts) {
159
167
  const r = await (0, sandbox_1.testSandbox)(cfg, kind);
160
168
  return sendJson(res, 200, { kind, ...r });
161
169
  }
170
+ // Settings diagnostics: prove the search engines / crawl backend actually
171
+ // work with the saved keys before a mission depends on them.
172
+ if (p === "/api/search/test" && method === "POST") {
173
+ const q = "open source vector database";
174
+ const probe = async (engine, fn) => {
175
+ try {
176
+ const hits = await fn();
177
+ return { engine, ok: hits.length > 0, detail: `${hits.length} result(s)` };
178
+ }
179
+ catch (e) {
180
+ return { engine, ok: false, detail: (0, util_1.errMsg)(e) };
181
+ }
182
+ };
183
+ const checks = [probe("duckduckgo", () => (0, webtools_1.ddgSearch)(q, 3)), probe("bing", () => (0, webtools_1.bingSearch)(q, 3))];
184
+ if (cfg.tinyfishApiKey)
185
+ checks.push(probe("tinyfish", () => (0, webtools_1.tinyfishSearch)(cfg, q, 3)));
186
+ const engines = await Promise.all(checks);
187
+ return sendJson(res, 200, { ok: engines.some((e) => e.ok), engines });
188
+ }
189
+ if (p === "/api/crawl/test" && method === "POST") {
190
+ const backend = (0, crawltools_1.resolveCrawlBackend)(cfg);
191
+ if (!backend) {
192
+ return sendJson(res, 200, { ok: false, backend: null, detail: "no crawl backend configured — add a key first" });
193
+ }
194
+ try {
195
+ if ((0, crawltools_1.hasScrapeBackend)(cfg)) {
196
+ const text = await (0, crawltools_1.scrapeUrl)(cfg, "https://example.com/");
197
+ return sendJson(res, 200, {
198
+ ok: Boolean(text && text.length > 50),
199
+ backend,
200
+ detail: text ? `scraped ${text.length} chars` : "empty scrape result",
201
+ });
202
+ }
203
+ // deepcrawl has no single-page scrape — smoke a 1-page crawl instead.
204
+ const out = await (0, crawltools_1.crawlSite)(cfg, { url: "https://example.com/", maxPages: 1 });
205
+ return sendJson(res, 200, {
206
+ ok: out.pages.length > 0,
207
+ backend,
208
+ detail: out.pages.length ? `crawled ${out.pages.length} page(s)` : out.warnings.join("; ") || "no pages",
209
+ });
210
+ }
211
+ catch (e) {
212
+ return sendJson(res, 200, { ok: false, backend, detail: (0, util_1.errMsg)(e) });
213
+ }
214
+ }
162
215
  if (p === "/api/models" && method === "GET") {
163
216
  try {
164
217
  const models = await (0, deepseek_1.listModels)(cfg);
@@ -297,6 +350,14 @@ async function api(req, res, url, opts) {
297
350
  res.end(fs.readFileSync(file));
298
351
  return;
299
352
  }
353
+ if (sub === "/plan" && method === "GET") {
354
+ const file = path.join((0, config_1.runDir)(id), "artifacts", "mission-plan.md");
355
+ if (!fs.existsSync(file))
356
+ return sendJson(res, 404, { error: "no plan yet" });
357
+ res.writeHead(200, { "content-type": "text/markdown; charset=utf-8" });
358
+ res.end(fs.readFileSync(file));
359
+ return;
360
+ }
300
361
  if (sub === "/artifacts" && method === "GET") {
301
362
  return sendJson(res, 200, { artifacts: listArtifactFiles(id) });
302
363
  }
@@ -429,6 +490,9 @@ function publicConfig(cfg) {
429
490
  reasoningEffort: cfg.reasoningEffort,
430
491
  safeMode: cfg.safeMode,
431
492
  contextTokenLimit: cfg.contextTokenLimit,
493
+ contextWindows: cfg.contextWindows,
494
+ cheapModel: cfg.cheapModel,
495
+ strongModel: cfg.strongModel,
432
496
  knownModels,
433
497
  pricing: cfg.pricing,
434
498
  };
@@ -482,6 +546,8 @@ function snapshot(state, id) {
482
546
  operatorNotes: state.operatorNotes,
483
547
  usageByModel: Object.fromEntries(state.usageByModel),
484
548
  cost: state.cost,
549
+ budgetSeries: state.budgetSeries,
550
+ planExcerpt: state.planExcerpt,
485
551
  finalSummary: state.finalSummary,
486
552
  finalReportPath: state.finalReportPath,
487
553
  live: (0, run_1.isRunLive)(id),
package/dist/memory.js CHANGED
@@ -58,10 +58,11 @@ function loadMemory(cwd) {
58
58
  }
59
59
  function appendMemory(cwd, entry) {
60
60
  try {
61
- const file = memoryFile(cwd);
62
- (0, util_1.ensureDir)(path.dirname(file));
63
- const entries = [...loadMemory(cwd), entry].slice(-MAX_ENTRIES);
64
- fs.writeFileSync(file, JSON.stringify({ cwd: path.resolve(cwd), entries }, null, 2), "utf8");
61
+ // Same-run entries replace (interim → final); writeJson is temp+rename so
62
+ // a crash mid-write never loses the prior history.
63
+ const prior = loadMemory(cwd).filter((e) => !(entry.runId && e.runId === entry.runId));
64
+ const entries = [...prior, entry].slice(-MAX_ENTRIES);
65
+ (0, util_1.writeJson)(memoryFile(cwd), { cwd: path.resolve(cwd), entries });
65
66
  }
66
67
  catch {
67
68
  /* memory is best-effort */