@robzilla1738/agentswarm 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -12
- package/dist/agent.js +6 -15
- package/dist/cli.js +31 -4
- package/dist/config.js +44 -1
- package/dist/crawltools.js +3 -22
- package/dist/executor.js +276 -60
- package/dist/hub.js +67 -3
- package/dist/journal.js +39 -5
- package/dist/memory.js +17 -11
- package/dist/pdftext.js +211 -0
- package/dist/prompts.js +23 -15
- package/dist/report.js +39 -1
- package/dist/run.js +8 -0
- package/dist/sandbox.js +11 -0
- package/dist/searchcore.js +55 -2
- package/dist/state.js +67 -17
- package/dist/tools.js +208 -19
- package/dist/util.js +117 -3
- package/dist/webtools.js +185 -32
- package/package.json +1 -1
- package/ui/out/404/index.html +1 -1
- package/ui/out/404.html +1 -1
- package/ui/out/_next/static/chunks/677-a62d486d6734bcf3.js +1 -0
- package/ui/out/_next/static/chunks/app/run/page-c29f95c51af08c60.js +1 -0
- package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
- package/ui/out/_next/static/css/{9f7bd82b8e4c762c.css → d95c2ba395730031.css} +1 -1
- package/ui/out/index.html +1 -1
- package/ui/out/index.txt +3 -3
- package/ui/out/run/index.html +1 -1
- package/ui/out/run/index.txt +3 -3
- package/ui/out/settings/index.html +1 -1
- package/ui/out/settings/index.txt +3 -3
- package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +0 -1
- package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +0 -1
- package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +0 -1
- /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → JFkx5KtNi0DYyqm_THzbY}/_buildManifest.js +0 -0
- /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → JFkx5KtNi0DYyqm_THzbY}/_ssgManifest.js +0 -0
package/dist/executor.js
CHANGED
|
@@ -69,10 +69,9 @@ class Executor {
|
|
|
69
69
|
finishNotes = "";
|
|
70
70
|
finishReason = "";
|
|
71
71
|
fatal = null;
|
|
72
|
+
/** "error" = the turn ended in a call failure, not a decision. */
|
|
72
73
|
lastConductorAction = "none";
|
|
73
74
|
conductorFailures = 0;
|
|
74
|
-
/** True when the last conductor turn ended in a call error, not a decision. */
|
|
75
|
-
lastConductorErrored = false;
|
|
76
75
|
resumed = false;
|
|
77
76
|
sandbox;
|
|
78
77
|
mode;
|
|
@@ -134,7 +133,14 @@ class Executor {
|
|
|
134
133
|
const n = Number(/^T(\d+)$/.exec(copy.id)?.[1] ?? 0);
|
|
135
134
|
this.taskCounter = Math.max(this.taskCounter, n);
|
|
136
135
|
}
|
|
137
|
-
|
|
136
|
+
// Drop claims held by settled tasks — they were released on task end and
|
|
137
|
+
// must not resurrect across a restart.
|
|
138
|
+
const settled = new Set(state.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status) && !reset.has(t.id)).map((t) => t.id));
|
|
139
|
+
this.notes = state.notes
|
|
140
|
+
.map((n) => ({ taskId: n.taskId, teamId: n.teamId, key: n.key, kind: n.kind, text: n.text, url: n.url }))
|
|
141
|
+
// Team claims always drop: the owning child executor died with the
|
|
142
|
+
// crash, and a re-run team task re-claims from scratch.
|
|
143
|
+
.filter((n) => !(n.kind === "claim" && (n.teamId || (n.taskId && settled.has(n.taskId)))));
|
|
138
144
|
const lastPhase = state.phases[state.phases.length - 1];
|
|
139
145
|
if (lastPhase)
|
|
140
146
|
this.phase = { name: lastPhase.name, goal: lastPhase.goal, exitCriteria: lastPhase.exitCriteria };
|
|
@@ -182,16 +188,16 @@ class Executor {
|
|
|
182
188
|
blackboardDigest(max = 1800) {
|
|
183
189
|
if (!this.notes.length)
|
|
184
190
|
return "";
|
|
185
|
-
const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.taskId ? ` (${n.taskId})` : ""}`;
|
|
186
|
-
// Decisions anchor mission-wide coherence and are never
|
|
187
|
-
// digest; everything else shows only its recent tail.
|
|
188
|
-
const
|
|
189
|
-
const rest = this.notes.filter((n) => n.kind !== "decision").slice(-80).map(fmt);
|
|
191
|
+
const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.url ? ` <${n.url}>` : ""}${n.taskId ? ` (${n.taskId})` : ""}`;
|
|
192
|
+
// Decisions and conflicts anchor mission-wide coherence and are never
|
|
193
|
+
// trimmed out of the digest; everything else shows only its recent tail.
|
|
194
|
+
const pinned = this.notes.filter((n) => n.kind === "decision" || n.kind === "conflict").map(fmt);
|
|
195
|
+
const rest = this.notes.filter((n) => n.kind !== "decision" && n.kind !== "conflict").slice(-80).map(fmt);
|
|
190
196
|
let tail = rest.join("\n");
|
|
191
|
-
const budget = Math.max(400, max -
|
|
197
|
+
const budget = Math.max(400, max - pinned.join("\n").length);
|
|
192
198
|
if (tail.length > budget)
|
|
193
199
|
tail = tail.slice(tail.length - budget);
|
|
194
|
-
return [
|
|
200
|
+
return [pinned.join("\n"), tail].filter(Boolean).join("\n");
|
|
195
201
|
}
|
|
196
202
|
searchNotes(query) {
|
|
197
203
|
const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
@@ -266,6 +272,8 @@ class Executor {
|
|
|
266
272
|
content: this.resumed
|
|
267
273
|
? (0, prompts_1.conductorUpdate)({
|
|
268
274
|
blackboard: this.blackboardDigest(),
|
|
275
|
+
phase: this.phaseLine(),
|
|
276
|
+
plan: this.planPin(),
|
|
269
277
|
nextId: this.nextId(),
|
|
270
278
|
taskTable: (0, prompts_1.taskTable)(this.taskList()),
|
|
271
279
|
budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
|
|
@@ -277,6 +285,15 @@ class Executor {
|
|
|
277
285
|
: (0, prompts_1.conductorInitialUpdate)(this.meta, this.nextId()),
|
|
278
286
|
},
|
|
279
287
|
];
|
|
288
|
+
if (this.resumed) {
|
|
289
|
+
// The conductor's reasoning history died with the old process. Re-seed
|
|
290
|
+
// the durable facts into the same slot trimConductorHistory() maintains,
|
|
291
|
+
// so a resumed conductor knows what settled and what was decided.
|
|
292
|
+
this.conductorMessages.splice(1, 0, {
|
|
293
|
+
role: "user",
|
|
294
|
+
content: this.missionLedger("This run was resumed — prior orchestration history is gone."),
|
|
295
|
+
});
|
|
296
|
+
}
|
|
280
297
|
try {
|
|
281
298
|
await this.conductorTurn();
|
|
282
299
|
this.setStatus("running");
|
|
@@ -382,6 +399,13 @@ class Executor {
|
|
|
382
399
|
sharedNotes: this.notes,
|
|
383
400
|
});
|
|
384
401
|
await child.run();
|
|
402
|
+
// The sub-swarm is over: claims its tasks left behind (e.g. after a child
|
|
403
|
+
// cancellation) are no longer live and must not haunt the shared board.
|
|
404
|
+
for (let i = this.notes.length - 1; i >= 0; i--) {
|
|
405
|
+
const n = this.notes[i];
|
|
406
|
+
if (n.kind === "claim" && n.teamId === task.id)
|
|
407
|
+
this.notes.splice(i, 1);
|
|
408
|
+
}
|
|
385
409
|
if (this.ac.signal.aborted) {
|
|
386
410
|
this.finalizeTask(task, "failed", "run cancelled");
|
|
387
411
|
return;
|
|
@@ -390,11 +414,13 @@ class Executor {
|
|
|
390
414
|
for (const a of child.teamArtifacts())
|
|
391
415
|
if (!task.artifacts.includes(a))
|
|
392
416
|
task.artifacts.push(a);
|
|
417
|
+
const ok = child.anyTaskDone();
|
|
418
|
+
const reportStatus = ok ? "done" : "blocked";
|
|
393
419
|
task.report = report;
|
|
394
|
-
task.reportStatus =
|
|
420
|
+
task.reportStatus = reportStatus;
|
|
395
421
|
this.journal.append("team.report", { taskId: task.id, report, artifacts: task.artifacts });
|
|
396
|
-
this.journal.append("task.report", { taskId: task.id, status:
|
|
397
|
-
this.finalizeTask(task,
|
|
422
|
+
this.journal.append("task.report", { taskId: task.id, status: reportStatus, report, artifacts: task.artifacts });
|
|
423
|
+
this.finalizeTask(task, ok ? "done" : "failed", report);
|
|
398
424
|
}
|
|
399
425
|
async mainLoop() {
|
|
400
426
|
while (!this.finishing) {
|
|
@@ -430,7 +456,7 @@ class Executor {
|
|
|
430
456
|
// An errored turn is not a decision — keep looping so the breaker
|
|
431
457
|
// can retry (and eventually trip) instead of misreading the error
|
|
432
458
|
// as "the conductor chose to stop".
|
|
433
|
-
if (this.lastConductorAction !== "spawn" &&
|
|
459
|
+
if (this.lastConductorAction !== "spawn" && this.lastConductorAction !== "error") {
|
|
434
460
|
this.finishing = true;
|
|
435
461
|
this.finishReason = this.finishReason || "all tasks settled";
|
|
436
462
|
}
|
|
@@ -439,7 +465,7 @@ class Executor {
|
|
|
439
465
|
// Stuck: pending tasks exist but can't run (failed/blocked deps).
|
|
440
466
|
this.appendConductorUpdate("Some tasks cannot run because their dependencies failed or were blocked. Re-plan around them or finish.", reports);
|
|
441
467
|
await this.conductorTurn();
|
|
442
|
-
if (this.lastConductorAction === "wait"
|
|
468
|
+
if (this.lastConductorAction === "wait") {
|
|
443
469
|
this.finishing = true;
|
|
444
470
|
this.finishReason = "stalled: dependencies unmet and conductor chose to wait";
|
|
445
471
|
}
|
|
@@ -580,12 +606,10 @@ class Executor {
|
|
|
580
606
|
const scale = Number(process.env.SWARM_BACKOFF_SCALE || "1") || 1;
|
|
581
607
|
const backoff = [2_000, 5_000, 15_000, 30_000][Math.min(this.conductorFailures - 1, 3)] * scale;
|
|
582
608
|
await new Promise((r) => setTimeout(r, backoff));
|
|
583
|
-
this.lastConductorAction = "
|
|
584
|
-
this.lastConductorErrored = true;
|
|
609
|
+
this.lastConductorAction = "error";
|
|
585
610
|
return;
|
|
586
611
|
}
|
|
587
612
|
this.conductorFailures = 0;
|
|
588
|
-
this.lastConductorErrored = false;
|
|
589
613
|
this.onUsage(this.meta.options.conductorModel, res.usage);
|
|
590
614
|
if (res.content.trim())
|
|
591
615
|
this.journal.append("conductor.say", { text: (0, util_1.clip)(res.content, 4000) });
|
|
@@ -767,7 +791,8 @@ class Executor {
|
|
|
767
791
|
return reports.map(prompts_1.reportBlock);
|
|
768
792
|
const important = reports.filter((t) => t.status !== "done");
|
|
769
793
|
const done = reports.filter((t) => t.status === "done");
|
|
770
|
-
const
|
|
794
|
+
const room = Math.max(0, CAP - important.length);
|
|
795
|
+
const fullDone = room > 0 ? done.slice(-room) : []; // slice(-0) would return everything
|
|
771
796
|
const briefDone = done.slice(0, done.length - fullDone.length);
|
|
772
797
|
return [
|
|
773
798
|
...important.map(prompts_1.reportBlock),
|
|
@@ -799,8 +824,8 @@ class Executor {
|
|
|
799
824
|
* trimmed history so the conductor never loses the plot on long missions —
|
|
800
825
|
* rebuilt fresh each trim from current state, so it also survives resume.
|
|
801
826
|
*/
|
|
802
|
-
missionLedger() {
|
|
803
|
-
const lines = [
|
|
827
|
+
missionLedger(intro = "Earlier orchestration history was trimmed.") {
|
|
828
|
+
const lines = [`[${intro} MISSION LEDGER — durable state so far:]`];
|
|
804
829
|
if (this.phase)
|
|
805
830
|
lines.push(this.phaseLine());
|
|
806
831
|
const settled = this.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status));
|
|
@@ -843,6 +868,19 @@ class Executor {
|
|
|
843
868
|
else
|
|
844
869
|
this.conductorMessages.splice(1, 0, msg);
|
|
845
870
|
};
|
|
871
|
+
// Old conductor turns carry the bulk in thinking traces and verbose prose;
|
|
872
|
+
// the durable decisions live in the ledger and the plan pin. Compact them
|
|
873
|
+
// in place before resorting to dropping whole messages. (sanitizeMessages
|
|
874
|
+
// backfills reasoning_content with "" for DeepSeek tool-call turns.)
|
|
875
|
+
for (let i = 1; i < this.conductorMessages.length - 6; i++) {
|
|
876
|
+
const m = this.conductorMessages[i];
|
|
877
|
+
if (m.role !== "assistant")
|
|
878
|
+
continue;
|
|
879
|
+
if (m.reasoning_content)
|
|
880
|
+
m.reasoning_content = "";
|
|
881
|
+
if (m.content && m.content.length > 400)
|
|
882
|
+
m.content = (0, util_1.clip)(m.content, 400);
|
|
883
|
+
}
|
|
846
884
|
if (this.conductorMessages.length > MAX) {
|
|
847
885
|
const system = this.conductorMessages[0];
|
|
848
886
|
const tail = this.conductorMessages.slice(-(MAX - 2));
|
|
@@ -855,7 +893,7 @@ class Executor {
|
|
|
855
893
|
// Count alone doesn't bound size: every update embeds the full task table,
|
|
856
894
|
// so a deep run can blow the model window long before 60 messages. The
|
|
857
895
|
// mission itself lives in the system message and always survives.
|
|
858
|
-
const budget = Math.floor(this.cfg.
|
|
896
|
+
const budget = Math.floor((0, config_1.contextLimitFor)(this.cfg, this.meta.options.conductorModel) * 0.75);
|
|
859
897
|
if ((0, agent_1.estimateMessages)(this.conductorMessages) <= budget)
|
|
860
898
|
return;
|
|
861
899
|
setLedger();
|
|
@@ -876,20 +914,48 @@ class Executor {
|
|
|
876
914
|
hasOpenWork() {
|
|
877
915
|
return this.taskList().some((t) => ["pending", "running", "verifying"].includes(t.status));
|
|
878
916
|
}
|
|
917
|
+
/** Walk a failed/blocked dep chain down to the task that actually failed. */
|
|
918
|
+
rootFailure(id) {
|
|
919
|
+
let cur = this.tasks.get(id);
|
|
920
|
+
const seen = new Set();
|
|
921
|
+
while (cur && !seen.has(cur.id)) {
|
|
922
|
+
seen.add(cur.id);
|
|
923
|
+
const next = cur.deps
|
|
924
|
+
.map((d) => this.tasks.get(d))
|
|
925
|
+
.find((t) => !!t && (t.status === "failed" || t.status === "blocked"));
|
|
926
|
+
if (!next)
|
|
927
|
+
return cur;
|
|
928
|
+
cur = next;
|
|
929
|
+
}
|
|
930
|
+
return cur;
|
|
931
|
+
}
|
|
879
932
|
blockStuckTasks() {
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
933
|
+
// Fixpoint: a failed dep chain T1→T2→T5 must block the whole chain in one
|
|
934
|
+
// pass, not one level per conductor turn.
|
|
935
|
+
for (let changed = true; changed;) {
|
|
936
|
+
changed = false;
|
|
937
|
+
for (const t of this.taskList()) {
|
|
938
|
+
if (t.status !== "pending")
|
|
939
|
+
continue;
|
|
940
|
+
const bad = t.deps.find((d) => {
|
|
941
|
+
const s = this.tasks.get(d)?.status;
|
|
942
|
+
return s === "failed" || s === "blocked";
|
|
943
|
+
});
|
|
944
|
+
if (!bad)
|
|
945
|
+
continue;
|
|
946
|
+
// Carry the root cause so the conductor re-plans around the actual
|
|
947
|
+
// failure, not a chain of "dependency did not complete".
|
|
948
|
+
const root = this.rootFailure(bad);
|
|
949
|
+
const cause = root ? (0, util_1.oneLine)(root.feedback ?? root.error ?? "unknown failure", 160) : "";
|
|
888
950
|
t.status = "blocked";
|
|
889
|
-
t.error =
|
|
951
|
+
t.error =
|
|
952
|
+
root && root.id !== bad
|
|
953
|
+
? `dependency ${bad} did not complete (root cause ${root.id}: ${cause})`
|
|
954
|
+
: `dependency ${bad} did not complete${cause ? ` (${cause})` : ""}`;
|
|
890
955
|
t.endedAt = Date.now();
|
|
891
956
|
this.journal.append("task.status", { taskId: t.id, status: "blocked", attempt: t.attempt, reason: t.error });
|
|
892
957
|
this.settledSinceUpdate.push(t.id);
|
|
958
|
+
changed = true;
|
|
893
959
|
}
|
|
894
960
|
}
|
|
895
961
|
}
|
|
@@ -954,27 +1020,40 @@ class Executor {
|
|
|
954
1020
|
taskId: task?.id,
|
|
955
1021
|
signal: this.ac.signal,
|
|
956
1022
|
addCheckpoint: task ? (summary) => this.recordCheckpoint(task, agentId, summary) : undefined,
|
|
957
|
-
addNote: (text, key, kind) => {
|
|
958
|
-
this.notes.push({ taskId: task?.id, key, kind, text });
|
|
1023
|
+
addNote: (text, key, kind, url) => {
|
|
1024
|
+
this.notes.push({ taskId: task?.id, teamId: this.teamId, key, kind, text, url });
|
|
959
1025
|
// Only the recent tail ever feeds digests; without a cap a multi-day
|
|
960
|
-
// run accumulates every note in memory. Decisions
|
|
1026
|
+
// run accumulates every note in memory. Decisions and conflicts are
|
|
1027
|
+
// kept regardless. In-place splice: teams share this array by reference.
|
|
961
1028
|
if (this.notes.length > 4000) {
|
|
962
|
-
const
|
|
963
|
-
const
|
|
964
|
-
|
|
965
|
-
this.notes
|
|
1029
|
+
const keep = (n) => n.kind === "decision" || n.kind === "conflict";
|
|
1030
|
+
const pinnedCount = this.notes.filter(keep).length;
|
|
1031
|
+
let toDrop = this.notes.length - Math.max(pinnedCount, 4000);
|
|
1032
|
+
for (let i = 0; i < this.notes.length && toDrop > 0;) {
|
|
1033
|
+
if (!keep(this.notes[i])) {
|
|
1034
|
+
this.notes.splice(i, 1);
|
|
1035
|
+
toDrop--;
|
|
1036
|
+
}
|
|
1037
|
+
else
|
|
1038
|
+
i++;
|
|
1039
|
+
}
|
|
966
1040
|
}
|
|
967
|
-
this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, text: (0, util_1.clip)(text, 1200) });
|
|
1041
|
+
this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, url, text: (0, util_1.clip)(text, 1200) });
|
|
968
1042
|
},
|
|
969
1043
|
searchNotes: (q) => this.searchNotes(q),
|
|
970
1044
|
readReport: (taskId) => this.readReportText(taskId),
|
|
971
1045
|
checkClaim: (rel) => {
|
|
972
1046
|
const norm = rel.replace(/^\.\//, "");
|
|
973
|
-
const claim = this.notes.find((n) =>
|
|
974
|
-
n.key
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
1047
|
+
const claim = this.notes.find((n) => {
|
|
1048
|
+
if (n.kind !== "claim" || n.key !== norm || !n.taskId)
|
|
1049
|
+
return false;
|
|
1050
|
+
// Another executor's claim: its tasks aren't in this.tasks, but
|
|
1051
|
+
// claims are spliced out when their task settles (and when a team
|
|
1052
|
+
// ends), so presence alone means the holder is still live.
|
|
1053
|
+
if (n.teamId !== this.teamId)
|
|
1054
|
+
return true;
|
|
1055
|
+
return n.taskId !== task?.id && ["running", "verifying"].includes(this.tasks.get(n.taskId)?.status ?? "");
|
|
1056
|
+
});
|
|
978
1057
|
return claim
|
|
979
1058
|
? `⚠ ${claim.taskId} holds a claim on ${norm} ("${(0, util_1.oneLine)(claim.text, 80)}") — coordinate via the blackboard before further edits.`
|
|
980
1059
|
: null;
|
|
@@ -1031,7 +1110,12 @@ class Executor {
|
|
|
1031
1110
|
if (task.attempt < this.cfg.verifyMaxAttempts) {
|
|
1032
1111
|
task.attempt++;
|
|
1033
1112
|
task.status = "running";
|
|
1034
|
-
this.journal.append("task.status", {
|
|
1113
|
+
this.journal.append("task.status", {
|
|
1114
|
+
taskId: task.id,
|
|
1115
|
+
status: "running",
|
|
1116
|
+
attempt: task.attempt,
|
|
1117
|
+
reason: task.feedback || task.error,
|
|
1118
|
+
});
|
|
1035
1119
|
continue;
|
|
1036
1120
|
}
|
|
1037
1121
|
this.finalizeTask(task, "failed", task.feedback || task.error || "verification failed after retries");
|
|
@@ -1046,12 +1130,12 @@ class Executor {
|
|
|
1046
1130
|
}
|
|
1047
1131
|
if (task.attempt < this.cfg.verifyMaxAttempts && !this.finishing && !this.budgetExceeded()) {
|
|
1048
1132
|
task.attempt++;
|
|
1049
|
-
task.error = (0, util_1.errMsg)(e)
|
|
1133
|
+
task.error = `${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`;
|
|
1050
1134
|
task.status = "running";
|
|
1051
1135
|
this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt, reason: task.error });
|
|
1052
1136
|
continue;
|
|
1053
1137
|
}
|
|
1054
|
-
this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}`);
|
|
1138
|
+
this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`);
|
|
1055
1139
|
return;
|
|
1056
1140
|
}
|
|
1057
1141
|
}
|
|
@@ -1068,6 +1152,7 @@ class Executor {
|
|
|
1068
1152
|
const agentId = (0, util_1.rid)("w");
|
|
1069
1153
|
const model = this.resolveModel(task.modelTier);
|
|
1070
1154
|
task.agentIds.push(agentId);
|
|
1155
|
+
task.lastToolError = undefined; // diagnostics are per-attempt
|
|
1071
1156
|
const dirListing = this.topListing();
|
|
1072
1157
|
const system = (0, prompts_1.workerSystem)({
|
|
1073
1158
|
agentId,
|
|
@@ -1101,7 +1186,7 @@ class Executor {
|
|
|
1101
1186
|
signal: this.ac.signal,
|
|
1102
1187
|
ctx: this.makeToolCtx(agentId, task),
|
|
1103
1188
|
hooks: {
|
|
1104
|
-
...this.agentHooks(agentId, task.id),
|
|
1189
|
+
...this.agentHooks(agentId, task.id, task),
|
|
1105
1190
|
onCheckpoint: (summary) => this.recordCheckpoint(task, agentId, summary),
|
|
1106
1191
|
},
|
|
1107
1192
|
stop: this.agentStop,
|
|
@@ -1111,7 +1196,11 @@ class Executor {
|
|
|
1111
1196
|
if (this.ac.signal.aborted)
|
|
1112
1197
|
return "done";
|
|
1113
1198
|
if (!outcome.terminal) {
|
|
1114
|
-
|
|
1199
|
+
const lastWords = (0, util_1.oneLine)(outcome.finalText ?? "", 200);
|
|
1200
|
+
task.error =
|
|
1201
|
+
"worker ended without reporting" +
|
|
1202
|
+
(task.lastToolError ? ` — last tool failure: ${task.lastToolError}` : "") +
|
|
1203
|
+
(lastWords ? `; last words: ${lastWords}` : "");
|
|
1115
1204
|
return "retry";
|
|
1116
1205
|
}
|
|
1117
1206
|
const a = outcome.terminal.args;
|
|
@@ -1127,6 +1216,20 @@ class Executor {
|
|
|
1127
1216
|
task.keyFacts = strList(a.key_facts, 8);
|
|
1128
1217
|
task.openQuestions = strList(a.open_questions, 6);
|
|
1129
1218
|
task.filesTouched = strList(a.files_touched, 40);
|
|
1219
|
+
// Structured sources: the citation pipeline's entry point. Only real
|
|
1220
|
+
// http(s) URLs survive; they flow into dep handoffs and the bibliography.
|
|
1221
|
+
const sources = Array.isArray(a.sources)
|
|
1222
|
+
? a.sources
|
|
1223
|
+
.filter((s) => s && typeof s === "object" && /^https?:\/\//.test(String(s.url ?? "")))
|
|
1224
|
+
.slice(0, 40)
|
|
1225
|
+
.map((s) => ({
|
|
1226
|
+
url: (0, util_1.clip)(String(s.url), 500),
|
|
1227
|
+
title: s.title ? (0, util_1.clip)(String(s.title), 200) : undefined,
|
|
1228
|
+
date: s.date ? (0, util_1.clip)(String(s.date), 40) : undefined,
|
|
1229
|
+
note: s.note ? (0, util_1.clip)(String(s.note), 300) : undefined,
|
|
1230
|
+
}))
|
|
1231
|
+
: [];
|
|
1232
|
+
task.sources = sources.length ? sources : undefined;
|
|
1130
1233
|
this.journal.append("task.report", {
|
|
1131
1234
|
taskId: task.id,
|
|
1132
1235
|
status: reportStatus,
|
|
@@ -1135,6 +1238,7 @@ class Executor {
|
|
|
1135
1238
|
keyFacts: task.keyFacts,
|
|
1136
1239
|
openQuestions: task.openQuestions,
|
|
1137
1240
|
filesTouched: task.filesTouched,
|
|
1241
|
+
sources: task.sources,
|
|
1138
1242
|
});
|
|
1139
1243
|
if (reportStatus === "blocked") {
|
|
1140
1244
|
this.finalizeTask(task, "blocked", report);
|
|
@@ -1165,6 +1269,7 @@ class Executor {
|
|
|
1165
1269
|
return "Report is too thin to verify. Re-do the task and report concretely: what was done, what was verified, exact paths.";
|
|
1166
1270
|
}
|
|
1167
1271
|
const missing = [];
|
|
1272
|
+
const malformed = [];
|
|
1168
1273
|
// Remote sandboxes own their filesystem — only check host-visible paths.
|
|
1169
1274
|
if (this.sandbox.localFs) {
|
|
1170
1275
|
const okAt = (p) => {
|
|
@@ -1178,16 +1283,28 @@ class Executor {
|
|
|
1178
1283
|
for (const rel of task.artifacts) {
|
|
1179
1284
|
const inArtifacts = path.join(this.runDirPath, "artifacts", rel);
|
|
1180
1285
|
const inWorkdir = path.resolve(this.meta.cwd, rel);
|
|
1181
|
-
if (!okAt(inArtifacts) && !okAt(inWorkdir))
|
|
1286
|
+
if (!okAt(inArtifacts) && !okAt(inWorkdir)) {
|
|
1182
1287
|
missing.push(rel);
|
|
1288
|
+
continue;
|
|
1289
|
+
}
|
|
1290
|
+
// Structural format check (json parses, csv is rectangular, html is
|
|
1291
|
+
// not a stub) — free, and catches what the LLM verifier wastes a whole
|
|
1292
|
+
// agent run discovering.
|
|
1293
|
+
const problem = (0, util_1.validateArtifactFormat)(okAt(inArtifacts) ? inArtifacts : inWorkdir);
|
|
1294
|
+
if (problem)
|
|
1295
|
+
malformed.push(`${rel}: ${problem}`);
|
|
1183
1296
|
}
|
|
1184
1297
|
}
|
|
1185
1298
|
if (missing.length) {
|
|
1186
1299
|
return `Claimed artifact(s) do not exist or are empty: ${missing.join(", ")}. Actually create them (use save_artifact), then report again.`;
|
|
1187
1300
|
}
|
|
1301
|
+
if (malformed.length) {
|
|
1302
|
+
return `Claimed artifact(s) are malformed — fix them and report again: ${malformed.join("; ")}`;
|
|
1303
|
+
}
|
|
1188
1304
|
return null;
|
|
1189
1305
|
}
|
|
1190
|
-
|
|
1306
|
+
/** One verifier agent pass; returns the outcome plus how many evidence-gathering tool calls it made. */
|
|
1307
|
+
async verifierAgent(task, kickoff) {
|
|
1191
1308
|
const agentId = (0, util_1.rid)("v");
|
|
1192
1309
|
// Verification gets the strong tier when configured — a weak verifier
|
|
1193
1310
|
// rubber-stamps exactly the tasks that most need scrutiny.
|
|
@@ -1200,14 +1317,16 @@ class Executor {
|
|
|
1200
1317
|
model,
|
|
1201
1318
|
purpose: `verify ${task.id}`,
|
|
1202
1319
|
});
|
|
1320
|
+
let evidenceCalls = 0;
|
|
1321
|
+
const baseHooks = this.agentHooks(agentId, task.id);
|
|
1203
1322
|
const outcome = await (0, agent_1.runAgent)({
|
|
1204
1323
|
cfg: this.cfg,
|
|
1205
1324
|
agentId,
|
|
1206
1325
|
model,
|
|
1207
1326
|
thinking: this.meta.options.thinking,
|
|
1208
1327
|
reasoningEffort: this.meta.options.reasoningEffort,
|
|
1209
|
-
system: (0, prompts_1.verifierSystem)(this.meta, task),
|
|
1210
|
-
kickoff
|
|
1328
|
+
system: (0, prompts_1.verifierSystem)(this.meta, task, this.depReportsFor(task)),
|
|
1329
|
+
kickoff,
|
|
1211
1330
|
tools: (0, tools_1.verifierToolset)(),
|
|
1212
1331
|
terminal: [tools_1.VERDICT_TOOL],
|
|
1213
1332
|
maxSteps: Math.min(14, this.meta.options.maxStepsPerTask),
|
|
@@ -1215,21 +1334,75 @@ class Executor {
|
|
|
1215
1334
|
// Blind verification: the verifier judges deliverables against the
|
|
1216
1335
|
// objective with its own tools — it must not inherit the swarm's shared
|
|
1217
1336
|
// beliefs (blackboard) or the worker's narrative beyond the claims.
|
|
1337
|
+
// (Dep reports are settled upstream outputs, not the worker's story.)
|
|
1218
1338
|
ctx: { ...this.makeToolCtx(agentId, task), readBlackboard: () => "", searchNotes: undefined },
|
|
1219
|
-
hooks:
|
|
1339
|
+
hooks: {
|
|
1340
|
+
...baseHooks,
|
|
1341
|
+
onToolCall: (callId, name, args) => {
|
|
1342
|
+
if (name !== "verdict")
|
|
1343
|
+
evidenceCalls++;
|
|
1344
|
+
baseHooks.onToolCall(callId, name, args);
|
|
1345
|
+
},
|
|
1346
|
+
},
|
|
1220
1347
|
stop: this.agentStop,
|
|
1221
1348
|
});
|
|
1222
1349
|
this.flushDeltas(agentId);
|
|
1223
1350
|
this.journal.append("agent.done", { agentId, taskId: task.id, steps: outcome.steps });
|
|
1351
|
+
return { outcome, evidenceCalls };
|
|
1352
|
+
}
|
|
1353
|
+
async runVerifier(task) {
|
|
1354
|
+
const strict = this.cfg.verification === "strict";
|
|
1355
|
+
let { outcome, evidenceCalls } = await this.verifierAgent(task, prompts_1.VERIFIER_KICKOFF);
|
|
1224
1356
|
if (this.ac.signal.aborted)
|
|
1225
1357
|
return true;
|
|
1358
|
+
// Strict mode: a pass verdict backed by zero tool calls is an opinion,
|
|
1359
|
+
// not a verification. One re-run demanding evidence; if that also passes
|
|
1360
|
+
// tool-free, accept but say so in the journal.
|
|
1361
|
+
if (strict && outcome.terminal && Boolean(outcome.terminal.args.pass) && evidenceCalls === 0) {
|
|
1362
|
+
this.journal.append("log", {
|
|
1363
|
+
level: "info",
|
|
1364
|
+
msg: `verifier passed ${task.id} without evidence — re-running with a tools-required kickoff`,
|
|
1365
|
+
});
|
|
1366
|
+
const second = await this.verifierAgent(task, "A previous verdict on this task cited no tool-gathered evidence. Verify concretely NOW — read the claimed files, run the commands — then call verdict(...).");
|
|
1367
|
+
if (this.ac.signal.aborted)
|
|
1368
|
+
return true;
|
|
1369
|
+
if (second.outcome.terminal) {
|
|
1370
|
+
if (second.evidenceCalls === 0) {
|
|
1371
|
+
this.journal.append("log", { level: "warn", msg: `verifier passed ${task.id} without gathering evidence` });
|
|
1372
|
+
}
|
|
1373
|
+
outcome = second.outcome;
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1226
1376
|
const v = (outcome.terminal?.args ?? {});
|
|
1227
|
-
const strict = this.cfg.verification === "strict";
|
|
1228
1377
|
// No verdict returned: in strict mode fail closed, otherwise accept.
|
|
1229
1378
|
const pass = outcome.terminal ? Boolean(v.pass) : !strict;
|
|
1230
|
-
|
|
1379
|
+
let feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
|
|
1380
|
+
// Structured issues become the retry's worklist — numbered, with evidence.
|
|
1381
|
+
const issues = Array.isArray(v.issues)
|
|
1382
|
+
? v.issues
|
|
1383
|
+
.filter((i) => i && typeof i === "object" && i.problem)
|
|
1384
|
+
.slice(0, 5)
|
|
1385
|
+
.map((i) => ({
|
|
1386
|
+
problem: (0, util_1.oneLine)(String(i.problem), 300),
|
|
1387
|
+
evidence: i.evidence ? (0, util_1.oneLine)(String(i.evidence), 300) : undefined,
|
|
1388
|
+
fix: i.fix ? (0, util_1.oneLine)(String(i.fix), 300) : undefined,
|
|
1389
|
+
}))
|
|
1390
|
+
: [];
|
|
1391
|
+
if (!pass && issues.length) {
|
|
1392
|
+
feedback = [
|
|
1393
|
+
feedback,
|
|
1394
|
+
...issues.map((i, n) => `${n + 1}. ${i.problem}${i.evidence ? `\n evidence: ${i.evidence}` : ""}${i.fix ? `\n fix: ${i.fix}` : ""}`),
|
|
1395
|
+
]
|
|
1396
|
+
.filter(Boolean)
|
|
1397
|
+
.join("\n");
|
|
1398
|
+
}
|
|
1231
1399
|
task.feedback = feedback;
|
|
1232
|
-
this.journal.append("verify.result", {
|
|
1400
|
+
this.journal.append("verify.result", {
|
|
1401
|
+
taskId: task.id,
|
|
1402
|
+
pass,
|
|
1403
|
+
feedback,
|
|
1404
|
+
...(issues.length ? { issues } : {}),
|
|
1405
|
+
});
|
|
1233
1406
|
return pass;
|
|
1234
1407
|
}
|
|
1235
1408
|
finalizeTask(task, status, reason) {
|
|
@@ -1237,6 +1410,14 @@ class Executor {
|
|
|
1237
1410
|
task.endedAt = Date.now();
|
|
1238
1411
|
if (reason && status !== "done")
|
|
1239
1412
|
task.error = reason;
|
|
1413
|
+
// A settled task holds no file claims — release them so the digest and
|
|
1414
|
+
// search_notes don't accumulate dead claims on long runs. In-place splice:
|
|
1415
|
+
// teams share this array by reference.
|
|
1416
|
+
for (let i = this.notes.length - 1; i >= 0; i--) {
|
|
1417
|
+
const n = this.notes[i];
|
|
1418
|
+
if (n.kind === "claim" && n.taskId === task.id && n.teamId === this.teamId)
|
|
1419
|
+
this.notes.splice(i, 1);
|
|
1420
|
+
}
|
|
1240
1421
|
this.journal.append("task.status", { taskId: task.id, status, attempt: task.attempt, reason });
|
|
1241
1422
|
this.settledSinceUpdate.push(task.id);
|
|
1242
1423
|
this.maybeSnapshot();
|
|
@@ -1283,6 +1464,18 @@ class Executor {
|
|
|
1283
1464
|
const rel = `progress-report-${n}.md`;
|
|
1284
1465
|
fs.writeFileSync(path.join(this.runDirPath, "artifacts", rel), res.content, "utf8");
|
|
1285
1466
|
this.journal.append("log", { level: "info", msg: `progress snapshot written: artifacts/${rel}` });
|
|
1467
|
+
// Interim memory: a multi-day run that dies before synthesis still
|
|
1468
|
+
// leaves the next swarm in this workspace something to build on.
|
|
1469
|
+
if (!this.meta.sandbox) {
|
|
1470
|
+
(0, memory_1.appendMemory)(this.meta.cwd, {
|
|
1471
|
+
runId: this.meta.id,
|
|
1472
|
+
mission: this.meta.mission,
|
|
1473
|
+
finishedAt: Date.now(),
|
|
1474
|
+
status: "in-progress",
|
|
1475
|
+
summary: (0, util_1.clip)(res.content, 600),
|
|
1476
|
+
keyDecisions: this.notes.filter((nt) => nt.kind === "decision").slice(-10).map((nt) => nt.text),
|
|
1477
|
+
});
|
|
1478
|
+
}
|
|
1286
1479
|
})
|
|
1287
1480
|
.catch((e) => {
|
|
1288
1481
|
if (!this.ac.signal.aborted)
|
|
@@ -1329,7 +1522,9 @@ class Executor {
|
|
|
1329
1522
|
queueDelta(agentId, taskId, channel, text) {
|
|
1330
1523
|
// Deltas are UI sugar, never state — thin them under load so a 100-agent
|
|
1331
1524
|
// swarm doesn't write gigabytes of streaming chatter into the journal.
|
|
1332
|
-
|
|
1525
|
+
// inflight.size over-counts verifying tasks slightly, but these are fuzzy
|
|
1526
|
+
// thresholds and this runs per streaming token — O(1) matters here.
|
|
1527
|
+
const load = this.inflight.size;
|
|
1333
1528
|
if (channel === "think" && load > 48) {
|
|
1334
1529
|
if (!this.thinkDropLogged) {
|
|
1335
1530
|
this.thinkDropLogged = true;
|
|
@@ -1369,7 +1564,7 @@ class Executor {
|
|
|
1369
1564
|
});
|
|
1370
1565
|
}
|
|
1371
1566
|
}
|
|
1372
|
-
agentHooks(agentId, taskId) {
|
|
1567
|
+
agentHooks(agentId, taskId, trackErrorsOn) {
|
|
1373
1568
|
return {
|
|
1374
1569
|
onDelta: (channel, text) => {
|
|
1375
1570
|
this.queueDelta(agentId, taskId, channel, text);
|
|
@@ -1379,6 +1574,8 @@ class Executor {
|
|
|
1379
1574
|
this.journal.append("tool.call", { agentId, taskId, callId, name, args });
|
|
1380
1575
|
},
|
|
1381
1576
|
onToolResult: (callId, name, ok, summary) => {
|
|
1577
|
+
if (!ok && trackErrorsOn)
|
|
1578
|
+
trackErrorsOn.lastToolError = `${name}: ${(0, util_1.oneLine)(summary, 200)}`;
|
|
1382
1579
|
this.journal.append("tool.result", { agentId, taskId, callId, name, ok, summary });
|
|
1383
1580
|
},
|
|
1384
1581
|
onUsage: this.onUsage,
|
|
@@ -1471,6 +1668,10 @@ class Executor {
|
|
|
1471
1668
|
? tasks.map(prompts_1.reportBlock).join("\n\n")
|
|
1472
1669
|
: "(no tasks were completed)";
|
|
1473
1670
|
const artifactList = this.listArtifacts().join("\n") || "(none)";
|
|
1671
|
+
// The citation pipeline's last hop: every source any worker reported,
|
|
1672
|
+
// deduplicated and numbered, becomes the synthesizer's bibliography.
|
|
1673
|
+
const allSources = (0, report_1.aggregateSources)(tasks);
|
|
1674
|
+
const sourcesText = allSources.length ? (0, util_1.truncateMiddle)((0, report_1.sourcesBlock)(allSources), 40_000, "chars") : "";
|
|
1474
1675
|
const agentId = (0, util_1.rid)("synth");
|
|
1475
1676
|
let summary = "";
|
|
1476
1677
|
let reportMarkdown = "";
|
|
@@ -1488,6 +1689,7 @@ class Executor {
|
|
|
1488
1689
|
blackboard: this.blackboardDigest(6000),
|
|
1489
1690
|
artifactList,
|
|
1490
1691
|
reason: this.finishReason || "completed",
|
|
1692
|
+
sources: sourcesText,
|
|
1491
1693
|
}),
|
|
1492
1694
|
kickoff: prompts_1.SYNTH_KICKOFF,
|
|
1493
1695
|
tools: (0, tools_1.synthToolset)(),
|
|
@@ -1513,7 +1715,7 @@ class Executor {
|
|
|
1513
1715
|
messages: [
|
|
1514
1716
|
{
|
|
1515
1717
|
role: "user",
|
|
1516
|
-
content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars")),
|
|
1718
|
+
content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars"), sourcesText ? (0, util_1.truncateMiddle)(sourcesText, 20_000, "chars") : undefined),
|
|
1517
1719
|
},
|
|
1518
1720
|
],
|
|
1519
1721
|
thinking: false,
|
|
@@ -1562,6 +1764,7 @@ class Executor {
|
|
|
1562
1764
|
// Cross-run memory: real-directory runs leave a trace for the next swarm.
|
|
1563
1765
|
if (!this.meta.sandbox && status !== "cancelled") {
|
|
1564
1766
|
(0, memory_1.appendMemory)(this.meta.cwd, {
|
|
1767
|
+
runId: this.meta.id,
|
|
1565
1768
|
mission: this.meta.mission,
|
|
1566
1769
|
finishedAt: Date.now(),
|
|
1567
1770
|
status,
|
|
@@ -1572,6 +1775,10 @@ class Executor {
|
|
|
1572
1775
|
}
|
|
1573
1776
|
fallbackReport(tasks) {
|
|
1574
1777
|
const lines = [`# ${this.meta.mission}`, ``, `_Run ${this.meta.id} — ${this.finishReason}_`, ``];
|
|
1778
|
+
// Even without a synthesizer, surface the cross-task essentials first.
|
|
1779
|
+
const facts = tasks.flatMap((t) => (t.keyFacts ?? []).map((f) => `- ${f} _(${t.id})_`));
|
|
1780
|
+
if (facts.length)
|
|
1781
|
+
lines.push(`## Key facts`, ...facts.slice(0, 60), "");
|
|
1575
1782
|
for (const t of tasks) {
|
|
1576
1783
|
lines.push(`## ${t.id} ${t.title} (${t.status})`);
|
|
1577
1784
|
lines.push(t.report || t.error || "(no output)");
|
|
@@ -1579,6 +1786,15 @@ class Executor {
|
|
|
1579
1786
|
lines.push(`Artifacts: ${t.artifacts.join(", ")}`);
|
|
1580
1787
|
lines.push("");
|
|
1581
1788
|
}
|
|
1789
|
+
const sources = (0, report_1.aggregateSources)(tasks);
|
|
1790
|
+
if (sources.length) {
|
|
1791
|
+
lines.push(`## Sources`);
|
|
1792
|
+
for (const s of sources.slice(0, 100)) {
|
|
1793
|
+
lines.push(`${s.n}. [${s.title || s.url}](${s.url})${s.date ? ` (${s.date})` : ""}`);
|
|
1794
|
+
}
|
|
1795
|
+
if (sources.length > 100)
|
|
1796
|
+
lines.push(`…and ${sources.length - 100} more in the task reports.`);
|
|
1797
|
+
}
|
|
1582
1798
|
return lines.join("\n");
|
|
1583
1799
|
}
|
|
1584
1800
|
listArtifacts() {
|