@robzilla1738/agentswarm 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -12
- package/dist/agent.js +2 -1
- package/dist/cli.js +21 -4
- package/dist/config.js +27 -1
- package/dist/executor.js +243 -43
- package/dist/hub.js +69 -3
- package/dist/memory.js +5 -4
- package/dist/pdftext.js +211 -0
- package/dist/prompts.js +23 -15
- package/dist/report.js +37 -0
- package/dist/run.js +8 -0
- package/dist/sandbox.js +11 -0
- package/dist/searchcore.js +55 -2
- package/dist/state.js +34 -6
- package/dist/tools.js +196 -19
- package/dist/util.js +85 -0
- package/dist/webtools.js +145 -15
- package/package.json +1 -1
- package/ui/out/404/index.html +1 -1
- package/ui/out/404.html +1 -1
- package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
- package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
- package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
- package/ui/out/_next/static/css/{9f7bd82b8e4c762c.css → d95c2ba395730031.css} +1 -1
- package/ui/out/index.html +1 -1
- package/ui/out/index.txt +3 -3
- package/ui/out/run/index.html +1 -1
- package/ui/out/run/index.txt +3 -3
- package/ui/out/settings/index.html +1 -1
- package/ui/out/settings/index.txt +3 -3
- package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +0 -1
- package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +0 -1
- package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +0 -1
- /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
- /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0
package/dist/executor.js
CHANGED
|
@@ -134,7 +134,12 @@ class Executor {
|
|
|
134
134
|
const n = Number(/^T(\d+)$/.exec(copy.id)?.[1] ?? 0);
|
|
135
135
|
this.taskCounter = Math.max(this.taskCounter, n);
|
|
136
136
|
}
|
|
137
|
-
|
|
137
|
+
// Drop claims held by settled tasks — they were released on task end and
|
|
138
|
+
// must not resurrect across a restart.
|
|
139
|
+
const settled = new Set(state.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status) && !reset.has(t.id)).map((t) => t.id));
|
|
140
|
+
this.notes = state.notes
|
|
141
|
+
.map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text, url: n.url }))
|
|
142
|
+
.filter((n) => !(n.kind === "claim" && n.taskId && settled.has(n.taskId)));
|
|
138
143
|
const lastPhase = state.phases[state.phases.length - 1];
|
|
139
144
|
if (lastPhase)
|
|
140
145
|
this.phase = { name: lastPhase.name, goal: lastPhase.goal, exitCriteria: lastPhase.exitCriteria };
|
|
@@ -182,16 +187,16 @@ class Executor {
|
|
|
182
187
|
blackboardDigest(max = 1800) {
|
|
183
188
|
if (!this.notes.length)
|
|
184
189
|
return "";
|
|
185
|
-
const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.taskId ? ` (${n.taskId})` : ""}`;
|
|
186
|
-
// Decisions anchor mission-wide coherence and are never
|
|
187
|
-
// digest; everything else shows only its recent tail.
|
|
188
|
-
const
|
|
189
|
-
const rest = this.notes.filter((n) => n.kind !== "decision").slice(-80).map(fmt);
|
|
190
|
+
const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.url ? ` <${n.url}>` : ""}${n.taskId ? ` (${n.taskId})` : ""}`;
|
|
191
|
+
// Decisions and conflicts anchor mission-wide coherence and are never
|
|
192
|
+
// trimmed out of the digest; everything else shows only its recent tail.
|
|
193
|
+
const pinned = this.notes.filter((n) => n.kind === "decision" || n.kind === "conflict").map(fmt);
|
|
194
|
+
const rest = this.notes.filter((n) => n.kind !== "decision" && n.kind !== "conflict").slice(-80).map(fmt);
|
|
190
195
|
let tail = rest.join("\n");
|
|
191
|
-
const budget = Math.max(400, max -
|
|
196
|
+
const budget = Math.max(400, max - pinned.join("\n").length);
|
|
192
197
|
if (tail.length > budget)
|
|
193
198
|
tail = tail.slice(tail.length - budget);
|
|
194
|
-
return [
|
|
199
|
+
return [pinned.join("\n"), tail].filter(Boolean).join("\n");
|
|
195
200
|
}
|
|
196
201
|
searchNotes(query) {
|
|
197
202
|
const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
@@ -266,6 +271,8 @@ class Executor {
|
|
|
266
271
|
content: this.resumed
|
|
267
272
|
? (0, prompts_1.conductorUpdate)({
|
|
268
273
|
blackboard: this.blackboardDigest(),
|
|
274
|
+
phase: this.phaseLine(),
|
|
275
|
+
plan: this.planPin(),
|
|
269
276
|
nextId: this.nextId(),
|
|
270
277
|
taskTable: (0, prompts_1.taskTable)(this.taskList()),
|
|
271
278
|
budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
|
|
@@ -277,6 +284,15 @@ class Executor {
|
|
|
277
284
|
: (0, prompts_1.conductorInitialUpdate)(this.meta, this.nextId()),
|
|
278
285
|
},
|
|
279
286
|
];
|
|
287
|
+
if (this.resumed) {
|
|
288
|
+
// The conductor's reasoning history died with the old process. Re-seed
|
|
289
|
+
// the durable facts into the same slot trimConductorHistory() maintains,
|
|
290
|
+
// so a resumed conductor knows what settled and what was decided.
|
|
291
|
+
this.conductorMessages.splice(1, 0, {
|
|
292
|
+
role: "user",
|
|
293
|
+
content: this.missionLedger("This run was resumed — prior orchestration history is gone."),
|
|
294
|
+
});
|
|
295
|
+
}
|
|
280
296
|
try {
|
|
281
297
|
await this.conductorTurn();
|
|
282
298
|
this.setStatus("running");
|
|
@@ -799,8 +815,8 @@ class Executor {
|
|
|
799
815
|
* trimmed history so the conductor never loses the plot on long missions —
|
|
800
816
|
* rebuilt fresh each trim from current state, so it also survives resume.
|
|
801
817
|
*/
|
|
802
|
-
missionLedger() {
|
|
803
|
-
const lines = [
|
|
818
|
+
missionLedger(intro = "Earlier orchestration history was trimmed.") {
|
|
819
|
+
const lines = [`[${intro} MISSION LEDGER — durable state so far:]`];
|
|
804
820
|
if (this.phase)
|
|
805
821
|
lines.push(this.phaseLine());
|
|
806
822
|
const settled = this.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status));
|
|
@@ -843,6 +859,19 @@ class Executor {
|
|
|
843
859
|
else
|
|
844
860
|
this.conductorMessages.splice(1, 0, msg);
|
|
845
861
|
};
|
|
862
|
+
// Old conductor turns carry the bulk in thinking traces and verbose prose;
|
|
863
|
+
// the durable decisions live in the ledger and the plan pin. Compact them
|
|
864
|
+
// in place before resorting to dropping whole messages. (sanitizeMessages
|
|
865
|
+
// backfills reasoning_content with "" for DeepSeek tool-call turns.)
|
|
866
|
+
for (let i = 1; i < this.conductorMessages.length - 6; i++) {
|
|
867
|
+
const m = this.conductorMessages[i];
|
|
868
|
+
if (m.role !== "assistant")
|
|
869
|
+
continue;
|
|
870
|
+
if (m.reasoning_content)
|
|
871
|
+
m.reasoning_content = "";
|
|
872
|
+
if (m.content && m.content.length > 400)
|
|
873
|
+
m.content = (0, util_1.clip)(m.content, 400);
|
|
874
|
+
}
|
|
846
875
|
if (this.conductorMessages.length > MAX) {
|
|
847
876
|
const system = this.conductorMessages[0];
|
|
848
877
|
const tail = this.conductorMessages.slice(-(MAX - 2));
|
|
@@ -855,7 +884,7 @@ class Executor {
|
|
|
855
884
|
// Count alone doesn't bound size: every update embeds the full task table,
|
|
856
885
|
// so a deep run can blow the model window long before 60 messages. The
|
|
857
886
|
// mission itself lives in the system message and always survives.
|
|
858
|
-
const budget = Math.floor(this.cfg.
|
|
887
|
+
const budget = Math.floor((0, config_1.contextLimitFor)(this.cfg, this.meta.options.conductorModel) * 0.75);
|
|
859
888
|
if ((0, agent_1.estimateMessages)(this.conductorMessages) <= budget)
|
|
860
889
|
return;
|
|
861
890
|
setLedger();
|
|
@@ -876,20 +905,48 @@ class Executor {
|
|
|
876
905
|
hasOpenWork() {
|
|
877
906
|
return this.taskList().some((t) => ["pending", "running", "verifying"].includes(t.status));
|
|
878
907
|
}
|
|
908
|
+
/** Walk a failed/blocked dep chain down to the task that actually failed. */
|
|
909
|
+
rootFailure(id) {
|
|
910
|
+
let cur = this.tasks.get(id);
|
|
911
|
+
const seen = new Set();
|
|
912
|
+
while (cur && !seen.has(cur.id)) {
|
|
913
|
+
seen.add(cur.id);
|
|
914
|
+
const next = cur.deps
|
|
915
|
+
.map((d) => this.tasks.get(d))
|
|
916
|
+
.find((t) => !!t && (t.status === "failed" || t.status === "blocked"));
|
|
917
|
+
if (!next)
|
|
918
|
+
return cur;
|
|
919
|
+
cur = next;
|
|
920
|
+
}
|
|
921
|
+
return cur;
|
|
922
|
+
}
|
|
879
923
|
blockStuckTasks() {
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
924
|
+
// Fixpoint: a failed dep chain T1→T2→T5 must block the whole chain in one
|
|
925
|
+
// pass, not one level per conductor turn.
|
|
926
|
+
for (let changed = true; changed;) {
|
|
927
|
+
changed = false;
|
|
928
|
+
for (const t of this.taskList()) {
|
|
929
|
+
if (t.status !== "pending")
|
|
930
|
+
continue;
|
|
931
|
+
const bad = t.deps.find((d) => {
|
|
932
|
+
const s = this.tasks.get(d)?.status;
|
|
933
|
+
return s === "failed" || s === "blocked";
|
|
934
|
+
});
|
|
935
|
+
if (!bad)
|
|
936
|
+
continue;
|
|
937
|
+
// Carry the root cause so the conductor re-plans around the actual
|
|
938
|
+
// failure, not a chain of "dependency did not complete".
|
|
939
|
+
const root = this.rootFailure(bad);
|
|
940
|
+
const cause = root ? (0, util_1.oneLine)(root.feedback ?? root.error ?? "unknown failure", 160) : "";
|
|
888
941
|
t.status = "blocked";
|
|
889
|
-
t.error =
|
|
942
|
+
t.error =
|
|
943
|
+
root && root.id !== bad
|
|
944
|
+
? `dependency ${bad} did not complete (root cause ${root.id}: ${cause})`
|
|
945
|
+
: `dependency ${bad} did not complete${cause ? ` (${cause})` : ""}`;
|
|
890
946
|
t.endedAt = Date.now();
|
|
891
947
|
this.journal.append("task.status", { taskId: t.id, status: "blocked", attempt: t.attempt, reason: t.error });
|
|
892
948
|
this.settledSinceUpdate.push(t.id);
|
|
949
|
+
changed = true;
|
|
893
950
|
}
|
|
894
951
|
}
|
|
895
952
|
}
|
|
@@ -954,17 +1011,25 @@ class Executor {
|
|
|
954
1011
|
taskId: task?.id,
|
|
955
1012
|
signal: this.ac.signal,
|
|
956
1013
|
addCheckpoint: task ? (summary) => this.recordCheckpoint(task, agentId, summary) : undefined,
|
|
957
|
-
addNote: (text, key, kind) => {
|
|
958
|
-
this.notes.push({ taskId: task?.id, key, kind, text });
|
|
1014
|
+
addNote: (text, key, kind, url) => {
|
|
1015
|
+
this.notes.push({ taskId: task?.id, key, kind, text, url });
|
|
959
1016
|
// Only the recent tail ever feeds digests; without a cap a multi-day
|
|
960
|
-
// run accumulates every note in memory. Decisions
|
|
1017
|
+
// run accumulates every note in memory. Decisions and conflicts are
|
|
1018
|
+
// kept regardless. In-place splice: teams share this array by reference.
|
|
961
1019
|
if (this.notes.length > 4000) {
|
|
962
|
-
const
|
|
963
|
-
const
|
|
964
|
-
|
|
965
|
-
this.notes
|
|
1020
|
+
const keep = (n) => n.kind === "decision" || n.kind === "conflict";
|
|
1021
|
+
const pinnedCount = this.notes.filter(keep).length;
|
|
1022
|
+
let toDrop = this.notes.length - Math.max(pinnedCount, 4000);
|
|
1023
|
+
for (let i = 0; i < this.notes.length && toDrop > 0;) {
|
|
1024
|
+
if (!keep(this.notes[i])) {
|
|
1025
|
+
this.notes.splice(i, 1);
|
|
1026
|
+
toDrop--;
|
|
1027
|
+
}
|
|
1028
|
+
else
|
|
1029
|
+
i++;
|
|
1030
|
+
}
|
|
966
1031
|
}
|
|
967
|
-
this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, text: (0, util_1.clip)(text, 1200) });
|
|
1032
|
+
this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, url, text: (0, util_1.clip)(text, 1200) });
|
|
968
1033
|
},
|
|
969
1034
|
searchNotes: (q) => this.searchNotes(q),
|
|
970
1035
|
readReport: (taskId) => this.readReportText(taskId),
|
|
@@ -1031,7 +1096,12 @@ class Executor {
|
|
|
1031
1096
|
if (task.attempt < this.cfg.verifyMaxAttempts) {
|
|
1032
1097
|
task.attempt++;
|
|
1033
1098
|
task.status = "running";
|
|
1034
|
-
this.journal.append("task.status", {
|
|
1099
|
+
this.journal.append("task.status", {
|
|
1100
|
+
taskId: task.id,
|
|
1101
|
+
status: "running",
|
|
1102
|
+
attempt: task.attempt,
|
|
1103
|
+
reason: task.feedback || task.error,
|
|
1104
|
+
});
|
|
1035
1105
|
continue;
|
|
1036
1106
|
}
|
|
1037
1107
|
this.finalizeTask(task, "failed", task.feedback || task.error || "verification failed after retries");
|
|
@@ -1046,12 +1116,12 @@ class Executor {
|
|
|
1046
1116
|
}
|
|
1047
1117
|
if (task.attempt < this.cfg.verifyMaxAttempts && !this.finishing && !this.budgetExceeded()) {
|
|
1048
1118
|
task.attempt++;
|
|
1049
|
-
task.error = (0, util_1.errMsg)(e)
|
|
1119
|
+
task.error = `${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`;
|
|
1050
1120
|
task.status = "running";
|
|
1051
1121
|
this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt, reason: task.error });
|
|
1052
1122
|
continue;
|
|
1053
1123
|
}
|
|
1054
|
-
this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}`);
|
|
1124
|
+
this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`);
|
|
1055
1125
|
return;
|
|
1056
1126
|
}
|
|
1057
1127
|
}
|
|
@@ -1068,6 +1138,7 @@ class Executor {
|
|
|
1068
1138
|
const agentId = (0, util_1.rid)("w");
|
|
1069
1139
|
const model = this.resolveModel(task.modelTier);
|
|
1070
1140
|
task.agentIds.push(agentId);
|
|
1141
|
+
task.lastToolError = undefined; // diagnostics are per-attempt
|
|
1071
1142
|
const dirListing = this.topListing();
|
|
1072
1143
|
const system = (0, prompts_1.workerSystem)({
|
|
1073
1144
|
agentId,
|
|
@@ -1101,7 +1172,7 @@ class Executor {
|
|
|
1101
1172
|
signal: this.ac.signal,
|
|
1102
1173
|
ctx: this.makeToolCtx(agentId, task),
|
|
1103
1174
|
hooks: {
|
|
1104
|
-
...this.agentHooks(agentId, task.id),
|
|
1175
|
+
...this.agentHooks(agentId, task.id, task),
|
|
1105
1176
|
onCheckpoint: (summary) => this.recordCheckpoint(task, agentId, summary),
|
|
1106
1177
|
},
|
|
1107
1178
|
stop: this.agentStop,
|
|
@@ -1111,7 +1182,11 @@ class Executor {
|
|
|
1111
1182
|
if (this.ac.signal.aborted)
|
|
1112
1183
|
return "done";
|
|
1113
1184
|
if (!outcome.terminal) {
|
|
1114
|
-
|
|
1185
|
+
const lastWords = (0, util_1.oneLine)(outcome.finalText ?? "", 200);
|
|
1186
|
+
task.error =
|
|
1187
|
+
"worker ended without reporting" +
|
|
1188
|
+
(task.lastToolError ? ` — last tool failure: ${task.lastToolError}` : "") +
|
|
1189
|
+
(lastWords ? `; last words: ${lastWords}` : "");
|
|
1115
1190
|
return "retry";
|
|
1116
1191
|
}
|
|
1117
1192
|
const a = outcome.terminal.args;
|
|
@@ -1127,6 +1202,20 @@ class Executor {
|
|
|
1127
1202
|
task.keyFacts = strList(a.key_facts, 8);
|
|
1128
1203
|
task.openQuestions = strList(a.open_questions, 6);
|
|
1129
1204
|
task.filesTouched = strList(a.files_touched, 40);
|
|
1205
|
+
// Structured sources: the citation pipeline's entry point. Only real
|
|
1206
|
+
// http(s) URLs survive; they flow into dep handoffs and the bibliography.
|
|
1207
|
+
const sources = Array.isArray(a.sources)
|
|
1208
|
+
? a.sources
|
|
1209
|
+
.filter((s) => s && typeof s === "object" && /^https?:\/\//.test(String(s.url ?? "")))
|
|
1210
|
+
.slice(0, 40)
|
|
1211
|
+
.map((s) => ({
|
|
1212
|
+
url: (0, util_1.clip)(String(s.url), 500),
|
|
1213
|
+
title: s.title ? (0, util_1.clip)(String(s.title), 200) : undefined,
|
|
1214
|
+
date: s.date ? (0, util_1.clip)(String(s.date), 40) : undefined,
|
|
1215
|
+
note: s.note ? (0, util_1.clip)(String(s.note), 300) : undefined,
|
|
1216
|
+
}))
|
|
1217
|
+
: [];
|
|
1218
|
+
task.sources = sources.length ? sources : undefined;
|
|
1130
1219
|
this.journal.append("task.report", {
|
|
1131
1220
|
taskId: task.id,
|
|
1132
1221
|
status: reportStatus,
|
|
@@ -1135,6 +1224,7 @@ class Executor {
|
|
|
1135
1224
|
keyFacts: task.keyFacts,
|
|
1136
1225
|
openQuestions: task.openQuestions,
|
|
1137
1226
|
filesTouched: task.filesTouched,
|
|
1227
|
+
sources: task.sources,
|
|
1138
1228
|
});
|
|
1139
1229
|
if (reportStatus === "blocked") {
|
|
1140
1230
|
this.finalizeTask(task, "blocked", report);
|
|
@@ -1165,6 +1255,7 @@ class Executor {
|
|
|
1165
1255
|
return "Report is too thin to verify. Re-do the task and report concretely: what was done, what was verified, exact paths.";
|
|
1166
1256
|
}
|
|
1167
1257
|
const missing = [];
|
|
1258
|
+
const malformed = [];
|
|
1168
1259
|
// Remote sandboxes own their filesystem — only check host-visible paths.
|
|
1169
1260
|
if (this.sandbox.localFs) {
|
|
1170
1261
|
const okAt = (p) => {
|
|
@@ -1178,16 +1269,28 @@ class Executor {
|
|
|
1178
1269
|
for (const rel of task.artifacts) {
|
|
1179
1270
|
const inArtifacts = path.join(this.runDirPath, "artifacts", rel);
|
|
1180
1271
|
const inWorkdir = path.resolve(this.meta.cwd, rel);
|
|
1181
|
-
if (!okAt(inArtifacts) && !okAt(inWorkdir))
|
|
1272
|
+
if (!okAt(inArtifacts) && !okAt(inWorkdir)) {
|
|
1182
1273
|
missing.push(rel);
|
|
1274
|
+
continue;
|
|
1275
|
+
}
|
|
1276
|
+
// Structural format check (json parses, csv is rectangular, html is
|
|
1277
|
+
// not a stub) — free, and catches what the LLM verifier wastes a whole
|
|
1278
|
+
// agent run discovering.
|
|
1279
|
+
const problem = (0, util_1.validateArtifactFormat)(okAt(inArtifacts) ? inArtifacts : inWorkdir);
|
|
1280
|
+
if (problem)
|
|
1281
|
+
malformed.push(`${rel}: ${problem}`);
|
|
1183
1282
|
}
|
|
1184
1283
|
}
|
|
1185
1284
|
if (missing.length) {
|
|
1186
1285
|
return `Claimed artifact(s) do not exist or are empty: ${missing.join(", ")}. Actually create them (use save_artifact), then report again.`;
|
|
1187
1286
|
}
|
|
1287
|
+
if (malformed.length) {
|
|
1288
|
+
return `Claimed artifact(s) are malformed — fix them and report again: ${malformed.join("; ")}`;
|
|
1289
|
+
}
|
|
1188
1290
|
return null;
|
|
1189
1291
|
}
|
|
1190
|
-
|
|
1292
|
+
/** One verifier agent pass; returns the outcome plus how many evidence-gathering tool calls it made. */
|
|
1293
|
+
async verifierAgent(task, kickoff) {
|
|
1191
1294
|
const agentId = (0, util_1.rid)("v");
|
|
1192
1295
|
// Verification gets the strong tier when configured — a weak verifier
|
|
1193
1296
|
// rubber-stamps exactly the tasks that most need scrutiny.
|
|
@@ -1200,14 +1303,16 @@ class Executor {
|
|
|
1200
1303
|
model,
|
|
1201
1304
|
purpose: `verify ${task.id}`,
|
|
1202
1305
|
});
|
|
1306
|
+
let evidenceCalls = 0;
|
|
1307
|
+
const baseHooks = this.agentHooks(agentId, task.id);
|
|
1203
1308
|
const outcome = await (0, agent_1.runAgent)({
|
|
1204
1309
|
cfg: this.cfg,
|
|
1205
1310
|
agentId,
|
|
1206
1311
|
model,
|
|
1207
1312
|
thinking: this.meta.options.thinking,
|
|
1208
1313
|
reasoningEffort: this.meta.options.reasoningEffort,
|
|
1209
|
-
system: (0, prompts_1.verifierSystem)(this.meta, task),
|
|
1210
|
-
kickoff
|
|
1314
|
+
system: (0, prompts_1.verifierSystem)(this.meta, task, this.depReportsFor(task)),
|
|
1315
|
+
kickoff,
|
|
1211
1316
|
tools: (0, tools_1.verifierToolset)(),
|
|
1212
1317
|
terminal: [tools_1.VERDICT_TOOL],
|
|
1213
1318
|
maxSteps: Math.min(14, this.meta.options.maxStepsPerTask),
|
|
@@ -1215,21 +1320,75 @@ class Executor {
|
|
|
1215
1320
|
// Blind verification: the verifier judges deliverables against the
|
|
1216
1321
|
// objective with its own tools — it must not inherit the swarm's shared
|
|
1217
1322
|
// beliefs (blackboard) or the worker's narrative beyond the claims.
|
|
1323
|
+
// (Dep reports are settled upstream outputs, not the worker's story.)
|
|
1218
1324
|
ctx: { ...this.makeToolCtx(agentId, task), readBlackboard: () => "", searchNotes: undefined },
|
|
1219
|
-
hooks:
|
|
1325
|
+
hooks: {
|
|
1326
|
+
...baseHooks,
|
|
1327
|
+
onToolCall: (callId, name, args) => {
|
|
1328
|
+
if (name !== "verdict")
|
|
1329
|
+
evidenceCalls++;
|
|
1330
|
+
baseHooks.onToolCall(callId, name, args);
|
|
1331
|
+
},
|
|
1332
|
+
},
|
|
1220
1333
|
stop: this.agentStop,
|
|
1221
1334
|
});
|
|
1222
1335
|
this.flushDeltas(agentId);
|
|
1223
1336
|
this.journal.append("agent.done", { agentId, taskId: task.id, steps: outcome.steps });
|
|
1337
|
+
return { outcome, evidenceCalls };
|
|
1338
|
+
}
|
|
1339
|
+
async runVerifier(task) {
|
|
1340
|
+
const strict = this.cfg.verification === "strict";
|
|
1341
|
+
let { outcome, evidenceCalls } = await this.verifierAgent(task, prompts_1.VERIFIER_KICKOFF);
|
|
1224
1342
|
if (this.ac.signal.aborted)
|
|
1225
1343
|
return true;
|
|
1344
|
+
// Strict mode: a pass verdict backed by zero tool calls is an opinion,
|
|
1345
|
+
// not a verification. One re-run demanding evidence; if that also passes
|
|
1346
|
+
// tool-free, accept but say so in the journal.
|
|
1347
|
+
if (strict && outcome.terminal && Boolean(outcome.terminal.args.pass) && evidenceCalls === 0) {
|
|
1348
|
+
this.journal.append("log", {
|
|
1349
|
+
level: "info",
|
|
1350
|
+
msg: `verifier passed ${task.id} without evidence — re-running with a tools-required kickoff`,
|
|
1351
|
+
});
|
|
1352
|
+
const second = await this.verifierAgent(task, "A previous verdict on this task cited no tool-gathered evidence. Verify concretely NOW — read the claimed files, run the commands — then call verdict(...).");
|
|
1353
|
+
if (this.ac.signal.aborted)
|
|
1354
|
+
return true;
|
|
1355
|
+
if (second.outcome.terminal) {
|
|
1356
|
+
if (second.evidenceCalls === 0) {
|
|
1357
|
+
this.journal.append("log", { level: "warn", msg: `verifier passed ${task.id} without gathering evidence` });
|
|
1358
|
+
}
|
|
1359
|
+
outcome = second.outcome;
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1226
1362
|
const v = (outcome.terminal?.args ?? {});
|
|
1227
|
-
const strict = this.cfg.verification === "strict";
|
|
1228
1363
|
// No verdict returned: in strict mode fail closed, otherwise accept.
|
|
1229
1364
|
const pass = outcome.terminal ? Boolean(v.pass) : !strict;
|
|
1230
|
-
|
|
1365
|
+
let feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
|
|
1366
|
+
// Structured issues become the retry's worklist — numbered, with evidence.
|
|
1367
|
+
const issues = Array.isArray(v.issues)
|
|
1368
|
+
? v.issues
|
|
1369
|
+
.filter((i) => i && typeof i === "object" && i.problem)
|
|
1370
|
+
.slice(0, 5)
|
|
1371
|
+
.map((i) => ({
|
|
1372
|
+
problem: (0, util_1.oneLine)(String(i.problem), 300),
|
|
1373
|
+
evidence: i.evidence ? (0, util_1.oneLine)(String(i.evidence), 300) : undefined,
|
|
1374
|
+
fix: i.fix ? (0, util_1.oneLine)(String(i.fix), 300) : undefined,
|
|
1375
|
+
}))
|
|
1376
|
+
: [];
|
|
1377
|
+
if (!pass && issues.length) {
|
|
1378
|
+
feedback = [
|
|
1379
|
+
feedback,
|
|
1380
|
+
...issues.map((i, n) => `${n + 1}. ${i.problem}${i.evidence ? `\n evidence: ${i.evidence}` : ""}${i.fix ? `\n fix: ${i.fix}` : ""}`),
|
|
1381
|
+
]
|
|
1382
|
+
.filter(Boolean)
|
|
1383
|
+
.join("\n");
|
|
1384
|
+
}
|
|
1231
1385
|
task.feedback = feedback;
|
|
1232
|
-
this.journal.append("verify.result", {
|
|
1386
|
+
this.journal.append("verify.result", {
|
|
1387
|
+
taskId: task.id,
|
|
1388
|
+
pass,
|
|
1389
|
+
feedback,
|
|
1390
|
+
...(issues.length ? { issues } : {}),
|
|
1391
|
+
});
|
|
1233
1392
|
return pass;
|
|
1234
1393
|
}
|
|
1235
1394
|
finalizeTask(task, status, reason) {
|
|
@@ -1237,6 +1396,14 @@ class Executor {
|
|
|
1237
1396
|
task.endedAt = Date.now();
|
|
1238
1397
|
if (reason && status !== "done")
|
|
1239
1398
|
task.error = reason;
|
|
1399
|
+
// A settled task holds no file claims — release them so the digest and
|
|
1400
|
+
// search_notes don't accumulate dead claims on long runs. In-place splice:
|
|
1401
|
+
// teams share this array by reference.
|
|
1402
|
+
for (let i = this.notes.length - 1; i >= 0; i--) {
|
|
1403
|
+
const n = this.notes[i];
|
|
1404
|
+
if (n.kind === "claim" && n.taskId === task.id)
|
|
1405
|
+
this.notes.splice(i, 1);
|
|
1406
|
+
}
|
|
1240
1407
|
this.journal.append("task.status", { taskId: task.id, status, attempt: task.attempt, reason });
|
|
1241
1408
|
this.settledSinceUpdate.push(task.id);
|
|
1242
1409
|
this.maybeSnapshot();
|
|
@@ -1283,6 +1450,18 @@ class Executor {
|
|
|
1283
1450
|
const rel = `progress-report-${n}.md`;
|
|
1284
1451
|
fs.writeFileSync(path.join(this.runDirPath, "artifacts", rel), res.content, "utf8");
|
|
1285
1452
|
this.journal.append("log", { level: "info", msg: `progress snapshot written: artifacts/${rel}` });
|
|
1453
|
+
// Interim memory: a multi-day run that dies before synthesis still
|
|
1454
|
+
// leaves the next swarm in this workspace something to build on.
|
|
1455
|
+
if (!this.meta.sandbox) {
|
|
1456
|
+
(0, memory_1.appendMemory)(this.meta.cwd, {
|
|
1457
|
+
runId: this.meta.id,
|
|
1458
|
+
mission: this.meta.mission,
|
|
1459
|
+
finishedAt: Date.now(),
|
|
1460
|
+
status: "in-progress",
|
|
1461
|
+
summary: (0, util_1.clip)(res.content, 600),
|
|
1462
|
+
keyDecisions: this.notes.filter((nt) => nt.kind === "decision").slice(-10).map((nt) => nt.text),
|
|
1463
|
+
});
|
|
1464
|
+
}
|
|
1286
1465
|
})
|
|
1287
1466
|
.catch((e) => {
|
|
1288
1467
|
if (!this.ac.signal.aborted)
|
|
@@ -1369,7 +1548,7 @@ class Executor {
|
|
|
1369
1548
|
});
|
|
1370
1549
|
}
|
|
1371
1550
|
}
|
|
1372
|
-
agentHooks(agentId, taskId) {
|
|
1551
|
+
agentHooks(agentId, taskId, trackErrorsOn) {
|
|
1373
1552
|
return {
|
|
1374
1553
|
onDelta: (channel, text) => {
|
|
1375
1554
|
this.queueDelta(agentId, taskId, channel, text);
|
|
@@ -1379,6 +1558,8 @@ class Executor {
|
|
|
1379
1558
|
this.journal.append("tool.call", { agentId, taskId, callId, name, args });
|
|
1380
1559
|
},
|
|
1381
1560
|
onToolResult: (callId, name, ok, summary) => {
|
|
1561
|
+
if (!ok && trackErrorsOn)
|
|
1562
|
+
trackErrorsOn.lastToolError = `${name}: ${(0, util_1.oneLine)(summary, 200)}`;
|
|
1382
1563
|
this.journal.append("tool.result", { agentId, taskId, callId, name, ok, summary });
|
|
1383
1564
|
},
|
|
1384
1565
|
onUsage: this.onUsage,
|
|
@@ -1471,6 +1652,10 @@ class Executor {
|
|
|
1471
1652
|
? tasks.map(prompts_1.reportBlock).join("\n\n")
|
|
1472
1653
|
: "(no tasks were completed)";
|
|
1473
1654
|
const artifactList = this.listArtifacts().join("\n") || "(none)";
|
|
1655
|
+
// The citation pipeline's last hop: every source any worker reported,
|
|
1656
|
+
// deduplicated and numbered, becomes the synthesizer's bibliography.
|
|
1657
|
+
const allSources = (0, report_1.aggregateSources)(tasks);
|
|
1658
|
+
const sourcesText = allSources.length ? (0, util_1.truncateMiddle)((0, report_1.sourcesBlock)(allSources), 40_000, "chars") : "";
|
|
1474
1659
|
const agentId = (0, util_1.rid)("synth");
|
|
1475
1660
|
let summary = "";
|
|
1476
1661
|
let reportMarkdown = "";
|
|
@@ -1488,6 +1673,7 @@ class Executor {
|
|
|
1488
1673
|
blackboard: this.blackboardDigest(6000),
|
|
1489
1674
|
artifactList,
|
|
1490
1675
|
reason: this.finishReason || "completed",
|
|
1676
|
+
sources: sourcesText,
|
|
1491
1677
|
}),
|
|
1492
1678
|
kickoff: prompts_1.SYNTH_KICKOFF,
|
|
1493
1679
|
tools: (0, tools_1.synthToolset)(),
|
|
@@ -1513,7 +1699,7 @@ class Executor {
|
|
|
1513
1699
|
messages: [
|
|
1514
1700
|
{
|
|
1515
1701
|
role: "user",
|
|
1516
|
-
content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars")),
|
|
1702
|
+
content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars"), sourcesText ? (0, util_1.truncateMiddle)(sourcesText, 20_000, "chars") : undefined),
|
|
1517
1703
|
},
|
|
1518
1704
|
],
|
|
1519
1705
|
thinking: false,
|
|
@@ -1562,6 +1748,7 @@ class Executor {
|
|
|
1562
1748
|
// Cross-run memory: real-directory runs leave a trace for the next swarm.
|
|
1563
1749
|
if (!this.meta.sandbox && status !== "cancelled") {
|
|
1564
1750
|
(0, memory_1.appendMemory)(this.meta.cwd, {
|
|
1751
|
+
runId: this.meta.id,
|
|
1565
1752
|
mission: this.meta.mission,
|
|
1566
1753
|
finishedAt: Date.now(),
|
|
1567
1754
|
status,
|
|
@@ -1572,6 +1759,10 @@ class Executor {
|
|
|
1572
1759
|
}
|
|
1573
1760
|
fallbackReport(tasks) {
|
|
1574
1761
|
const lines = [`# ${this.meta.mission}`, ``, `_Run ${this.meta.id} — ${this.finishReason}_`, ``];
|
|
1762
|
+
// Even without a synthesizer, surface the cross-task essentials first.
|
|
1763
|
+
const facts = tasks.flatMap((t) => (t.keyFacts ?? []).map((f) => `- ${f} _(${t.id})_`));
|
|
1764
|
+
if (facts.length)
|
|
1765
|
+
lines.push(`## Key facts`, ...facts.slice(0, 60), "");
|
|
1575
1766
|
for (const t of tasks) {
|
|
1576
1767
|
lines.push(`## ${t.id} ${t.title} (${t.status})`);
|
|
1577
1768
|
lines.push(t.report || t.error || "(no output)");
|
|
@@ -1579,6 +1770,15 @@ class Executor {
|
|
|
1579
1770
|
lines.push(`Artifacts: ${t.artifacts.join(", ")}`);
|
|
1580
1771
|
lines.push("");
|
|
1581
1772
|
}
|
|
1773
|
+
const sources = (0, report_1.aggregateSources)(tasks);
|
|
1774
|
+
if (sources.length) {
|
|
1775
|
+
lines.push(`## Sources`);
|
|
1776
|
+
for (const s of sources.slice(0, 100)) {
|
|
1777
|
+
lines.push(`${s.n}. [${s.title || s.url}](${s.url})${s.date ? ` (${s.date})` : ""}`);
|
|
1778
|
+
}
|
|
1779
|
+
if (sources.length > 100)
|
|
1780
|
+
lines.push(`…and ${sources.length - 100} more in the task reports.`);
|
|
1781
|
+
}
|
|
1582
1782
|
return lines.join("\n");
|
|
1583
1783
|
}
|
|
1584
1784
|
listArtifacts() {
|
package/dist/hub.js
CHANGED
|
@@ -43,6 +43,7 @@ const url_1 = require("url");
|
|
|
43
43
|
const config_1 = require("./config");
|
|
44
44
|
const control_1 = require("./control");
|
|
45
45
|
const crawltools_1 = require("./crawltools");
|
|
46
|
+
const webtools_1 = require("./webtools");
|
|
46
47
|
const deepseek_1 = require("./deepseek");
|
|
47
48
|
const providers_1 = require("./providers");
|
|
48
49
|
const journal_1 = require("./journal");
|
|
@@ -83,9 +84,16 @@ function startHub(opts) {
|
|
|
83
84
|
async function handle(req, res, opts) {
|
|
84
85
|
const url = new url_1.URL(req.url || "/", `http://localhost:${opts.port}`);
|
|
85
86
|
const p = url.pathname;
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
87
|
+
// Localhost-only CORS. The hub launches runs and reads reports with the
|
|
88
|
+
// operator's keys — a random website's JS must never get a readable
|
|
89
|
+
// response. The dev UI on another localhost port is the one legitimate
|
|
90
|
+
// cross-origin client; everyone else gets no CORS headers at all.
|
|
91
|
+
const origin = String(req.headers.origin || "");
|
|
92
|
+
if (/^https?:\/\/(localhost|127\.0\.0\.1|\[::1\])(:\d+)?$/.test(origin)) {
|
|
93
|
+
res.setHeader("access-control-allow-origin", origin);
|
|
94
|
+
res.setHeader("access-control-allow-methods", "GET, POST, DELETE, OPTIONS");
|
|
95
|
+
res.setHeader("access-control-allow-headers", "content-type");
|
|
96
|
+
}
|
|
89
97
|
if (req.method === "OPTIONS") {
|
|
90
98
|
res.writeHead(204);
|
|
91
99
|
res.end();
|
|
@@ -159,6 +167,51 @@ async function api(req, res, url, opts) {
|
|
|
159
167
|
const r = await (0, sandbox_1.testSandbox)(cfg, kind);
|
|
160
168
|
return sendJson(res, 200, { kind, ...r });
|
|
161
169
|
}
|
|
170
|
+
// Settings diagnostics: prove the search engines / crawl backend actually
|
|
171
|
+
// work with the saved keys before a mission depends on them.
|
|
172
|
+
if (p === "/api/search/test" && method === "POST") {
|
|
173
|
+
const q = "open source vector database";
|
|
174
|
+
const probe = async (engine, fn) => {
|
|
175
|
+
try {
|
|
176
|
+
const hits = await fn();
|
|
177
|
+
return { engine, ok: hits.length > 0, detail: `${hits.length} result(s)` };
|
|
178
|
+
}
|
|
179
|
+
catch (e) {
|
|
180
|
+
return { engine, ok: false, detail: (0, util_1.errMsg)(e) };
|
|
181
|
+
}
|
|
182
|
+
};
|
|
183
|
+
const checks = [probe("duckduckgo", () => (0, webtools_1.ddgSearch)(q, 3)), probe("bing", () => (0, webtools_1.bingSearch)(q, 3))];
|
|
184
|
+
if (cfg.tinyfishApiKey)
|
|
185
|
+
checks.push(probe("tinyfish", () => (0, webtools_1.tinyfishSearch)(cfg, q, 3)));
|
|
186
|
+
const engines = await Promise.all(checks);
|
|
187
|
+
return sendJson(res, 200, { ok: engines.some((e) => e.ok), engines });
|
|
188
|
+
}
|
|
189
|
+
if (p === "/api/crawl/test" && method === "POST") {
|
|
190
|
+
const backend = (0, crawltools_1.resolveCrawlBackend)(cfg);
|
|
191
|
+
if (!backend) {
|
|
192
|
+
return sendJson(res, 200, { ok: false, backend: null, detail: "no crawl backend configured — add a key first" });
|
|
193
|
+
}
|
|
194
|
+
try {
|
|
195
|
+
if ((0, crawltools_1.hasScrapeBackend)(cfg)) {
|
|
196
|
+
const text = await (0, crawltools_1.scrapeUrl)(cfg, "https://example.com/");
|
|
197
|
+
return sendJson(res, 200, {
|
|
198
|
+
ok: Boolean(text && text.length > 50),
|
|
199
|
+
backend,
|
|
200
|
+
detail: text ? `scraped ${text.length} chars` : "empty scrape result",
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
// deepcrawl has no single-page scrape — smoke a 1-page crawl instead.
|
|
204
|
+
const out = await (0, crawltools_1.crawlSite)(cfg, { url: "https://example.com/", maxPages: 1 });
|
|
205
|
+
return sendJson(res, 200, {
|
|
206
|
+
ok: out.pages.length > 0,
|
|
207
|
+
backend,
|
|
208
|
+
detail: out.pages.length ? `crawled ${out.pages.length} page(s)` : out.warnings.join("; ") || "no pages",
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
catch (e) {
|
|
212
|
+
return sendJson(res, 200, { ok: false, backend, detail: (0, util_1.errMsg)(e) });
|
|
213
|
+
}
|
|
214
|
+
}
|
|
162
215
|
if (p === "/api/models" && method === "GET") {
|
|
163
216
|
try {
|
|
164
217
|
const models = await (0, deepseek_1.listModels)(cfg);
|
|
@@ -297,6 +350,14 @@ async function api(req, res, url, opts) {
|
|
|
297
350
|
res.end(fs.readFileSync(file));
|
|
298
351
|
return;
|
|
299
352
|
}
|
|
353
|
+
if (sub === "/plan" && method === "GET") {
|
|
354
|
+
const file = path.join((0, config_1.runDir)(id), "artifacts", "mission-plan.md");
|
|
355
|
+
if (!fs.existsSync(file))
|
|
356
|
+
return sendJson(res, 404, { error: "no plan yet" });
|
|
357
|
+
res.writeHead(200, { "content-type": "text/markdown; charset=utf-8" });
|
|
358
|
+
res.end(fs.readFileSync(file));
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
300
361
|
if (sub === "/artifacts" && method === "GET") {
|
|
301
362
|
return sendJson(res, 200, { artifacts: listArtifactFiles(id) });
|
|
302
363
|
}
|
|
@@ -429,6 +490,9 @@ function publicConfig(cfg) {
|
|
|
429
490
|
reasoningEffort: cfg.reasoningEffort,
|
|
430
491
|
safeMode: cfg.safeMode,
|
|
431
492
|
contextTokenLimit: cfg.contextTokenLimit,
|
|
493
|
+
contextWindows: cfg.contextWindows,
|
|
494
|
+
cheapModel: cfg.cheapModel,
|
|
495
|
+
strongModel: cfg.strongModel,
|
|
432
496
|
knownModels,
|
|
433
497
|
pricing: cfg.pricing,
|
|
434
498
|
};
|
|
@@ -482,6 +546,8 @@ function snapshot(state, id) {
|
|
|
482
546
|
operatorNotes: state.operatorNotes,
|
|
483
547
|
usageByModel: Object.fromEntries(state.usageByModel),
|
|
484
548
|
cost: state.cost,
|
|
549
|
+
budgetSeries: state.budgetSeries,
|
|
550
|
+
planExcerpt: state.planExcerpt,
|
|
485
551
|
finalSummary: state.finalSummary,
|
|
486
552
|
finalReportPath: state.finalReportPath,
|
|
487
553
|
live: (0, run_1.isRunLive)(id),
|
package/dist/memory.js
CHANGED
|
@@ -58,10 +58,11 @@ function loadMemory(cwd) {
|
|
|
58
58
|
}
|
|
59
59
|
function appendMemory(cwd, entry) {
|
|
60
60
|
try {
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const
|
|
64
|
-
|
|
61
|
+
// Same-run entries replace (interim → final); writeJson is temp+rename so
|
|
62
|
+
// a crash mid-write never loses the prior history.
|
|
63
|
+
const prior = loadMemory(cwd).filter((e) => !(entry.runId && e.runId === entry.runId));
|
|
64
|
+
const entries = [...prior, entry].slice(-MAX_ENTRIES);
|
|
65
|
+
(0, util_1.writeJson)(memoryFile(cwd), { cwd: path.resolve(cwd), entries });
|
|
65
66
|
}
|
|
66
67
|
catch {
|
|
67
68
|
/* memory is best-effort */
|