@robzilla1738/agentswarm 0.3.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -11
- package/dist/agent.js +18 -2
- package/dist/cli.js +39 -8
- package/dist/config.js +62 -6
- package/dist/crawltools.js +247 -0
- package/dist/deepseek.js +125 -10
- package/dist/executor.js +993 -144
- package/dist/hub.js +85 -6
- package/dist/journal.js +61 -11
- package/dist/memory.js +84 -0
- package/dist/pdftext.js +211 -0
- package/dist/prompts.js +124 -23
- package/dist/report.js +289 -0
- package/dist/run.js +15 -2
- package/dist/sandbox.js +11 -0
- package/dist/searchcore.js +244 -0
- package/dist/state.js +85 -3
- package/dist/tools.js +392 -25
- package/dist/util.js +85 -0
- package/dist/webtools.js +327 -66
- package/package.json +3 -2
- package/ui/out/404/index.html +1 -1
- package/ui/out/404.html +1 -1
- package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
- package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
- package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
- package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
- package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
- package/ui/out/_next/static/css/d95c2ba395730031.css +3 -0
- package/ui/out/fonts/PlanetKosmos.ttf +0 -0
- package/ui/out/index.html +1 -1
- package/ui/out/index.txt +3 -3
- package/ui/out/run/index.html +1 -1
- package/ui/out/run/index.txt +3 -3
- package/ui/out/settings/index.html +1 -1
- package/ui/out/settings/index.txt +3 -3
- package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
- package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
- package/ui/out/_next/static/chunks/677-7ab85a6f38c3a235.js +0 -1
- package/ui/out/_next/static/chunks/app/page-0fda5b8e77d90b84.js +0 -1
- package/ui/out/_next/static/chunks/app/run/page-07aab6b1224c3c8c.js +0 -1
- package/ui/out/_next/static/chunks/app/settings/page-528482d468d84cfa.js +0 -1
- package/ui/out/_next/static/css/e2c82b53bf4519e8.css +0 -3
- /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
- /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0
package/dist/executor.js
CHANGED
|
@@ -40,12 +40,14 @@ const agent_1 = require("./agent");
|
|
|
40
40
|
const config_1 = require("./config");
|
|
41
41
|
const control_1 = require("./control");
|
|
42
42
|
const deepseek_1 = require("./deepseek");
|
|
43
|
+
const journal_1 = require("./journal");
|
|
43
44
|
const tools_1 = require("./tools");
|
|
44
45
|
const prompts_1 = require("./prompts");
|
|
46
|
+
const memory_1 = require("./memory");
|
|
47
|
+
const report_1 = require("./report");
|
|
45
48
|
const sandbox_1 = require("./sandbox");
|
|
46
49
|
const types_1 = require("./types");
|
|
47
50
|
const util_1 = require("./util");
|
|
48
|
-
const VERIFY_MAX_ATTEMPTS = 2;
|
|
49
51
|
class Executor {
|
|
50
52
|
cfg;
|
|
51
53
|
meta;
|
|
@@ -59,6 +61,7 @@ class Executor {
|
|
|
59
61
|
inflight = new Map();
|
|
60
62
|
settledSinceUpdate = [];
|
|
61
63
|
notes = [];
|
|
64
|
+
phase = null;
|
|
62
65
|
conductorMessages = [];
|
|
63
66
|
spentTokens = 0;
|
|
64
67
|
cost = 0;
|
|
@@ -67,19 +70,43 @@ class Executor {
|
|
|
67
70
|
finishReason = "";
|
|
68
71
|
fatal = null;
|
|
69
72
|
lastConductorAction = "none";
|
|
73
|
+
conductorFailures = 0;
|
|
74
|
+
/** True when the last conductor turn ended in a call error, not a decision. */
|
|
75
|
+
lastConductorErrored = false;
|
|
70
76
|
resumed = false;
|
|
71
77
|
sandbox;
|
|
72
|
-
|
|
78
|
+
mode;
|
|
79
|
+
teamId;
|
|
80
|
+
opts;
|
|
81
|
+
/** Team-mode result: the consolidated report handed back to the parent task. */
|
|
82
|
+
teamReport = "";
|
|
83
|
+
constructor(cfg, meta, journal, opts = {}) {
|
|
73
84
|
this.cfg = cfg;
|
|
74
85
|
this.meta = meta;
|
|
75
|
-
this.runDirPath = (0, config_1.runDir)(meta.id);
|
|
86
|
+
this.runDirPath = opts.runDirPath ?? (0, config_1.runDir)(meta.id);
|
|
76
87
|
this.journal = journal;
|
|
77
88
|
this.control = new control_1.ControlReader(this.runDirPath);
|
|
89
|
+
this.mode = opts.mode ?? "root";
|
|
90
|
+
this.teamId = opts.teamId;
|
|
91
|
+
this.opts = opts;
|
|
92
|
+
if (opts.sharedNotes)
|
|
93
|
+
this.notes = opts.sharedNotes;
|
|
78
94
|
(0, util_1.ensureDir)(path.join(this.runDirPath, "artifacts"));
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
95
|
+
if (opts.sandbox) {
|
|
96
|
+
this.sandbox = opts.sandbox;
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
// "A directory on disk" runs always execute on the host — touching the
|
|
100
|
+
// operator's real files is the entire point of that mode.
|
|
101
|
+
const kind = meta.sandbox ? meta.options.sandboxRuntime ?? "host" : "host";
|
|
102
|
+
this.sandbox = (0, sandbox_1.createSandbox)(kind, { runId: meta.id, hostDir: meta.cwd, cfg });
|
|
103
|
+
}
|
|
104
|
+
if (opts.parentSignal) {
|
|
105
|
+
if (opts.parentSignal.aborted)
|
|
106
|
+
this.ac.abort();
|
|
107
|
+
else
|
|
108
|
+
opts.parentSignal.addEventListener("abort", () => this.ac.abort(), { once: true });
|
|
109
|
+
}
|
|
83
110
|
}
|
|
84
111
|
cancel() {
|
|
85
112
|
this.finishing = true;
|
|
@@ -107,18 +134,52 @@ class Executor {
|
|
|
107
134
|
const n = Number(/^T(\d+)$/.exec(copy.id)?.[1] ?? 0);
|
|
108
135
|
this.taskCounter = Math.max(this.taskCounter, n);
|
|
109
136
|
}
|
|
110
|
-
|
|
137
|
+
// Drop claims held by settled tasks — they were released on task end and
|
|
138
|
+
// must not resurrect across a restart.
|
|
139
|
+
const settled = new Set(state.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status) && !reset.has(t.id)).map((t) => t.id));
|
|
140
|
+
this.notes = state.notes
|
|
141
|
+
.map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text, url: n.url }))
|
|
142
|
+
.filter((n) => !(n.kind === "claim" && n.taskId && settled.has(n.taskId)));
|
|
143
|
+
const lastPhase = state.phases[state.phases.length - 1];
|
|
144
|
+
if (lastPhase)
|
|
145
|
+
this.phase = { name: lastPhase.name, goal: lastPhase.goal, exitCriteria: lastPhase.exitCriteria };
|
|
111
146
|
this.spentTokens = state.totalUsage.promptTokens + state.totalUsage.completionTokens;
|
|
112
147
|
this.cost = state.cost;
|
|
148
|
+
try {
|
|
149
|
+
// The living plan survives restarts from disk, not from the journal.
|
|
150
|
+
this.planDoc = fs.readFileSync(path.join(this.runDirPath, "artifacts", this.planFileName()), "utf8");
|
|
151
|
+
}
|
|
152
|
+
catch {
|
|
153
|
+
/* no plan yet */
|
|
154
|
+
}
|
|
113
155
|
this.resumed = true;
|
|
114
156
|
}
|
|
115
157
|
setStatus(status, reason) {
|
|
158
|
+
// A team is one task of the parent run, not a run of its own.
|
|
159
|
+
if (this.mode === "team")
|
|
160
|
+
return;
|
|
116
161
|
this.journal.append("run.status", { status, reason });
|
|
117
162
|
}
|
|
163
|
+
budgetWarned = new Set();
|
|
118
164
|
onUsage = (model, usage) => {
|
|
119
165
|
this.spentTokens += usage.promptTokens + usage.completionTokens;
|
|
120
166
|
this.cost += (0, types_1.usageCost)(usage, this.cfg.pricing[model]);
|
|
121
167
|
this.journal.append("usage", { model, usage, cost: this.cost });
|
|
168
|
+
// Team spend also counts against the parent's (authoritative) budget.
|
|
169
|
+
this.opts.onUsageForward?.(model, usage);
|
|
170
|
+
const cap = this.meta.options.maxTokens;
|
|
171
|
+
if (cap > 0) {
|
|
172
|
+
const pct = (this.spentTokens / cap) * 100;
|
|
173
|
+
for (const threshold of [50, 80, 95]) {
|
|
174
|
+
if (pct >= threshold && !this.budgetWarned.has(threshold)) {
|
|
175
|
+
this.budgetWarned.add(threshold);
|
|
176
|
+
this.journal.append("log", {
|
|
177
|
+
level: threshold >= 95 ? "warn" : "info",
|
|
178
|
+
msg: `budget: ${threshold}% of the run's token cap used (est. $${this.cost.toFixed(2)})`,
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
122
183
|
};
|
|
123
184
|
budgetExceeded() {
|
|
124
185
|
return this.spentTokens >= this.meta.options.maxTokens;
|
|
@@ -126,39 +187,69 @@ class Executor {
|
|
|
126
187
|
blackboardDigest(max = 1800) {
|
|
127
188
|
if (!this.notes.length)
|
|
128
189
|
return "";
|
|
129
|
-
const
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
190
|
+
const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.url ? ` <${n.url}>` : ""}${n.taskId ? ` (${n.taskId})` : ""}`;
|
|
191
|
+
// Decisions and conflicts anchor mission-wide coherence and are never
|
|
192
|
+
// trimmed out of the digest; everything else shows only its recent tail.
|
|
193
|
+
const pinned = this.notes.filter((n) => n.kind === "decision" || n.kind === "conflict").map(fmt);
|
|
194
|
+
const rest = this.notes.filter((n) => n.kind !== "decision" && n.kind !== "conflict").slice(-80).map(fmt);
|
|
195
|
+
let tail = rest.join("\n");
|
|
196
|
+
const budget = Math.max(400, max - pinned.join("\n").length);
|
|
197
|
+
if (tail.length > budget)
|
|
198
|
+
tail = tail.slice(tail.length - budget);
|
|
199
|
+
return [pinned.join("\n"), tail].filter(Boolean).join("\n");
|
|
200
|
+
}
|
|
201
|
+
searchNotes(query) {
|
|
202
|
+
const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
203
|
+
if (!terms.length)
|
|
204
|
+
return "empty query";
|
|
205
|
+
const scored = this.notes
|
|
206
|
+
.map((n) => {
|
|
207
|
+
const hay = `${n.key ?? ""} ${n.kind ?? ""} ${n.text}`.toLowerCase();
|
|
208
|
+
const score = terms.reduce((s, t) => s + (hay.includes(t) ? 1 : 0), 0);
|
|
209
|
+
return { n, score };
|
|
210
|
+
})
|
|
211
|
+
.filter((x) => x.score > 0)
|
|
212
|
+
.sort((a, b) => b.score - a.score)
|
|
213
|
+
.slice(0, 12);
|
|
214
|
+
if (!scored.length)
|
|
215
|
+
return "no notes matched";
|
|
216
|
+
return scored
|
|
217
|
+
.map(({ n }) => `• ${n.kind ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.clip)(n.text, 400)}${n.taskId ? ` (${n.taskId})` : ""}`)
|
|
218
|
+
.join("\n");
|
|
136
219
|
}
|
|
137
220
|
// ---------------------------------------------------------------- main
|
|
138
221
|
async run() {
|
|
139
222
|
this.setStatus("planning");
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
223
|
+
if (this.mode === "root") {
|
|
224
|
+
// Surface AIMD limiter adjustments (429 pressure) in the journal/UI.
|
|
225
|
+
(0, deepseek_1.gateFor)(this.cfg).onState = (s) => {
|
|
226
|
+
this.journal.append("limiter.state", { ceiling: s.ceiling, active: s.active, queued: s.queued });
|
|
227
|
+
};
|
|
228
|
+
// Preflight: validate auth before doing any work so the operator gets an
|
|
229
|
+
// instant, clear error instead of a phantom "done" run. (Teams inherit a
|
|
230
|
+
// parent that already passed.)
|
|
231
|
+
const auth = await (0, deepseek_1.validateAuth)(this.cfg);
|
|
232
|
+
if (auth.status === "invalid") {
|
|
233
|
+
this.fatal = `Provider authentication failed — ${auth.message || "invalid API key"}. Set a valid key in Settings (or: swarm config set apiKey <...>).`;
|
|
234
|
+
this.finishReason = this.fatal;
|
|
235
|
+
this.journal.append("log", { level: "error", msg: this.fatal });
|
|
236
|
+
await this.fail(this.fatal);
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
// Boot the sandbox before any work — a dead Docker daemon or a bad cloud
|
|
240
|
+
// key must fail the run instantly with a clear reason, not mid-mission.
|
|
241
|
+
// (Teams share the parent's already-running sandbox.)
|
|
242
|
+
try {
|
|
243
|
+
await this.sandbox.start((msg) => this.journal.append("log", { level: "info", msg }));
|
|
244
|
+
this.journal.append("log", { level: "info", msg: `sandbox: ${this.sandbox.label}` });
|
|
245
|
+
}
|
|
246
|
+
catch (e) {
|
|
247
|
+
this.fatal = `Sandbox failed to start — ${(0, util_1.errMsg)(e)}`;
|
|
248
|
+
this.finishReason = this.fatal;
|
|
249
|
+
this.journal.append("log", { level: "error", msg: this.fatal });
|
|
250
|
+
await this.fail(this.fatal);
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
162
253
|
}
|
|
163
254
|
// Operator control must land while agents are mid-task, not only when the
|
|
164
255
|
// scheduler wakes up — a Stop click aborts in-flight work within ~1s.
|
|
@@ -170,13 +261,18 @@ class Executor {
|
|
|
170
261
|
/* control polling must never kill the run */
|
|
171
262
|
}
|
|
172
263
|
}, 750);
|
|
264
|
+
// Real-directory runs remember: prior missions in the same workspace feed
|
|
265
|
+
// the conductor so it builds on settled decisions instead of starting cold.
|
|
266
|
+
const memory = this.mode === "root" && !this.meta.sandbox ? (0, memory_1.memoryBlock)(this.meta.cwd) : "";
|
|
173
267
|
this.conductorMessages = [
|
|
174
|
-
{ role: "system", content: (0, prompts_1.conductorSystem)(this.meta) },
|
|
268
|
+
{ role: "system", content: (0, prompts_1.conductorSystem)(this.meta) + (memory ? `\n\n${memory}` : "") },
|
|
175
269
|
{
|
|
176
270
|
role: "user",
|
|
177
271
|
content: this.resumed
|
|
178
272
|
? (0, prompts_1.conductorUpdate)({
|
|
179
273
|
blackboard: this.blackboardDigest(),
|
|
274
|
+
phase: this.phaseLine(),
|
|
275
|
+
plan: this.planPin(),
|
|
180
276
|
nextId: this.nextId(),
|
|
181
277
|
taskTable: (0, prompts_1.taskTable)(this.taskList()),
|
|
182
278
|
budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
|
|
@@ -188,56 +284,23 @@ class Executor {
|
|
|
188
284
|
: (0, prompts_1.conductorInitialUpdate)(this.meta, this.nextId()),
|
|
189
285
|
},
|
|
190
286
|
];
|
|
287
|
+
if (this.resumed) {
|
|
288
|
+
// The conductor's reasoning history died with the old process. Re-seed
|
|
289
|
+
// the durable facts into the same slot trimConductorHistory() maintains,
|
|
290
|
+
// so a resumed conductor knows what settled and what was decided.
|
|
291
|
+
this.conductorMessages.splice(1, 0, {
|
|
292
|
+
role: "user",
|
|
293
|
+
content: this.missionLedger("This run was resumed — prior orchestration history is gone."),
|
|
294
|
+
});
|
|
295
|
+
}
|
|
191
296
|
try {
|
|
192
297
|
await this.conductorTurn();
|
|
193
298
|
this.setStatus("running");
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
this.finishing = true;
|
|
200
|
-
this.finishReason = "token budget reached";
|
|
201
|
-
break;
|
|
202
|
-
}
|
|
203
|
-
this.startReadyTasks();
|
|
204
|
-
if (this.inflight.size === 0) {
|
|
205
|
-
const runnable = this.runnableTasks();
|
|
206
|
-
if (runnable.length > 0)
|
|
207
|
-
continue; // loop starts them
|
|
208
|
-
// Nothing running, nothing runnable. Include any reports that
|
|
209
|
-
// settled while the conductor was mid-turn — they must not be lost.
|
|
210
|
-
this.blockStuckTasks();
|
|
211
|
-
const reports = this.drainSettled();
|
|
212
|
-
if (!this.hasOpenWork()) {
|
|
213
|
-
// Everything is terminal. Ask the conductor for a final decision.
|
|
214
|
-
this.appendConductorUpdate("All tasks have settled and no tasks are runnable.", reports);
|
|
215
|
-
await this.conductorTurn();
|
|
216
|
-
if (this.lastConductorAction !== "spawn") {
|
|
217
|
-
this.finishing = true;
|
|
218
|
-
this.finishReason = this.finishReason || "all tasks settled";
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
else {
|
|
222
|
-
// Stuck: pending tasks exist but can't run (failed/blocked deps).
|
|
223
|
-
this.appendConductorUpdate("Some tasks cannot run because their dependencies failed or were blocked. Re-plan around them or finish.", reports);
|
|
224
|
-
await this.conductorTurn();
|
|
225
|
-
if (this.lastConductorAction === "wait") {
|
|
226
|
-
this.finishing = true;
|
|
227
|
-
this.finishReason = "stalled: dependencies unmet and conductor chose to wait";
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
continue;
|
|
231
|
-
}
|
|
232
|
-
// Tasks are running — wait for at least one to settle.
|
|
233
|
-
await Promise.race([...this.inflight.values()]);
|
|
234
|
-
this.drainControl();
|
|
235
|
-
const reports = this.drainSettled();
|
|
236
|
-
if (reports.length && !this.finishing) {
|
|
237
|
-
this.appendConductorUpdate(undefined, reports);
|
|
238
|
-
await this.conductorTurn();
|
|
239
|
-
}
|
|
240
|
-
}
|
|
299
|
+
await this.mainLoop();
|
|
300
|
+
// Strict verification: one completeness review before synthesis; if it
|
|
301
|
+
// finds real gaps the conductor gets one chance to fill them.
|
|
302
|
+
if (await this.completenessPass())
|
|
303
|
+
await this.mainLoop();
|
|
241
304
|
}
|
|
242
305
|
catch (e) {
|
|
243
306
|
if (!this.ac.signal.aborted) {
|
|
@@ -251,12 +314,231 @@ class Executor {
|
|
|
251
314
|
await Promise.allSettled([...this.inflight.values()]);
|
|
252
315
|
}
|
|
253
316
|
this.drainSettled();
|
|
317
|
+
if (this.mode === "team") {
|
|
318
|
+
await this.consolidateTeam();
|
|
319
|
+
return; // the parent owns the sandbox, final flush, and run status
|
|
320
|
+
}
|
|
254
321
|
await this.synthesize();
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
322
|
+
// Teardown is best-effort AND bounded — a wedged container must not hang
|
|
323
|
+
// the engine after the report is already written.
|
|
324
|
+
await Promise.race([
|
|
325
|
+
this.sandbox.destroy().catch(() => { }),
|
|
326
|
+
new Promise((r) => setTimeout(r, 15_000).unref()),
|
|
327
|
+
]);
|
|
258
328
|
await this.journal.flush();
|
|
259
329
|
}
|
|
330
|
+
// ---------------------------------------------------------------- teams
|
|
331
|
+
/** All artifacts reported by this (team) executor's tasks. */
|
|
332
|
+
teamArtifacts() {
|
|
333
|
+
return [...new Set(this.taskList().flatMap((t) => t.artifacts))];
|
|
334
|
+
}
|
|
335
|
+
/** Whether any task here actually completed. */
|
|
336
|
+
anyTaskDone() {
|
|
337
|
+
return this.taskList().some((t) => t.status === "done");
|
|
338
|
+
}
|
|
339
|
+
/** Team-mode finale: one consolidated report instead of run synthesis. */
|
|
340
|
+
async consolidateTeam() {
|
|
341
|
+
const tasks = this.taskList();
|
|
342
|
+
const reports = tasks.length ? tasks.map(prompts_1.reportBlock).join("\n\n") : "(no tasks were completed)";
|
|
343
|
+
try {
|
|
344
|
+
const res = await (0, deepseek_1.chat)(this.cfg, {
|
|
345
|
+
model: this.meta.options.conductorModel,
|
|
346
|
+
priority: "high",
|
|
347
|
+
messages: [
|
|
348
|
+
{
|
|
349
|
+
role: "user",
|
|
350
|
+
content: `You led a sub-team inside a larger agent swarm. Consolidate your team's work into ONE report for the parent conductor: what was accomplished (with evidence and exact paths), what failed or remains open, and the key facts the rest of the mission needs.\n\nTEAM OBJECTIVE\n${this.meta.mission}\n\nOUTCOME: ${this.finishReason || "completed"}\nLead's closing notes: ${this.finishNotes || "(none)"}\n\nTASK REPORTS\n${(0, util_1.truncateMiddle)(reports, 60_000, "chars")}\n\nReply with the consolidated report only.`,
|
|
351
|
+
},
|
|
352
|
+
],
|
|
353
|
+
thinking: false,
|
|
354
|
+
maxTokens: 4096,
|
|
355
|
+
signal: new AbortController().signal, // consolidation runs even when cancelled
|
|
356
|
+
});
|
|
357
|
+
this.onUsage(this.meta.options.conductorModel, res.usage);
|
|
358
|
+
this.teamReport = (res.content || "").trim();
|
|
359
|
+
}
|
|
360
|
+
catch (e) {
|
|
361
|
+
this.journal.append("log", { level: "warn", msg: `team consolidation failed: ${(0, util_1.errMsg)(e)}` });
|
|
362
|
+
}
|
|
363
|
+
if (!this.teamReport) {
|
|
364
|
+
this.teamReport = tasks
|
|
365
|
+
.map((t) => `${t.id} [${t.status}] ${t.title}: ${(0, util_1.oneLine)(t.report ?? t.error ?? "(no output)", 200)}`)
|
|
366
|
+
.join("\n");
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
/** Run a team:true task as a sub-swarm sharing this run's everything. */
|
|
370
|
+
async runTeam(task) {
|
|
371
|
+
const remaining = Math.max(0, this.meta.options.maxTokens - this.spentTokens);
|
|
372
|
+
const childMeta = {
|
|
373
|
+
...this.meta,
|
|
374
|
+
mission: `${task.objective}${task.context ? `\n\nContext from the parent conductor:\n${task.context}` : ""}`,
|
|
375
|
+
options: {
|
|
376
|
+
...this.meta.options,
|
|
377
|
+
maxWorkers: task.teamMaxWorkers || Math.max(2, Math.min(16, Math.floor(this.meta.options.maxWorkers / 2))),
|
|
378
|
+
maxTokens: Math.min(remaining, task.teamBudgetTokens || Math.max(50_000, Math.floor(remaining / 4))),
|
|
379
|
+
maxTasks: Math.min(this.meta.options.maxTasks, 24),
|
|
380
|
+
},
|
|
381
|
+
};
|
|
382
|
+
this.journal.append("team.created", {
|
|
383
|
+
taskId: task.id,
|
|
384
|
+
maxWorkers: childMeta.options.maxWorkers,
|
|
385
|
+
budgetTokens: childMeta.options.maxTokens,
|
|
386
|
+
});
|
|
387
|
+
const child = new Executor(this.cfg, childMeta, new journal_1.TeamJournal(this.journal, task.id), {
|
|
388
|
+
mode: "team",
|
|
389
|
+
teamId: task.id,
|
|
390
|
+
sandbox: this.sandbox,
|
|
391
|
+
runDirPath: this.runDirPath,
|
|
392
|
+
onUsageForward: (model, usage) => {
|
|
393
|
+
// Absorb tokens/cost only — the child already journaled the usage event.
|
|
394
|
+
this.spentTokens += usage.promptTokens + usage.completionTokens;
|
|
395
|
+
this.cost += (0, types_1.usageCost)(usage, this.cfg.pricing[model]);
|
|
396
|
+
},
|
|
397
|
+
parentSignal: this.ac.signal,
|
|
398
|
+
sharedNotes: this.notes,
|
|
399
|
+
});
|
|
400
|
+
await child.run();
|
|
401
|
+
if (this.ac.signal.aborted) {
|
|
402
|
+
this.finalizeTask(task, "failed", "run cancelled");
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
const report = child.teamReport || "(team produced no consolidated report)";
|
|
406
|
+
for (const a of child.teamArtifacts())
|
|
407
|
+
if (!task.artifacts.includes(a))
|
|
408
|
+
task.artifacts.push(a);
|
|
409
|
+
task.report = report;
|
|
410
|
+
task.reportStatus = "done";
|
|
411
|
+
this.journal.append("team.report", { taskId: task.id, report, artifacts: task.artifacts });
|
|
412
|
+
this.journal.append("task.report", { taskId: task.id, status: "done", report, artifacts: task.artifacts });
|
|
413
|
+
this.finalizeTask(task, child.anyTaskDone() ? "done" : "failed", report);
|
|
414
|
+
}
|
|
415
|
+
async mainLoop() {
|
|
416
|
+
while (!this.finishing) {
|
|
417
|
+
this.drainControl();
|
|
418
|
+
if (this.finishing)
|
|
419
|
+
break;
|
|
420
|
+
if (this.budgetExceeded()) {
|
|
421
|
+
this.finishing = true;
|
|
422
|
+
this.finishReason = "token budget reached";
|
|
423
|
+
break;
|
|
424
|
+
}
|
|
425
|
+
if (this.journal.degraded) {
|
|
426
|
+
// The journal is the source of truth; if it can't be written, the
|
|
427
|
+
// run must stop loudly rather than burn tokens on unrecorded work.
|
|
428
|
+
this.finishing = true;
|
|
429
|
+
this.finishReason = "journal writes are failing — run state is no longer durable";
|
|
430
|
+
this.ac.abort();
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
this.startReadyTasks();
|
|
434
|
+
if (this.inflight.size === 0) {
|
|
435
|
+
const runnable = this.runnableTasks();
|
|
436
|
+
if (runnable.length > 0)
|
|
437
|
+
continue; // loop starts them
|
|
438
|
+
// Nothing running, nothing runnable. Include any reports that
|
|
439
|
+
// settled while the conductor was mid-turn — they must not be lost.
|
|
440
|
+
this.blockStuckTasks();
|
|
441
|
+
const reports = this.drainSettled();
|
|
442
|
+
if (!this.hasOpenWork()) {
|
|
443
|
+
// Everything is terminal. Ask the conductor for a final decision.
|
|
444
|
+
this.appendConductorUpdate("All tasks have settled and no tasks are runnable.", reports);
|
|
445
|
+
await this.conductorTurn();
|
|
446
|
+
// An errored turn is not a decision — keep looping so the breaker
|
|
447
|
+
// can retry (and eventually trip) instead of misreading the error
|
|
448
|
+
// as "the conductor chose to stop".
|
|
449
|
+
if (this.lastConductorAction !== "spawn" && !this.lastConductorErrored) {
|
|
450
|
+
this.finishing = true;
|
|
451
|
+
this.finishReason = this.finishReason || "all tasks settled";
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
else {
|
|
455
|
+
// Stuck: pending tasks exist but can't run (failed/blocked deps).
|
|
456
|
+
this.appendConductorUpdate("Some tasks cannot run because their dependencies failed or were blocked. Re-plan around them or finish.", reports);
|
|
457
|
+
await this.conductorTurn();
|
|
458
|
+
if (this.lastConductorAction === "wait" && !this.lastConductorErrored) {
|
|
459
|
+
this.finishing = true;
|
|
460
|
+
this.finishReason = "stalled: dependencies unmet and conductor chose to wait";
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
continue;
|
|
464
|
+
}
|
|
465
|
+
// Tasks are running — wait for at least one to settle, then debounce:
|
|
466
|
+
// at 100 agents, settles arrive constantly, and waking the conductor
|
|
467
|
+
// for every one of them serializes the whole swarm on its turns.
|
|
468
|
+
await Promise.race([...this.inflight.values()]);
|
|
469
|
+
const debounceMs = Number(process.env.SWARM_SETTLE_DEBOUNCE_MS ?? "2000");
|
|
470
|
+
const settleCap = Math.max(3, Math.ceil(this.activeWorkerCount() / 8));
|
|
471
|
+
while (debounceMs > 0 && this.inflight.size > 0 && this.settledSinceUpdate.length < settleCap) {
|
|
472
|
+
const before = this.settledSinceUpdate.length;
|
|
473
|
+
await Promise.race([...this.inflight.values(), (0, util_1.sleep)(debounceMs)]);
|
|
474
|
+
if (this.settledSinceUpdate.length === before)
|
|
475
|
+
break; // quiet period — flush to the conductor
|
|
476
|
+
this.drainControl();
|
|
477
|
+
if (this.finishing)
|
|
478
|
+
break;
|
|
479
|
+
this.startReadyTasks(); // settles free dep chains; don't idle workers during the debounce
|
|
480
|
+
}
|
|
481
|
+
this.drainControl();
|
|
482
|
+
const reports = this.drainSettled();
|
|
483
|
+
if (reports.length && !this.finishing) {
|
|
484
|
+
this.appendConductorUpdate(undefined, reports);
|
|
485
|
+
await this.conductorTurn();
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
/**
|
|
490
|
+
* Strict-mode gap review before synthesis. Returns true when the conductor
|
|
491
|
+
* accepted gap-filling work (the main loop must run again).
|
|
492
|
+
*/
|
|
493
|
+
gapPassDone = false;
|
|
494
|
+
async completenessPass() {
|
|
495
|
+
if (this.mode === "team")
|
|
496
|
+
return false; // the root run owns gap review
|
|
497
|
+
if (this.cfg.verification !== "strict" || this.gapPassDone)
|
|
498
|
+
return false;
|
|
499
|
+
if (this.fatal || this.ac.signal.aborted || this.budgetExceeded())
|
|
500
|
+
return false;
|
|
501
|
+
if (this.finishReason.includes("cancel") || this.finishReason.includes("conductor unavailable"))
|
|
502
|
+
return false;
|
|
503
|
+
if (!this.taskList().some((t) => t.status === "done"))
|
|
504
|
+
return false;
|
|
505
|
+
this.gapPassDone = true;
|
|
506
|
+
let verdict = "";
|
|
507
|
+
try {
|
|
508
|
+
const res = await (0, deepseek_1.chat)(this.cfg, {
|
|
509
|
+
model: this.meta.options.conductorModel,
|
|
510
|
+
messages: [
|
|
511
|
+
{
|
|
512
|
+
role: "user",
|
|
513
|
+
content: (0, prompts_1.completenessPrompt)(this.meta.mission, (0, prompts_1.taskTable)(this.taskList()), (0, util_1.truncateMiddle)(this.taskList().map(prompts_1.reportBlock).join("\n\n"), 80_000, "chars")),
|
|
514
|
+
},
|
|
515
|
+
],
|
|
516
|
+
thinking: false,
|
|
517
|
+
maxTokens: 2048,
|
|
518
|
+
signal: this.ac.signal,
|
|
519
|
+
});
|
|
520
|
+
this.onUsage(this.meta.options.conductorModel, res.usage);
|
|
521
|
+
verdict = (res.content || "").trim();
|
|
522
|
+
}
|
|
523
|
+
catch (e) {
|
|
524
|
+
this.journal.append("log", { level: "warn", msg: `completeness review failed: ${(0, util_1.errMsg)(e)}` });
|
|
525
|
+
return false;
|
|
526
|
+
}
|
|
527
|
+
if (!verdict || /^COMPLETE\b/i.test(verdict)) {
|
|
528
|
+
this.journal.append("log", { level: "info", msg: "completeness review: no gaps found" });
|
|
529
|
+
return false;
|
|
530
|
+
}
|
|
531
|
+
this.journal.append("log", { level: "info", msg: `completeness review found gaps:\n${(0, util_1.clip)(verdict, 1500)}` });
|
|
532
|
+
this.finishing = false;
|
|
533
|
+
this.appendConductorUpdate(`COMPLETENESS REVIEW found gaps before final synthesis:\n${(0, util_1.clip)(verdict, 2000)}\n` +
|
|
534
|
+
"Spawn focused tasks to close the REAL gaps (or finish if you judge them immaterial). This is the final round.");
|
|
535
|
+
await this.conductorTurn();
|
|
536
|
+
if (this.lastConductorAction === "spawn")
|
|
537
|
+
return true;
|
|
538
|
+
this.finishing = true;
|
|
539
|
+
this.finishReason = this.finishReason || "all tasks settled";
|
|
540
|
+
return false;
|
|
541
|
+
}
|
|
260
542
|
// ---------------------------------------------------------------- conductor
|
|
261
543
|
nextId() {
|
|
262
544
|
return this.taskCounter + 1;
|
|
@@ -267,7 +549,7 @@ class Executor {
|
|
|
267
549
|
// Re-bound the history every turn — the nudge loop and tool-result pushes
|
|
268
550
|
// below grow it outside appendConductorUpdate's trim.
|
|
269
551
|
this.trimConductorHistory();
|
|
270
|
-
const tools = [tools_1.SPAWN_TASKS_TOOL, tools_1.WAIT_TOOL, tools_1.FINISH_TOOL];
|
|
552
|
+
const tools = [tools_1.SPAWN_TASKS_TOOL, tools_1.SET_PHASE_TOOL, tools_1.UPDATE_PLAN_TOOL, tools_1.CONDUCTOR_READ_REPORT_TOOL, tools_1.WAIT_TOOL, tools_1.FINISH_TOOL];
|
|
271
553
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
272
554
|
let res;
|
|
273
555
|
try {
|
|
@@ -278,6 +560,9 @@ class Executor {
|
|
|
278
560
|
// "auto" rather than "required" for cross-provider safety; the prompt
|
|
279
561
|
// mandates a tool call and the no-tool nudge loop below enforces it.
|
|
280
562
|
toolChoice: "auto",
|
|
563
|
+
// The conductor is the swarm's brain: it must never queue behind a
|
|
564
|
+
// hundred worker streams.
|
|
565
|
+
priority: "high",
|
|
281
566
|
thinking: this.meta.options.thinking,
|
|
282
567
|
reasoningEffort: this.meta.options.reasoningEffort,
|
|
283
568
|
// Generous: with thinking enabled, reasoning + a large spawn_tasks
|
|
@@ -297,11 +582,26 @@ class Executor {
|
|
|
297
582
|
this.fatal = `Provider authentication failed — ${msg}. Set a valid key in Settings.`;
|
|
298
583
|
this.finishing = true;
|
|
299
584
|
this.finishReason = this.fatal;
|
|
585
|
+
return;
|
|
586
|
+
}
|
|
587
|
+
// Circuit breaker: a transient failure degrades to "wait" so the loop
|
|
588
|
+
// keeps draining tasks, but repeated consecutive failures must end the
|
|
589
|
+
// run with a clear reason rather than spin forever.
|
|
590
|
+
this.conductorFailures++;
|
|
591
|
+
if (this.conductorFailures >= 5) {
|
|
592
|
+
this.finishing = true;
|
|
593
|
+
this.finishReason = `conductor unavailable: ${this.conductorFailures} consecutive call failures (last: ${msg})`;
|
|
594
|
+
return;
|
|
300
595
|
}
|
|
301
|
-
|
|
596
|
+
const scale = Number(process.env.SWARM_BACKOFF_SCALE || "1") || 1;
|
|
597
|
+
const backoff = [2_000, 5_000, 15_000, 30_000][Math.min(this.conductorFailures - 1, 3)] * scale;
|
|
598
|
+
await new Promise((r) => setTimeout(r, backoff));
|
|
302
599
|
this.lastConductorAction = "wait";
|
|
600
|
+
this.lastConductorErrored = true;
|
|
303
601
|
return;
|
|
304
602
|
}
|
|
603
|
+
this.conductorFailures = 0;
|
|
604
|
+
this.lastConductorErrored = false;
|
|
305
605
|
this.onUsage(this.meta.options.conductorModel, res.usage);
|
|
306
606
|
if (res.content.trim())
|
|
307
607
|
this.journal.append("conductor.say", { text: (0, util_1.clip)(res.content, 4000) });
|
|
@@ -334,6 +634,41 @@ class Executor {
|
|
|
334
634
|
toolResult = "Acknowledged. Synthesizing the final deliverable.";
|
|
335
635
|
acted = "finish";
|
|
336
636
|
}
|
|
637
|
+
else if (call.function.name === "update_plan") {
|
|
638
|
+
const md = String(args.markdown ?? "");
|
|
639
|
+
if (md.trim()) {
|
|
640
|
+
this.planDoc = md;
|
|
641
|
+
try {
|
|
642
|
+
fs.writeFileSync(path.join(this.runDirPath, "artifacts", this.planFileName()), md, "utf8");
|
|
643
|
+
}
|
|
644
|
+
catch (e) {
|
|
645
|
+
this.journal.append("log", { level: "warn", msg: `plan write failed: ${(0, util_1.errMsg)(e)}` });
|
|
646
|
+
}
|
|
647
|
+
this.journal.append("plan.updated", { teamScoped: this.mode === "team" || undefined, excerpt: (0, util_1.clip)(md, 1200) });
|
|
648
|
+
toolResult = `Plan saved to artifacts/${this.planFileName()}.`;
|
|
649
|
+
}
|
|
650
|
+
else {
|
|
651
|
+
toolResult = "Plan was empty — not saved.";
|
|
652
|
+
}
|
|
653
|
+
// Bookkeeping, not a scheduling decision — falls through to the nudge.
|
|
654
|
+
}
|
|
655
|
+
else if (call.function.name === "read_report") {
|
|
656
|
+
toolResult = (0, util_1.truncateMiddle)(this.readReportText(String(args.task_id ?? "")), 8000, "chars");
|
|
657
|
+
// Information lookup, not a scheduling decision — falls through to
|
|
658
|
+
// the nudge loop if the conductor stopped here.
|
|
659
|
+
}
|
|
660
|
+
else if (call.function.name === "set_phase") {
|
|
661
|
+
const name = (0, util_1.clip)(String(args.name ?? ""), 80);
|
|
662
|
+
this.phase = {
|
|
663
|
+
name,
|
|
664
|
+
goal: args.goal ? String(args.goal) : undefined,
|
|
665
|
+
exitCriteria: args.exit_criteria ? String(args.exit_criteria) : undefined,
|
|
666
|
+
};
|
|
667
|
+
this.journal.append("phase.set", { name, goal: this.phase.goal, exit_criteria: this.phase.exitCriteria });
|
|
668
|
+
toolResult = `Phase set: ${name}. Now also call spawn_tasks, wait, or finish.`;
|
|
669
|
+
// Not a scheduling decision by itself — fall through to the nudge
|
|
670
|
+
// loop if the conductor stopped here.
|
|
671
|
+
}
|
|
337
672
|
else if (call.function.name === "wait") {
|
|
338
673
|
toolResult = "Waiting for running tasks to report.";
|
|
339
674
|
if (acted === "none")
|
|
@@ -344,6 +679,12 @@ class Executor {
|
|
|
344
679
|
}
|
|
345
680
|
this.conductorMessages.push({ role: "tool", tool_call_id: call.id, content: toolResult });
|
|
346
681
|
}
|
|
682
|
+
if (acted === "none") {
|
|
683
|
+
// set_phase (or an unknown tool) alone is not a scheduling decision —
|
|
684
|
+
// ask again rather than letting the run misread it as "wait"/"finish".
|
|
685
|
+
this.conductorMessages.push({ role: "user", content: "Now call spawn_tasks, wait, or finish." });
|
|
686
|
+
continue;
|
|
687
|
+
}
|
|
347
688
|
this.lastConductorAction = acted;
|
|
348
689
|
this.journal.append("conductor.action", { kind: acted });
|
|
349
690
|
return;
|
|
@@ -380,6 +721,7 @@ class Executor {
|
|
|
380
721
|
warnings.push(`${id}: dropped dep "${d}" (${idx >= i ? "same-batch later task — would deadlock" : "unknown task"})`);
|
|
381
722
|
return false;
|
|
382
723
|
});
|
|
724
|
+
const rawSpec = spec;
|
|
383
725
|
const task = {
|
|
384
726
|
id,
|
|
385
727
|
title: (0, util_1.clip)(String(spec.title ?? "task"), 120),
|
|
@@ -388,6 +730,10 @@ class Executor {
|
|
|
388
730
|
deps,
|
|
389
731
|
verify: Boolean(spec.verify) && this.cfg.verification !== "off",
|
|
390
732
|
context: spec.context ? String(spec.context) : undefined,
|
|
733
|
+
modelTier: ["cheap", "strong"].includes(String(spec.model)) ? spec.model : undefined,
|
|
734
|
+
team: Boolean(spec.team) && this.mode === "root",
|
|
735
|
+
teamMaxWorkers: Number(rawSpec.team_max_workers ?? rawSpec.teamMaxWorkers) || undefined,
|
|
736
|
+
teamBudgetTokens: Number(rawSpec.team_budget_tokens ?? rawSpec.teamBudgetTokens) || undefined,
|
|
391
737
|
status: "pending",
|
|
392
738
|
attempt: 1,
|
|
393
739
|
wave,
|
|
@@ -415,14 +761,46 @@ class Executor {
|
|
|
415
761
|
w = Math.max(w, t.wave);
|
|
416
762
|
return w + 1;
|
|
417
763
|
}
|
|
764
|
+
/** The conductor's living plan document (mission-plan.md). */
|
|
765
|
+
planDoc = "";
|
|
766
|
+
planFileName() {
|
|
767
|
+
return this.mode === "team" ? `mission-plan-${this.teamId}.md` : "mission-plan.md";
|
|
768
|
+
}
|
|
769
|
+
planPin() {
|
|
770
|
+
if (!this.planDoc)
|
|
771
|
+
return undefined;
|
|
772
|
+
return `MISSION PLAN (artifacts/${this.planFileName()}, maintained via update_plan):\n${(0, util_1.clip)(this.planDoc, 1500)}`;
|
|
773
|
+
}
|
|
774
|
+
phaseLine() {
|
|
775
|
+
if (!this.phase)
|
|
776
|
+
return undefined;
|
|
777
|
+
return `CURRENT PHASE: ${this.phase.name}${this.phase.goal ? ` — ${this.phase.goal}` : ""}${this.phase.exitCriteria ? ` (exit: ${this.phase.exitCriteria})` : ""}`;
|
|
778
|
+
}
|
|
779
|
+
/** Full text for the reports that matter, one-liners past the cap. */
|
|
780
|
+
digestReports(reports) {
|
|
781
|
+
const CAP = 12;
|
|
782
|
+
if (reports.length <= CAP)
|
|
783
|
+
return reports.map(prompts_1.reportBlock);
|
|
784
|
+
const important = reports.filter((t) => t.status !== "done");
|
|
785
|
+
const done = reports.filter((t) => t.status === "done");
|
|
786
|
+
const fullDone = done.slice(-Math.max(0, CAP - important.length));
|
|
787
|
+
const briefDone = done.slice(0, done.length - fullDone.length);
|
|
788
|
+
return [
|
|
789
|
+
...important.map(prompts_1.reportBlock),
|
|
790
|
+
...fullDone.map(prompts_1.reportBlock),
|
|
791
|
+
...briefDone.map((t) => `── ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → DONE — ${(0, util_1.oneLine)(t.report ?? "", 140)} (full text: read_report)`),
|
|
792
|
+
];
|
|
793
|
+
}
|
|
418
794
|
appendConductorUpdate(extra, reports) {
|
|
419
795
|
const ops = this.consumeOperatorNotes();
|
|
420
796
|
this.conductorMessages.push({
|
|
421
797
|
role: "user",
|
|
422
798
|
content: (0, prompts_1.conductorUpdate)({
|
|
423
|
-
reports: reports
|
|
799
|
+
reports: reports ? this.digestReports(reports) : undefined,
|
|
424
800
|
operatorNotes: ops,
|
|
425
801
|
blackboard: this.blackboardDigest(),
|
|
802
|
+
phase: this.phaseLine(),
|
|
803
|
+
plan: this.planPin(),
|
|
426
804
|
nextId: this.nextId(),
|
|
427
805
|
taskTable: (0, prompts_1.taskTable)(this.taskList()),
|
|
428
806
|
budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
|
|
@@ -432,26 +810,84 @@ class Executor {
|
|
|
432
810
|
// Keep the conductor's own history from growing without bound.
|
|
433
811
|
this.trimConductorHistory();
|
|
434
812
|
}
|
|
813
|
+
/**
|
|
814
|
+
* One-screen summary of everything durable about the run so far. Replaces
|
|
815
|
+
* trimmed history so the conductor never loses the plot on long missions —
|
|
816
|
+
* rebuilt fresh each trim from current state, so it also survives resume.
|
|
817
|
+
*/
|
|
818
|
+
missionLedger(intro = "Earlier orchestration history was trimmed.") {
|
|
819
|
+
const lines = [`[${intro} MISSION LEDGER — durable state so far:]`];
|
|
820
|
+
if (this.phase)
|
|
821
|
+
lines.push(this.phaseLine());
|
|
822
|
+
const settled = this.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status));
|
|
823
|
+
if (settled.length) {
|
|
824
|
+
lines.push("Settled tasks:");
|
|
825
|
+
const failures = settled.filter((t) => t.status !== "done");
|
|
826
|
+
const done = settled.filter((t) => t.status === "done");
|
|
827
|
+
// Failures stay itemized forever; done tasks collapse by wave once the
|
|
828
|
+
// run gets big (a 500-task ledger must still fit on one screen).
|
|
829
|
+
if (done.length > 30) {
|
|
830
|
+
const waves = [...new Set(done.map((t) => t.wave))].sort((a, b) => a - b);
|
|
831
|
+
for (const w of waves) {
|
|
832
|
+
const ws = done.filter((t) => t.wave === w);
|
|
833
|
+
lines.push(`- wave ${w}: ${ws.length} done (${ws.map((t) => t.id).join(",")})`);
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
else {
|
|
837
|
+
for (const t of done)
|
|
838
|
+
lines.push(`- ${t.id} [done] ${(0, util_1.clip)(t.title, 60)}${t.report ? ` — ${(0, util_1.oneLine)(t.report, 120)}` : ""}`);
|
|
839
|
+
}
|
|
840
|
+
for (const t of failures) {
|
|
841
|
+
lines.push(`- ${t.id} [${t.status}] ${(0, util_1.clip)(t.title, 60)}${t.error ? ` — ${(0, util_1.oneLine)(t.error, 80)}` : ""}`);
|
|
842
|
+
}
|
|
843
|
+
}
|
|
844
|
+
const decisions = this.notes.filter((n) => n.kind === "decision");
|
|
845
|
+
if (decisions.length) {
|
|
846
|
+
lines.push("Decisions:");
|
|
847
|
+
for (const d of decisions.slice(-20))
|
|
848
|
+
lines.push(`- ${(0, util_1.oneLine)(d.text, 140)}`);
|
|
849
|
+
}
|
|
850
|
+
return (0, util_1.clip)(lines.join("\n"), 8000);
|
|
851
|
+
}
|
|
435
852
|
trimConductorHistory() {
|
|
436
853
|
const MAX = 60;
|
|
437
|
-
const
|
|
854
|
+
const LEDGER_MARK = "MISSION LEDGER";
|
|
855
|
+
const setLedger = () => {
|
|
856
|
+
const msg = { role: "user", content: this.missionLedger() };
|
|
857
|
+
if (this.conductorMessages[1]?.content?.includes(LEDGER_MARK))
|
|
858
|
+
this.conductorMessages[1] = msg;
|
|
859
|
+
else
|
|
860
|
+
this.conductorMessages.splice(1, 0, msg);
|
|
861
|
+
};
|
|
862
|
+
// Old conductor turns carry the bulk in thinking traces and verbose prose;
|
|
863
|
+
// the durable decisions live in the ledger and the plan pin. Compact them
|
|
864
|
+
// in place before resorting to dropping whole messages. (sanitizeMessages
|
|
865
|
+
// backfills reasoning_content with "" for DeepSeek tool-call turns.)
|
|
866
|
+
for (let i = 1; i < this.conductorMessages.length - 6; i++) {
|
|
867
|
+
const m = this.conductorMessages[i];
|
|
868
|
+
if (m.role !== "assistant")
|
|
869
|
+
continue;
|
|
870
|
+
if (m.reasoning_content)
|
|
871
|
+
m.reasoning_content = "";
|
|
872
|
+
if (m.content && m.content.length > 400)
|
|
873
|
+
m.content = (0, util_1.clip)(m.content, 400);
|
|
874
|
+
}
|
|
438
875
|
if (this.conductorMessages.length > MAX) {
|
|
439
876
|
const system = this.conductorMessages[0];
|
|
440
877
|
const tail = this.conductorMessages.slice(-(MAX - 2));
|
|
441
878
|
// Don't begin the tail on an orphic tool result.
|
|
442
879
|
while (tail.length && tail[0].role === "tool")
|
|
443
880
|
tail.shift();
|
|
444
|
-
this.conductorMessages = [system,
|
|
881
|
+
this.conductorMessages = [system, ...tail];
|
|
882
|
+
setLedger();
|
|
445
883
|
}
|
|
446
884
|
// Count alone doesn't bound size: every update embeds the full task table,
|
|
447
885
|
// so a deep run can blow the model window long before 60 messages. The
|
|
448
886
|
// mission itself lives in the system message and always survives.
|
|
449
|
-
const budget = Math.floor(this.cfg.
|
|
887
|
+
const budget = Math.floor((0, config_1.contextLimitFor)(this.cfg, this.meta.options.conductorModel) * 0.75);
|
|
450
888
|
if ((0, agent_1.estimateMessages)(this.conductorMessages) <= budget)
|
|
451
889
|
return;
|
|
452
|
-
|
|
453
|
-
this.conductorMessages.splice(1, 0, { role: "user", content: TRIM_NOTICE });
|
|
454
|
-
}
|
|
890
|
+
setLedger();
|
|
455
891
|
while ((0, agent_1.estimateMessages)(this.conductorMessages) > budget && this.conductorMessages.length > 10) {
|
|
456
892
|
this.conductorMessages.splice(2, 1);
|
|
457
893
|
// Never leave tool results whose assistant turn was dropped.
|
|
@@ -469,25 +905,62 @@ class Executor {
|
|
|
469
905
|
hasOpenWork() {
|
|
470
906
|
return this.taskList().some((t) => ["pending", "running", "verifying"].includes(t.status));
|
|
471
907
|
}
|
|
908
|
+
/** Walk a failed/blocked dep chain down to the task that actually failed. */
|
|
909
|
+
rootFailure(id) {
|
|
910
|
+
let cur = this.tasks.get(id);
|
|
911
|
+
const seen = new Set();
|
|
912
|
+
while (cur && !seen.has(cur.id)) {
|
|
913
|
+
seen.add(cur.id);
|
|
914
|
+
const next = cur.deps
|
|
915
|
+
.map((d) => this.tasks.get(d))
|
|
916
|
+
.find((t) => !!t && (t.status === "failed" || t.status === "blocked"));
|
|
917
|
+
if (!next)
|
|
918
|
+
return cur;
|
|
919
|
+
cur = next;
|
|
920
|
+
}
|
|
921
|
+
return cur;
|
|
922
|
+
}
|
|
472
923
|
blockStuckTasks() {
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
924
|
+
// Fixpoint: a failed dep chain T1→T2→T5 must block the whole chain in one
|
|
925
|
+
// pass, not one level per conductor turn.
|
|
926
|
+
for (let changed = true; changed;) {
|
|
927
|
+
changed = false;
|
|
928
|
+
for (const t of this.taskList()) {
|
|
929
|
+
if (t.status !== "pending")
|
|
930
|
+
continue;
|
|
931
|
+
const bad = t.deps.find((d) => {
|
|
932
|
+
const s = this.tasks.get(d)?.status;
|
|
933
|
+
return s === "failed" || s === "blocked";
|
|
934
|
+
});
|
|
935
|
+
if (!bad)
|
|
936
|
+
continue;
|
|
937
|
+
// Carry the root cause so the conductor re-plans around the actual
|
|
938
|
+
// failure, not a chain of "dependency did not complete".
|
|
939
|
+
const root = this.rootFailure(bad);
|
|
940
|
+
const cause = root ? (0, util_1.oneLine)(root.feedback ?? root.error ?? "unknown failure", 160) : "";
|
|
481
941
|
t.status = "blocked";
|
|
482
|
-
t.error =
|
|
942
|
+
t.error =
|
|
943
|
+
root && root.id !== bad
|
|
944
|
+
? `dependency ${bad} did not complete (root cause ${root.id}: ${cause})`
|
|
945
|
+
: `dependency ${bad} did not complete${cause ? ` (${cause})` : ""}`;
|
|
483
946
|
t.endedAt = Date.now();
|
|
484
947
|
this.journal.append("task.status", { taskId: t.id, status: "blocked", attempt: t.attempt, reason: t.error });
|
|
485
948
|
this.settledSinceUpdate.push(t.id);
|
|
949
|
+
changed = true;
|
|
486
950
|
}
|
|
487
951
|
}
|
|
488
952
|
}
|
|
953
|
+
/** Tasks occupying a worker slot: running, not those awaiting verification. */
|
|
954
|
+
activeWorkerCount() {
|
|
955
|
+
let n = 0;
|
|
956
|
+
for (const id of this.inflight.keys()) {
|
|
957
|
+
if (this.tasks.get(id)?.status === "running")
|
|
958
|
+
n++;
|
|
959
|
+
}
|
|
960
|
+
return n;
|
|
961
|
+
}
|
|
489
962
|
startReadyTasks() {
|
|
490
|
-
while (this.
|
|
963
|
+
while (this.activeWorkerCount() < this.meta.options.maxWorkers && !this.finishing) {
|
|
491
964
|
const next = this.runnableTasks()[0];
|
|
492
965
|
if (!next)
|
|
493
966
|
break;
|
|
@@ -516,12 +989,14 @@ class Executor {
|
|
|
516
989
|
depReportsFor(task) {
|
|
517
990
|
if (!task.deps.length)
|
|
518
991
|
return "";
|
|
992
|
+
// Excerpts, not full reports: a fan-in task with many deps must not blow
|
|
993
|
+
// its context window on day one. Workers fetch full text with read_report.
|
|
519
994
|
return task.deps
|
|
520
995
|
.map((d) => {
|
|
521
996
|
const dep = this.tasks.get(d);
|
|
522
997
|
if (!dep)
|
|
523
998
|
return `(${d}: missing)`;
|
|
524
|
-
return (0, prompts_1.
|
|
999
|
+
return (0, prompts_1.depReportBlock)(dep);
|
|
525
1000
|
})
|
|
526
1001
|
.join("\n\n");
|
|
527
1002
|
}
|
|
@@ -535,13 +1010,39 @@ class Executor {
|
|
|
535
1010
|
agentId,
|
|
536
1011
|
taskId: task?.id,
|
|
537
1012
|
signal: this.ac.signal,
|
|
538
|
-
|
|
539
|
-
|
|
1013
|
+
addCheckpoint: task ? (summary) => this.recordCheckpoint(task, agentId, summary) : undefined,
|
|
1014
|
+
addNote: (text, key, kind, url) => {
|
|
1015
|
+
this.notes.push({ taskId: task?.id, key, kind, text, url });
|
|
540
1016
|
// Only the recent tail ever feeds digests; without a cap a multi-day
|
|
541
|
-
// run accumulates every note in memory.
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
1017
|
+
// run accumulates every note in memory. Decisions and conflicts are
|
|
1018
|
+
// kept regardless. In-place splice: teams share this array by reference.
|
|
1019
|
+
if (this.notes.length > 4000) {
|
|
1020
|
+
const keep = (n) => n.kind === "decision" || n.kind === "conflict";
|
|
1021
|
+
const pinnedCount = this.notes.filter(keep).length;
|
|
1022
|
+
let toDrop = this.notes.length - Math.max(pinnedCount, 4000);
|
|
1023
|
+
for (let i = 0; i < this.notes.length && toDrop > 0;) {
|
|
1024
|
+
if (!keep(this.notes[i])) {
|
|
1025
|
+
this.notes.splice(i, 1);
|
|
1026
|
+
toDrop--;
|
|
1027
|
+
}
|
|
1028
|
+
else
|
|
1029
|
+
i++;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, url, text: (0, util_1.clip)(text, 1200) });
|
|
1033
|
+
},
|
|
1034
|
+
searchNotes: (q) => this.searchNotes(q),
|
|
1035
|
+
readReport: (taskId) => this.readReportText(taskId),
|
|
1036
|
+
checkClaim: (rel) => {
|
|
1037
|
+
const norm = rel.replace(/^\.\//, "");
|
|
1038
|
+
const claim = this.notes.find((n) => n.kind === "claim" &&
|
|
1039
|
+
n.key === norm &&
|
|
1040
|
+
n.taskId &&
|
|
1041
|
+
n.taskId !== task?.id &&
|
|
1042
|
+
["running", "verifying"].includes(this.tasks.get(n.taskId)?.status ?? ""));
|
|
1043
|
+
return claim
|
|
1044
|
+
? `⚠ ${claim.taskId} holds a claim on ${norm} ("${(0, util_1.oneLine)(claim.text, 80)}") — coordinate via the blackboard before further edits.`
|
|
1045
|
+
: null;
|
|
545
1046
|
},
|
|
546
1047
|
addArtifact: (rel) => {
|
|
547
1048
|
if (task && !task.artifacts.includes(rel))
|
|
@@ -553,7 +1054,33 @@ class Executor {
|
|
|
553
1054
|
},
|
|
554
1055
|
};
|
|
555
1056
|
}
|
|
1057
|
+
readReportText(taskId) {
|
|
1058
|
+
const t = this.tasks.get(taskId.trim().toUpperCase());
|
|
1059
|
+
if (!t)
|
|
1060
|
+
return `no such task: ${taskId}`;
|
|
1061
|
+
if (!t.report)
|
|
1062
|
+
return `${t.id} has not reported yet (status: ${t.status})`;
|
|
1063
|
+
return `${t.id} "${t.title}" → ${t.status}\n${t.report}${t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : ""}`;
|
|
1064
|
+
}
|
|
1065
|
+
recordCheckpoint(task, agentId, summary) {
|
|
1066
|
+
task.lastCheckpoint = (0, util_1.clip)(summary, 4000);
|
|
1067
|
+
this.journal.append("task.checkpoint", {
|
|
1068
|
+
taskId: task.id,
|
|
1069
|
+
agentId,
|
|
1070
|
+
attempt: task.attempt,
|
|
1071
|
+
summary: task.lastCheckpoint,
|
|
1072
|
+
});
|
|
1073
|
+
}
|
|
556
1074
|
async runTaskPipeline(task) {
|
|
1075
|
+
if (task.team) {
|
|
1076
|
+
try {
|
|
1077
|
+
await this.runTeam(task);
|
|
1078
|
+
}
|
|
1079
|
+
catch (e) {
|
|
1080
|
+
this.finalizeTask(task, "failed", `team error: ${(0, util_1.errMsg)(e)}`);
|
|
1081
|
+
}
|
|
1082
|
+
return;
|
|
1083
|
+
}
|
|
557
1084
|
for (;;) {
|
|
558
1085
|
try {
|
|
559
1086
|
const outcome = await this.runWorker(task);
|
|
@@ -566,10 +1093,15 @@ class Executor {
|
|
|
566
1093
|
this.finalizeTask(task, "failed", task.feedback || task.error || "not retried: run is winding down");
|
|
567
1094
|
return;
|
|
568
1095
|
}
|
|
569
|
-
if (task.attempt <
|
|
1096
|
+
if (task.attempt < this.cfg.verifyMaxAttempts) {
|
|
570
1097
|
task.attempt++;
|
|
571
1098
|
task.status = "running";
|
|
572
|
-
this.journal.append("task.status", {
|
|
1099
|
+
this.journal.append("task.status", {
|
|
1100
|
+
taskId: task.id,
|
|
1101
|
+
status: "running",
|
|
1102
|
+
attempt: task.attempt,
|
|
1103
|
+
reason: task.feedback || task.error,
|
|
1104
|
+
});
|
|
573
1105
|
continue;
|
|
574
1106
|
}
|
|
575
1107
|
this.finalizeTask(task, "failed", task.feedback || task.error || "verification failed after retries");
|
|
@@ -582,22 +1114,31 @@ class Executor {
|
|
|
582
1114
|
this.finalizeTask(task, "failed", "run cancelled");
|
|
583
1115
|
return;
|
|
584
1116
|
}
|
|
585
|
-
if (task.attempt <
|
|
1117
|
+
if (task.attempt < this.cfg.verifyMaxAttempts && !this.finishing && !this.budgetExceeded()) {
|
|
586
1118
|
task.attempt++;
|
|
587
|
-
task.error = (0, util_1.errMsg)(e)
|
|
1119
|
+
task.error = `${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`;
|
|
588
1120
|
task.status = "running";
|
|
589
1121
|
this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt, reason: task.error });
|
|
590
1122
|
continue;
|
|
591
1123
|
}
|
|
592
|
-
this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}`);
|
|
1124
|
+
this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`);
|
|
593
1125
|
return;
|
|
594
1126
|
}
|
|
595
1127
|
}
|
|
596
1128
|
}
|
|
1129
|
+
resolveModel(tier) {
|
|
1130
|
+
if (tier === "cheap")
|
|
1131
|
+
return this.cfg.cheapModel || this.meta.options.model;
|
|
1132
|
+
if (tier === "strong")
|
|
1133
|
+
return this.cfg.strongModel || this.meta.options.model;
|
|
1134
|
+
return this.meta.options.model;
|
|
1135
|
+
}
|
|
597
1136
|
/** Returns "retry" to request another attempt, or "done" when finalized. */
|
|
598
1137
|
async runWorker(task) {
|
|
599
1138
|
const agentId = (0, util_1.rid)("w");
|
|
1139
|
+
const model = this.resolveModel(task.modelTier);
|
|
600
1140
|
task.agentIds.push(agentId);
|
|
1141
|
+
task.lastToolError = undefined; // diagnostics are per-attempt
|
|
601
1142
|
const dirListing = this.topListing();
|
|
602
1143
|
const system = (0, prompts_1.workerSystem)({
|
|
603
1144
|
agentId,
|
|
@@ -614,23 +1155,26 @@ class Executor {
|
|
|
614
1155
|
agentId,
|
|
615
1156
|
taskId: task.id,
|
|
616
1157
|
role: task.role,
|
|
617
|
-
model
|
|
1158
|
+
model,
|
|
618
1159
|
purpose: task.title,
|
|
619
1160
|
});
|
|
620
1161
|
const outcome = await (0, agent_1.runAgent)({
|
|
621
1162
|
cfg: this.cfg,
|
|
622
1163
|
agentId,
|
|
623
|
-
model
|
|
1164
|
+
model,
|
|
624
1165
|
thinking: this.meta.options.thinking,
|
|
625
1166
|
reasoningEffort: this.meta.options.reasoningEffort,
|
|
626
1167
|
system,
|
|
627
1168
|
kickoff: prompts_1.WORKER_KICKOFF,
|
|
628
|
-
tools: (0, tools_1.workerToolset)(),
|
|
1169
|
+
tools: (0, tools_1.workerToolset)(this.cfg),
|
|
629
1170
|
terminal: [tools_1.REPORT_TOOL],
|
|
630
1171
|
maxSteps: this.meta.options.maxStepsPerTask,
|
|
631
1172
|
signal: this.ac.signal,
|
|
632
1173
|
ctx: this.makeToolCtx(agentId, task),
|
|
633
|
-
hooks:
|
|
1174
|
+
hooks: {
|
|
1175
|
+
...this.agentHooks(agentId, task.id, task),
|
|
1176
|
+
onCheckpoint: (summary) => this.recordCheckpoint(task, agentId, summary),
|
|
1177
|
+
},
|
|
634
1178
|
stop: this.agentStop,
|
|
635
1179
|
});
|
|
636
1180
|
this.flushDeltas(agentId);
|
|
@@ -638,7 +1182,11 @@ class Executor {
|
|
|
638
1182
|
if (this.ac.signal.aborted)
|
|
639
1183
|
return "done";
|
|
640
1184
|
if (!outcome.terminal) {
|
|
641
|
-
|
|
1185
|
+
const lastWords = (0, util_1.oneLine)(outcome.finalText ?? "", 200);
|
|
1186
|
+
task.error =
|
|
1187
|
+
"worker ended without reporting" +
|
|
1188
|
+
(task.lastToolError ? ` — last tool failure: ${task.lastToolError}` : "") +
|
|
1189
|
+
(lastWords ? `; last words: ${lastWords}` : "");
|
|
642
1190
|
return "retry";
|
|
643
1191
|
}
|
|
644
1192
|
const a = outcome.terminal.args;
|
|
@@ -650,11 +1198,33 @@ class Executor {
|
|
|
650
1198
|
task.artifacts.push(art);
|
|
651
1199
|
task.report = report;
|
|
652
1200
|
task.reportStatus = reportStatus;
|
|
1201
|
+
const strList = (v, max) => Array.isArray(v) ? v.map((x) => (0, util_1.clip)(String(x), 300)).slice(0, max) : undefined;
|
|
1202
|
+
task.keyFacts = strList(a.key_facts, 8);
|
|
1203
|
+
task.openQuestions = strList(a.open_questions, 6);
|
|
1204
|
+
task.filesTouched = strList(a.files_touched, 40);
|
|
1205
|
+
// Structured sources: the citation pipeline's entry point. Only real
|
|
1206
|
+
// http(s) URLs survive; they flow into dep handoffs and the bibliography.
|
|
1207
|
+
const sources = Array.isArray(a.sources)
|
|
1208
|
+
? a.sources
|
|
1209
|
+
.filter((s) => s && typeof s === "object" && /^https?:\/\//.test(String(s.url ?? "")))
|
|
1210
|
+
.slice(0, 40)
|
|
1211
|
+
.map((s) => ({
|
|
1212
|
+
url: (0, util_1.clip)(String(s.url), 500),
|
|
1213
|
+
title: s.title ? (0, util_1.clip)(String(s.title), 200) : undefined,
|
|
1214
|
+
date: s.date ? (0, util_1.clip)(String(s.date), 40) : undefined,
|
|
1215
|
+
note: s.note ? (0, util_1.clip)(String(s.note), 300) : undefined,
|
|
1216
|
+
}))
|
|
1217
|
+
: [];
|
|
1218
|
+
task.sources = sources.length ? sources : undefined;
|
|
653
1219
|
this.journal.append("task.report", {
|
|
654
1220
|
taskId: task.id,
|
|
655
1221
|
status: reportStatus,
|
|
656
1222
|
report,
|
|
657
1223
|
artifacts: task.artifacts,
|
|
1224
|
+
keyFacts: task.keyFacts,
|
|
1225
|
+
openQuestions: task.openQuestions,
|
|
1226
|
+
filesTouched: task.filesTouched,
|
|
1227
|
+
sources: task.sources,
|
|
658
1228
|
});
|
|
659
1229
|
if (reportStatus === "blocked") {
|
|
660
1230
|
this.finalizeTask(task, "blocked", report);
|
|
@@ -663,6 +1233,14 @@ class Executor {
|
|
|
663
1233
|
if (task.verify && this.cfg.verification !== "off") {
|
|
664
1234
|
task.status = "verifying";
|
|
665
1235
|
this.journal.append("task.status", { taskId: task.id, status: "verifying", attempt: task.attempt });
|
|
1236
|
+
// Mechanical checks first: free, instant, and they catch the most common
|
|
1237
|
+
// fabrications (claimed artifacts that don't exist) without an LLM call.
|
|
1238
|
+
const mech = this.preVerify(task);
|
|
1239
|
+
if (mech) {
|
|
1240
|
+
task.feedback = mech;
|
|
1241
|
+
this.journal.append("verify.result", { taskId: task.id, pass: false, feedback: mech, mechanical: true });
|
|
1242
|
+
return "retry";
|
|
1243
|
+
}
|
|
666
1244
|
const pass = await this.runVerifier(task);
|
|
667
1245
|
if (!pass)
|
|
668
1246
|
return "retry";
|
|
@@ -670,43 +1248,147 @@ class Executor {
|
|
|
670
1248
|
this.finalizeTask(task, "done", report);
|
|
671
1249
|
return "done";
|
|
672
1250
|
}
|
|
673
|
-
|
|
1251
|
+
/** Zero-token sanity checks before the LLM verifier. Returns failure feedback or null. */
|
|
1252
|
+
preVerify(task) {
|
|
1253
|
+
const report = task.report ?? "";
|
|
1254
|
+
if (report.trim().length < 40) {
|
|
1255
|
+
return "Report is too thin to verify. Re-do the task and report concretely: what was done, what was verified, exact paths.";
|
|
1256
|
+
}
|
|
1257
|
+
const missing = [];
|
|
1258
|
+
const malformed = [];
|
|
1259
|
+
// Remote sandboxes own their filesystem — only check host-visible paths.
|
|
1260
|
+
if (this.sandbox.localFs) {
|
|
1261
|
+
const okAt = (p) => {
|
|
1262
|
+
try {
|
|
1263
|
+
return fs.statSync(p).size > 0;
|
|
1264
|
+
}
|
|
1265
|
+
catch {
|
|
1266
|
+
return false;
|
|
1267
|
+
}
|
|
1268
|
+
};
|
|
1269
|
+
for (const rel of task.artifacts) {
|
|
1270
|
+
const inArtifacts = path.join(this.runDirPath, "artifacts", rel);
|
|
1271
|
+
const inWorkdir = path.resolve(this.meta.cwd, rel);
|
|
1272
|
+
if (!okAt(inArtifacts) && !okAt(inWorkdir)) {
|
|
1273
|
+
missing.push(rel);
|
|
1274
|
+
continue;
|
|
1275
|
+
}
|
|
1276
|
+
// Structural format check (json parses, csv is rectangular, html is
|
|
1277
|
+
// not a stub) — free, and catches what the LLM verifier wastes a whole
|
|
1278
|
+
// agent run discovering.
|
|
1279
|
+
const problem = (0, util_1.validateArtifactFormat)(okAt(inArtifacts) ? inArtifacts : inWorkdir);
|
|
1280
|
+
if (problem)
|
|
1281
|
+
malformed.push(`${rel}: ${problem}`);
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
if (missing.length) {
|
|
1285
|
+
return `Claimed artifact(s) do not exist or are empty: ${missing.join(", ")}. Actually create them (use save_artifact), then report again.`;
|
|
1286
|
+
}
|
|
1287
|
+
if (malformed.length) {
|
|
1288
|
+
return `Claimed artifact(s) are malformed — fix them and report again: ${malformed.join("; ")}`;
|
|
1289
|
+
}
|
|
1290
|
+
return null;
|
|
1291
|
+
}
|
|
1292
|
+
/** One verifier agent pass; returns the outcome plus how many evidence-gathering tool calls it made. */
|
|
1293
|
+
async verifierAgent(task, kickoff) {
|
|
674
1294
|
const agentId = (0, util_1.rid)("v");
|
|
1295
|
+
// Verification gets the strong tier when configured — a weak verifier
|
|
1296
|
+
// rubber-stamps exactly the tasks that most need scrutiny.
|
|
1297
|
+
const model = this.cfg.strongModel || this.meta.options.model;
|
|
675
1298
|
task.agentIds.push(agentId);
|
|
676
1299
|
this.journal.append("agent.spawned", {
|
|
677
1300
|
agentId,
|
|
678
1301
|
taskId: task.id,
|
|
679
1302
|
role: "verifier",
|
|
680
|
-
model
|
|
1303
|
+
model,
|
|
681
1304
|
purpose: `verify ${task.id}`,
|
|
682
1305
|
});
|
|
1306
|
+
let evidenceCalls = 0;
|
|
1307
|
+
const baseHooks = this.agentHooks(agentId, task.id);
|
|
683
1308
|
const outcome = await (0, agent_1.runAgent)({
|
|
684
1309
|
cfg: this.cfg,
|
|
685
1310
|
agentId,
|
|
686
|
-
model
|
|
1311
|
+
model,
|
|
687
1312
|
thinking: this.meta.options.thinking,
|
|
688
1313
|
reasoningEffort: this.meta.options.reasoningEffort,
|
|
689
|
-
system: (0, prompts_1.verifierSystem)(this.meta, task),
|
|
690
|
-
kickoff
|
|
1314
|
+
system: (0, prompts_1.verifierSystem)(this.meta, task, this.depReportsFor(task)),
|
|
1315
|
+
kickoff,
|
|
691
1316
|
tools: (0, tools_1.verifierToolset)(),
|
|
692
1317
|
terminal: [tools_1.VERDICT_TOOL],
|
|
693
1318
|
maxSteps: Math.min(14, this.meta.options.maxStepsPerTask),
|
|
694
1319
|
signal: this.ac.signal,
|
|
695
|
-
|
|
696
|
-
|
|
1320
|
+
// Blind verification: the verifier judges deliverables against the
|
|
1321
|
+
// objective with its own tools — it must not inherit the swarm's shared
|
|
1322
|
+
// beliefs (blackboard) or the worker's narrative beyond the claims.
|
|
1323
|
+
// (Dep reports are settled upstream outputs, not the worker's story.)
|
|
1324
|
+
ctx: { ...this.makeToolCtx(agentId, task), readBlackboard: () => "", searchNotes: undefined },
|
|
1325
|
+
hooks: {
|
|
1326
|
+
...baseHooks,
|
|
1327
|
+
onToolCall: (callId, name, args) => {
|
|
1328
|
+
if (name !== "verdict")
|
|
1329
|
+
evidenceCalls++;
|
|
1330
|
+
baseHooks.onToolCall(callId, name, args);
|
|
1331
|
+
},
|
|
1332
|
+
},
|
|
697
1333
|
stop: this.agentStop,
|
|
698
1334
|
});
|
|
699
1335
|
this.flushDeltas(agentId);
|
|
700
1336
|
this.journal.append("agent.done", { agentId, taskId: task.id, steps: outcome.steps });
|
|
1337
|
+
return { outcome, evidenceCalls };
|
|
1338
|
+
}
|
|
1339
|
+
async runVerifier(task) {
|
|
1340
|
+
const strict = this.cfg.verification === "strict";
|
|
1341
|
+
let { outcome, evidenceCalls } = await this.verifierAgent(task, prompts_1.VERIFIER_KICKOFF);
|
|
701
1342
|
if (this.ac.signal.aborted)
|
|
702
1343
|
return true;
|
|
1344
|
+
// Strict mode: a pass verdict backed by zero tool calls is an opinion,
|
|
1345
|
+
// not a verification. One re-run demanding evidence; if that also passes
|
|
1346
|
+
// tool-free, accept but say so in the journal.
|
|
1347
|
+
if (strict && outcome.terminal && Boolean(outcome.terminal.args.pass) && evidenceCalls === 0) {
|
|
1348
|
+
this.journal.append("log", {
|
|
1349
|
+
level: "info",
|
|
1350
|
+
msg: `verifier passed ${task.id} without evidence — re-running with a tools-required kickoff`,
|
|
1351
|
+
});
|
|
1352
|
+
const second = await this.verifierAgent(task, "A previous verdict on this task cited no tool-gathered evidence. Verify concretely NOW — read the claimed files, run the commands — then call verdict(...).");
|
|
1353
|
+
if (this.ac.signal.aborted)
|
|
1354
|
+
return true;
|
|
1355
|
+
if (second.outcome.terminal) {
|
|
1356
|
+
if (second.evidenceCalls === 0) {
|
|
1357
|
+
this.journal.append("log", { level: "warn", msg: `verifier passed ${task.id} without gathering evidence` });
|
|
1358
|
+
}
|
|
1359
|
+
outcome = second.outcome;
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
703
1362
|
const v = (outcome.terminal?.args ?? {});
|
|
704
|
-
const strict = this.cfg.verification === "strict";
|
|
705
1363
|
// No verdict returned: in strict mode fail closed, otherwise accept.
|
|
706
1364
|
const pass = outcome.terminal ? Boolean(v.pass) : !strict;
|
|
707
|
-
|
|
1365
|
+
let feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
|
|
1366
|
+
// Structured issues become the retry's worklist — numbered, with evidence.
|
|
1367
|
+
const issues = Array.isArray(v.issues)
|
|
1368
|
+
? v.issues
|
|
1369
|
+
.filter((i) => i && typeof i === "object" && i.problem)
|
|
1370
|
+
.slice(0, 5)
|
|
1371
|
+
.map((i) => ({
|
|
1372
|
+
problem: (0, util_1.oneLine)(String(i.problem), 300),
|
|
1373
|
+
evidence: i.evidence ? (0, util_1.oneLine)(String(i.evidence), 300) : undefined,
|
|
1374
|
+
fix: i.fix ? (0, util_1.oneLine)(String(i.fix), 300) : undefined,
|
|
1375
|
+
}))
|
|
1376
|
+
: [];
|
|
1377
|
+
if (!pass && issues.length) {
|
|
1378
|
+
feedback = [
|
|
1379
|
+
feedback,
|
|
1380
|
+
...issues.map((i, n) => `${n + 1}. ${i.problem}${i.evidence ? `\n evidence: ${i.evidence}` : ""}${i.fix ? `\n fix: ${i.fix}` : ""}`),
|
|
1381
|
+
]
|
|
1382
|
+
.filter(Boolean)
|
|
1383
|
+
.join("\n");
|
|
1384
|
+
}
|
|
708
1385
|
task.feedback = feedback;
|
|
709
|
-
this.journal.append("verify.result", {
|
|
1386
|
+
this.journal.append("verify.result", {
|
|
1387
|
+
taskId: task.id,
|
|
1388
|
+
pass,
|
|
1389
|
+
feedback,
|
|
1390
|
+
...(issues.length ? { issues } : {}),
|
|
1391
|
+
});
|
|
710
1392
|
return pass;
|
|
711
1393
|
}
|
|
712
1394
|
finalizeTask(task, status, reason) {
|
|
@@ -714,8 +1396,80 @@ class Executor {
|
|
|
714
1396
|
task.endedAt = Date.now();
|
|
715
1397
|
if (reason && status !== "done")
|
|
716
1398
|
task.error = reason;
|
|
1399
|
+
// A settled task holds no file claims — release them so the digest and
|
|
1400
|
+
// search_notes don't accumulate dead claims on long runs. In-place splice:
|
|
1401
|
+
// teams share this array by reference.
|
|
1402
|
+
for (let i = this.notes.length - 1; i >= 0; i--) {
|
|
1403
|
+
const n = this.notes[i];
|
|
1404
|
+
if (n.kind === "claim" && n.taskId === task.id)
|
|
1405
|
+
this.notes.splice(i, 1);
|
|
1406
|
+
}
|
|
717
1407
|
this.journal.append("task.status", { taskId: task.id, status, attempt: task.attempt, reason });
|
|
718
1408
|
this.settledSinceUpdate.push(task.id);
|
|
1409
|
+
this.maybeSnapshot();
|
|
1410
|
+
}
|
|
1411
|
+
// ---------------------------------------------------------------- progress snapshots
|
|
1412
|
+
snapshotCounter = 0;
|
|
1413
|
+
settledSinceSnapshot = 0;
|
|
1414
|
+
snapshotInflight = false;
|
|
1415
|
+
/**
|
|
1416
|
+
* Periodic partial deliverable: every N settled tasks, write a cheap-tier
|
|
1417
|
+
* progress report to artifacts/. Fire-and-forget — a multi-day run always
|
|
1418
|
+
* has something readable, and a snapshot failure never blocks scheduling.
|
|
1419
|
+
*/
|
|
1420
|
+
maybeSnapshot() {
|
|
1421
|
+
if (this.mode !== "root" || this.finishing || this.snapshotInflight)
|
|
1422
|
+
return;
|
|
1423
|
+
const every = Number(process.env.SWARM_SNAPSHOT_EVERY ?? "25");
|
|
1424
|
+
if (!every || every < 1)
|
|
1425
|
+
return;
|
|
1426
|
+
if (++this.settledSinceSnapshot < every)
|
|
1427
|
+
return;
|
|
1428
|
+
this.settledSinceSnapshot = 0;
|
|
1429
|
+
this.snapshotInflight = true;
|
|
1430
|
+
const n = ++this.snapshotCounter;
|
|
1431
|
+
const model = this.cfg.cheapModel || this.meta.options.conductorModel;
|
|
1432
|
+
const tasks = this.taskList();
|
|
1433
|
+
const settled = tasks.filter((t) => ["done", "failed", "blocked"].includes(t.status));
|
|
1434
|
+
(0, deepseek_1.chat)(this.cfg, {
|
|
1435
|
+
model,
|
|
1436
|
+
messages: [
|
|
1437
|
+
{
|
|
1438
|
+
role: "user",
|
|
1439
|
+
content: `Write a concise interim progress report (markdown) for an in-flight agent-swarm mission. Cover: what has been accomplished so far (with concrete results/paths from the reports), what failed, what is currently running, and what remains. This is a partial deliverable for the operator — informative, no filler.\n\nMISSION\n${this.meta.mission}\n\nTASKS\n${(0, prompts_1.taskTable)(tasks)}\n\nSETTLED REPORTS\n${(0, util_1.truncateMiddle)(settled.map(prompts_1.reportBlock).join("\n\n"), 50_000, "chars")}`,
|
|
1440
|
+
},
|
|
1441
|
+
],
|
|
1442
|
+
thinking: false,
|
|
1443
|
+
maxTokens: 4096,
|
|
1444
|
+
signal: this.ac.signal,
|
|
1445
|
+
})
|
|
1446
|
+
.then((res) => {
|
|
1447
|
+
this.onUsage(model, res.usage);
|
|
1448
|
+
if (!res.content.trim())
|
|
1449
|
+
return;
|
|
1450
|
+
const rel = `progress-report-${n}.md`;
|
|
1451
|
+
fs.writeFileSync(path.join(this.runDirPath, "artifacts", rel), res.content, "utf8");
|
|
1452
|
+
this.journal.append("log", { level: "info", msg: `progress snapshot written: artifacts/${rel}` });
|
|
1453
|
+
// Interim memory: a multi-day run that dies before synthesis still
|
|
1454
|
+
// leaves the next swarm in this workspace something to build on.
|
|
1455
|
+
if (!this.meta.sandbox) {
|
|
1456
|
+
(0, memory_1.appendMemory)(this.meta.cwd, {
|
|
1457
|
+
runId: this.meta.id,
|
|
1458
|
+
mission: this.meta.mission,
|
|
1459
|
+
finishedAt: Date.now(),
|
|
1460
|
+
status: "in-progress",
|
|
1461
|
+
summary: (0, util_1.clip)(res.content, 600),
|
|
1462
|
+
keyDecisions: this.notes.filter((nt) => nt.kind === "decision").slice(-10).map((nt) => nt.text),
|
|
1463
|
+
});
|
|
1464
|
+
}
|
|
1465
|
+
})
|
|
1466
|
+
.catch((e) => {
|
|
1467
|
+
if (!this.ac.signal.aborted)
|
|
1468
|
+
this.journal.append("log", { level: "warn", msg: `progress snapshot failed: ${(0, util_1.errMsg)(e)}` });
|
|
1469
|
+
})
|
|
1470
|
+
.finally(() => {
|
|
1471
|
+
this.snapshotInflight = false;
|
|
1472
|
+
});
|
|
719
1473
|
}
|
|
720
1474
|
topListing() {
|
|
721
1475
|
// Remote sandboxes own their filesystem; a host listing would be a lie.
|
|
@@ -750,18 +1504,31 @@ class Executor {
|
|
|
750
1504
|
*/
|
|
751
1505
|
deltaBuf = new Map();
|
|
752
1506
|
deltaTimer = null;
|
|
1507
|
+
thinkDropLogged = false;
|
|
753
1508
|
queueDelta(agentId, taskId, channel, text) {
|
|
1509
|
+
// Deltas are UI sugar, never state — thin them under load so a 100-agent
|
|
1510
|
+
// swarm doesn't write gigabytes of streaming chatter into the journal.
|
|
1511
|
+
const load = this.activeWorkerCount();
|
|
1512
|
+
if (channel === "think" && load > 48) {
|
|
1513
|
+
if (!this.thinkDropLogged) {
|
|
1514
|
+
this.thinkDropLogged = true;
|
|
1515
|
+
this.journal.append("log", { level: "info", msg: `thinking streams muted above 48 active agents (currently ${load})` });
|
|
1516
|
+
}
|
|
1517
|
+
return;
|
|
1518
|
+
}
|
|
1519
|
+
const flushChars = load > 24 ? 2000 : 480;
|
|
1520
|
+
const flushMs = load > 24 ? 1000 : 200;
|
|
754
1521
|
const key = `${agentId}:${channel}`;
|
|
755
1522
|
const buf = this.deltaBuf.get(key);
|
|
756
1523
|
if (buf)
|
|
757
1524
|
buf.text += text;
|
|
758
1525
|
else
|
|
759
1526
|
this.deltaBuf.set(key, { agentId, taskId, channel, text });
|
|
760
|
-
if (this.deltaBuf.get(key).text.length >=
|
|
1527
|
+
if (this.deltaBuf.get(key).text.length >= flushChars) {
|
|
761
1528
|
this.flushDeltas(agentId);
|
|
762
1529
|
}
|
|
763
1530
|
else if (!this.deltaTimer) {
|
|
764
|
-
this.deltaTimer = setTimeout(() => this.flushDeltas(),
|
|
1531
|
+
this.deltaTimer = setTimeout(() => this.flushDeltas(), flushMs);
|
|
765
1532
|
}
|
|
766
1533
|
}
|
|
767
1534
|
flushDeltas(onlyAgent) {
|
|
@@ -781,7 +1548,7 @@ class Executor {
|
|
|
781
1548
|
});
|
|
782
1549
|
}
|
|
783
1550
|
}
|
|
784
|
-
agentHooks(agentId, taskId) {
|
|
1551
|
+
agentHooks(agentId, taskId, trackErrorsOn) {
|
|
785
1552
|
return {
|
|
786
1553
|
onDelta: (channel, text) => {
|
|
787
1554
|
this.queueDelta(agentId, taskId, channel, text);
|
|
@@ -791,6 +1558,8 @@ class Executor {
|
|
|
791
1558
|
this.journal.append("tool.call", { agentId, taskId, callId, name, args });
|
|
792
1559
|
},
|
|
793
1560
|
onToolResult: (callId, name, ok, summary) => {
|
|
1561
|
+
if (!ok && trackErrorsOn)
|
|
1562
|
+
trackErrorsOn.lastToolError = `${name}: ${(0, util_1.oneLine)(summary, 200)}`;
|
|
794
1563
|
this.journal.append("tool.result", { agentId, taskId, callId, name, ok, summary });
|
|
795
1564
|
},
|
|
796
1565
|
onUsage: this.onUsage,
|
|
@@ -802,6 +1571,10 @@ class Executor {
|
|
|
802
1571
|
// ---------------------------------------------------------------- operator control
|
|
803
1572
|
operatorQueue = [];
|
|
804
1573
|
drainControl() {
|
|
1574
|
+
// Only the root executor consumes operator control; teams are cancelled
|
|
1575
|
+
// via the parent's abort signal and would otherwise steal queued notes.
|
|
1576
|
+
if (this.mode === "team")
|
|
1577
|
+
return;
|
|
805
1578
|
for (const msg of this.control.poll()) {
|
|
806
1579
|
if (msg.kind === "cancel") {
|
|
807
1580
|
this.journal.append("operator.note", { text: "⛔ Cancel requested by operator." });
|
|
@@ -830,8 +1603,25 @@ class Executor {
|
|
|
830
1603
|
const reportPath = path.join(this.runDirPath, "artifacts", "final-report.md");
|
|
831
1604
|
(0, util_1.ensureDir)(path.dirname(reportPath));
|
|
832
1605
|
fs.writeFileSync(reportPath, reportMarkdown, "utf8");
|
|
1606
|
+
// Always ship a readable, shareable HTML rendering alongside the raw
|
|
1607
|
+
// markdown; a rendering bug must never block run finalization.
|
|
1608
|
+
let htmlPath;
|
|
1609
|
+
try {
|
|
1610
|
+
htmlPath = path.join(this.runDirPath, "artifacts", "final-report.html");
|
|
1611
|
+
fs.writeFileSync(htmlPath, (0, report_1.renderFinalHtml)({
|
|
1612
|
+
markdown: reportMarkdown,
|
|
1613
|
+
mission: this.meta.mission,
|
|
1614
|
+
runId: this.meta.id,
|
|
1615
|
+
status,
|
|
1616
|
+
finishedAt: Date.now(),
|
|
1617
|
+
}), "utf8");
|
|
1618
|
+
}
|
|
1619
|
+
catch (e) {
|
|
1620
|
+
htmlPath = undefined;
|
|
1621
|
+
this.journal.append("log", { level: "warn", msg: `final-report.html render failed: ${(0, util_1.errMsg)(e)}` });
|
|
1622
|
+
}
|
|
833
1623
|
this.setStatus(status, reason);
|
|
834
|
-
this.journal.append("run.final", { summary, reportPath, reason, status });
|
|
1624
|
+
this.journal.append("run.final", { summary, reportPath, htmlPath, reason, status });
|
|
835
1625
|
await this.journal.flush();
|
|
836
1626
|
}
|
|
837
1627
|
/** Terminate the run as failed without any further model calls. */
|
|
@@ -862,10 +1652,14 @@ class Executor {
|
|
|
862
1652
|
? tasks.map(prompts_1.reportBlock).join("\n\n")
|
|
863
1653
|
: "(no tasks were completed)";
|
|
864
1654
|
const artifactList = this.listArtifacts().join("\n") || "(none)";
|
|
1655
|
+
// The citation pipeline's last hop: every source any worker reported,
|
|
1656
|
+
// deduplicated and numbered, becomes the synthesizer's bibliography.
|
|
1657
|
+
const allSources = (0, report_1.aggregateSources)(tasks);
|
|
1658
|
+
const sourcesText = allSources.length ? (0, util_1.truncateMiddle)((0, report_1.sourcesBlock)(allSources), 40_000, "chars") : "";
|
|
865
1659
|
const agentId = (0, util_1.rid)("synth");
|
|
866
1660
|
let summary = "";
|
|
867
1661
|
let reportMarkdown = "";
|
|
868
|
-
|
|
1662
|
+
const synthOnce = async (extraNote) => {
|
|
869
1663
|
const outcome = await (0, agent_1.runAgent)({
|
|
870
1664
|
cfg: this.cfg,
|
|
871
1665
|
agentId,
|
|
@@ -874,17 +1668,18 @@ class Executor {
|
|
|
874
1668
|
reasoningEffort: this.meta.options.reasoningEffort,
|
|
875
1669
|
system: (0, prompts_1.synthSystem)({
|
|
876
1670
|
meta: this.meta,
|
|
877
|
-
finishNotes: this.finishNotes,
|
|
878
|
-
reports: (0, util_1.truncateMiddle)(reports,
|
|
879
|
-
blackboard: this.blackboardDigest(
|
|
1671
|
+
finishNotes: [this.finishNotes, extraNote].filter(Boolean).join("\n\n"),
|
|
1672
|
+
reports: (0, util_1.truncateMiddle)(reports, 300_000, "chars"),
|
|
1673
|
+
blackboard: this.blackboardDigest(6000),
|
|
880
1674
|
artifactList,
|
|
881
1675
|
reason: this.finishReason || "completed",
|
|
1676
|
+
sources: sourcesText,
|
|
882
1677
|
}),
|
|
883
1678
|
kickoff: prompts_1.SYNTH_KICKOFF,
|
|
884
1679
|
tools: (0, tools_1.synthToolset)(),
|
|
885
1680
|
terminal: [tools_1.SUBMIT_FINAL_TOOL],
|
|
886
|
-
maxSteps:
|
|
887
|
-
maxTokensOut:
|
|
1681
|
+
maxSteps: 24,
|
|
1682
|
+
maxTokensOut: 32000,
|
|
888
1683
|
signal: new AbortController().signal, // synthesis should finish even if run was cancelled
|
|
889
1684
|
ctx: this.makeToolCtx(agentId, null),
|
|
890
1685
|
hooks: this.agentHooks(agentId, ""),
|
|
@@ -892,6 +1687,36 @@ class Executor {
|
|
|
892
1687
|
const a = (outcome.terminal?.args ?? {});
|
|
893
1688
|
reportMarkdown = String(a.report_markdown ?? outcome.finalText ?? "");
|
|
894
1689
|
summary = String(a.summary ?? "");
|
|
1690
|
+
};
|
|
1691
|
+
try {
|
|
1692
|
+
await synthOnce();
|
|
1693
|
+
// Strict mode: check the final report's claims against the task reports
|
|
1694
|
+
// (the ground truth) and re-synthesize once if it misrepresents them.
|
|
1695
|
+
if (this.cfg.verification === "strict" && reportMarkdown.trim() && tasks.length) {
|
|
1696
|
+
try {
|
|
1697
|
+
const res = await (0, deepseek_1.chat)(this.cfg, {
|
|
1698
|
+
model: this.meta.options.conductorModel,
|
|
1699
|
+
messages: [
|
|
1700
|
+
{
|
|
1701
|
+
role: "user",
|
|
1702
|
+
content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars"), sourcesText ? (0, util_1.truncateMiddle)(sourcesText, 20_000, "chars") : undefined),
|
|
1703
|
+
},
|
|
1704
|
+
],
|
|
1705
|
+
thinking: false,
|
|
1706
|
+
maxTokens: 2048,
|
|
1707
|
+
signal: new AbortController().signal,
|
|
1708
|
+
});
|
|
1709
|
+
this.onUsage(this.meta.options.conductorModel, res.usage);
|
|
1710
|
+
const check = (res.content || "").trim();
|
|
1711
|
+
if (check && !/^OK\b/i.test(check)) {
|
|
1712
|
+
this.journal.append("log", { level: "warn", msg: `synthesis check found discrepancies:\n${(0, util_1.clip)(check, 1500)}` });
|
|
1713
|
+
await synthOnce(`A faithfulness review of your previous draft found these discrepancies — fix them, claiming only what the task reports support:\n${(0, util_1.clip)(check, 2000)}`);
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
catch (e) {
|
|
1717
|
+
this.journal.append("log", { level: "warn", msg: `synthesis check failed: ${(0, util_1.errMsg)(e)}` });
|
|
1718
|
+
}
|
|
1719
|
+
}
|
|
895
1720
|
}
|
|
896
1721
|
catch (e) {
|
|
897
1722
|
this.journal.append("log", { level: "error", msg: `synthesis failed: ${(0, util_1.errMsg)(e)}` });
|
|
@@ -920,9 +1745,24 @@ class Executor {
|
|
|
920
1745
|
reason = `All ${tasks.length} task(s) failed or were blocked.`;
|
|
921
1746
|
}
|
|
922
1747
|
await this.writeFinal(status, reason, reportMarkdown, summary || (0, util_1.clip)(reportMarkdown, 600));
|
|
1748
|
+
// Cross-run memory: real-directory runs leave a trace for the next swarm.
|
|
1749
|
+
if (!this.meta.sandbox && status !== "cancelled") {
|
|
1750
|
+
(0, memory_1.appendMemory)(this.meta.cwd, {
|
|
1751
|
+
runId: this.meta.id,
|
|
1752
|
+
mission: this.meta.mission,
|
|
1753
|
+
finishedAt: Date.now(),
|
|
1754
|
+
status,
|
|
1755
|
+
summary: (0, util_1.clip)(summary || reportMarkdown, 600),
|
|
1756
|
+
keyDecisions: this.notes.filter((n) => n.kind === "decision").slice(-10).map((n) => n.text),
|
|
1757
|
+
});
|
|
1758
|
+
}
|
|
923
1759
|
}
|
|
924
1760
|
fallbackReport(tasks) {
|
|
925
1761
|
const lines = [`# ${this.meta.mission}`, ``, `_Run ${this.meta.id} — ${this.finishReason}_`, ``];
|
|
1762
|
+
// Even without a synthesizer, surface the cross-task essentials first.
|
|
1763
|
+
const facts = tasks.flatMap((t) => (t.keyFacts ?? []).map((f) => `- ${f} _(${t.id})_`));
|
|
1764
|
+
if (facts.length)
|
|
1765
|
+
lines.push(`## Key facts`, ...facts.slice(0, 60), "");
|
|
926
1766
|
for (const t of tasks) {
|
|
927
1767
|
lines.push(`## ${t.id} ${t.title} (${t.status})`);
|
|
928
1768
|
lines.push(t.report || t.error || "(no output)");
|
|
@@ -930,6 +1770,15 @@ class Executor {
|
|
|
930
1770
|
lines.push(`Artifacts: ${t.artifacts.join(", ")}`);
|
|
931
1771
|
lines.push("");
|
|
932
1772
|
}
|
|
1773
|
+
const sources = (0, report_1.aggregateSources)(tasks);
|
|
1774
|
+
if (sources.length) {
|
|
1775
|
+
lines.push(`## Sources`);
|
|
1776
|
+
for (const s of sources.slice(0, 100)) {
|
|
1777
|
+
lines.push(`${s.n}. [${s.title || s.url}](${s.url})${s.date ? ` (${s.date})` : ""}`);
|
|
1778
|
+
}
|
|
1779
|
+
if (sources.length > 100)
|
|
1780
|
+
lines.push(`…and ${sources.length - 100} more in the task reports.`);
|
|
1781
|
+
}
|
|
933
1782
|
return lines.join("\n");
|
|
934
1783
|
}
|
|
935
1784
|
listArtifacts() {
|