@robzilla1738/agentswarm 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +36 -5
  2. package/dist/agent.js +64 -32
  3. package/dist/cli.js +18 -4
  4. package/dist/config.js +35 -5
  5. package/dist/crawltools.js +247 -0
  6. package/dist/deepseek.js +125 -10
  7. package/dist/executor.js +771 -122
  8. package/dist/hub.js +40 -3
  9. package/dist/journal.js +61 -11
  10. package/dist/memory.js +83 -0
  11. package/dist/prompts.js +109 -16
  12. package/dist/report.js +252 -0
  13. package/dist/run.js +7 -2
  14. package/dist/searchcore.js +191 -0
  15. package/dist/state.js +57 -3
  16. package/dist/tools.js +202 -12
  17. package/dist/webtools.js +191 -60
  18. package/package.json +3 -2
  19. package/ui/out/404/index.html +1 -1
  20. package/ui/out/404.html +1 -1
  21. package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
  22. package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +1 -0
  23. package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
  24. package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +1 -0
  25. package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +1 -0
  26. package/ui/out/_next/static/css/9f7bd82b8e4c762c.css +3 -0
  27. package/ui/out/fonts/PlanetKosmos.ttf +0 -0
  28. package/ui/out/index.html +1 -1
  29. package/ui/out/index.txt +3 -3
  30. package/ui/out/run/index.html +1 -1
  31. package/ui/out/run/index.txt +3 -3
  32. package/ui/out/settings/index.html +1 -1
  33. package/ui/out/settings/index.txt +3 -3
  34. package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
  35. package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
  36. package/ui/out/_next/static/chunks/677-b37981ba0eca75b2.js +0 -1
  37. package/ui/out/_next/static/chunks/app/page-0c9f35bd4aa8e370.js +0 -1
  38. package/ui/out/_next/static/chunks/app/run/page-13dc41a57e34da71.js +0 -1
  39. package/ui/out/_next/static/chunks/app/settings/page-a1763be7f6de888c.js +0 -1
  40. package/ui/out/_next/static/css/82edaa7a5942f894.css +0 -3
  41. /package/ui/out/_next/static/{eiQeDU9uBHNsBj0CFkp8M → errjtBR_bKoee8ogLp8xk}/_buildManifest.js +0 -0
  42. /package/ui/out/_next/static/{eiQeDU9uBHNsBj0CFkp8M → errjtBR_bKoee8ogLp8xk}/_ssgManifest.js +0 -0
package/dist/hub.js CHANGED
@@ -34,12 +34,15 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.startHub = startHub;
37
+ exports.publicConfig = publicConfig;
37
38
  const fs = __importStar(require("fs"));
38
39
  const http = __importStar(require("http"));
40
+ const os = __importStar(require("os"));
39
41
  const path = __importStar(require("path"));
40
42
  const url_1 = require("url");
41
43
  const config_1 = require("./config");
42
44
  const control_1 = require("./control");
45
+ const crawltools_1 = require("./crawltools");
43
46
  const deepseek_1 = require("./deepseek");
44
47
  const providers_1 = require("./providers");
45
48
  const journal_1 = require("./journal");
@@ -165,6 +168,29 @@ async function api(req, res, url, opts) {
165
168
  return sendJson(res, 200, { models: Object.keys(cfg.pricing), error: (0, util_1.errMsg)(e) });
166
169
  }
167
170
  }
171
+ // Directory browser for the launch-folder picker. Localhost-only hub, same
172
+ // user permissions as the CLI — lists directory names, never file contents.
173
+ if (p === "/api/fs/dirs" && method === "GET") {
174
+ const raw = url.searchParams.get("path") || os.homedir();
175
+ const dir = path.resolve(raw);
176
+ try {
177
+ const entries = fs
178
+ .readdirSync(dir, { withFileTypes: true })
179
+ .filter((e) => e.isDirectory() && !e.name.startsWith("."))
180
+ .map((e) => ({ name: e.name, path: path.join(dir, e.name) }))
181
+ .sort((a, b) => a.name.localeCompare(b.name));
182
+ const parent = path.dirname(dir);
183
+ return sendJson(res, 200, {
184
+ path: dir,
185
+ parent: parent === dir ? null : parent,
186
+ home: os.homedir(),
187
+ dirs: entries,
188
+ });
189
+ }
190
+ catch (e) {
191
+ return sendJson(res, 400, { error: (0, util_1.errMsg)(e) });
192
+ }
193
+ }
168
194
  if (p === "/api/runs" && method === "GET") {
169
195
  return sendJson(res, 200, { runs: (0, run_1.listRuns)(cfg.pricing) });
170
196
  }
@@ -231,7 +257,7 @@ async function api(req, res, url, opts) {
231
257
  return sendJson(res, 200, { events, live: (0, run_1.isRunLive)(id) });
232
258
  }
233
259
  if (sub === "/stream" && method === "GET") {
234
- return streamEvents(res, id);
260
+ return streamEvents(res, id, url.searchParams.get("quiet") === "1");
235
261
  }
236
262
  if (sub === "/note" && method === "POST") {
237
263
  const body = await readBody(req);
@@ -292,7 +318,7 @@ async function api(req, res, url, opts) {
292
318
  }
293
319
  sendJson(res, 404, { error: "not found" });
294
320
  }
295
- function streamEvents(res, id) {
321
+ function streamEvents(res, id, quiet = false) {
296
322
  res.writeHead(200, {
297
323
  "content-type": "text/event-stream",
298
324
  "cache-control": "no-cache, no-transform",
@@ -311,6 +337,9 @@ function streamEvents(res, id) {
311
337
  return;
312
338
  }
313
339
  for (const ev of evs) {
340
+ // quiet mode: skip streaming chatter for clients rendering many agents.
341
+ if (quiet && ev.type === "agent.delta")
342
+ continue;
314
343
  res.write(`data: ${JSON.stringify(ev)}\n\n`);
315
344
  }
316
345
  };
@@ -368,7 +397,15 @@ function publicConfig(cfg) {
368
397
  tinyfishKeySet: Boolean(cfg.tinyfishApiKey),
369
398
  tinyfishKeyMasked: (0, config_1.maskKey)(cfg.tinyfishApiKey),
370
399
  searchBackend: cfg.searchBackend,
371
- searchkitCmd: cfg.searchkitCmd,
400
+ crawlBackend: cfg.crawlBackend,
401
+ crawlResolved: (0, crawltools_1.resolveCrawlBackend)(cfg),
402
+ firecrawlKeySet: Boolean(cfg.firecrawlApiKey),
403
+ firecrawlKeyMasked: (0, config_1.maskKey)(cfg.firecrawlApiKey),
404
+ contextdevKeySet: Boolean(cfg.contextdevApiKey),
405
+ contextdevKeyMasked: (0, config_1.maskKey)(cfg.contextdevApiKey),
406
+ deepcrawlKeySet: Boolean(cfg.deepcrawlApiKey),
407
+ deepcrawlKeyMasked: (0, config_1.maskKey)(cfg.deepcrawlApiKey),
408
+ deepcrawlBaseUrl: cfg.deepcrawlBaseUrl,
372
409
  sandboxRuntime: cfg.sandboxRuntime,
373
410
  sandboxResolved: (0, sandbox_1.resolveSandboxKind)(cfg),
374
411
  sandboxImage: cfg.sandboxImage,
package/dist/journal.js CHANGED
@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.Journal = void 0;
36
+ exports.Journal = exports.TeamJournal = void 0;
37
37
  exports.eventsFile = eventsFile;
38
38
  exports.readEvents = readEvents;
39
39
  exports.lastSeq = lastSeq;
@@ -41,14 +41,36 @@ exports.readNewEvents = readNewEvents;
41
41
  const fs = __importStar(require("fs"));
42
42
  const path = __importStar(require("path"));
43
43
  /**
44
- * Append-only event journal. events.jsonl is the single source of truth for a
45
- * run: the executor writes it, the terminal renderer and the hub (web UI) read
46
- * and tail it. Tolerant of a torn final line after a crash.
44
+ * A child swarm's view of its parent's journal: same file, same sequence,
45
+ * every event stamped with the owning team's task id so the reducer can
46
+ * partition team activity away from the root run.
47
47
  */
48
+ class TeamJournal {
49
+ inner;
50
+ teamId;
51
+ constructor(inner, teamId) {
52
+ this.inner = inner;
53
+ this.teamId = teamId;
54
+ }
55
+ append(type, payload = {}) {
56
+ return this.inner.append(type, { teamId: this.teamId, ...payload });
57
+ }
58
+ flush() {
59
+ return this.inner.flush();
60
+ }
61
+ get degraded() {
62
+ return this.inner.degraded;
63
+ }
64
+ }
65
+ exports.TeamJournal = TeamJournal;
48
66
  class Journal {
49
67
  file;
50
68
  seq;
51
69
  chain = Promise.resolve();
70
+ buf = "";
71
+ failures = 0;
72
+ /** Set after repeated append failures: the source of truth is no longer being persisted. */
73
+ degraded = false;
52
74
  onEvent;
53
75
  constructor(runDirPath, startSeq) {
54
76
  this.file = path.join(runDirPath, "events.jsonl");
@@ -56,12 +78,8 @@ class Journal {
56
78
  }
57
79
  append(type, payload = {}) {
58
80
  const ev = { seq: this.seq++, t: Date.now(), type, ...payload };
59
- const line = JSON.stringify(ev) + "\n";
60
- this.chain = this.chain
61
- .then(() => fs.promises.appendFile(this.file, line, "utf8"))
62
- .catch(() => {
63
- /* never break the run on journal IO; next append retries the chain */
64
- });
81
+ this.buf += JSON.stringify(ev) + "\n";
82
+ this.chain = this.chain.then(() => this.drain());
65
83
  try {
66
84
  this.onEvent?.(ev);
67
85
  }
@@ -70,8 +88,40 @@ class Journal {
70
88
  }
71
89
  return ev;
72
90
  }
91
+ async drain() {
92
+ if (!this.buf)
93
+ return;
94
+ const chunk = this.buf;
95
+ this.buf = "";
96
+ try {
97
+ await fs.promises.appendFile(this.file, chunk, "utf8");
98
+ this.failures = 0;
99
+ }
100
+ catch (e) {
101
+ // Keep the unwritten events buffered so the next append/flush retries
102
+ // them in order; after repeated failures, stop pretending it's fine.
103
+ this.buf = chunk + this.buf;
104
+ this.failures++;
105
+ if (this.failures >= 5 && !this.degraded) {
106
+ this.degraded = true;
107
+ process.stderr.write(`agentswarm: journal writes are failing (${String(e)}); run state is no longer durable\n`);
108
+ }
109
+ }
110
+ }
73
111
  flush() {
74
- return this.chain;
112
+ return this.chain.then(() => this.drain());
113
+ }
114
+ /** Last-gasp synchronous flush for signal handlers and exit paths. */
115
+ flushSync() {
116
+ if (!this.buf)
117
+ return;
118
+ try {
119
+ fs.appendFileSync(this.file, this.buf, "utf8");
120
+ this.buf = "";
121
+ }
122
+ catch {
123
+ /* nothing left to do */
124
+ }
75
125
  }
76
126
  }
77
127
  exports.Journal = Journal;
package/dist/memory.js ADDED
@@ -0,0 +1,83 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.memoryFile = memoryFile;
37
+ exports.loadMemory = loadMemory;
38
+ exports.appendMemory = appendMemory;
39
+ exports.memoryBlock = memoryBlock;
40
+ const crypto = __importStar(require("crypto"));
41
+ const fs = __importStar(require("fs"));
42
+ const path = __importStar(require("path"));
43
+ const config_1 = require("./config");
44
+ const util_1 = require("./util");
45
+ const MAX_ENTRIES = 20;
46
+ function memoryFile(cwd) {
47
+ const hash = crypto.createHash("sha1").update(path.resolve(cwd)).digest("hex").slice(0, 12);
48
+ return path.join((0, config_1.home)(), "memory", `${hash}.json`);
49
+ }
50
+ function loadMemory(cwd) {
51
+ try {
52
+ const raw = JSON.parse(fs.readFileSync(memoryFile(cwd), "utf8"));
53
+ return Array.isArray(raw?.entries) ? raw.entries : [];
54
+ }
55
+ catch {
56
+ return [];
57
+ }
58
+ }
59
+ function appendMemory(cwd, entry) {
60
+ try {
61
+ const file = memoryFile(cwd);
62
+ (0, util_1.ensureDir)(path.dirname(file));
63
+ const entries = [...loadMemory(cwd), entry].slice(-MAX_ENTRIES);
64
+ fs.writeFileSync(file, JSON.stringify({ cwd: path.resolve(cwd), entries }, null, 2), "utf8");
65
+ }
66
+ catch {
67
+ /* memory is best-effort */
68
+ }
69
+ }
70
+ /** Prompt block for the conductor, or "" when there's no history. */
71
+ function memoryBlock(cwd) {
72
+ const entries = loadMemory(cwd);
73
+ if (!entries.length)
74
+ return "";
75
+ const lines = entries.slice(-8).map((e) => {
76
+ const when = new Date(e.finishedAt).toISOString().slice(0, 10);
77
+ const decisions = e.keyDecisions.length
78
+ ? ` Decisions: ${e.keyDecisions.map((d) => (0, util_1.oneLine)(d, 100)).join("; ")}`
79
+ : "";
80
+ return `- [${when}, ${e.status}] "${(0, util_1.oneLine)(e.mission, 100)}" — ${(0, util_1.oneLine)(e.summary, 200)}${decisions}`;
81
+ });
82
+ return (0, util_1.clip)(`PRIOR RUNS IN THIS WORKSPACE (build on them; don't redo settled decisions without reason):\n${lines.join("\n")}`, 4000);
83
+ }
package/dist/prompts.js CHANGED
@@ -39,10 +39,13 @@ exports.conductorInitialUpdate = conductorInitialUpdate;
39
39
  exports.conductorUpdate = conductorUpdate;
40
40
  exports.taskTable = taskTable;
41
41
  exports.reportBlock = reportBlock;
42
+ exports.depReportBlock = depReportBlock;
42
43
  exports.workerSystem = workerSystem;
43
44
  exports.forcedFinal = forcedFinal;
44
45
  exports.verifierSystem = verifierSystem;
45
46
  exports.synthSystem = synthSystem;
47
+ exports.completenessPrompt = completenessPrompt;
48
+ exports.synthCheckPrompt = synthCheckPrompt;
46
49
  exports.compactorPrompt = compactorPrompt;
47
50
  exports.budgetLine = budgetLine;
48
51
  const os = __importStar(require("os"));
@@ -72,15 +75,20 @@ DOCTRINE
72
75
  2. Make every task self-contained: crisp objective, explicit success criteria ("Done when …"), and every fact/path/URL the worker needs inlined in context. Workers know nothing you don't tell them.
73
76
  3. Invent the right specialist role per task (researcher, coder, analyst, data-wrangler, reviewer, writer, …). One concern per task, roughly 5–25 tool steps of work. Bigger → split it. Trivial → batch it.
74
77
  4. Software missions: scaffold first (one task), then parallel tasks on DISJOINT files/modules — never two writers on the same file — then an integration + test task that deps on all of them with verify:true.
75
- 5. Research missions: parallel scouts with distinct angles and sources, then a consolidation/analysis task that deps on the scouts.
78
+ 5. Research missions: go WIDE. Spawn many parallel scouts (10+ for a broad topic), each owning a distinct sub-question, angle, source type, time period, or entity — so collectively they pull hundreds of sources, not dozens. Tell each scout to use deep web_search (high count) and to record findings with exact URLs/quotes on the blackboard and in artifact files. Then spawn analysis/consolidation tasks that dep on the scouts, and a final synthesis. When one scout's area is itself broad, spawn it with team:true so it fans out further.
76
79
  6. Set verify:true on tasks whose failure would poison the mission (builds, integrations, data pipelines, final deliverables). A verification agent will adversarially check them and can fail them back for retry.
77
80
  7. React to evidence. Failed/blocked task → diagnose from its report and spawn a corrected or alternative approach (never re-run a failed approach verbatim). Surprising findings → adapt the plan.
78
81
  8. Watch the budget shown in every update. As it tightens, cut scope to what the mission truly needs — always deliver value before the cap, never run out mid-flight.
79
82
  9. Operator messages override everything. Adjust the plan immediately when one appears.
80
83
  10. finish only when the mission's success criteria are demonstrably met, or budget/feasibility forces it. Your finish notes steer the synthesizer that writes the final report.
84
+ 11. Model tiers: set model:"cheap" on scouts and bulk extraction, model:"strong" on leads, integration, and verified deliverables. Default tier for everything in between.
85
+ 12. Big subsystems: spawn with team:true to run the task as a sub-swarm — its own lead decomposes it into parallel sub-tasks and reports one consolidated result. Use for coherent multi-task chunks ("build the backend", "research all 12 competitors"), not for single jobs.
86
+ 13. Beyond ~20 tasks, maintain a living plan with update_plan (mission-plan.md): approach, what's done, what's next, open risks. Rewrite it at phase boundaries — it is pinned into your updates and survives restarts.
87
+ 14. Long missions: structure the work into phases with set_phase (e.g. discovery → build → integrate → polish). The current phase and its exit criteria are pinned into every update, so the plan survives even when old history is trimmed.
88
+ 15. DELIVERABLES SHIP IN THE FORMAT THE MISSION ACTUALLY NEEDS — a markdown report is the fallback, not the default. Software → running code with build/run instructions; data work → .csv/.json/.sqlite plus a summary; comparisons and datasets → tables in CSV as well as prose; polished documents → styled self-contained .html (the operator reads HTML, not raw markdown); scripts/configs → the runnable files themselves. Spell the expected format and exact filename(s) out in the deliverable task's objective and have it save them with save_artifact.
81
89
 
82
90
  RULES
83
- - Respond ONLY by calling your tools (spawn_tasks / wait / finish). Plain-text replies are ignored.
91
+ - Respond ONLY by calling your tools (spawn_tasks / set_phase / wait / finish). Plain-text replies are ignored. set_phase alone is not a decision — pair it with spawn_tasks, wait, or finish.
84
92
  - Never spawn a task whose deps are not yet all created.
85
93
  - Keep the total task count within budget (max ${o.maxTasks} per run); make every task earn its place.`;
86
94
  }
@@ -98,6 +106,10 @@ function conductorUpdate(p) {
98
106
  sections.push(`NEW REPORTS\n${p.reports.join("\n\n")}`);
99
107
  if (p.blackboard)
100
108
  sections.push(`BLACKBOARD (shared notes digest)\n${p.blackboard}`);
109
+ if (p.phase)
110
+ sections.push(p.phase);
111
+ if (p.plan)
112
+ sections.push(p.plan);
101
113
  sections.push(`SWARM STATE\n${p.taskTable}`);
102
114
  sections.push(p.budgetLine);
103
115
  if (p.extra)
@@ -108,27 +120,63 @@ function conductorUpdate(p) {
108
120
  function taskTable(tasks) {
109
121
  if (!tasks.length)
110
122
  return "(no tasks yet)";
111
- return tasks
112
- .map((t) => {
123
+ const line = (t) => {
113
124
  const deps = t.deps.length ? ` deps:[${t.deps.join(",")}]` : "";
114
125
  const extra = t.status === "failed" && t.error ? ` — ${(0, util_1.clip)(t.error, 80)}` : "";
115
126
  return `${t.id} [${t.status}${t.attempt > 1 ? ` a${t.attempt}` : ""}] (${t.role})${deps} ${(0, util_1.clip)(t.title, 70)}${extra}`;
116
- })
117
- .join("\n");
127
+ };
128
+ const settled = tasks.filter((t) => ["done", "failed", "blocked"].includes(t.status));
129
+ if (settled.length <= 30)
130
+ return tasks.map(line).join("\n");
131
+ // Hundreds of tasks must not flood the conductor's prompt: collapse DONE
132
+ // tasks in older waves to one line per wave. Failures/blocks stay full-line
133
+ // forever (they're what the conductor plans around), as do active tasks and
134
+ // the two most recent waves.
135
+ const maxWave = Math.max(...tasks.map((t) => t.wave));
136
+ const out = [];
137
+ const waves = [...new Set(tasks.map((t) => t.wave))].sort((a, b) => a - b);
138
+ for (const w of waves) {
139
+ const ws = tasks.filter((t) => t.wave === w);
140
+ const collapsible = w < maxWave - 1 ? ws.filter((t) => t.status === "done") : [];
141
+ const fullLines = ws.filter((t) => !collapsible.includes(t));
142
+ if (collapsible.length) {
143
+ out.push(`wave ${w}: ${collapsible.length} done (${collapsible.map((t) => t.id).join(",")})`);
144
+ }
145
+ out.push(...fullLines.map(line));
146
+ }
147
+ return out.join("\n");
118
148
  }
119
149
  function reportBlock(t) {
120
150
  const head = `── ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → ${t.status.toUpperCase()}${t.attempt > 1 ? ` (attempt ${t.attempt})` : ""}`;
121
151
  const body = t.report ? (0, util_1.clip)(t.report, 1600) : t.error ? `error: ${(0, util_1.clip)(t.error, 400)}` : "(no report)";
152
+ const facts = t.keyFacts?.length ? `\nkey facts:\n${t.keyFacts.map((f) => ` • ${(0, util_1.clip)(f, 200)}`).join("\n")}` : "";
153
+ const open = t.openQuestions?.length ? `\nopen questions: ${t.openQuestions.map((q) => (0, util_1.clip)(q, 150)).join(" | ")}` : "";
154
+ const files = t.filesTouched?.length ? `\nfiles touched: ${t.filesTouched.join(", ")}` : "";
122
155
  const arts = t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : "";
123
156
  const fb = t.feedback ? `\nverifier: ${(0, util_1.clip)(t.feedback, 300)}` : "";
124
- return `${head}\n${body}${arts}${fb}`;
157
+ return `${head}\n${body}${facts}${open}${files}${arts}${fb}`;
158
+ }
159
+ /**
160
+ * Compact dependency context for a downstream worker: structured handoff
161
+ * fields in full, prose report as an excerpt — read_report(taskId) has the
162
+ * rest. Keeps fan-in tasks from inheriting megabytes of ancestor prose.
163
+ */
164
+ function depReportBlock(t) {
165
+ const head = `── dep ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → ${t.status.toUpperCase()}`;
166
+ const facts = t.keyFacts?.length ? `\nkey facts:\n${t.keyFacts.map((f) => ` • ${(0, util_1.clip)(f, 200)}`).join("\n")}` : "";
167
+ const files = t.filesTouched?.length ? `\nfiles touched: ${t.filesTouched.join(", ")}` : "";
168
+ const arts = t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : "";
169
+ const full = (t.report ?? "").length > 1200 ? `\n(excerpt — full text: read_report("${t.id}"))` : "";
170
+ const body = t.report ? (0, util_1.clip)(t.report, 1200) : t.error ? `error: ${(0, util_1.clip)(t.error, 400)}` : "(no report)";
171
+ return `${head}\n${body}${facts}${files}${arts}${full}`;
125
172
  }
126
173
  // ============================================================ workers
127
174
  const ROLE_HINTS = {
128
- researcher: "Research craft: triangulate across independent sources; prefer primary docs over blog spam; capture exact figures, dates, URLs. Search several distinct phrasings before concluding something is unfindable.",
175
+ researcher: "Research craft: be exhaustive. Run deep web_search (deep=true, high count) across several distinct phrasings — pull DOZENS of sources for your sub-question, not three. Triangulate across independent sources; prefer primary docs and official sources over blog spam; capture exact figures, dates, and URLs, and keep the quotable passages the search returns. Record key findings as blackboard notes (with the source URL) and save a structured markdown file of your sources+findings as an artifact so the synthesizer can build on it. " +
176
+ "If a crawl_site tool is available, use it to ingest whole documentation sites or multi-page sources into local markdown files, then read the saved files — far cheaper and broader than fetching pages one by one.",
129
177
  coder: "Engineering craft: read existing code before changing it; match its conventions; build/run/test after every meaningful change and include the command + result in your report. Leave the tree compiling.",
130
178
  analyst: "Analysis craft: quantify wherever possible; state assumptions explicitly; separate observation from interpretation; sanity-check numbers twice.",
131
- writer: "Writing craft: structure before prose; concrete over abstract; cut filler. Match the audience and purpose given in the objective.",
179
+ writer: "Writing craft: structure before prose; concrete over abstract; cut filler. Match the audience and purpose given in the objective. Deliver in the format the objective calls for — for polished documents prefer a styled, self-contained .html file (inline CSS, readable typography, real tables) over raw markdown; ship data tables as .csv alongside the prose.",
132
180
  reviewer: "Review craft: be adversarial; try to break it; check edge cases and the unhappy path; verify claims against the actual files, not the description.",
133
181
  "data-wrangler": "Data craft: validate schema and row counts at every step; spot-check samples; never silently drop rows — report anomalies.",
134
182
  };
@@ -140,13 +188,16 @@ function workerSystem(opts) {
140
188
  : task.attempt > 1 && task.error
141
189
  ? `\nPREVIOUS ATTEMPT FAILED: ${task.error}\nTake a different approach.\n`
142
190
  : "";
191
+ const checkpoint = task.lastCheckpoint
192
+ ? `\nPROGRESS CHECKPOINT FROM A PREVIOUS ATTEMPT (the run was interrupted or retried — do not redo completed work blindly):\n${task.lastCheckpoint}\nRe-verify the state it describes (files, commands) before re-creating anything, then continue from where it left off.\n`
193
+ : "";
143
194
  return `You are ${opts.agentId}, a ${opts.role} agent in a swarm pursuing this mission:
144
195
  ${meta.mission}
145
196
 
146
197
  YOUR TASK — ${task.id} (attempt ${task.attempt})
147
198
  ${task.title}
148
199
  Objective: ${task.objective}
149
- ${task.context ? `Context from the conductor:\n${task.context}\n` : ""}${retry}
200
+ ${task.context ? `Context from the conductor:\n${task.context}\n` : ""}${retry}${checkpoint}
150
201
  CONTEXT FROM THE SWARM
151
202
  ${opts.depReports || "(no dependency reports)"}
152
203
  ${opts.blackboard ? `Blackboard digest:\n${opts.blackboard}` : ""}
@@ -160,10 +211,13 @@ OPERATING PROTOCOL
160
211
  - Evidence over assumption: read before you edit; check outputs; cite concrete paths, commands and numbers.
161
212
  - Be token-lean: targeted reads (line ranges, grep via shell) over wholesale dumps; don't re-read unchanged files.
162
213
  - Post durable discoveries other agents will need to the blackboard with note(...) — facts only, used sparingly.
163
- - Save deliverable files with save_artifact so the operator sees them.
214
+ - Editing files other tasks might also touch? First search_notes for claims, then post note(kind:"claim", key:"<path>") before editing. Claims are advisory — coordinate, don't fight.
215
+ - Save deliverable files with save_artifact so the operator sees them. Pick the format that genuinely fits the deliverable — structured data as .csv/.json, polished documents as self-contained .html, code as runnable files — not everything is a markdown report.
216
+ - On long tasks, call checkpoint(...) after each major chunk so an interrupted run resumes warm instead of from scratch.
164
217
  - Genuinely impossible / missing prerequisite → report(status:"blocked", …) early instead of thrashing.
165
218
  - You have at most ${opts.maxSteps} tool steps. Budget them.
166
- - ALWAYS end by calling report(...). The conductor sees ONLY that report it is the entire value of your work. Specific beats vague: what you did, what you verified, key findings, exact paths.
219
+ - Dependency reports above are excerpts; use read_report(task_id) for full text, and search_notes(query) to find facts posted earlier in the run.
220
+ - ALWAYS end by calling report(...). The conductor sees ONLY that report — it is the entire value of your work. Specific beats vague: what you did, what you verified, key findings, exact paths. Fill key_facts (standalone facts downstream tasks need), open_questions, and files_touched — they are handed verbatim to dependent tasks.
167
221
  ${roleHint ? "\n" + roleHint : ""}`;
168
222
  }
169
223
  exports.WORKER_KICKOFF = "Begin now. Work the task to completion, then call report(...).";
@@ -188,8 +242,12 @@ ${task.artifacts.length ? `Claimed artifacts: ${task.artifacts.join(", ")}` : ""
188
242
  Working directory: ${meta.cwd}
189
243
 
190
244
  PROTOCOL
191
- - Do NOT trust the report. Verify concretely with tools: read the files it claims to have written, run the build/tests/commands, fetch the URLs, check the numbers.
192
- - Check: objective met? success criteria satisfied? deliverables exist and are non-trivial (not stubs/placeholders)?
245
+ - Do NOT trust the report. Verify concretely with tools: read the files it claims to have written, run the build/tests/commands, fetch the URLs, check the numbers. You see only the worker's CLAIMS — gather your own evidence; do not assume shared context.
246
+ - RUBRIC fail unless all hold:
247
+ 1. Completeness: every part of the objective and its "Done when" criteria is addressed.
248
+ 2. Evidence: each substantive claim in the report is backed by something you verified yourself.
249
+ 3. Deliverables: claimed files/artifacts exist, are non-trivial (not stubs/placeholders), and match what the report says about them.
250
+ 4. Correctness: commands/builds/tests the task implies actually succeed when you run them.
193
251
  - Spot-check depth over exhaustive breadth; ~5-12 tool steps.
194
252
  - Then call verdict(pass, feedback). On fail, feedback must be actionable: exactly what is wrong and where. On pass, one line citing the evidence you checked.`;
195
253
  }
@@ -212,12 +270,42 @@ Working directory: ${opts.meta.cwd}
212
270
 
213
271
  PROTOCOL
214
272
  - You may read files (read_file / list_dir) to confirm specifics before writing — verify key claims you repeat.
273
+ - The mission's PRIMARY deliverable should exist in the format that serves it best, not only as prose. If the task reports produced data, comparisons, or rankings that the artifacts don't already capture in a structured form, save them now with save_artifact (e.g. data/results.csv, data/findings.json) before submitting. Don't duplicate artifacts that already exist — point to them.
215
274
  - Then call submit_final with:
216
- • report_markdown — the deliverable document. Structure: # title; **Outcome** first (did the mission succeed, headline results); then What was built/found with evidence and exact paths; How to use/run it (if applicable); Open issues & recommended next steps. Write for the operator: complete, concrete, zero filler.
275
+ • report_markdown — the deliverable document. Structure: # title; **Outcome** first (did the mission succeed, headline results); then What was built/found with evidence and exact paths; How to use/run it (if applicable); Open issues & recommended next steps. Write for the operator: complete, concrete, zero filler. Use real markdown tables for tabular findings. (A styled HTML rendering is generated automatically — do not hand-write one.)
217
276
  • summary — ≤8 sentences for the console.
218
277
  - The report stands alone: a reader who saw nothing else must understand what happened and where everything is.`;
219
278
  }
220
279
  exports.SYNTH_KICKOFF = "Compose and submit the final deliverable now via submit_final(...).";
280
+ // ============================================================ completeness / synthesis checks
281
+ function completenessPrompt(mission, taskTableStr, reports) {
282
+ return `You are a completeness critic for an agent-swarm run that is about to finish. Given the mission and what was actually delivered, list any REAL gaps: parts of the mission not addressed, claims with no supporting task, or deliverables that were promised but never produced.
283
+
284
+ MISSION
285
+ ${mission}
286
+
287
+ TASKS
288
+ ${taskTableStr}
289
+
290
+ TASK REPORTS
291
+ ${reports}
292
+
293
+ Reply with EXACTLY "COMPLETE" if the mission's requirements are genuinely covered. Otherwise reply with a short numbered list of concrete gaps (max 5), each one actionable enough to become a task. Do not invent nice-to-haves — only true gaps against the stated mission.`;
294
+ }
295
+ function synthCheckPrompt(mission, reports, finalReport) {
296
+ return `You are checking a final mission report for faithfulness before delivery. Compare it against the underlying task reports.
297
+
298
+ MISSION
299
+ ${mission}
300
+
301
+ TASK REPORTS (ground truth)
302
+ ${reports}
303
+
304
+ FINAL REPORT (to check)
305
+ ${finalReport}
306
+
307
+ Reply with EXACTLY "OK" if the final report's claims are supported by the task reports and nothing material is misrepresented or fabricated. Otherwise list the specific discrepancies (max 5), each citing what the final report says vs what the task reports support.`;
308
+ }
221
309
  // ============================================================ compaction
222
310
  function compactorPrompt(serialized) {
223
311
  return `Compress this agent conversation segment into a dense progress summary the agent can rely on to continue working. Preserve: decisions made, files created/modified (exact paths), commands run and their outcomes, key findings/numbers/URLs, errors hit and how they were resolved, current state of the work, and anything still pending. Omit pleasantries and dead ends unless they prevent repeating a mistake. Output the summary only.
@@ -228,5 +316,10 @@ ${serialized}`;
228
316
  // ============================================================ misc
229
317
  function budgetLine(spent, cap) {
230
318
  const pct = cap > 0 ? Math.round((spent.total / cap) * 100) : 0;
231
- return `BUDGET: ${(0, util_1.fmtTokens)(spent.total)} of ${(0, util_1.fmtTokens)(cap)} tokens used (${pct}%) · est. cost so far $${spent.cost.toFixed(2)}`;
319
+ const urgency = pct >= 90
320
+ ? " ⚠ WIND DOWN NOW: stop spawning new work, consolidate what exists, and finish before the cap."
321
+ : pct >= 75
322
+ ? " Note: budget is tightening — prefer consolidation over new exploration."
323
+ : "";
324
+ return `BUDGET: ${(0, util_1.fmtTokens)(spent.total)} of ${(0, util_1.fmtTokens)(cap)} tokens used (${pct}%) · est. cost so far $${spent.cost.toFixed(2)}${urgency}`;
232
325
  }