npm - @robzilla1738/agentswarm - Versions diffs - 0.2.0 → 0.5.0 - Mend

@robzilla1738/agentswarm 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +36 -5
package/dist/agent.js +64 -32
package/dist/cli.js +18 -4
package/dist/config.js +35 -5
package/dist/crawltools.js +247 -0
package/dist/deepseek.js +125 -10
package/dist/executor.js +771 -122
package/dist/hub.js +40 -3
package/dist/journal.js +61 -11
package/dist/memory.js +83 -0
package/dist/prompts.js +109 -16
package/dist/report.js +252 -0
package/dist/run.js +7 -2
package/dist/searchcore.js +191 -0
package/dist/state.js +57 -3
package/dist/tools.js +202 -12
package/dist/webtools.js +191 -60
package/package.json +3 -2
package/ui/out/404/index.html +1 -1
package/ui/out/404.html +1 -1
package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +1 -0
package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +1 -0
package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +1 -0
package/ui/out/_next/static/css/9f7bd82b8e4c762c.css +3 -0
package/ui/out/fonts/PlanetKosmos.ttf +0 -0
package/ui/out/index.html +1 -1
package/ui/out/index.txt +3 -3
package/ui/out/run/index.html +1 -1
package/ui/out/run/index.txt +3 -3
package/ui/out/settings/index.html +1 -1
package/ui/out/settings/index.txt +3 -3
package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
package/ui/out/_next/static/chunks/677-b37981ba0eca75b2.js +0 -1
package/ui/out/_next/static/chunks/app/page-0c9f35bd4aa8e370.js +0 -1
package/ui/out/_next/static/chunks/app/run/page-13dc41a57e34da71.js +0 -1
package/ui/out/_next/static/chunks/app/settings/page-a1763be7f6de888c.js +0 -1
package/ui/out/_next/static/css/82edaa7a5942f894.css +0 -3
/package/ui/out/_next/static/{eiQeDU9uBHNsBj0CFkp8M → errjtBR_bKoee8ogLp8xk}/_buildManifest.js +0 -0
/package/ui/out/_next/static/{eiQeDU9uBHNsBj0CFkp8M → errjtBR_bKoee8ogLp8xk}/_ssgManifest.js +0 -0

package/dist/hub.js CHANGED Viewed

@@ -34,12 +34,15 @@ var __importStar = (this && this.__importStar) || (function () {
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.startHub = startHub;
+exports.publicConfig = publicConfig;
 const fs = __importStar(require("fs"));
 const http = __importStar(require("http"));
+const os = __importStar(require("os"));
 const path = __importStar(require("path"));
 const url_1 = require("url");
 const config_1 = require("./config");
 const control_1 = require("./control");
+const crawltools_1 = require("./crawltools");
 const deepseek_1 = require("./deepseek");
 const providers_1 = require("./providers");
 const journal_1 = require("./journal");
@@ -165,6 +168,29 @@ async function api(req, res, url, opts) {
             return sendJson(res, 200, { models: Object.keys(cfg.pricing), error: (0, util_1.errMsg)(e) });
         }
     }
+    // Directory browser for the launch-folder picker. Localhost-only hub, same
+    // user permissions as the CLI — lists directory names, never file contents.
+    if (p === "/api/fs/dirs" && method === "GET") {
+        const raw = url.searchParams.get("path") || os.homedir();
+        const dir = path.resolve(raw);
+        try {
+            const entries = fs
+                .readdirSync(dir, { withFileTypes: true })
+                .filter((e) => e.isDirectory() && !e.name.startsWith("."))
+                .map((e) => ({ name: e.name, path: path.join(dir, e.name) }))
+                .sort((a, b) => a.name.localeCompare(b.name));
+            const parent = path.dirname(dir);
+            return sendJson(res, 200, {
+                path: dir,
+                parent: parent === dir ? null : parent,
+                home: os.homedir(),
+                dirs: entries,
+            });
+        }
+        catch (e) {
+            return sendJson(res, 400, { error: (0, util_1.errMsg)(e) });
+        }
+    }
     if (p === "/api/runs" && method === "GET") {
         return sendJson(res, 200, { runs: (0, run_1.listRuns)(cfg.pricing) });
     }
@@ -231,7 +257,7 @@ async function api(req, res, url, opts) {
             return sendJson(res, 200, { events, live: (0, run_1.isRunLive)(id) });
         }
         if (sub === "/stream" && method === "GET") {
-            return streamEvents(res, id);
+            return streamEvents(res, id, url.searchParams.get("quiet") === "1");
         }
         if (sub === "/note" && method === "POST") {
             const body = await readBody(req);
@@ -292,7 +318,7 @@ async function api(req, res, url, opts) {
     }
     sendJson(res, 404, { error: "not found" });
 }
-function streamEvents(res, id) {
+function streamEvents(res, id, quiet = false) {
     res.writeHead(200, {
         "content-type": "text/event-stream",
         "cache-control": "no-cache, no-transform",
@@ -311,6 +337,9 @@ function streamEvents(res, id) {
             return;
         }
         for (const ev of evs) {
+            // quiet mode: skip streaming chatter for clients rendering many agents.
+            if (quiet && ev.type === "agent.delta")
+                continue;
             res.write(`data: ${JSON.stringify(ev)}\n\n`);
         }
     };
@@ -368,7 +397,15 @@ function publicConfig(cfg) {
         tinyfishKeySet: Boolean(cfg.tinyfishApiKey),
         tinyfishKeyMasked: (0, config_1.maskKey)(cfg.tinyfishApiKey),
         searchBackend: cfg.searchBackend,
-        searchkitCmd: cfg.searchkitCmd,
+        crawlBackend: cfg.crawlBackend,
+        crawlResolved: (0, crawltools_1.resolveCrawlBackend)(cfg),
+        firecrawlKeySet: Boolean(cfg.firecrawlApiKey),
+        firecrawlKeyMasked: (0, config_1.maskKey)(cfg.firecrawlApiKey),
+        contextdevKeySet: Boolean(cfg.contextdevApiKey),
+        contextdevKeyMasked: (0, config_1.maskKey)(cfg.contextdevApiKey),
+        deepcrawlKeySet: Boolean(cfg.deepcrawlApiKey),
+        deepcrawlKeyMasked: (0, config_1.maskKey)(cfg.deepcrawlApiKey),
+        deepcrawlBaseUrl: cfg.deepcrawlBaseUrl,
         sandboxRuntime: cfg.sandboxRuntime,
         sandboxResolved: (0, sandbox_1.resolveSandboxKind)(cfg),
         sandboxImage: cfg.sandboxImage,

package/dist/journal.js CHANGED Viewed

@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
     };
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.Journal = void 0;
+exports.Journal = exports.TeamJournal = void 0;
 exports.eventsFile = eventsFile;
 exports.readEvents = readEvents;
 exports.lastSeq = lastSeq;
@@ -41,14 +41,36 @@ exports.readNewEvents = readNewEvents;
 const fs = __importStar(require("fs"));
 const path = __importStar(require("path"));
 /**
- * Append-only event journal. events.jsonl is the single source of truth for a
- * run: the executor writes it, the terminal renderer and the hub (web UI) read
- * and tail it. Tolerant of a torn final line after a crash.
+ * A child swarm's view of its parent's journal: same file, same sequence,
+ * every event stamped with the owning team's task id so the reducer can
+ * partition team activity away from the root run.
  */
+class TeamJournal {
+    inner;
+    teamId;
+    constructor(inner, teamId) {
+        this.inner = inner;
+        this.teamId = teamId;
+    }
+    append(type, payload = {}) {
+        return this.inner.append(type, { teamId: this.teamId, ...payload });
+    }
+    flush() {
+        return this.inner.flush();
+    }
+    get degraded() {
+        return this.inner.degraded;
+    }
+}
+exports.TeamJournal = TeamJournal;
 class Journal {
     file;
     seq;
     chain = Promise.resolve();
+    buf = "";
+    failures = 0;
+    /** Set after repeated append failures: the source of truth is no longer being persisted. */
+    degraded = false;
     onEvent;
     constructor(runDirPath, startSeq) {
         this.file = path.join(runDirPath, "events.jsonl");
@@ -56,12 +78,8 @@ class Journal {
     }
     append(type, payload = {}) {
         const ev = { seq: this.seq++, t: Date.now(), type, ...payload };
-        const line = JSON.stringify(ev) + "\n";
-        this.chain = this.chain
-            .then(() => fs.promises.appendFile(this.file, line, "utf8"))
-            .catch(() => {
-            /* never break the run on journal IO; next append retries the chain */
-        });
+        this.buf += JSON.stringify(ev) + "\n";
+        this.chain = this.chain.then(() => this.drain());
         try {
             this.onEvent?.(ev);
         }
@@ -70,8 +88,40 @@ class Journal {
         }
         return ev;
     }
+    async drain() {
+        if (!this.buf)
+            return;
+        const chunk = this.buf;
+        this.buf = "";
+        try {
+            await fs.promises.appendFile(this.file, chunk, "utf8");
+            this.failures = 0;
+        }
+        catch (e) {
+            // Keep the unwritten events buffered so the next append/flush retries
+            // them in order; after repeated failures, stop pretending it's fine.
+            this.buf = chunk + this.buf;
+            this.failures++;
+            if (this.failures >= 5 && !this.degraded) {
+                this.degraded = true;
+                process.stderr.write(`agentswarm: journal writes are failing (${String(e)}); run state is no longer durable\n`);
+            }
+        }
+    }
     flush() {
-        return this.chain;
+        return this.chain.then(() => this.drain());
+    }
+    /** Last-gasp synchronous flush for signal handlers and exit paths. */
+    flushSync() {
+        if (!this.buf)
+            return;
+        try {
+            fs.appendFileSync(this.file, this.buf, "utf8");
+            this.buf = "";
+        }
+        catch {
+            /* nothing left to do */
+        }
     }
 }
 exports.Journal = Journal;

package/dist/memory.js ADDED Viewed

@@ -0,0 +1,83 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || (function () {
+    var ownKeys = function(o) {
+        ownKeys = Object.getOwnPropertyNames || function (o) {
+            var ar = [];
+            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
+            return ar;
+        };
+        return ownKeys(o);
+    };
+    return function (mod) {
+        if (mod && mod.__esModule) return mod;
+        var result = {};
+        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
+        __setModuleDefault(result, mod);
+        return result;
+    };
+})();
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.memoryFile = memoryFile;
+exports.loadMemory = loadMemory;
+exports.appendMemory = appendMemory;
+exports.memoryBlock = memoryBlock;
+const crypto = __importStar(require("crypto"));
+const fs = __importStar(require("fs"));
+const path = __importStar(require("path"));
+const config_1 = require("./config");
+const util_1 = require("./util");
+const MAX_ENTRIES = 20;
+function memoryFile(cwd) {
+    const hash = crypto.createHash("sha1").update(path.resolve(cwd)).digest("hex").slice(0, 12);
+    return path.join((0, config_1.home)(), "memory", `${hash}.json`);
+}
+function loadMemory(cwd) {
+    try {
+        const raw = JSON.parse(fs.readFileSync(memoryFile(cwd), "utf8"));
+        return Array.isArray(raw?.entries) ? raw.entries : [];
+    }
+    catch {
+        return [];
+    }
+}
+function appendMemory(cwd, entry) {
+    try {
+        const file = memoryFile(cwd);
+        (0, util_1.ensureDir)(path.dirname(file));
+        const entries = [...loadMemory(cwd), entry].slice(-MAX_ENTRIES);
+        fs.writeFileSync(file, JSON.stringify({ cwd: path.resolve(cwd), entries }, null, 2), "utf8");
+    }
+    catch {
+        /* memory is best-effort */
+    }
+}
+/** Prompt block for the conductor, or "" when there's no history. */
+function memoryBlock(cwd) {
+    const entries = loadMemory(cwd);
+    if (!entries.length)
+        return "";
+    const lines = entries.slice(-8).map((e) => {
+        const when = new Date(e.finishedAt).toISOString().slice(0, 10);
+        const decisions = e.keyDecisions.length
+            ? ` Decisions: ${e.keyDecisions.map((d) => (0, util_1.oneLine)(d, 100)).join("; ")}`
+            : "";
+        return `- [${when}, ${e.status}] "${(0, util_1.oneLine)(e.mission, 100)}" — ${(0, util_1.oneLine)(e.summary, 200)}${decisions}`;
+    });
+    return (0, util_1.clip)(`PRIOR RUNS IN THIS WORKSPACE (build on them; don't redo settled decisions without reason):\n${lines.join("\n")}`, 4000);
+}

package/dist/prompts.js CHANGED Viewed

@@ -39,10 +39,13 @@ exports.conductorInitialUpdate = conductorInitialUpdate;
 exports.conductorUpdate = conductorUpdate;
 exports.taskTable = taskTable;
 exports.reportBlock = reportBlock;
+exports.depReportBlock = depReportBlock;
 exports.workerSystem = workerSystem;
 exports.forcedFinal = forcedFinal;
 exports.verifierSystem = verifierSystem;
 exports.synthSystem = synthSystem;
+exports.completenessPrompt = completenessPrompt;
+exports.synthCheckPrompt = synthCheckPrompt;
 exports.compactorPrompt = compactorPrompt;
 exports.budgetLine = budgetLine;
 const os = __importStar(require("os"));
@@ -72,15 +75,20 @@ DOCTRINE
 2. Make every task self-contained: crisp objective, explicit success criteria ("Done when …"), and every fact/path/URL the worker needs inlined in context. Workers know nothing you don't tell them.
 3. Invent the right specialist role per task (researcher, coder, analyst, data-wrangler, reviewer, writer, …). One concern per task, roughly 5–25 tool steps of work. Bigger → split it. Trivial → batch it.
 4. Software missions: scaffold first (one task), then parallel tasks on DISJOINT files/modules — never two writers on the same file — then an integration + test task that deps on all of them with verify:true.
-5. Research missions: parallel scouts with distinct angles and sources, then a consolidation/analysis task that deps on the scouts.
+5. Research missions: go WIDE. Spawn many parallel scouts (10+ for a broad topic), each owning a distinct sub-question, angle, source type, time period, or entity — so collectively they pull hundreds of sources, not dozens. Tell each scout to use deep web_search (high count) and to record findings with exact URLs/quotes on the blackboard and in artifact files. Then spawn analysis/consolidation tasks that dep on the scouts, and a final synthesis. When one scout's area is itself broad, spawn it with team:true so it fans out further.
 6. Set verify:true on tasks whose failure would poison the mission (builds, integrations, data pipelines, final deliverables). A verification agent will adversarially check them and can fail them back for retry.
 7. React to evidence. Failed/blocked task → diagnose from its report and spawn a corrected or alternative approach (never re-run a failed approach verbatim). Surprising findings → adapt the plan.
 8. Watch the budget shown in every update. As it tightens, cut scope to what the mission truly needs — always deliver value before the cap, never run out mid-flight.
 9. Operator messages override everything. Adjust the plan immediately when one appears.
 10. finish only when the mission's success criteria are demonstrably met, or budget/feasibility forces it. Your finish notes steer the synthesizer that writes the final report.
+11. Model tiers: set model:"cheap" on scouts and bulk extraction, model:"strong" on leads, integration, and verified deliverables. Default tier for everything in between.
+12. Big subsystems: spawn with team:true to run the task as a sub-swarm — its own lead decomposes it into parallel sub-tasks and reports one consolidated result. Use for coherent multi-task chunks ("build the backend", "research all 12 competitors"), not for single jobs.
+13. Beyond ~20 tasks, maintain a living plan with update_plan (mission-plan.md): approach, what's done, what's next, open risks. Rewrite it at phase boundaries — it is pinned into your updates and survives restarts.
+14. Long missions: structure the work into phases with set_phase (e.g. discovery → build → integrate → polish). The current phase and its exit criteria are pinned into every update, so the plan survives even when old history is trimmed.
+15. DELIVERABLES SHIP IN THE FORMAT THE MISSION ACTUALLY NEEDS — a markdown report is the fallback, not the default. Software → running code with build/run instructions; data work → .csv/.json/.sqlite plus a summary; comparisons and datasets → tables in CSV as well as prose; polished documents → styled self-contained .html (the operator reads HTML, not raw markdown); scripts/configs → the runnable files themselves. Spell the expected format and exact filename(s) out in the deliverable task's objective and have it save them with save_artifact.
 RULES
-- Respond ONLY by calling your tools (spawn_tasks / wait / finish). Plain-text replies are ignored.
+- Respond ONLY by calling your tools (spawn_tasks / set_phase / wait / finish). Plain-text replies are ignored. set_phase alone is not a decision — pair it with spawn_tasks, wait, or finish.
 - Never spawn a task whose deps are not yet all created.
 - Keep the total task count within budget (max ${o.maxTasks} per run); make every task earn its place.`;
 }
@@ -98,6 +106,10 @@ function conductorUpdate(p) {
         sections.push(`NEW REPORTS\n${p.reports.join("\n\n")}`);
     if (p.blackboard)
         sections.push(`BLACKBOARD (shared notes digest)\n${p.blackboard}`);
+    if (p.phase)
+        sections.push(p.phase);
+    if (p.plan)
+        sections.push(p.plan);
     sections.push(`SWARM STATE\n${p.taskTable}`);
     sections.push(p.budgetLine);
     if (p.extra)
@@ -108,27 +120,63 @@ function conductorUpdate(p) {
 function taskTable(tasks) {
     if (!tasks.length)
         return "(no tasks yet)";
-    return tasks
-        .map((t) => {
+    const line = (t) => {
         const deps = t.deps.length ? ` deps:[${t.deps.join(",")}]` : "";
         const extra = t.status === "failed" && t.error ? ` — ${(0, util_1.clip)(t.error, 80)}` : "";
         return `${t.id} [${t.status}${t.attempt > 1 ? ` a${t.attempt}` : ""}] (${t.role})${deps} ${(0, util_1.clip)(t.title, 70)}${extra}`;
-    })
-        .join("\n");
+    };
+    const settled = tasks.filter((t) => ["done", "failed", "blocked"].includes(t.status));
+    if (settled.length <= 30)
+        return tasks.map(line).join("\n");
+    // Hundreds of tasks must not flood the conductor's prompt: collapse DONE
+    // tasks in older waves to one line per wave. Failures/blocks stay full-line
+    // forever (they're what the conductor plans around), as do active tasks and
+    // the two most recent waves.
+    const maxWave = Math.max(...tasks.map((t) => t.wave));
+    const out = [];
+    const waves = [...new Set(tasks.map((t) => t.wave))].sort((a, b) => a - b);
+    for (const w of waves) {
+        const ws = tasks.filter((t) => t.wave === w);
+        const collapsible = w < maxWave - 1 ? ws.filter((t) => t.status === "done") : [];
+        const fullLines = ws.filter((t) => !collapsible.includes(t));
+        if (collapsible.length) {
+            out.push(`wave ${w}: ${collapsible.length} done (${collapsible.map((t) => t.id).join(",")})`);
+        }
+        out.push(...fullLines.map(line));
+    }
+    return out.join("\n");
 }
 function reportBlock(t) {
     const head = `── ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → ${t.status.toUpperCase()}${t.attempt > 1 ? ` (attempt ${t.attempt})` : ""}`;
     const body = t.report ? (0, util_1.clip)(t.report, 1600) : t.error ? `error: ${(0, util_1.clip)(t.error, 400)}` : "(no report)";
+    const facts = t.keyFacts?.length ? `\nkey facts:\n${t.keyFacts.map((f) => `  • ${(0, util_1.clip)(f, 200)}`).join("\n")}` : "";
+    const open = t.openQuestions?.length ? `\nopen questions: ${t.openQuestions.map((q) => (0, util_1.clip)(q, 150)).join(" | ")}` : "";
+    const files = t.filesTouched?.length ? `\nfiles touched: ${t.filesTouched.join(", ")}` : "";
     const arts = t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : "";
     const fb = t.feedback ? `\nverifier: ${(0, util_1.clip)(t.feedback, 300)}` : "";
-    return `${head}\n${body}${arts}${fb}`;
+    return `${head}\n${body}${facts}${open}${files}${arts}${fb}`;
+}
+/**
+ * Compact dependency context for a downstream worker: structured handoff
+ * fields in full, prose report as an excerpt — read_report(taskId) has the
+ * rest. Keeps fan-in tasks from inheriting megabytes of ancestor prose.
+ */
+function depReportBlock(t) {
+    const head = `── dep ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → ${t.status.toUpperCase()}`;
+    const facts = t.keyFacts?.length ? `\nkey facts:\n${t.keyFacts.map((f) => `  • ${(0, util_1.clip)(f, 200)}`).join("\n")}` : "";
+    const files = t.filesTouched?.length ? `\nfiles touched: ${t.filesTouched.join(", ")}` : "";
+    const arts = t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : "";
+    const full = (t.report ?? "").length > 1200 ? `\n(excerpt — full text: read_report("${t.id}"))` : "";
+    const body = t.report ? (0, util_1.clip)(t.report, 1200) : t.error ? `error: ${(0, util_1.clip)(t.error, 400)}` : "(no report)";
+    return `${head}\n${body}${facts}${files}${arts}${full}`;
 }
 // ============================================================ workers
 const ROLE_HINTS = {
-    researcher: "Research craft: triangulate across independent sources; prefer primary docs over blog spam; capture exact figures, dates, URLs. Search several distinct phrasings before concluding something is unfindable.",
+    researcher: "Research craft: be exhaustive. Run deep web_search (deep=true, high count) across several distinct phrasings — pull DOZENS of sources for your sub-question, not three. Triangulate across independent sources; prefer primary docs and official sources over blog spam; capture exact figures, dates, and URLs, and keep the quotable passages the search returns. Record key findings as blackboard notes (with the source URL) and save a structured markdown file of your sources+findings as an artifact so the synthesizer can build on it. " +
+        "If a crawl_site tool is available, use it to ingest whole documentation sites or multi-page sources into local markdown files, then read the saved files — far cheaper and broader than fetching pages one by one.",
     coder: "Engineering craft: read existing code before changing it; match its conventions; build/run/test after every meaningful change and include the command + result in your report. Leave the tree compiling.",
     analyst: "Analysis craft: quantify wherever possible; state assumptions explicitly; separate observation from interpretation; sanity-check numbers twice.",
-    writer: "Writing craft: structure before prose; concrete over abstract; cut filler. Match the audience and purpose given in the objective.",
+    writer: "Writing craft: structure before prose; concrete over abstract; cut filler. Match the audience and purpose given in the objective. Deliver in the format the objective calls for — for polished documents prefer a styled, self-contained .html file (inline CSS, readable typography, real tables) over raw markdown; ship data tables as .csv alongside the prose.",
     reviewer: "Review craft: be adversarial; try to break it; check edge cases and the unhappy path; verify claims against the actual files, not the description.",
     "data-wrangler": "Data craft: validate schema and row counts at every step; spot-check samples; never silently drop rows — report anomalies.",
 };
@@ -140,13 +188,16 @@ function workerSystem(opts) {
         : task.attempt > 1 && task.error
             ? `\nPREVIOUS ATTEMPT FAILED: ${task.error}\nTake a different approach.\n`
             : "";
+    const checkpoint = task.lastCheckpoint
+        ? `\nPROGRESS CHECKPOINT FROM A PREVIOUS ATTEMPT (the run was interrupted or retried — do not redo completed work blindly):\n${task.lastCheckpoint}\nRe-verify the state it describes (files, commands) before re-creating anything, then continue from where it left off.\n`
+        : "";
     return `You are ${opts.agentId}, a ${opts.role} agent in a swarm pursuing this mission:
 ${meta.mission}
 YOUR TASK — ${task.id} (attempt ${task.attempt})
 ${task.title}
 Objective: ${task.objective}
-${task.context ? `Context from the conductor:\n${task.context}\n` : ""}${retry}
+${task.context ? `Context from the conductor:\n${task.context}\n` : ""}${retry}${checkpoint}
 CONTEXT FROM THE SWARM
 ${opts.depReports || "(no dependency reports)"}
 ${opts.blackboard ? `Blackboard digest:\n${opts.blackboard}` : ""}
@@ -160,10 +211,13 @@ OPERATING PROTOCOL
 - Evidence over assumption: read before you edit; check outputs; cite concrete paths, commands and numbers.
 - Be token-lean: targeted reads (line ranges, grep via shell) over wholesale dumps; don't re-read unchanged files.
 - Post durable discoveries other agents will need to the blackboard with note(...) — facts only, used sparingly.
-- Save deliverable files with save_artifact so the operator sees them.
+- Editing files other tasks might also touch? First search_notes for claims, then post note(kind:"claim", key:"<path>") before editing. Claims are advisory — coordinate, don't fight.
+- Save deliverable files with save_artifact so the operator sees them. Pick the format that genuinely fits the deliverable — structured data as .csv/.json, polished documents as self-contained .html, code as runnable files — not everything is a markdown report.
+- On long tasks, call checkpoint(...) after each major chunk so an interrupted run resumes warm instead of from scratch.
 - Genuinely impossible / missing prerequisite → report(status:"blocked", …) early instead of thrashing.
 - You have at most ${opts.maxSteps} tool steps. Budget them.
-- ALWAYS end by calling report(...). The conductor sees ONLY that report — it is the entire value of your work. Specific beats vague: what you did, what you verified, key findings, exact paths.
+- Dependency reports above are excerpts; use read_report(task_id) for full text, and search_notes(query) to find facts posted earlier in the run.
+- ALWAYS end by calling report(...). The conductor sees ONLY that report — it is the entire value of your work. Specific beats vague: what you did, what you verified, key findings, exact paths. Fill key_facts (standalone facts downstream tasks need), open_questions, and files_touched — they are handed verbatim to dependent tasks.
 ${roleHint ? "\n" + roleHint : ""}`;
 }
 exports.WORKER_KICKOFF = "Begin now. Work the task to completion, then call report(...).";
@@ -188,8 +242,12 @@ ${task.artifacts.length ? `Claimed artifacts: ${task.artifacts.join(", ")}` : ""
 Working directory: ${meta.cwd}
 PROTOCOL
-- Do NOT trust the report. Verify concretely with tools: read the files it claims to have written, run the build/tests/commands, fetch the URLs, check the numbers.
-- Check: objective met? success criteria satisfied? deliverables exist and are non-trivial (not stubs/placeholders)?
+- Do NOT trust the report. Verify concretely with tools: read the files it claims to have written, run the build/tests/commands, fetch the URLs, check the numbers. You see only the worker's CLAIMS — gather your own evidence; do not assume shared context.
+- RUBRIC — fail unless all hold:
+  1. Completeness: every part of the objective and its "Done when" criteria is addressed.
+  2. Evidence: each substantive claim in the report is backed by something you verified yourself.
+  3. Deliverables: claimed files/artifacts exist, are non-trivial (not stubs/placeholders), and match what the report says about them.
+  4. Correctness: commands/builds/tests the task implies actually succeed when you run them.
 - Spot-check depth over exhaustive breadth; ~5-12 tool steps.
 - Then call verdict(pass, feedback). On fail, feedback must be actionable: exactly what is wrong and where. On pass, one line citing the evidence you checked.`;
 }
@@ -212,12 +270,42 @@ Working directory: ${opts.meta.cwd}
 PROTOCOL
 - You may read files (read_file / list_dir) to confirm specifics before writing — verify key claims you repeat.
+- The mission's PRIMARY deliverable should exist in the format that serves it best, not only as prose. If the task reports produced data, comparisons, or rankings that the artifacts don't already capture in a structured form, save them now with save_artifact (e.g. data/results.csv, data/findings.json) before submitting. Don't duplicate artifacts that already exist — point to them.
 - Then call submit_final with:
-  • report_markdown — the deliverable document. Structure: # title; **Outcome** first (did the mission succeed, headline results); then What was built/found with evidence and exact paths; How to use/run it (if applicable); Open issues & recommended next steps. Write for the operator: complete, concrete, zero filler.
+  • report_markdown — the deliverable document. Structure: # title; **Outcome** first (did the mission succeed, headline results); then What was built/found with evidence and exact paths; How to use/run it (if applicable); Open issues & recommended next steps. Write for the operator: complete, concrete, zero filler. Use real markdown tables for tabular findings. (A styled HTML rendering is generated automatically — do not hand-write one.)
   • summary — ≤8 sentences for the console.
 - The report stands alone: a reader who saw nothing else must understand what happened and where everything is.`;
 }
 exports.SYNTH_KICKOFF = "Compose and submit the final deliverable now via submit_final(...).";
+// ============================================================ completeness / synthesis checks
+function completenessPrompt(mission, taskTableStr, reports) {
+    return `You are a completeness critic for an agent-swarm run that is about to finish. Given the mission and what was actually delivered, list any REAL gaps: parts of the mission not addressed, claims with no supporting task, or deliverables that were promised but never produced.
+MISSION
+${mission}
+TASKS
+${taskTableStr}
+TASK REPORTS
+${reports}
+Reply with EXACTLY "COMPLETE" if the mission's requirements are genuinely covered. Otherwise reply with a short numbered list of concrete gaps (max 5), each one actionable enough to become a task. Do not invent nice-to-haves — only true gaps against the stated mission.`;
+}
+function synthCheckPrompt(mission, reports, finalReport) {
+    return `You are checking a final mission report for faithfulness before delivery. Compare it against the underlying task reports.
+MISSION
+${mission}
+TASK REPORTS (ground truth)
+${reports}
+FINAL REPORT (to check)
+${finalReport}
+Reply with EXACTLY "OK" if the final report's claims are supported by the task reports and nothing material is misrepresented or fabricated. Otherwise list the specific discrepancies (max 5), each citing what the final report says vs what the task reports support.`;
+}
 // ============================================================ compaction
 function compactorPrompt(serialized) {
     return `Compress this agent conversation segment into a dense progress summary the agent can rely on to continue working. Preserve: decisions made, files created/modified (exact paths), commands run and their outcomes, key findings/numbers/URLs, errors hit and how they were resolved, current state of the work, and anything still pending. Omit pleasantries and dead ends unless they prevent repeating a mistake. Output the summary only.
@@ -228,5 +316,10 @@ ${serialized}`;
 // ============================================================ misc
 function budgetLine(spent, cap) {
     const pct = cap > 0 ? Math.round((spent.total / cap) * 100) : 0;
-    return `BUDGET: ${(0, util_1.fmtTokens)(spent.total)} of ${(0, util_1.fmtTokens)(cap)} tokens used (${pct}%) · est. cost so far $${spent.cost.toFixed(2)}`;
+    const urgency = pct >= 90
+        ? " ⚠ WIND DOWN NOW: stop spawning new work, consolidate what exists, and finish before the cap."
+        : pct >= 75
+            ? " Note: budget is tightening — prefer consolidation over new exploration."
+            : "";
+    return `BUDGET: ${(0, util_1.fmtTokens)(spent.total)} of ${(0, util_1.fmtTokens)(cap)} tokens used (${pct}%) · est. cost so far $${spent.cost.toFixed(2)}${urgency}`;
 }