npm - @robzilla1738/agentswarm - Versions diffs - 0.5.0 → 0.6.0 - Mend

@robzilla1738/agentswarm 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +29 -12
package/dist/agent.js +2 -1
package/dist/cli.js +21 -4
package/dist/config.js +27 -1
package/dist/executor.js +243 -43
package/dist/hub.js +69 -3
package/dist/memory.js +5 -4
package/dist/pdftext.js +211 -0
package/dist/prompts.js +23 -15
package/dist/report.js +37 -0
package/dist/run.js +8 -0
package/dist/sandbox.js +11 -0
package/dist/searchcore.js +55 -2
package/dist/state.js +34 -6
package/dist/tools.js +196 -19
package/dist/util.js +85 -0
package/dist/webtools.js +145 -15
package/package.json +1 -1
package/ui/out/404/index.html +1 -1
package/ui/out/404.html +1 -1
package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
package/ui/out/_next/static/css/{9f7bd82b8e4c762c.css → d95c2ba395730031.css} +1 -1
package/ui/out/index.html +1 -1
package/ui/out/index.txt +3 -3
package/ui/out/run/index.html +1 -1
package/ui/out/run/index.txt +3 -3
package/ui/out/settings/index.html +1 -1
package/ui/out/settings/index.txt +3 -3
package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +0 -1
package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +0 -1
package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +0 -1
/package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
/package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0

package/dist/executor.js CHANGED Viewed

@@ -134,7 +134,12 @@ class Executor {
             const n = Number(/^T(\d+)$/.exec(copy.id)?.[1] ?? 0);
             this.taskCounter = Math.max(this.taskCounter, n);
         }
-        this.notes = state.notes.map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text }));
+        // Drop claims held by settled tasks — they were released on task end and
+        // must not resurrect across a restart.
+        const settled = new Set(state.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status) && !reset.has(t.id)).map((t) => t.id));
+        this.notes = state.notes
+            .map((n) => ({ taskId: n.taskId, key: n.key, kind: n.kind, text: n.text, url: n.url }))
+            .filter((n) => !(n.kind === "claim" && n.taskId && settled.has(n.taskId)));
         const lastPhase = state.phases[state.phases.length - 1];
         if (lastPhase)
             this.phase = { name: lastPhase.name, goal: lastPhase.goal, exitCriteria: lastPhase.exitCriteria };
@@ -182,16 +187,16 @@ class Executor {
     blackboardDigest(max = 1800) {
         if (!this.notes.length)
             return "";
-        const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.taskId ? ` (${n.taskId})` : ""}`;
-        // Decisions anchor mission-wide coherence and are never trimmed out of the
-        // digest; everything else shows only its recent tail.
-        const decisions = this.notes.filter((n) => n.kind === "decision").map(fmt);
-        const rest = this.notes.filter((n) => n.kind !== "decision").slice(-80).map(fmt);
+        const fmt = (n) => `• ${n.kind && n.kind !== "finding" ? `[${n.kind}] ` : ""}${n.key ? `[${n.key}] ` : ""}${(0, util_1.oneLine)(n.text, 160)}${n.url ? ` <${n.url}>` : ""}${n.taskId ? ` (${n.taskId})` : ""}`;
+        // Decisions and conflicts anchor mission-wide coherence and are never
+        // trimmed out of the digest; everything else shows only its recent tail.
+        const pinned = this.notes.filter((n) => n.kind === "decision" || n.kind === "conflict").map(fmt);
+        const rest = this.notes.filter((n) => n.kind !== "decision" && n.kind !== "conflict").slice(-80).map(fmt);
         let tail = rest.join("\n");
-        const budget = Math.max(400, max - decisions.join("\n").length);
+        const budget = Math.max(400, max - pinned.join("\n").length);
         if (tail.length > budget)
             tail = tail.slice(tail.length - budget);
-        return [decisions.join("\n"), tail].filter(Boolean).join("\n");
+        return [pinned.join("\n"), tail].filter(Boolean).join("\n");
     }
     searchNotes(query) {
         const terms = query.toLowerCase().split(/\s+/).filter(Boolean);
@@ -266,6 +271,8 @@ class Executor {
                 content: this.resumed
                     ? (0, prompts_1.conductorUpdate)({
                         blackboard: this.blackboardDigest(),
+                        phase: this.phaseLine(),
+                        plan: this.planPin(),
                         nextId: this.nextId(),
                         taskTable: (0, prompts_1.taskTable)(this.taskList()),
                         budgetLine: (0, prompts_1.budgetLine)({ total: this.spentTokens, cost: this.cost }, this.meta.options.maxTokens),
@@ -277,6 +284,15 @@ class Executor {
                     : (0, prompts_1.conductorInitialUpdate)(this.meta, this.nextId()),
             },
         ];
+        if (this.resumed) {
+            // The conductor's reasoning history died with the old process. Re-seed
+            // the durable facts into the same slot trimConductorHistory() maintains,
+            // so a resumed conductor knows what settled and what was decided.
+            this.conductorMessages.splice(1, 0, {
+                role: "user",
+                content: this.missionLedger("This run was resumed — prior orchestration history is gone."),
+            });
+        }
         try {
             await this.conductorTurn();
             this.setStatus("running");
@@ -799,8 +815,8 @@ class Executor {
      * trimmed history so the conductor never loses the plot on long missions —
      * rebuilt fresh each trim from current state, so it also survives resume.
      */
-    missionLedger() {
-        const lines = ["[Earlier orchestration history was trimmed. MISSION LEDGER — durable state so far:]"];
+    missionLedger(intro = "Earlier orchestration history was trimmed.") {
+        const lines = [`[${intro} MISSION LEDGER — durable state so far:]`];
         if (this.phase)
             lines.push(this.phaseLine());
         const settled = this.taskList().filter((t) => ["done", "failed", "blocked"].includes(t.status));
@@ -843,6 +859,19 @@ class Executor {
             else
                 this.conductorMessages.splice(1, 0, msg);
         };
+        // Old conductor turns carry the bulk in thinking traces and verbose prose;
+        // the durable decisions live in the ledger and the plan pin. Compact them
+        // in place before resorting to dropping whole messages. (sanitizeMessages
+        // backfills reasoning_content with "" for DeepSeek tool-call turns.)
+        for (let i = 1; i < this.conductorMessages.length - 6; i++) {
+            const m = this.conductorMessages[i];
+            if (m.role !== "assistant")
+                continue;
+            if (m.reasoning_content)
+                m.reasoning_content = "";
+            if (m.content && m.content.length > 400)
+                m.content = (0, util_1.clip)(m.content, 400);
+        }
         if (this.conductorMessages.length > MAX) {
             const system = this.conductorMessages[0];
             const tail = this.conductorMessages.slice(-(MAX - 2));
@@ -855,7 +884,7 @@ class Executor {
         // Count alone doesn't bound size: every update embeds the full task table,
         // so a deep run can blow the model window long before 60 messages. The
         // mission itself lives in the system message and always survives.
-        const budget = Math.floor(this.cfg.contextTokenLimit * 0.75);
+        const budget = Math.floor((0, config_1.contextLimitFor)(this.cfg, this.meta.options.conductorModel) * 0.75);
         if ((0, agent_1.estimateMessages)(this.conductorMessages) <= budget)
             return;
         setLedger();
@@ -876,20 +905,48 @@ class Executor {
     hasOpenWork() {
         return this.taskList().some((t) => ["pending", "running", "verifying"].includes(t.status));
     }
+    /** Walk a failed/blocked dep chain down to the task that actually failed. */
+    rootFailure(id) {
+        let cur = this.tasks.get(id);
+        const seen = new Set();
+        while (cur && !seen.has(cur.id)) {
+            seen.add(cur.id);
+            const next = cur.deps
+                .map((d) => this.tasks.get(d))
+                .find((t) => !!t && (t.status === "failed" || t.status === "blocked"));
+            if (!next)
+                return cur;
+            cur = next;
+        }
+        return cur;
+    }
     blockStuckTasks() {
-        for (const t of this.taskList()) {
-            if (t.status !== "pending")
-                continue;
-            const bad = t.deps.find((d) => {
-                const s = this.tasks.get(d)?.status;
-                return s === "failed" || s === "blocked";
-            });
-            if (bad) {
+        // Fixpoint: a failed dep chain T1→T2→T5 must block the whole chain in one
+        // pass, not one level per conductor turn.
+        for (let changed = true; changed;) {
+            changed = false;
+            for (const t of this.taskList()) {
+                if (t.status !== "pending")
+                    continue;
+                const bad = t.deps.find((d) => {
+                    const s = this.tasks.get(d)?.status;
+                    return s === "failed" || s === "blocked";
+                });
+                if (!bad)
+                    continue;
+                // Carry the root cause so the conductor re-plans around the actual
+                // failure, not a chain of "dependency did not complete".
+                const root = this.rootFailure(bad);
+                const cause = root ? (0, util_1.oneLine)(root.feedback ?? root.error ?? "unknown failure", 160) : "";
                 t.status = "blocked";
-                t.error = `dependency ${bad} did not complete`;
+                t.error =
+                    root && root.id !== bad
+                        ? `dependency ${bad} did not complete (root cause ${root.id}: ${cause})`
+                        : `dependency ${bad} did not complete${cause ? ` (${cause})` : ""}`;
                 t.endedAt = Date.now();
                 this.journal.append("task.status", { taskId: t.id, status: "blocked", attempt: t.attempt, reason: t.error });
                 this.settledSinceUpdate.push(t.id);
+                changed = true;
             }
         }
     }
@@ -954,17 +1011,25 @@ class Executor {
             taskId: task?.id,
             signal: this.ac.signal,
             addCheckpoint: task ? (summary) => this.recordCheckpoint(task, agentId, summary) : undefined,
-            addNote: (text, key, kind) => {
-                this.notes.push({ taskId: task?.id, key, kind, text });
+            addNote: (text, key, kind, url) => {
+                this.notes.push({ taskId: task?.id, key, kind, text, url });
                 // Only the recent tail ever feeds digests; without a cap a multi-day
-                // run accumulates every note in memory. Decisions are kept regardless.
+                // run accumulates every note in memory. Decisions and conflicts are
+                // kept regardless. In-place splice: teams share this array by reference.
                 if (this.notes.length > 4000) {
-                    const decisions = this.notes.filter((n) => n.kind === "decision");
-                    const rest = this.notes.filter((n) => n.kind !== "decision");
-                    rest.splice(0, rest.length - Math.max(0, 4000 - decisions.length));
-                    this.notes = [...decisions, ...rest];
+                    const keep = (n) => n.kind === "decision" || n.kind === "conflict";
+                    const pinnedCount = this.notes.filter(keep).length;
+                    let toDrop = this.notes.length - Math.max(pinnedCount, 4000);
+                    for (let i = 0; i < this.notes.length && toDrop > 0;) {
+                        if (!keep(this.notes[i])) {
+                            this.notes.splice(i, 1);
+                            toDrop--;
+                        }
+                        else
+                            i++;
+                    }
                 }
-                this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, text: (0, util_1.clip)(text, 1200) });
+                this.journal.append("note.added", { taskId: task?.id, agentId, key, kind, url, text: (0, util_1.clip)(text, 1200) });
             },
             searchNotes: (q) => this.searchNotes(q),
             readReport: (taskId) => this.readReportText(taskId),
@@ -1031,7 +1096,12 @@ class Executor {
                     if (task.attempt < this.cfg.verifyMaxAttempts) {
                         task.attempt++;
                         task.status = "running";
-                        this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt });
+                        this.journal.append("task.status", {
+                            taskId: task.id,
+                            status: "running",
+                            attempt: task.attempt,
+                            reason: task.feedback || task.error,
+                        });
                         continue;
                     }
                     this.finalizeTask(task, "failed", task.feedback || task.error || "verification failed after retries");
@@ -1046,12 +1116,12 @@ class Executor {
                 }
                 if (task.attempt < this.cfg.verifyMaxAttempts && !this.finishing && !this.budgetExceeded()) {
                     task.attempt++;
-                    task.error = (0, util_1.errMsg)(e);
+                    task.error = `${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`;
                     task.status = "running";
                     this.journal.append("task.status", { taskId: task.id, status: "running", attempt: task.attempt, reason: task.error });
                     continue;
                 }
-                this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}`);
+                this.finalizeTask(task, "failed", `worker error: ${(0, util_1.errMsg)(e)}${task.lastToolError ? ` (last tool failure: ${task.lastToolError})` : ""}`);
                 return;
             }
         }
@@ -1068,6 +1138,7 @@ class Executor {
         const agentId = (0, util_1.rid)("w");
         const model = this.resolveModel(task.modelTier);
         task.agentIds.push(agentId);
+        task.lastToolError = undefined; // diagnostics are per-attempt
         const dirListing = this.topListing();
         const system = (0, prompts_1.workerSystem)({
             agentId,
@@ -1101,7 +1172,7 @@ class Executor {
             signal: this.ac.signal,
             ctx: this.makeToolCtx(agentId, task),
             hooks: {
-                ...this.agentHooks(agentId, task.id),
+                ...this.agentHooks(agentId, task.id, task),
                 onCheckpoint: (summary) => this.recordCheckpoint(task, agentId, summary),
             },
             stop: this.agentStop,
@@ -1111,7 +1182,11 @@ class Executor {
         if (this.ac.signal.aborted)
             return "done";
         if (!outcome.terminal) {
-            task.error = "worker ended without reporting";
+            const lastWords = (0, util_1.oneLine)(outcome.finalText ?? "", 200);
+            task.error =
+                "worker ended without reporting" +
+                    (task.lastToolError ? ` — last tool failure: ${task.lastToolError}` : "") +
+                    (lastWords ? `; last words: ${lastWords}` : "");
             return "retry";
         }
         const a = outcome.terminal.args;
@@ -1127,6 +1202,20 @@ class Executor {
         task.keyFacts = strList(a.key_facts, 8);
         task.openQuestions = strList(a.open_questions, 6);
         task.filesTouched = strList(a.files_touched, 40);
+        // Structured sources: the citation pipeline's entry point. Only real
+        // http(s) URLs survive; they flow into dep handoffs and the bibliography.
+        const sources = Array.isArray(a.sources)
+            ? a.sources
+                .filter((s) => s && typeof s === "object" && /^https?:\/\//.test(String(s.url ?? "")))
+                .slice(0, 40)
+                .map((s) => ({
+                url: (0, util_1.clip)(String(s.url), 500),
+                title: s.title ? (0, util_1.clip)(String(s.title), 200) : undefined,
+                date: s.date ? (0, util_1.clip)(String(s.date), 40) : undefined,
+                note: s.note ? (0, util_1.clip)(String(s.note), 300) : undefined,
+            }))
+            : [];
+        task.sources = sources.length ? sources : undefined;
         this.journal.append("task.report", {
             taskId: task.id,
             status: reportStatus,
@@ -1135,6 +1224,7 @@ class Executor {
             keyFacts: task.keyFacts,
             openQuestions: task.openQuestions,
             filesTouched: task.filesTouched,
+            sources: task.sources,
         });
         if (reportStatus === "blocked") {
             this.finalizeTask(task, "blocked", report);
@@ -1165,6 +1255,7 @@ class Executor {
             return "Report is too thin to verify. Re-do the task and report concretely: what was done, what was verified, exact paths.";
         }
         const missing = [];
+        const malformed = [];
         // Remote sandboxes own their filesystem — only check host-visible paths.
         if (this.sandbox.localFs) {
             const okAt = (p) => {
@@ -1178,16 +1269,28 @@ class Executor {
             for (const rel of task.artifacts) {
                 const inArtifacts = path.join(this.runDirPath, "artifacts", rel);
                 const inWorkdir = path.resolve(this.meta.cwd, rel);
-                if (!okAt(inArtifacts) && !okAt(inWorkdir))
+                if (!okAt(inArtifacts) && !okAt(inWorkdir)) {
                     missing.push(rel);
+                    continue;
+                }
+                // Structural format check (json parses, csv is rectangular, html is
+                // not a stub) — free, and catches what the LLM verifier wastes a whole
+                // agent run discovering.
+                const problem = (0, util_1.validateArtifactFormat)(okAt(inArtifacts) ? inArtifacts : inWorkdir);
+                if (problem)
+                    malformed.push(`${rel}: ${problem}`);
             }
         }
         if (missing.length) {
             return `Claimed artifact(s) do not exist or are empty: ${missing.join(", ")}. Actually create them (use save_artifact), then report again.`;
         }
+        if (malformed.length) {
+            return `Claimed artifact(s) are malformed — fix them and report again: ${malformed.join("; ")}`;
+        }
         return null;
     }
-    async runVerifier(task) {
+    /** One verifier agent pass; returns the outcome plus how many evidence-gathering tool calls it made. */
+    async verifierAgent(task, kickoff) {
         const agentId = (0, util_1.rid)("v");
         // Verification gets the strong tier when configured — a weak verifier
         // rubber-stamps exactly the tasks that most need scrutiny.
@@ -1200,14 +1303,16 @@ class Executor {
             model,
             purpose: `verify ${task.id}`,
         });
+        let evidenceCalls = 0;
+        const baseHooks = this.agentHooks(agentId, task.id);
         const outcome = await (0, agent_1.runAgent)({
             cfg: this.cfg,
             agentId,
             model,
             thinking: this.meta.options.thinking,
             reasoningEffort: this.meta.options.reasoningEffort,
-            system: (0, prompts_1.verifierSystem)(this.meta, task),
-            kickoff: prompts_1.VERIFIER_KICKOFF,
+            system: (0, prompts_1.verifierSystem)(this.meta, task, this.depReportsFor(task)),
+            kickoff,
             tools: (0, tools_1.verifierToolset)(),
             terminal: [tools_1.VERDICT_TOOL],
             maxSteps: Math.min(14, this.meta.options.maxStepsPerTask),
@@ -1215,21 +1320,75 @@ class Executor {
             // Blind verification: the verifier judges deliverables against the
             // objective with its own tools — it must not inherit the swarm's shared
             // beliefs (blackboard) or the worker's narrative beyond the claims.
+            // (Dep reports are settled upstream outputs, not the worker's story.)
             ctx: { ...this.makeToolCtx(agentId, task), readBlackboard: () => "", searchNotes: undefined },
-            hooks: this.agentHooks(agentId, task.id),
+            hooks: {
+                ...baseHooks,
+                onToolCall: (callId, name, args) => {
+                    if (name !== "verdict")
+                        evidenceCalls++;
+                    baseHooks.onToolCall(callId, name, args);
+                },
+            },
             stop: this.agentStop,
         });
         this.flushDeltas(agentId);
         this.journal.append("agent.done", { agentId, taskId: task.id, steps: outcome.steps });
+        return { outcome, evidenceCalls };
+    }
+    async runVerifier(task) {
+        const strict = this.cfg.verification === "strict";
+        let { outcome, evidenceCalls } = await this.verifierAgent(task, prompts_1.VERIFIER_KICKOFF);
         if (this.ac.signal.aborted)
             return true;
+        // Strict mode: a pass verdict backed by zero tool calls is an opinion,
+        // not a verification. One re-run demanding evidence; if that also passes
+        // tool-free, accept but say so in the journal.
+        if (strict && outcome.terminal && Boolean(outcome.terminal.args.pass) && evidenceCalls === 0) {
+            this.journal.append("log", {
+                level: "info",
+                msg: `verifier passed ${task.id} without evidence — re-running with a tools-required kickoff`,
+            });
+            const second = await this.verifierAgent(task, "A previous verdict on this task cited no tool-gathered evidence. Verify concretely NOW — read the claimed files, run the commands — then call verdict(...).");
+            if (this.ac.signal.aborted)
+                return true;
+            if (second.outcome.terminal) {
+                if (second.evidenceCalls === 0) {
+                    this.journal.append("log", { level: "warn", msg: `verifier passed ${task.id} without gathering evidence` });
+                }
+                outcome = second.outcome;
+            }
+        }
         const v = (outcome.terminal?.args ?? {});
-        const strict = this.cfg.verification === "strict";
         // No verdict returned: in strict mode fail closed, otherwise accept.
         const pass = outcome.terminal ? Boolean(v.pass) : !strict;
-        const feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
+        let feedback = String(v.feedback ?? (outcome.terminal ? "" : "verifier produced no verdict"));
+        // Structured issues become the retry's worklist — numbered, with evidence.
+        const issues = Array.isArray(v.issues)
+            ? v.issues
+                .filter((i) => i && typeof i === "object" && i.problem)
+                .slice(0, 5)
+                .map((i) => ({
+                problem: (0, util_1.oneLine)(String(i.problem), 300),
+                evidence: i.evidence ? (0, util_1.oneLine)(String(i.evidence), 300) : undefined,
+                fix: i.fix ? (0, util_1.oneLine)(String(i.fix), 300) : undefined,
+            }))
+            : [];
+        if (!pass && issues.length) {
+            feedback = [
+                feedback,
+                ...issues.map((i, n) => `${n + 1}. ${i.problem}${i.evidence ? `\n   evidence: ${i.evidence}` : ""}${i.fix ? `\n   fix: ${i.fix}` : ""}`),
+            ]
+                .filter(Boolean)
+                .join("\n");
+        }
         task.feedback = feedback;
-        this.journal.append("verify.result", { taskId: task.id, pass, feedback });
+        this.journal.append("verify.result", {
+            taskId: task.id,
+            pass,
+            feedback,
+            ...(issues.length ? { issues } : {}),
+        });
         return pass;
     }
     finalizeTask(task, status, reason) {
@@ -1237,6 +1396,14 @@ class Executor {
         task.endedAt = Date.now();
         if (reason && status !== "done")
             task.error = reason;
+        // A settled task holds no file claims — release them so the digest and
+        // search_notes don't accumulate dead claims on long runs. In-place splice:
+        // teams share this array by reference.
+        for (let i = this.notes.length - 1; i >= 0; i--) {
+            const n = this.notes[i];
+            if (n.kind === "claim" && n.taskId === task.id)
+                this.notes.splice(i, 1);
+        }
         this.journal.append("task.status", { taskId: task.id, status, attempt: task.attempt, reason });
         this.settledSinceUpdate.push(task.id);
         this.maybeSnapshot();
@@ -1283,6 +1450,18 @@ class Executor {
             const rel = `progress-report-${n}.md`;
             fs.writeFileSync(path.join(this.runDirPath, "artifacts", rel), res.content, "utf8");
             this.journal.append("log", { level: "info", msg: `progress snapshot written: artifacts/${rel}` });
+            // Interim memory: a multi-day run that dies before synthesis still
+            // leaves the next swarm in this workspace something to build on.
+            if (!this.meta.sandbox) {
+                (0, memory_1.appendMemory)(this.meta.cwd, {
+                    runId: this.meta.id,
+                    mission: this.meta.mission,
+                    finishedAt: Date.now(),
+                    status: "in-progress",
+                    summary: (0, util_1.clip)(res.content, 600),
+                    keyDecisions: this.notes.filter((nt) => nt.kind === "decision").slice(-10).map((nt) => nt.text),
+                });
+            }
         })
             .catch((e) => {
             if (!this.ac.signal.aborted)
@@ -1369,7 +1548,7 @@ class Executor {
             });
         }
     }
-    agentHooks(agentId, taskId) {
+    agentHooks(agentId, taskId, trackErrorsOn) {
         return {
             onDelta: (channel, text) => {
                 this.queueDelta(agentId, taskId, channel, text);
@@ -1379,6 +1558,8 @@ class Executor {
                 this.journal.append("tool.call", { agentId, taskId, callId, name, args });
             },
             onToolResult: (callId, name, ok, summary) => {
+                if (!ok && trackErrorsOn)
+                    trackErrorsOn.lastToolError = `${name}: ${(0, util_1.oneLine)(summary, 200)}`;
                 this.journal.append("tool.result", { agentId, taskId, callId, name, ok, summary });
             },
             onUsage: this.onUsage,
@@ -1471,6 +1652,10 @@ class Executor {
             ? tasks.map(prompts_1.reportBlock).join("\n\n")
             : "(no tasks were completed)";
         const artifactList = this.listArtifacts().join("\n") || "(none)";
+        // The citation pipeline's last hop: every source any worker reported,
+        // deduplicated and numbered, becomes the synthesizer's bibliography.
+        const allSources = (0, report_1.aggregateSources)(tasks);
+        const sourcesText = allSources.length ? (0, util_1.truncateMiddle)((0, report_1.sourcesBlock)(allSources), 40_000, "chars") : "";
         const agentId = (0, util_1.rid)("synth");
         let summary = "";
         let reportMarkdown = "";
@@ -1488,6 +1673,7 @@ class Executor {
                     blackboard: this.blackboardDigest(6000),
                     artifactList,
                     reason: this.finishReason || "completed",
+                    sources: sourcesText,
                 }),
                 kickoff: prompts_1.SYNTH_KICKOFF,
                 tools: (0, tools_1.synthToolset)(),
@@ -1513,7 +1699,7 @@ class Executor {
                         messages: [
                             {
                                 role: "user",
-                                content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars")),
+                                content: (0, prompts_1.synthCheckPrompt)(this.meta.mission, (0, util_1.truncateMiddle)(reports, 60_000, "chars"), (0, util_1.truncateMiddle)(reportMarkdown, 60_000, "chars"), sourcesText ? (0, util_1.truncateMiddle)(sourcesText, 20_000, "chars") : undefined),
                             },
                         ],
                         thinking: false,
@@ -1562,6 +1748,7 @@ class Executor {
         // Cross-run memory: real-directory runs leave a trace for the next swarm.
         if (!this.meta.sandbox && status !== "cancelled") {
             (0, memory_1.appendMemory)(this.meta.cwd, {
+                runId: this.meta.id,
                 mission: this.meta.mission,
                 finishedAt: Date.now(),
                 status,
@@ -1572,6 +1759,10 @@ class Executor {
     }
     fallbackReport(tasks) {
         const lines = [`# ${this.meta.mission}`, ``, `_Run ${this.meta.id} — ${this.finishReason}_`, ``];
+        // Even without a synthesizer, surface the cross-task essentials first.
+        const facts = tasks.flatMap((t) => (t.keyFacts ?? []).map((f) => `- ${f} _(${t.id})_`));
+        if (facts.length)
+            lines.push(`## Key facts`, ...facts.slice(0, 60), "");
         for (const t of tasks) {
             lines.push(`## ${t.id} ${t.title} (${t.status})`);
             lines.push(t.report || t.error || "(no output)");
@@ -1579,6 +1770,15 @@ class Executor {
                 lines.push(`Artifacts: ${t.artifacts.join(", ")}`);
             lines.push("");
         }
+        const sources = (0, report_1.aggregateSources)(tasks);
+        if (sources.length) {
+            lines.push(`## Sources`);
+            for (const s of sources.slice(0, 100)) {
+                lines.push(`${s.n}. [${s.title || s.url}](${s.url})${s.date ? ` (${s.date})` : ""}`);
+            }
+            if (sources.length > 100)
+                lines.push(`…and ${sources.length - 100} more in the task reports.`);
+        }
         return lines.join("\n");
     }
     listArtifacts() {

package/dist/hub.js CHANGED Viewed

@@ -43,6 +43,7 @@ const url_1 = require("url");
 const config_1 = require("./config");
 const control_1 = require("./control");
 const crawltools_1 = require("./crawltools");
+const webtools_1 = require("./webtools");
 const deepseek_1 = require("./deepseek");
 const providers_1 = require("./providers");
 const journal_1 = require("./journal");
@@ -83,9 +84,16 @@ function startHub(opts) {
 async function handle(req, res, opts) {
     const url = new url_1.URL(req.url || "/", `http://localhost:${opts.port}`);
     const p = url.pathname;
-    res.setHeader("access-control-allow-origin", "*");
-    res.setHeader("access-control-allow-methods", "GET, POST, DELETE, OPTIONS");
-    res.setHeader("access-control-allow-headers", "content-type");
+    // Localhost-only CORS. The hub launches runs and reads reports with the
+    // operator's keys — a random website's JS must never get a readable
+    // response. The dev UI on another localhost port is the one legitimate
+    // cross-origin client; everyone else gets no CORS headers at all.
+    const origin = String(req.headers.origin || "");
+    if (/^https?:\/\/(localhost|127\.0\.0\.1|\[::1\])(:\d+)?$/.test(origin)) {
+        res.setHeader("access-control-allow-origin", origin);
+        res.setHeader("access-control-allow-methods", "GET, POST, DELETE, OPTIONS");
+        res.setHeader("access-control-allow-headers", "content-type");
+    }
     if (req.method === "OPTIONS") {
         res.writeHead(204);
         res.end();
@@ -159,6 +167,51 @@ async function api(req, res, url, opts) {
         const r = await (0, sandbox_1.testSandbox)(cfg, kind);
         return sendJson(res, 200, { kind, ...r });
     }
+    // Settings diagnostics: prove the search engines / crawl backend actually
+    // work with the saved keys before a mission depends on them.
+    if (p === "/api/search/test" && method === "POST") {
+        const q = "open source vector database";
+        const probe = async (engine, fn) => {
+            try {
+                const hits = await fn();
+                return { engine, ok: hits.length > 0, detail: `${hits.length} result(s)` };
+            }
+            catch (e) {
+                return { engine, ok: false, detail: (0, util_1.errMsg)(e) };
+            }
+        };
+        const checks = [probe("duckduckgo", () => (0, webtools_1.ddgSearch)(q, 3)), probe("bing", () => (0, webtools_1.bingSearch)(q, 3))];
+        if (cfg.tinyfishApiKey)
+            checks.push(probe("tinyfish", () => (0, webtools_1.tinyfishSearch)(cfg, q, 3)));
+        const engines = await Promise.all(checks);
+        return sendJson(res, 200, { ok: engines.some((e) => e.ok), engines });
+    }
+    if (p === "/api/crawl/test" && method === "POST") {
+        const backend = (0, crawltools_1.resolveCrawlBackend)(cfg);
+        if (!backend) {
+            return sendJson(res, 200, { ok: false, backend: null, detail: "no crawl backend configured — add a key first" });
+        }
+        try {
+            if ((0, crawltools_1.hasScrapeBackend)(cfg)) {
+                const text = await (0, crawltools_1.scrapeUrl)(cfg, "https://example.com/");
+                return sendJson(res, 200, {
+                    ok: Boolean(text && text.length > 50),
+                    backend,
+                    detail: text ? `scraped ${text.length} chars` : "empty scrape result",
+                });
+            }
+            // deepcrawl has no single-page scrape — smoke a 1-page crawl instead.
+            const out = await (0, crawltools_1.crawlSite)(cfg, { url: "https://example.com/", maxPages: 1 });
+            return sendJson(res, 200, {
+                ok: out.pages.length > 0,
+                backend,
+                detail: out.pages.length ? `crawled ${out.pages.length} page(s)` : out.warnings.join("; ") || "no pages",
+            });
+        }
+        catch (e) {
+            return sendJson(res, 200, { ok: false, backend, detail: (0, util_1.errMsg)(e) });
+        }
+    }
     if (p === "/api/models" && method === "GET") {
         try {
             const models = await (0, deepseek_1.listModels)(cfg);
@@ -297,6 +350,14 @@ async function api(req, res, url, opts) {
             res.end(fs.readFileSync(file));
             return;
         }
+        if (sub === "/plan" && method === "GET") {
+            const file = path.join((0, config_1.runDir)(id), "artifacts", "mission-plan.md");
+            if (!fs.existsSync(file))
+                return sendJson(res, 404, { error: "no plan yet" });
+            res.writeHead(200, { "content-type": "text/markdown; charset=utf-8" });
+            res.end(fs.readFileSync(file));
+            return;
+        }
         if (sub === "/artifacts" && method === "GET") {
             return sendJson(res, 200, { artifacts: listArtifactFiles(id) });
         }
@@ -429,6 +490,9 @@ function publicConfig(cfg) {
         reasoningEffort: cfg.reasoningEffort,
         safeMode: cfg.safeMode,
         contextTokenLimit: cfg.contextTokenLimit,
+        contextWindows: cfg.contextWindows,
+        cheapModel: cfg.cheapModel,
+        strongModel: cfg.strongModel,
         knownModels,
         pricing: cfg.pricing,
     };
@@ -482,6 +546,8 @@ function snapshot(state, id) {
         operatorNotes: state.operatorNotes,
         usageByModel: Object.fromEntries(state.usageByModel),
         cost: state.cost,
+        budgetSeries: state.budgetSeries,
+        planExcerpt: state.planExcerpt,
         finalSummary: state.finalSummary,
         finalReportPath: state.finalReportPath,
         live: (0, run_1.isRunLive)(id),

package/dist/memory.js CHANGED Viewed

@@ -58,10 +58,11 @@ function loadMemory(cwd) {
 }
 function appendMemory(cwd, entry) {
     try {
-        const file = memoryFile(cwd);
-        (0, util_1.ensureDir)(path.dirname(file));
-        const entries = [...loadMemory(cwd), entry].slice(-MAX_ENTRIES);
-        fs.writeFileSync(file, JSON.stringify({ cwd: path.resolve(cwd), entries }, null, 2), "utf8");
+        // Same-run entries replace (interim → final); writeJson is temp+rename so
+        // a crash mid-write never loses the prior history.
+        const prior = loadMemory(cwd).filter((e) => !(entry.runId && e.runId === entry.runId));
+        const entries = [...prior, entry].slice(-MAX_ENTRIES);
+        (0, util_1.writeJson)(memoryFile(cwd), { cwd: path.resolve(cwd), entries });
     }
     catch {
         /* memory is best-effort */