npm - @robzilla1738/agentswarm - Versions diffs - 0.2.0 → 0.5.0 - Mend

@robzilla1738/agentswarm 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +36 -5
package/dist/agent.js +64 -32
package/dist/cli.js +18 -4
package/dist/config.js +35 -5
package/dist/crawltools.js +247 -0
package/dist/deepseek.js +125 -10
package/dist/executor.js +771 -122
package/dist/hub.js +40 -3
package/dist/journal.js +61 -11
package/dist/memory.js +83 -0
package/dist/prompts.js +109 -16
package/dist/report.js +252 -0
package/dist/run.js +7 -2
package/dist/searchcore.js +191 -0
package/dist/state.js +57 -3
package/dist/tools.js +202 -12
package/dist/webtools.js +191 -60
package/package.json +3 -2
package/ui/out/404/index.html +1 -1
package/ui/out/404.html +1 -1
package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +1 -0
package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +1 -0
package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +1 -0
package/ui/out/_next/static/css/9f7bd82b8e4c762c.css +3 -0
package/ui/out/fonts/PlanetKosmos.ttf +0 -0
package/ui/out/index.html +1 -1
package/ui/out/index.txt +3 -3
package/ui/out/run/index.html +1 -1
package/ui/out/run/index.txt +3 -3
package/ui/out/settings/index.html +1 -1
package/ui/out/settings/index.txt +3 -3
package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
package/ui/out/_next/static/chunks/677-b37981ba0eca75b2.js +0 -1
package/ui/out/_next/static/chunks/app/page-0c9f35bd4aa8e370.js +0 -1
package/ui/out/_next/static/chunks/app/run/page-13dc41a57e34da71.js +0 -1
package/ui/out/_next/static/chunks/app/settings/page-a1763be7f6de888c.js +0 -1
package/ui/out/_next/static/css/82edaa7a5942f894.css +0 -3
/package/ui/out/_next/static/{eiQeDU9uBHNsBj0CFkp8M → errjtBR_bKoee8ogLp8xk}/_buildManifest.js +0 -0
/package/ui/out/_next/static/{eiQeDU9uBHNsBj0CFkp8M → errjtBR_bKoee8ogLp8xk}/_ssgManifest.js +0 -0

package/README.md CHANGED Viewed

@@ -1,5 +1,17 @@
+<p align="center">
+  <picture>
+    <source media="(prefers-color-scheme: dark)" srcset=".github/assets/swarm-mark-light.png">
+    <img src=".github/assets/swarm-mark-dark.png" alt="agentswarm" width="120">
+  </picture>
+</p>
 # agentswarm
+[![npm](https://img.shields.io/npm/v/@robzilla1738/agentswarm)](https://www.npmjs.com/package/@robzilla1738/agentswarm)
+[![license](https://img.shields.io/badge/license-MIT-blue)](LICENSE)
+[![node](https://img.shields.io/badge/node-%E2%89%A520.10-brightgreen)](package.json)
+[![support](https://img.shields.io/badge/support-buy%20me%20a%20coffee-yellow)](https://buymeacoffee.com/robcourson)
 A local agent-swarm orchestrator with a terminal dashboard and a localhost web UI. Works with DeepSeek, OpenAI, Anthropic, xAI, MiniMax, OpenRouter, Ollama, LM Studio, or any OpenAI-compatible endpoint.
 You give it a mission. A conductor model breaks the mission into tasks and hands them to worker agents that run in parallel, share findings on a blackboard, and get checked by an adversarial verifier. The run ends with a synthesized report plus whatever files the agents produced. Everything runs on your machine with your own API key, or fully offline against a local model.
@@ -17,7 +29,7 @@ You give it a mission. A conductor model breaks the mission into tasks and hands
         │  T4 dep  │◀─────│ verify  │   adversarial verification
         └────┬─────┘      └─────────┘
         ┌────▼─────┐
-        │Synthesize│  → final-report.md + artifacts
+        │Synthesize│  → final report (.md + .html) + artifacts
         └──────────┘
 ```
@@ -89,17 +101,32 @@ Run options (also on the UI launch form under Options): `--workers N` (paralleli
 ## How it works
-The conductor is a model with three tools: `spawn_tasks`, `wait`, and `finish`. It reads the mission, spawns self-contained tasks (each with an objective, success criteria, a role, optional dependencies, and an optional `verify` flag), then reacts as reports come back.
+The conductor is a model with six tools: `spawn_tasks`, `set_phase`, `update_plan`, `read_report`, `wait`, and `finish`. It reads the mission, spawns self-contained tasks (each with an objective, success criteria, a role, optional dependencies, and an optional `verify` flag), then reacts as reports come back. On long missions it declares phases (`set_phase`) whose goals and exit criteria are pinned into every update — so the plan survives even when old history is trimmed and replaced by a mission ledger (settled tasks, decisions, current phase).
+Each task becomes an autonomous agent with a tool budget. It works in small steps, posts durable findings to the blackboard (decisions are never trimmed from digests; `search_notes` searches the full history), journals progress checkpoints on long tasks, saves artifacts, and ends by reporting back with structured handoff fields (`key_facts`, `open_questions`, `files_touched`). Dependent tasks receive report excerpts plus those fields, and can pull full text with `read_report`.
+**Scale.** A global AIMD limiter (`maxConcurrentCalls`) bounds concurrent model calls per endpoint — a 429 halves the ceiling, successes recover it, and conductor calls always jump the queue, so a 100-agent swarm degrades gracefully instead of melting down. Settles are debounced before waking the conductor; on big runs the task table collapses settled waves (failures stay itemized) and excess reports become one-liners the conductor can expand with `read_report`. Spawn specs take a `model` tier (`cheap` for scouts, `strong` for leads/verifiers via `cheapModel`/`strongModel` config) and `team:true` to run a task as a full sub-swarm — its own conductor decomposes it in parallel and reports one consolidated result, with all activity journaled under its `teamId`.
+**Long horizon.** The conductor maintains a living `mission-plan.md` (`update_plan`) pinned into every update and restored on resume; every 25 settled tasks a progress snapshot lands in `artifacts/` so multi-day runs always have a partial deliverable; and real-directory runs leave a memory (`~/.agentswarm/memory/`) of missions, outcomes, and decisions that seeds the next swarm in the same workspace.
-Each task becomes an autonomous agent with a tool budget. It works in small steps, posts durable findings to the blackboard, saves artifacts, and ends by reporting back. The report is the only thing the conductor sees, which keeps reports specific.
+Verified tasks pass two gates: a free mechanical check (claimed artifacts must exist and be non-empty), then a blind LLM verifier that judges the deliverables against the objective with its own tools — it never sees the worker's blackboard. In `--verify strict` mode, a completeness critic reviews the whole run for gaps before synthesis (the conductor gets one round to fill them), and the final report is checked for faithfulness against the task reports.
 The scheduler starts a task as soon as its dependencies are done, up to the parallelism cap. Tasks whose dependencies failed are blocked and surfaced to the conductor for re-planning.
-When the conductor finishes (or the budget forces it), a synthesizer composes `final-report.md` from every task report.
+When the conductor finishes (or the budget forces it), a synthesizer composes the final deliverable from every task report. Deliverables ship in the format the mission calls for — code, `.csv`/`.json` data, styled documents — alongside `final-report.md` and a self-contained `final-report.html` rendering (open it with `swarm report <id> --open`).
 The journal is the source of truth. Every run is an append-only `events.jsonl`; the terminal dashboard, the web UI, and `swarm ls` all reduce the same file. That's why runs survive crashes and can be resumed or replayed. Runs live under `~/.agentswarm/runs/<id>/`.
-If the engine process dies without writing a terminal status (kill -9, reboot), the hub notices the missing process and shows the run as interrupted instead of leaving it "running" forever.
+If the engine process dies without writing a terminal status (kill -9, reboot), the hub notices the missing process and shows the run as interrupted instead of leaving it "running" forever. `swarm resume <id>` continues it: settled tasks keep their results, and tasks that were mid-flight restart *warm* from their last journaled checkpoint instead of from scratch. SIGTERM flushes the journal synchronously and leaves the run resumable.
+## Troubleshooting
+- **"interrupted — the engine process is no longer running"** — the engine died without a terminal status (kill -9, reboot, crash). Check `~/.agentswarm/runs/<id>/exec.log` for the crash output, then `swarm resume <id>`.
+- **Run ended with "conductor unavailable"** — five consecutive conductor API calls failed (after backoff). Usually a provider outage or a bad model name; check the run's activity log for the underlying error, fix, and resume.
+- **"journal writes are failing"** — the engine could not append to `events.jsonl` (disk full, permissions). The run aborts deliberately rather than doing unrecorded work.
+- **A verified task keeps failing with "Claimed artifact(s) do not exist"** — the worker reported files it never wrote. That's the mechanical pre-verifier doing its job; the retry prompt tells the worker to actually create them.
+- **Docker sandbox fails to start** — confirm `docker info` works as your user, and that the configured `sandboxImage` can be pulled. `swarm sandbox test` checks the configured runtime end-to-end.
+- **Hung or wedged run** — `swarm cancel <id>` aborts in-flight agents within ~1s; sandbox teardown is bounded by a 15s timeout so it can't hang shutdown.
 ## Architecture
@@ -137,6 +164,10 @@ Boots a mock model server and drives real missions through the engine, offline,
 - Costs are estimates based on list prices and the token counts the API reports. Models without pricing data show $0. Set a `--budget` either way.
 - Keys are stored in `~/.agentswarm/config.json` (chmod 600) and are only sent to the APIs you configured.
+## Author
+Built by [Robert Courson](https://robertcourson.com). If agentswarm saves you time, you can [buy me a coffee](https://buymeacoffee.com/robcourson).
 ## License
 MIT

package/dist/agent.js CHANGED Viewed

@@ -26,31 +26,52 @@ async function runAgent(p) {
     let lastText = "";
     let steps = 0;
     hooks.onTranscript?.(messages);
-    const callModel = (opts) => (0, deepseek_1.chat)(cfg, {
-        model: p.model,
-        messages,
-        tools: opts?.only
-            ? allSchemas.filter((s) => s.name === opts.only)
-            : allSchemas,
-        toolChoice: opts?.only,
-        thinking: p.thinking,
-        reasoningEffort: p.thinking ? p.reasoningEffort : undefined,
-        maxTokens: p.maxTokensOut,
-        signal: p.signal,
-        onDelta: (d) => {
-            if (d.think)
-                hooks.onDelta?.("think", d.think);
-            if (d.text)
-                hooks.onDelta?.("text", d.text);
-        },
-    });
+    // Wrap-up calls (forced or terminal-only) run with thinking disabled:
+    // DeepSeek's thinking mode hard-400s on a forced function tool_choice, and
+    // the final "summarize into the terminal tool" turn needs no deep reasoning.
+    const callModel = (opts) => {
+        const wrapUp = Boolean(opts?.only || opts?.terminalOnly);
+        return (0, deepseek_1.chat)(cfg, {
+            model: p.model,
+            messages,
+            tools: opts?.only
+                ? allSchemas.filter((s) => s.name === opts.only)
+                : opts?.terminalOnly
+                    ? p.terminal
+                    : allSchemas,
+            toolChoice: opts?.only,
+            thinking: wrapUp ? false : p.thinking,
+            reasoningEffort: !wrapUp && p.thinking ? p.reasoningEffort : undefined,
+            maxTokens: p.maxTokensOut,
+            signal: p.signal,
+            onDelta: (d) => {
+                if (d.think)
+                    hooks.onDelta?.("think", d.think);
+                if (d.text)
+                    hooks.onDelta?.("text", d.text);
+            },
+        });
+    };
     let stopReason = null;
     while (steps < p.maxSteps) {
         stopReason = p.stop?.() ?? null;
         if (stopReason)
             break;
         steps++;
-        const res = await callModel();
+        let res;
+        try {
+            res = await callModel();
+        }
+        catch (e) {
+            // The chat client already retries 429/5xx; this catches the rest of the
+            // transient class (connection resets, DNS blips) once per step so a
+            // single network hiccup doesn't burn a whole task attempt.
+            if (p.signal.aborted)
+                throw e;
+            hooks.onLog?.("warn", `${p.agentId}: model call failed (${(0, util_1.errMsg)(e)}); retrying once`);
+            await new Promise((r) => setTimeout(r, 1500));
+            res = await callModel();
+        }
         hooks.onUsage?.(p.model, res.usage);
         usage = (0, types_1.addUsage)(usage, res.usage);
         if (res.toolCalls.length === 0) {
@@ -128,21 +149,30 @@ async function runAgent(p) {
         }
     }
     // Step budget exhausted (or stopped early) — force one final terminal call.
+    // Two attempts: a forced tool_choice first, then terminal-only tools with
+    // free choice, since some providers reject or ignore forced choices. The
+    // agent's work must never be discarded because the wrap-up call failed.
     messages.push({ role: "user", content: stopReason ? (0, prompts_1.forcedFinal)(stopReason) : prompts_1.STEP_LIMIT_FINAL });
-    try {
-        const res = await callModel({ only: p.terminal[0].name });
-        hooks.onUsage?.(p.model, res.usage);
-        usage = (0, types_1.addUsage)(usage, res.usage);
-        const call = res.toolCalls.find((c) => terminalNames.has(c.function.name));
-        if (call) {
-            const args = (0, util_1.safeJson)(call.function.arguments) ?? {};
-            return { terminal: { name: call.function.name, args }, finalText: lastText, steps, usage };
+    for (const opts of [{ only: p.terminal[0].name }, { terminalOnly: true }]) {
+        try {
+            const res = await callModel(opts);
+            hooks.onUsage?.(p.model, res.usage);
+            usage = (0, types_1.addUsage)(usage, res.usage);
+            const call = res.toolCalls.find((c) => terminalNames.has(c.function.name));
+            if (call) {
+                const args = (0, util_1.safeJson)(call.function.arguments) ?? {};
+                return { terminal: { name: call.function.name, args }, finalText: lastText, steps, usage };
+            }
+            if (res.content) {
+                lastText = res.content;
+                // The model answered in prose; keep it and demand the tool call.
+                messages.push({ role: "assistant", content: res.content });
+                messages.push({ role: "user", content: `Call the ${p.terminal[0].name} tool now. Do not reply with text.` });
+            }
+        }
+        catch (e) {
+            hooks.onLog?.("warn", `${p.agentId}: final terminal call failed: ${(0, util_1.errMsg)(e)}`);
         }
-        if (res.content)
-            lastText = res.content;
-    }
-    catch (e) {
-        hooks.onLog?.("warn", `${p.agentId}: forced final call failed: ${(0, util_1.errMsg)(e)}`);
     }
     return { terminal: null, finalText: lastText, steps, usage };
 }
@@ -194,6 +224,8 @@ async function compact(p, messages) {
         });
         p.hooks.onUsage?.(p.model, res.usage);
         summary = res.content || "(compaction produced no summary)";
+        if (res.content)
+            p.hooks.onCheckpoint?.(res.content);
     }
     catch (e) {
         // Compaction is best-effort; fall back to hard truncation.

package/dist/cli.js CHANGED Viewed

@@ -342,11 +342,24 @@ async function execForeground(cfg, meta, render, resume = false) {
     };
     process.on("uncaughtException", onFatal);
     process.on("unhandledRejection", onFatal);
+    // SIGTERM (kill, system shutdown): flush buffered journal lines synchronously
+    // and exit WITHOUT a terminal status — the run stays resumable, and viewers
+    // show it as interrupted once the pid disappears.
+    const onTerm = () => {
+        journal.append("log", { level: "warn", msg: "engine received SIGTERM — exiting; resume with: swarm resume " + meta.id });
+        journal.flushSync();
+        (0, run_1.clearPid)(meta.id);
+        if (renderer)
+            renderer.stop();
+        process.exit(143);
+    };
+    process.on("SIGTERM", onTerm);
     try {
         await executor.run();
     }
     finally {
         process.off("SIGINT", onSig);
+        process.off("SIGTERM", onTerm);
         process.off("uncaughtException", onFatal);
         process.off("unhandledRejection", onFatal);
         (0, run_1.clearPid)(meta.id);
@@ -476,8 +489,10 @@ function cmdReport(id, flags) {
         process.exit(1);
     }
     if (flags.open) {
-        openBrowser("file://" + file);
-        console.log(file);
+        const html = path.join((0, config_1.runDir)(id), "artifacts", "final-report.html");
+        const target = fs.existsSync(html) ? html : file;
+        openBrowser("file://" + target);
+        console.log(target);
         return;
     }
     process.stdout.write(fs.readFileSync(file, "utf8") + "\n");
@@ -614,7 +629,7 @@ function printFinalLine(id) {
     console.log("");
     if (fs.existsSync(reportFile)) {
         console.log(util_1.ansi.green("✓ final report: ") + reportFile);
-        console.log(util_1.ansi.gray("  view: ") + `swarm report ${id}`);
+        console.log(util_1.ansi.gray("  view: ") + `swarm report ${id}` + util_1.ansi.gray("  ·  open in browser: ") + `swarm report ${id} --open`);
     }
     else {
         console.log(util_1.ansi.gray(`run ${id} ended without a final report (see: swarm watch ${id})`));
@@ -661,7 +676,6 @@ ${b("RUN OPTIONS")}
 ${b("FIRST RUN")}
   swarm config set apiKey <key>             # key for the active provider (default: DeepSeek)
   swarm config set provider <id>            # deepseek | openai | anthropic | xai | minimax | openrouter | ollama | lmstudio | custom
-  pip install searchkit                     # optional: local, citable web search for agents
   swarm serve --open                        # open the web UI
 `);
 }

package/dist/config.js CHANGED Viewed

@@ -70,17 +70,24 @@ exports.DEFAULTS = {
     baseUrl: providers_1.PROVIDERS.deepseek.baseUrl,
     model: "deepseek-v4-flash",
     conductorModel: "deepseek-v4-flash",
+    cheapModel: "",
+    strongModel: "",
     maxWorkers: 6,
     maxStepsPerTask: 30,
-    maxTasks: 48,
+    maxTasks: 200,
     maxTokensPerRun: 12_000_000,
     verification: "normal",
+    verifyMaxAttempts: 2,
     thinking: true,
     reasoningEffort: "high",
     safeMode: true,
     tinyfishApiKey: "",
     searchBackend: "auto",
-    searchkitCmd: "searchkit",
+    firecrawlApiKey: "",
+    contextdevApiKey: "",
+    deepcrawlApiKey: "",
+    deepcrawlBaseUrl: "",
+    crawlBackend: "auto",
     sandboxRuntime: "host",
     sandboxImage: "node:22-bookworm",
     e2bApiKey: "",
@@ -90,10 +97,11 @@ exports.DEFAULTS = {
     vercelToken: "",
     vercelTeamId: "",
     vercelProjectId: "",
+    maxConcurrentCalls: 16,
     requestTimeoutMs: 900_000,
     idleTimeoutMs: 180_000,
     contextTokenLimit: 120_000,
-    maxToolResultChars: 12_000,
+    maxToolResultChars: 20_000,
     hubPort: 7777,
     uiPort: 7780,
     pricing: exports.DEFAULT_PRICING,
@@ -109,6 +117,9 @@ exports.SECRET_ENV_KEYS = [
             .map((p) => p.keyEnv)
             .filter((k) => Boolean(k)),
         "TINYFISH_API_KEY",
+        "FIRECRAWL_API_KEY",
+        "CONTEXT_DEV_API_KEY",
+        "DEEPCRAWL_API_KEY",
         "E2B_API_KEY",
         "MODAL_TOKEN_ID",
         "MODAL_TOKEN_SECRET",
@@ -160,6 +171,14 @@ function loadConfig() {
         cfg.apiKey = process.env[info.keyEnv];
     if (process.env.TINYFISH_API_KEY)
         cfg.tinyfishApiKey = process.env.TINYFISH_API_KEY;
+    if (process.env.FIRECRAWL_API_KEY)
+        cfg.firecrawlApiKey = process.env.FIRECRAWL_API_KEY;
+    if (process.env.CONTEXT_DEV_API_KEY)
+        cfg.contextdevApiKey = process.env.CONTEXT_DEV_API_KEY;
+    if (process.env.DEEPCRAWL_API_KEY)
+        cfg.deepcrawlApiKey = process.env.DEEPCRAWL_API_KEY;
+    if (process.env.DEEPCRAWL_BASE_URL)
+        cfg.deepcrawlBaseUrl = process.env.DEEPCRAWL_BASE_URL;
     if (process.env.E2B_API_KEY)
         cfg.e2bApiKey = process.env.E2B_API_KEY;
     if (process.env.MODAL_TOKEN_ID)
@@ -218,17 +237,24 @@ exports.SETTABLE_KEYS = [
     "baseUrl",
     "model",
     "conductorModel",
+    "cheapModel",
+    "strongModel",
     "maxWorkers",
     "maxStepsPerTask",
     "maxTasks",
     "maxTokensPerRun",
     "verification",
+    "verifyMaxAttempts",
     "thinking",
     "reasoningEffort",
     "safeMode",
     "tinyfishApiKey",
     "searchBackend",
-    "searchkitCmd",
+    "firecrawlApiKey",
+    "contextdevApiKey",
+    "deepcrawlApiKey",
+    "deepcrawlBaseUrl",
+    "crawlBackend",
     "sandboxRuntime",
     "sandboxImage",
     "e2bApiKey",
@@ -238,15 +264,18 @@ exports.SETTABLE_KEYS = [
     "vercelToken",
     "vercelTeamId",
     "vercelProjectId",
+    "maxConcurrentCalls",
     "contextTokenLimit",
     "hubPort",
     "uiPort",
 ];
 /** Allowed ranges for numeric settings (values are clamped, not rejected). */
 const NUM_RANGES = {
-    maxWorkers: [1, 32],
+    maxWorkers: [1, 128],
+    maxConcurrentCalls: [1, 256],
     maxStepsPerTask: [3, 200],
     maxTasks: [1, 1000],
+    verifyMaxAttempts: [1, 5],
     maxTokensPerRun: [50_000, 2_000_000_000],
     contextTokenLimit: [8_000, 900_000],
     hubPort: [0, 65535],
@@ -256,6 +285,7 @@ const ENUMS = {
     verification: ["off", "normal", "strict"],
     reasoningEffort: ["low", "medium", "high", "max"],
     searchBackend: ["auto", "tinyfish", "ddg"],
+    crawlBackend: ["auto", "firecrawl", "contextdev", "deepcrawl", "off"],
     sandboxRuntime: ["auto", "host", "docker", "e2b", "modal", "vercel"],
     provider: Object.keys(providers_1.PROVIDERS),
 };

package/dist/crawltools.js ADDED Viewed

@@ -0,0 +1,247 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.resolveCrawlBackend = resolveCrawlBackend;
+exports.hasScrapeBackend = hasScrapeBackend;
+exports.crawlSite = crawlSite;
+exports.scrapeUrl = scrapeUrl;
+exports.slugForUrl = slugForUrl;
+const util_1 = require("./util");
+const PER_PAGE_CHAR_CAP = 200_000;
+const TOTAL_CHAR_BUDGET = 8_000_000;
+const CRAWL_DEADLINE_MS = 120_000;
+/** auto = first configured: Firecrawl → context.dev → deepcrawl. "off" or nothing configured → null. */
+function resolveCrawlBackend(cfg) {
+    if (cfg.crawlBackend === "off")
+        return null;
+    const configured = {
+        firecrawl: Boolean(cfg.firecrawlApiKey),
+        contextdev: Boolean(cfg.contextdevApiKey),
+        deepcrawl: Boolean(cfg.deepcrawlApiKey && cfg.deepcrawlBaseUrl),
+    };
+    if (cfg.crawlBackend !== "auto")
+        return configured[cfg.crawlBackend] ? cfg.crawlBackend : null;
+    for (const id of ["firecrawl", "contextdev", "deepcrawl"]) {
+        if (configured[id])
+            return id;
+    }
+    return null;
+}
+/** Backends usable for single-page scrape in fetch_url (the custom deepcrawl contract has no scrape endpoint). */
+function hasScrapeBackend(cfg) {
+    const b = resolveCrawlBackend(cfg);
+    return b === "firecrawl" || b === "contextdev";
+}
+async function crawlSite(cfg, opts) {
+    const backend = resolveCrawlBackend(cfg);
+    if (!backend)
+        throw new Error("no crawl backend configured — add a Firecrawl/context.dev/deepcrawl key in Settings");
+    const warnings = [];
+    let pages;
+    if (backend === "firecrawl")
+        pages = await firecrawlCrawl(cfg, opts, warnings);
+    else if (backend === "contextdev")
+        pages = await contextdevCrawl(cfg, opts);
+    else
+        pages = await deepcrawlCrawl(cfg, opts);
+    // Normalize: drop empty/binary pages, cap per-page and total size.
+    const clean = [];
+    let skipped = 0;
+    let total = 0;
+    for (const p of pages) {
+        if (clean.length >= opts.maxPages)
+            break;
+        const md = (p.markdown || "").trim();
+        if (!md || md.includes("\u0000")) {
+            skipped++;
+            continue;
+        }
+        const body = (0, util_1.truncateMiddle)(md, PER_PAGE_CHAR_CAP, "chars");
+        if (total + body.length > TOTAL_CHAR_BUDGET) {
+            warnings.push(`stopped at ${clean.length} pages: total content budget reached`);
+            break;
+        }
+        total += body.length;
+        clean.push({ url: p.url, title: p.title, markdown: body });
+    }
+    if (skipped)
+        warnings.push(`${skipped} empty page${skipped > 1 ? "s" : ""} skipped`);
+    return { backend, pages: clean, warnings };
+}
+/** Single-page scrape via the configured backend. Throws on failure — callers fall through to their own fetch path. */
+async function scrapeUrl(cfg, url, signal) {
+    const backend = resolveCrawlBackend(cfg);
+    if (backend === "firecrawl") {
+        const data = await callJson("firecrawl", "https://api.firecrawl.dev/v1/scrape", cfg.firecrawlApiKey, { url, formats: ["markdown"] }, 30_000, signal);
+        const md = String(data?.data?.markdown ?? "");
+        if (!md.trim())
+            throw new Error("firecrawl: empty scrape result");
+        const title = data?.data?.metadata?.title;
+        return title ? `# ${title}\n\n${md}` : md;
+    }
+    if (backend === "contextdev") {
+        const data = await callJson("context.dev", "https://api.context.dev/v1/web/scrape", cfg.contextdevApiKey, { url }, 30_000, signal);
+        const md = String(data?.markdown ?? data?.results?.[0]?.markdown ?? "");
+        if (!md.trim())
+            throw new Error("context.dev: empty scrape result");
+        const title = data?.metadata?.title ?? data?.results?.[0]?.metadata?.title;
+        return title ? `# ${title}\n\n${md}` : md;
+    }
+    throw new Error("no scrape-capable crawl backend configured");
+}
+/** "https://docs.foo.com/a/b?x=1" → filesystem-safe { host, slug } with no separators or traversal. */
+function slugForUrl(url) {
+    let u;
+    try {
+        u = new URL(url);
+    }
+    catch {
+        return { host: "site", slug: sanitize(url) || "page" };
+    }
+    const host = sanitize(u.hostname) || "site";
+    const slug = sanitize(u.pathname.replace(/\/+$/, "")) || "index";
+    return { host, slug };
+}
+function sanitize(s) {
+    return s
+        .toLowerCase()
+        .replace(/[^a-z0-9._-]+/g, "-")
+        .replace(/\.{2,}/g, ".")
+        .replace(/-{2,}/g, "-")
+        .replace(/^[-.]+|[-.]+$/g, "")
+        .slice(0, 120);
+}
+// ---------------------------------------------------------------- backends
+async function firecrawlCrawl(cfg, opts, warnings) {
+    const start = await callJson("firecrawl", "https://api.firecrawl.dev/v1/crawl", cfg.firecrawlApiKey, {
+        url: opts.url,
+        limit: opts.maxPages,
+        ...(opts.includePaths?.length ? { includePaths: opts.includePaths } : {}),
+        scrapeOptions: { formats: ["markdown"] },
+    }, 30_000, opts.signal);
+    const jobId = start?.id;
+    if (!jobId)
+        throw new Error(`firecrawl: crawl did not start (${start?.error || "no job id"})`);
+    const pollMs = opts.pollMs ?? 3000;
+    const deadline = Date.now() + CRAWL_DEADLINE_MS;
+    let last = null;
+    for (;;) {
+        opts.signal?.throwIfAborted();
+        last = await getJson("firecrawl", `https://api.firecrawl.dev/v1/crawl/${jobId}`, cfg.firecrawlApiKey, opts.signal);
+        if (last?.status === "completed")
+            break;
+        if (last?.status === "failed")
+            throw new Error(`firecrawl: crawl failed (${last?.error || "unknown error"})`);
+        if (Date.now() > deadline) {
+            const partial = mapFirecrawlPages(last);
+            if (!partial.length)
+                throw new Error("firecrawl: crawl still running after 120s with no pages yet — try fewer pages");
+            warnings.push(`crawl still running after 120s; returning ${partial.length} partial pages`);
+            return partial;
+        }
+        await sleep(pollMs, opts.signal);
+    }
+    // Completed: collect pages, following `next` pagination until maxPages.
+    const pages = mapFirecrawlPages(last);
+    let next = last?.next;
+    while (next && pages.length < opts.maxPages) {
+        const more = await getJson("firecrawl", String(next), cfg.firecrawlApiKey, opts.signal);
+        pages.push(...mapFirecrawlPages(more));
+        next = more?.next;
+    }
+    return pages;
+}
+function mapFirecrawlPages(res) {
+    const data = Array.isArray(res?.data) ? res.data : [];
+    return data.map((d) => ({
+        url: String(d?.metadata?.sourceURL ?? d?.metadata?.url ?? ""),
+        title: String(d?.metadata?.title ?? ""),
+        markdown: String(d?.markdown ?? ""),
+    }));
+}
+async function contextdevCrawl(cfg, opts) {
+    const data = await callJson("context.dev", "https://api.context.dev/v1/web/crawl", cfg.contextdevApiKey, {
+        url: opts.url,
+        max_pages: opts.maxPages,
+        ...(opts.includePaths?.length ? { include_paths: opts.includePaths } : {}),
+    }, CRAWL_DEADLINE_MS, opts.signal);
+    const results = Array.isArray(data?.results) ? data.results : [];
+    return results.map((r) => ({
+        url: String(r?.metadata?.url ?? r?.url ?? ""),
+        title: String(r?.metadata?.title ?? r?.title ?? ""),
+        markdown: String(r?.markdown ?? ""),
+    }));
+}
+async function deepcrawlCrawl(cfg, opts) {
+    const base = cfg.deepcrawlBaseUrl.replace(/\/+$/, "");
+    const data = await callJson("deepcrawl", `${base}/crawl`, cfg.deepcrawlApiKey, {
+        url: opts.url,
+        max_pages: opts.maxPages,
+        ...(opts.includePaths?.length ? { include_paths: opts.includePaths } : {}),
+    }, CRAWL_DEADLINE_MS, opts.signal);
+    // Accept either the context.dev-compatible shape or a flat pages[] list.
+    if (Array.isArray(data?.results)) {
+        return data.results.map((r) => ({
+            url: String(r?.metadata?.url ?? r?.url ?? ""),
+            title: String(r?.metadata?.title ?? r?.title ?? ""),
+            markdown: String(r?.markdown ?? ""),
+        }));
+    }
+    if (Array.isArray(data?.pages)) {
+        return data.pages.map((p) => ({
+            url: String(p?.url ?? ""),
+            title: String(p?.title ?? ""),
+            markdown: String(p?.markdown ?? p?.content ?? ""),
+        }));
+    }
+    throw new Error("deepcrawl: unrecognized response shape (expected results[] or pages[])");
+}
+// ---------------------------------------------------------------- plumbing
+function friendlyHttpError(service, status, body) {
+    if (status === 401 || status === 403) {
+        return new Error(`${service} API key invalid or unauthorized (HTTP ${status}) — check Settings → Crawl integrations`);
+    }
+    if (status === 402)
+        return new Error(`${service}: quota or credits exhausted (HTTP 402)`);
+    if (status === 429)
+        return new Error(`${service}: rate limited (HTTP 429) — retry later`);
+    return new Error(`${service}: HTTP ${status} ${(0, util_1.truncateMiddle)(body, 300, "chars")}`);
+}
+function mergeSignal(timeoutMs, signal) {
+    const t = AbortSignal.timeout(timeoutMs);
+    if (!signal)
+        return t;
+    return typeof AbortSignal.any === "function" ? AbortSignal.any([t, signal]) : signal;
+}
+async function callJson(service, url, key, body, timeoutMs, signal) {
+    const res = await fetch(url, {
+        method: "POST",
+        headers: { authorization: `Bearer ${key}`, "content-type": "application/json" },
+        body: JSON.stringify(body),
+        signal: mergeSignal(timeoutMs, signal),
+    });
+    if (!res.ok)
+        throw friendlyHttpError(service, res.status, await res.text().catch(() => ""));
+    return res.json();
+}
+async function getJson(service, url, key, signal) {
+    const res = await fetch(url, {
+        headers: { authorization: `Bearer ${key}` },
+        signal: mergeSignal(30_000, signal),
+    });
+    if (!res.ok)
+        throw friendlyHttpError(service, res.status, await res.text().catch(() => ""));
+    return res.json();
+}
+function sleep(ms, signal) {
+    return new Promise((resolve, reject) => {
+        const t = setTimeout(() => {
+            signal?.removeEventListener("abort", onAbort);
+            resolve();
+        }, ms);
+        const onAbort = () => {
+            clearTimeout(t);
+            reject(new Error("aborted"));
+        };
+        signal?.addEventListener("abort", onAbort, { once: true });
+    });
+}