npm - @robzilla1738/agentswarm - Versions diffs - 0.3.0 → 0.5.0 - Mend

@robzilla1738/agentswarm 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +28 -5
package/dist/agent.js +16 -1
package/dist/cli.js +18 -4
package/dist/config.js +35 -5
package/dist/crawltools.js +247 -0
package/dist/deepseek.js +125 -10
package/dist/executor.js +771 -122
package/dist/hub.js +16 -3
package/dist/journal.js +61 -11
package/dist/memory.js +83 -0
package/dist/prompts.js +109 -16
package/dist/report.js +252 -0
package/dist/run.js +7 -2
package/dist/searchcore.js +191 -0
package/dist/state.js +57 -3
package/dist/tools.js +202 -12
package/dist/webtools.js +191 -60
package/package.json +3 -2
package/ui/out/404/index.html +1 -1
package/ui/out/404.html +1 -1
package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +1 -0
package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +1 -0
package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +1 -0
package/ui/out/_next/static/css/9f7bd82b8e4c762c.css +3 -0
package/ui/out/fonts/PlanetKosmos.ttf +0 -0
package/ui/out/index.html +1 -1
package/ui/out/index.txt +3 -3
package/ui/out/run/index.html +1 -1
package/ui/out/run/index.txt +3 -3
package/ui/out/settings/index.html +1 -1
package/ui/out/settings/index.txt +3 -3
package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
package/ui/out/_next/static/chunks/677-7ab85a6f38c3a235.js +0 -1
package/ui/out/_next/static/chunks/app/page-0fda5b8e77d90b84.js +0 -1
package/ui/out/_next/static/chunks/app/run/page-07aab6b1224c3c8c.js +0 -1
package/ui/out/_next/static/chunks/app/settings/page-528482d468d84cfa.js +0 -1
package/ui/out/_next/static/css/e2c82b53bf4519e8.css +0 -3
/package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → errjtBR_bKoee8ogLp8xk}/_buildManifest.js +0 -0
/package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → errjtBR_bKoee8ogLp8xk}/_ssgManifest.js +0 -0

package/dist/tools.js CHANGED Viewed

@@ -33,12 +33,13 @@ var __importStar = (this && this.__importStar) || (function () {
     };
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.FINISH_TOOL = exports.WAIT_TOOL = exports.SPAWN_TASKS_TOOL = exports.SUBMIT_FINAL_TOOL = exports.VERDICT_TOOL = exports.REPORT_TOOL = void 0;
+exports.FINISH_TOOL = exports.WAIT_TOOL = exports.SET_PHASE_TOOL = exports.UPDATE_PLAN_TOOL = exports.CONDUCTOR_READ_REPORT_TOOL = exports.SPAWN_TASKS_TOOL = exports.SUBMIT_FINAL_TOOL = exports.VERDICT_TOOL = exports.REPORT_TOOL = void 0;
 exports.workerToolset = workerToolset;
 exports.verifierToolset = verifierToolset;
 exports.synthToolset = synthToolset;
 const fs = __importStar(require("fs"));
 const path = __importStar(require("path"));
+const crawltools_1 = require("./crawltools");
 const util_1 = require("./util");
 const webtools_1 = require("./webtools");
 // ---------- safety ----------
@@ -84,7 +85,7 @@ async function writeFileVia(ctx, abs, content) {
     }
 }
 // ---------- tool definitions ----------
-function workerToolset() {
+function workerToolset(cfg) {
     const tools = {};
     tools.shell = {
         schema: {
@@ -163,7 +164,8 @@ function workerToolset() {
             if (content.length > 5_000_000)
                 throw new Error("content too large (>5MB)");
             await writeFileVia(ctx, abs, content);
-            return `wrote ${abs} (${content.length} chars)`;
+            const warn = ctx.checkClaim?.(String(args.path));
+            return `wrote ${abs} (${content.length} chars)${warn ? `\n${warn}` : ""}`;
         },
     };
     tools.replace_in_file = {
@@ -194,7 +196,8 @@ function workerToolset() {
             }
             const next = args.all ? raw.split(find).join(replace) : raw.replace(find, replace);
             await writeFileVia(ctx, abs, next);
-            return `replaced ${args.all ? count : 1} occurrence(s) in ${abs}`;
+            const warn = ctx.checkClaim?.(String(args.path));
+            return `replaced ${args.all ? count : 1} occurrence(s) in ${abs}${warn ? `\n${warn}` : ""}`;
         },
     };
     tools.list_dir = {
@@ -266,20 +269,20 @@ function workerToolset() {
     tools.web_search = {
         schema: {
             name: "web_search",
-            description: "Search the web. Returns ranked results with title, URL and snippet. " +
-                "Set deep=true to also fetch top pages and return quotable passages (slower; use for claims that need grounding).",
+            description: "Search the web. Fans out across multiple engines (DuckDuckGo, Bing, +TinyFish if configured), merges and quality-ranks results, and dedupes by canonical URL. Returns ranked results with title, URL and snippet. " +
+                "Set deep=true to widen the query into complementary phrasings, fetch the top pages, and return quotable passages with publication dates — use for thorough research and any claim that needs grounding. Raise count (up to 25) to pull more sources per call.",
             parameters: {
                 type: "object",
                 properties: {
                     query: { type: "string" },
-                    count: { type: "number", description: "Max results, default 6, max 10" },
-                    deep: { type: "boolean", description: "Fetch page content for quotable passages" },
+                    count: { type: "number", description: "Max results, default 8, max 25" },
+                    deep: { type: "boolean", description: "Multi-phrasing sweep + fetch pages for quotable passages" },
                 },
                 required: ["query"],
             },
         },
         run: async (args, ctx) => {
-            const count = Math.min(Math.max(Number(args.count) || 6, 1), 10);
+            const count = Math.min(Math.max(Number(args.count) || 8, 1), 25);
             const hits = await (0, webtools_1.webSearch)(ctx.cfg, String(args.query), count, ctx.signal, Boolean(args.deep), (msg) => ctx.log?.("warn", msg));
             if (!hits.length)
                 return "no results";
@@ -315,25 +318,92 @@ function workerToolset() {
     tools.note = {
         schema: {
             name: "note",
-            description: "Post a durable fact/discovery to the swarm's shared blackboard so the conductor and other agents can see it. Use sparingly — facts other tasks need, not progress chatter.",
+            description: "Post a durable fact/discovery to the swarm's shared blackboard so the conductor and other agents can see it. Use sparingly — facts other tasks need, not progress chatter. Mark kind='decision' for choices the rest of the mission must respect (these are never trimmed from digests).",
             parameters: {
                 type: "object",
                 properties: {
                     text: { type: "string" },
                     key: { type: "string", description: "Optional short label" },
+                    kind: {
+                        type: "string",
+                        enum: ["finding", "decision", "open-question", "handoff", "claim"],
+                        description: "Category (default finding). kind='claim' with key=<file path> advertises you are editing that file",
+                    },
                 },
                 required: ["text"],
             },
         },
         run: async (args, ctx) => {
-            ctx.addNote(String(args.text), args.key ? String(args.key) : undefined);
+            const kind = ["finding", "decision", "open-question", "handoff", "claim"].includes(String(args.kind))
+                ? String(args.kind)
+                : undefined;
+            ctx.addNote(String(args.text), args.key ? String(args.key) : undefined, kind);
             return "noted on the blackboard";
         },
     };
+    tools.search_notes = {
+        schema: {
+            name: "search_notes",
+            description: "Keyword-search the ENTIRE blackboard history (the digest in your prompt only shows the recent tail). Use when you need a fact another agent may have posted earlier in the run.",
+            parameters: {
+                type: "object",
+                properties: {
+                    query: { type: "string", description: "Keywords to match against note text/labels" },
+                },
+                required: ["query"],
+            },
+        },
+        run: async (args, ctx) => {
+            if (!ctx.searchNotes)
+                return "note search is unavailable in this context";
+            return ctx.searchNotes(String(args.query ?? ""));
+        },
+    };
+    tools.read_report = {
+        schema: {
+            name: "read_report",
+            description: "Read the FULL report of a settled task (dependency reports in your prompt are excerpts). Use when an excerpt cuts off details you need.",
+            parameters: {
+                type: "object",
+                properties: {
+                    task_id: { type: "string", description: "e.g. T3" },
+                },
+                required: ["task_id"],
+            },
+        },
+        run: async (args, ctx) => {
+            if (!ctx.readReport)
+                return "report lookup is unavailable in this context";
+            return ctx.readReport(String(args.task_id ?? ""));
+        },
+    };
+    tools.checkpoint = {
+        schema: {
+            name: "checkpoint",
+            description: "Journal a durable progress checkpoint: a dense summary of what you've completed, key findings, and what remains. If the run is interrupted, the next attempt resumes from your latest checkpoint instead of starting over. Use after completing each major chunk of a long task.",
+            parameters: {
+                type: "object",
+                properties: {
+                    summary: {
+                        type: "string",
+                        description: "Completed work (exact paths/commands), key findings, and remaining steps",
+                    },
+                },
+                required: ["summary"],
+            },
+        },
+        run: async (args, ctx) => {
+            const summary = String(args.summary ?? "").trim();
+            if (!summary)
+                throw new Error("summary is required");
+            ctx.addCheckpoint?.(summary);
+            return "checkpoint saved";
+        },
+    };
     tools.save_artifact = {
         schema: {
             name: "save_artifact",
-            description: "Save a deliverable into the run's artifacts folder (shown prominently to the operator). Provide content, or from_path to copy an existing file.",
+            description: "Save a deliverable into the run's artifacts folder (shown prominently to the operator). Provide content, or from_path to copy an existing file. Any file type works — save deliverables in the format that fits them (.csv/.json for data, .html for documents, runnable code files), not just markdown.",
             parameters: {
                 type: "object",
                 properties: {
@@ -366,6 +436,64 @@ function workerToolset() {
             return `saved artifacts/${name}`;
         },
     };
+    // Only offered when a crawl backend (Firecrawl / context.dev / deepcrawl)
+    // is configured — there is no free fallback for whole-site crawls.
+    if (cfg && (0, crawltools_1.resolveCrawlBackend)(cfg)) {
+        tools.crawl_site = {
+            schema: {
+                name: "crawl_site",
+                description: "Crawl a website (JS-rendered, clean markdown) and save every discovered page as a markdown file under crawl/<host>/ in the working directory. Returns an index of the saved files — read individual pages afterwards with read_file. Use for ingesting documentation sites or multi-page content; use fetch_url for single pages.",
+                parameters: {
+                    type: "object",
+                    properties: {
+                        url: { type: "string", description: "Starting URL to crawl" },
+                        max_pages: { type: "number", description: "Page limit (default 15, max 50)" },
+                        include_paths: {
+                            type: "array",
+                            items: { type: "string" },
+                            description: "Limit the crawl to URL path prefixes/globs, e.g. /docs/*",
+                        },
+                    },
+                    required: ["url"],
+                },
+            },
+            run: async (args, ctx) => {
+                const url = String(args.url ?? "");
+                if (!/^https?:\/\//.test(url))
+                    throw new Error("only http(s) URLs are supported");
+                const maxPages = Math.min(Math.max(Number(args.max_pages) || 15, 1), 50);
+                const includePaths = Array.isArray(args.include_paths)
+                    ? args.include_paths.map(String).filter(Boolean)
+                    : undefined;
+                const out = await (0, crawltools_1.crawlSite)(ctx.cfg, { url, maxPages, includePaths, signal: ctx.signal });
+                if (!out.pages.length) {
+                    return `crawled ${url} via ${out.backend}: no pages with content${out.warnings.length ? `\nwarnings: ${out.warnings.join("; ")}` : ""}`;
+                }
+                const used = new Set();
+                const lines = [];
+                for (const page of out.pages) {
+                    const { host, slug } = (0, crawltools_1.slugForUrl)(page.url || url);
+                    let rel = `crawl/${host}/${slug}.md`;
+                    for (let n = 2; used.has(rel); n++)
+                        rel = `crawl/${host}/${slug}-${n}.md`;
+                    used.add(rel);
+                    const abs = resolveWrite(rel, ctx);
+                    const header = `# ${page.title || page.url || "untitled"}\n\nSource: ${page.url || url}\n\n`;
+                    await writeFileVia(ctx, abs, header + page.markdown);
+                    if (lines.length < 50) {
+                        lines.push(`  ${rel} — "${page.title || "untitled"}" (${page.markdown.length.toLocaleString()} chars)`);
+                    }
+                }
+                const hidden = out.pages.length - lines.length;
+                return [
+                    `crawled ${url} via ${out.backend}: ${out.pages.length} page${out.pages.length > 1 ? "s" : ""} saved`,
+                    ...lines,
+                    ...(hidden > 0 ? [`  …and ${hidden} more (list crawl/ to see all)`] : []),
+                    ...(out.warnings.length ? [`warnings: ${out.warnings.join("; ")}`] : []),
+                ].join("\n");
+            },
+        };
+    }
     return tools;
 }
 function verifierToolset() {
@@ -383,6 +511,7 @@ function synthToolset() {
     return {
         read_file: all.read_file,
         list_dir: all.list_dir,
+        save_artifact: all.save_artifact,
     };
 }
 // ---------- terminal tool schemas (handled by the agent loop, not executed) ----------
@@ -402,6 +531,21 @@ exports.REPORT_TOOL = {
                 items: { type: "string" },
                 description: "Paths of files you created/changed that matter",
             },
+            key_facts: {
+                type: "array",
+                items: { type: "string" },
+                description: "3-8 standalone facts downstream tasks need (figures, paths, URLs, decisions)",
+            },
+            open_questions: {
+                type: "array",
+                items: { type: "string" },
+                description: "Unresolved questions or risks the conductor should know about",
+            },
+            files_touched: {
+                type: "array",
+                items: { type: "string" },
+                description: "Every file you created or modified (exact paths)",
+            },
         },
         required: ["status", "report"],
     },
@@ -460,6 +604,17 @@ exports.SPAWN_TASKS_TOOL = {
                         },
                         verify: { type: "boolean", description: "Adversarially verify this task's result before accepting it" },
                         context: { type: "string", description: "Facts, paths, URLs, constraints the worker needs inlined" },
+                        model: {
+                            type: "string",
+                            enum: ["cheap", "default", "strong"],
+                            description: "Model tier: cheap for scouts/bulk extraction, strong for leads, integration, and verified deliverables",
+                        },
+                        team: {
+                            type: "boolean",
+                            description: "Run as a sub-swarm: this task gets its own conductor that decomposes it into parallel sub-tasks and reports one consolidated result. Use for coherent multi-task subsystems (e.g. 'build the backend'). Teams cannot spawn teams.",
+                        },
+                        team_max_workers: { type: "number", description: "Parallelism inside the team (default: half the run's)" },
+                        team_budget_tokens: { type: "number", description: "Token slice for the team (default: a quarter of what remains)" },
                     },
                     required: ["title", "objective"],
                 },
@@ -468,6 +623,41 @@ exports.SPAWN_TASKS_TOOL = {
         required: ["tasks"],
     },
 };
+exports.CONDUCTOR_READ_REPORT_TOOL = {
+    name: "read_report",
+    description: "Read the full report of any settled task. Updates show one-line summaries once many tasks settle — use this when a summary isn't enough to plan from.",
+    parameters: {
+        type: "object",
+        properties: {
+            task_id: { type: "string", description: "e.g. T17" },
+        },
+        required: ["task_id"],
+    },
+};
+exports.UPDATE_PLAN_TOOL = {
+    name: "update_plan",
+    description: "Maintain the mission's living plan document (artifacts/mission-plan.md, full overwrite). On missions beyond ~20 tasks, keep it current: approach, phases, what's done, what's next, open risks. Its head is pinned into every update you receive, surviving history trimming and restarts.",
+    parameters: {
+        type: "object",
+        properties: {
+            markdown: { type: "string", description: "The complete plan document (markdown)" },
+        },
+        required: ["markdown"],
+    },
+};
+exports.SET_PHASE_TOOL = {
+    name: "set_phase",
+    description: "Declare the mission's current phase/milestone. Use on long missions to structure the work (e.g. 'discovery' → 'build' → 'integrate' → 'polish'). The phase and its exit criteria are pinned into every update you receive, surviving history trimming.",
+    parameters: {
+        type: "object",
+        properties: {
+            name: { type: "string", description: "Short phase name" },
+            goal: { type: "string", description: "What this phase accomplishes" },
+            exit_criteria: { type: "string", description: "Concrete conditions that end this phase" },
+        },
+        required: ["name"],
+    },
+};
 exports.WAIT_TOOL = {
     name: "wait",
     description: "Do nothing for now; wake again when running tasks report.",

package/dist/webtools.js CHANGED Viewed

@@ -1,95 +1,162 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.webSearch = webSearch;
+exports.parseBingHtml = parseBingHtml;
 exports.fetchUrl = fetchUrl;
-const child_process_1 = require("child_process");
+const crawltools_1 = require("./crawltools");
+const searchcore_1 = require("./searchcore");
 const util_1 = require("./util");
 const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36 agentswarm/0.1";
+/** How many of the merged pool get fetched for passage extraction in deep mode. */
+const DEEP_FETCH = 12;
+/** Quotable passages kept per fetched page. */
+const DEEP_PASSAGES = 3;
 /**
- * Web search backends, best first:
- *  1. SearchKit CLI (local, multi-engine, ranked + citable; `deep` fetches
- *     pages and returns quotable passages) — when installed.
- *  2. TinyFish Search (fast, structured) — when a key is configured.
- *  3. DuckDuckGo HTML scraping — always available, last resort.
+ * Web search: fan out across every available engine in parallel (DuckDuckGo +
+ * Bing scraping, plus TinyFish when keyed). In `deep` mode it also fans the
+ * query into a few complementary phrasings — so one call sweeps queries ×
+ * engines into a much larger pool — then quality-ranks and dedupes by
+ * canonical URL, fetches the top pages concurrently for quotable passages,
+ * and re-ranks by content quality. Ranking/passage algorithms live in
+ * searchcore.ts.
  */
 async function webSearch(cfg, query, count, signal, deep = false, warn) {
-    if (cfg.searchBackend === "auto" && searchkitOk !== false) {
-        try {
-            const hits = await searchkitSearch(cfg, query, count, deep, signal);
-            searchkitOk = true;
-            if (hits.length)
-                return hits;
+    // Deep searches widen recall by issuing complementary phrasings; the fast
+    // path stays a single query so an agent's tool loop isn't slowed.
+    const queries = deep ? (0, searchcore_1.expandQueries)(query) : [query];
+    const perEngine = Math.min(count, 15);
+    const engineCalls = [];
+    for (const q of queries) {
+        if (cfg.searchBackend === "tinyfish" && cfg.tinyfishApiKey) {
+            engineCalls.push(tinyfishSearch(cfg, q, perEngine, signal));
         }
-        catch (e) {
-            // Not installed → stop probing for the rest of this process.
-            if (e?.code === "ENOENT")
-                searchkitOk = false;
-            else if (!searchkitWarned) {
-                // Installed but failing — say so once instead of silently degrading.
-                searchkitWarned = true;
-                warn?.(`searchkit failed (${(0, util_1.errMsg)(e)}); falling back to ${cfg.tinyfishApiKey ? "TinyFish" : "DuckDuckGo"}. ` +
-                    `Set searchBackend=ddg to skip searchkit.`);
+        else {
+            engineCalls.push(ddgSearch(q, perEngine, signal), bingSearch(q, perEngine, signal));
+            if (cfg.searchBackend === "auto" && cfg.tinyfishApiKey) {
+                engineCalls.push(tinyfishSearch(cfg, q, perEngine, signal));
             }
-            /* fall through */
         }
     }
-    if (cfg.searchBackend !== "ddg" && cfg.tinyfishApiKey) {
+    const settled = await Promise.allSettled(engineCalls);
+    const candidates = settled.flatMap((s) => (s.status === "fulfilled" ? s.value : []));
+    if (!candidates.length) {
+        const firstErr = settled.find((s) => s.status === "rejected");
+        if (firstErr)
+            throw firstErr.reason;
+        return [];
+    }
+    const failures = settled.filter((s) => s.status === "rejected").length;
+    if (failures && failures === settled.length) {
+        throw (settled.find((s) => s.status === "rejected")).reason;
+    }
+    if (failures) {
+        warn?.(`${failures}/${settled.length} search engine calls failed; results come from the rest`);
+    }
+    const merged = (0, searchcore_1.mergeCandidates)(candidates, count);
+    if (!deep || !merged.length) {
+        return merged.map((c) => ({ title: c.title, url: c.url, snippet: c.snippet, date: c.date }));
+    }
+    return deepEnrich(merged, query, signal);
+}
+/**
+ * Deep mode: fetch the top pages concurrently, extract readable text and
+ * quotable passages, and re-rank by composite content quality. Pages that
+ * fail to fetch keep their snippet-level hit.
+ */
+async function deepEnrich(merged, query, signal) {
+    const terms = (0, searchcore_1.queryTerms)(query);
+    const toFetch = merged.slice(0, Math.min(merged.length, DEEP_FETCH));
+    const pages = await Promise.allSettled(toFetch.map((c) => fetchReadable(c.url, signal)));
+    const scoredHits = merged.map((c, i) => {
+        const base = { title: c.title, url: c.url, snippet: c.snippet, date: c.date };
+        const page = i < pages.length && pages[i].status === "fulfilled" ? pages[i].value : "";
+        if (!page)
+            return { hit: base, score: (0, searchcore_1.rankBonus)(i + 1, 20) };
+        const passages = (0, searchcore_1.selectPassages)(page, query);
+        const date = (0, searchcore_1.detectDate)(page.slice(0, 4000)) || c.date;
+        let domain = "";
         try {
-            return await tinyfishSearch(cfg, query, count, signal);
+            domain = new URL(c.url).hostname.replace(/^www\./, "");
         }
         catch {
-            /* fall through to DDG */
+            /* keep empty */
         }
-    }
-    return ddgSearch(query, count, signal);
+        const score = (0, searchcore_1.scorePage)({ url: c.url, domain, title: c.title, text: page, date }, terms) +
+            (0, searchcore_1.passageBonus)(passages) +
+            (0, searchcore_1.rankBonus)(i + 1, 10);
+        return {
+            hit: { ...base, date, passages: passages.slice(0, DEEP_PASSAGES).map((p) => p.text) },
+            score,
+        };
+    });
+    return scoredHits.sort((a, b) => b.score - a.score).map((s) => s.hit);
 }
-// ---------------------------------------------------------------- searchkit
-let searchkitOk = null;
-let searchkitWarned = false;
-function runCli(cmd, args, timeoutMs, signal) {
-    return new Promise((resolve, reject) => {
-        (0, child_process_1.execFile)(cmd, args, { timeout: timeoutMs, maxBuffer: 8 * 1024 * 1024, signal }, (err, stdout) => (err ? reject(err) : resolve(stdout)));
+/** Fetch one page as cleaned readable text for passage extraction (~3000 words max). */
+async function fetchReadable(url, signal) {
+    // GitHub repo pages bury the README in app markup — the raw file is cleaner.
+    const gh = /^https?:\/\/github\.com\/([^/]+)\/([^/#?]+)\/?$/.exec(url);
+    if (gh) {
+        for (const branch of ["main", "master"]) {
+            try {
+                const res = await fetch(`https://raw.githubusercontent.com/${gh[1]}/${gh[2]}/${branch}/README.md`, {
+                    headers: { "user-agent": UA },
+                    signal: mergeSignal(20_000, signal),
+                });
+                if (res.ok)
+                    return clip(await res.text());
+            }
+            catch {
+                /* fall through */
+            }
+        }
+    }
+    const res = await fetch(url, {
+        headers: { "user-agent": UA, accept: "text/html,text/*;q=0.9,*/*;q=0.5" },
+        signal: mergeSignal(20_000, signal),
+        redirect: "follow",
     });
+    if (!res.ok)
+        throw new Error(`HTTP ${res.status}`);
+    const ctype = res.headers.get("content-type") || "";
+    if (!/text\/|html|xml|json/i.test(ctype))
+        throw new Error(`not textual: ${ctype}`);
+    const body = await res.text();
+    const text = /html/i.test(ctype) ? (0, util_1.htmlToText)(body) : body;
+    return clip(text);
 }
-async function searchkitSearch(cfg, query, count, deep, signal) {
-    const args = ["search", query, "--json", "--max-results", String(count)];
-    if (!deep)
-        args.push("--no-fetch");
-    const out = await runCli(cfg.searchkitCmd, args, deep ? 90_000 : 30_000, signal);
-    const start = out.indexOf("{");
-    if (start < 0)
-        throw new Error("searchkit: no JSON in output");
-    const data = JSON.parse(out.slice(start));
-    return (data.results || []).slice(0, count).map((r) => ({
-        title: r.title || r.url,
-        url: r.url,
-        snippet: r.snippet || "",
-        date: r.published_date || undefined,
-        passages: Array.isArray(r.passages)
-            ? r.passages.slice(0, 2).map((p) => String(p.text || "")).filter(Boolean)
-            : undefined,
-    }));
+function clip(text) {
+    const words = text.replace(/\s+/g, " ").trim().split(" ");
+    return words.slice(0, 3000).join(" ");
+}
+function mergeSignal(timeoutMs, signal) {
+    const t = AbortSignal.timeout(timeoutMs);
+    if (!signal)
+        return t;
+    return typeof AbortSignal.any === "function" ? AbortSignal.any([t, signal]) : signal;
 }
+// ---------------------------------------------------------------- engines
 async function tinyfishSearch(cfg, query, count, signal) {
     const url = `https://api.search.tinyfish.ai?query=${encodeURIComponent(query)}`;
     const res = await fetch(url, {
         headers: { "X-API-Key": cfg.tinyfishApiKey },
-        signal: signal ?? AbortSignal.timeout(20000),
+        signal: mergeSignal(20_000, signal),
     });
     if (!res.ok)
         throw new Error(`tinyfish search ${res.status}`);
     const data = await res.json();
-    return (data.results || []).slice(0, count).map((r) => ({
+    return (data.results || []).slice(0, count).map((r, i) => ({
         title: r.title || r.site_name || r.url,
         url: r.url,
         snippet: r.snippet || "",
+        rank: i + 1,
+        engine: "tinyfish",
     }));
 }
 /**
  * DuckDuckGo serves two scrape-friendly endpoints with different markup.
  * A parse miss on one falls through to the other, so a DDG layout change has
- * to break both before search goes dark. Link regexes tolerate either quote
- * style and either attribute order (groups 1+2 or 3+4).
+ * to break both before the engine goes dark. Link regexes tolerate either
+ * quote style and either attribute order (groups 1+2 or 3+4).
  */
 const DDG_ENDPOINTS = [
     {
@@ -108,7 +175,7 @@ async function ddgSearch(query, count, signal) {
         try {
             const res = await fetch(ep.url + encodeURIComponent(query), {
                 headers: { "user-agent": UA },
-                signal: signal ?? AbortSignal.timeout(20000),
+                signal: mergeSignal(20_000, signal),
             });
             if (!res.ok)
                 throw new Error(`search failed: HTTP ${res.status}`);
@@ -147,19 +214,83 @@ function parseDdgHtml(html, count, linkRe) {
             continue;
         if (url.includes("duckduckgo.com/y.js"))
             continue; // ads
-        hits.push({ title, url, snippet: snippets[hits.length] || "" });
+        const snippet = snippets[hits.length] || "";
+        hits.push({ title, url, snippet, rank: hits.length + 1, engine: "ddg", date: (0, searchcore_1.detectDate)(snippet) });
+    }
+    return hits;
+}
+/** Bing's HTML results page: each hit is an <li class="b_algo"> with an <h2><a> link. */
+async function bingSearch(query, count, signal) {
+    const res = await fetch(`https://www.bing.com/search?q=${encodeURIComponent(query)}`, {
+        headers: { "user-agent": UA, "accept-language": "en-US,en;q=0.9" },
+        signal: mergeSignal(20_000, signal),
+    });
+    if (!res.ok)
+        throw new Error(`bing search ${res.status}`);
+    return parseBingHtml(await res.text(), count);
+}
+function parseBingHtml(html, count) {
+    const hits = [];
+    const blocks = html.split(/<li class="b_algo[^"]*"/i).slice(1);
+    for (const block of blocks) {
+        if (hits.length >= count)
+            break;
+        const link = /<h2[^>]*>\s*<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i.exec(block);
+        if (!link)
+            continue;
+        const url = decodeBingUrl((0, util_1.decodeEntities)(link[1]));
+        if (!url || !/^https?:\/\//.test(url))
+            continue;
+        const title = strip(link[2]);
+        const sn = /<p[^>]*>([\s\S]*?)<\/p>/i.exec(block);
+        const snippet = sn ? strip(sn[1]) : "";
+        hits.push({ title, url, snippet, rank: hits.length + 1, engine: "bing", date: (0, searchcore_1.detectDate)(snippet) });
     }
     return hits;
 }
+/** Bing wraps result URLs in a /ck/ redirect with a base64url-encoded `u` param. */
+function decodeBingUrl(href) {
+    let u;
+    try {
+        u = new URL(href, "https://www.bing.com");
+    }
+    catch {
+        return null;
+    }
+    if (!u.hostname.endsWith("bing.com") || !u.pathname.startsWith("/ck/"))
+        return href;
+    const encoded = u.searchParams.get("u");
+    if (!encoded)
+        return null;
+    const value = encoded.startsWith("a1") ? encoded.slice(2) : encoded;
+    const padded = value + "=".repeat((4 - (value.length % 4)) % 4);
+    try {
+        const decoded = Buffer.from(padded, "base64url").toString("utf8");
+        return decoded.startsWith("http://") || decoded.startsWith("https://") ? decoded : null;
+    }
+    catch {
+        return null;
+    }
+}
 function strip(html) {
     return (0, util_1.decodeEntities)(html.replace(/<[^>]+>/g, "")).replace(/\s+/g, " ").trim();
 }
 /**
- * Fetch a URL as readable text. Uses TinyFish Fetch (real browser, clean
- * markdown) when a key is configured; falls back to a direct request with
- * HTML→text extraction.
+ * Fetch a URL as readable text. Prefers a configured crawl backend's scrape
+ * (Firecrawl/context.dev: real browser, clean markdown), then TinyFish Fetch,
+ * then a direct request with HTML→text extraction.
  */
 async function fetchUrl(cfg, url, raw, maxChars, signal) {
+    if (!raw && (0, crawltools_1.hasScrapeBackend)(cfg)) {
+        try {
+            const text = await (0, crawltools_1.scrapeUrl)(cfg, url, signal);
+            if (text)
+                return (0, util_1.truncateMiddle)(text, maxChars, "chars");
+        }
+        catch {
+            /* fall through to TinyFish → direct */
+        }
+    }
     if (cfg.tinyfishApiKey && !raw) {
         try {
             const text = await tinyfishFetch(cfg, url, signal);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@robzilla1738/agentswarm",
-  "version": "0.3.0",
+  "version": "0.5.0",
   "publishConfig": {
     "access": "public"
   },
@@ -46,7 +46,8 @@
     "dev:ui": "npm --prefix ui run dev",
     "serve": "node bin/swarm.js serve",
     "demo": "node bin/swarm.js demo",
-    "test": "node test/e2e.js"
+    "test": "npm run test:unit && node test/e2e.js",
+    "test:unit": "node --test test/unit/*.test.js"
   },
   "engines": {
     "node": ">=20.10"