@robzilla1738/agentswarm 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +28 -5
  2. package/dist/agent.js +16 -1
  3. package/dist/cli.js +18 -4
  4. package/dist/config.js +35 -5
  5. package/dist/crawltools.js +247 -0
  6. package/dist/deepseek.js +125 -10
  7. package/dist/executor.js +771 -122
  8. package/dist/hub.js +16 -3
  9. package/dist/journal.js +61 -11
  10. package/dist/memory.js +83 -0
  11. package/dist/prompts.js +109 -16
  12. package/dist/report.js +252 -0
  13. package/dist/run.js +7 -2
  14. package/dist/searchcore.js +191 -0
  15. package/dist/state.js +57 -3
  16. package/dist/tools.js +202 -12
  17. package/dist/webtools.js +191 -60
  18. package/package.json +3 -2
  19. package/ui/out/404/index.html +1 -1
  20. package/ui/out/404.html +1 -1
  21. package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
  22. package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +1 -0
  23. package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
  24. package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +1 -0
  25. package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +1 -0
  26. package/ui/out/_next/static/css/9f7bd82b8e4c762c.css +3 -0
  27. package/ui/out/fonts/PlanetKosmos.ttf +0 -0
  28. package/ui/out/index.html +1 -1
  29. package/ui/out/index.txt +3 -3
  30. package/ui/out/run/index.html +1 -1
  31. package/ui/out/run/index.txt +3 -3
  32. package/ui/out/settings/index.html +1 -1
  33. package/ui/out/settings/index.txt +3 -3
  34. package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
  35. package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
  36. package/ui/out/_next/static/chunks/677-7ab85a6f38c3a235.js +0 -1
  37. package/ui/out/_next/static/chunks/app/page-0fda5b8e77d90b84.js +0 -1
  38. package/ui/out/_next/static/chunks/app/run/page-07aab6b1224c3c8c.js +0 -1
  39. package/ui/out/_next/static/chunks/app/settings/page-528482d468d84cfa.js +0 -1
  40. package/ui/out/_next/static/css/e2c82b53bf4519e8.css +0 -3
  41. /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → errjtBR_bKoee8ogLp8xk}/_buildManifest.js +0 -0
  42. /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → errjtBR_bKoee8ogLp8xk}/_ssgManifest.js +0 -0
package/dist/tools.js CHANGED
@@ -33,12 +33,13 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.FINISH_TOOL = exports.WAIT_TOOL = exports.SPAWN_TASKS_TOOL = exports.SUBMIT_FINAL_TOOL = exports.VERDICT_TOOL = exports.REPORT_TOOL = void 0;
36
+ exports.FINISH_TOOL = exports.WAIT_TOOL = exports.SET_PHASE_TOOL = exports.UPDATE_PLAN_TOOL = exports.CONDUCTOR_READ_REPORT_TOOL = exports.SPAWN_TASKS_TOOL = exports.SUBMIT_FINAL_TOOL = exports.VERDICT_TOOL = exports.REPORT_TOOL = void 0;
37
37
  exports.workerToolset = workerToolset;
38
38
  exports.verifierToolset = verifierToolset;
39
39
  exports.synthToolset = synthToolset;
40
40
  const fs = __importStar(require("fs"));
41
41
  const path = __importStar(require("path"));
42
+ const crawltools_1 = require("./crawltools");
42
43
  const util_1 = require("./util");
43
44
  const webtools_1 = require("./webtools");
44
45
  // ---------- safety ----------
@@ -84,7 +85,7 @@ async function writeFileVia(ctx, abs, content) {
84
85
  }
85
86
  }
86
87
  // ---------- tool definitions ----------
87
- function workerToolset() {
88
+ function workerToolset(cfg) {
88
89
  const tools = {};
89
90
  tools.shell = {
90
91
  schema: {
@@ -163,7 +164,8 @@ function workerToolset() {
163
164
  if (content.length > 5_000_000)
164
165
  throw new Error("content too large (>5MB)");
165
166
  await writeFileVia(ctx, abs, content);
166
- return `wrote ${abs} (${content.length} chars)`;
167
+ const warn = ctx.checkClaim?.(String(args.path));
168
+ return `wrote ${abs} (${content.length} chars)${warn ? `\n${warn}` : ""}`;
167
169
  },
168
170
  };
169
171
  tools.replace_in_file = {
@@ -194,7 +196,8 @@ function workerToolset() {
194
196
  }
195
197
  const next = args.all ? raw.split(find).join(replace) : raw.replace(find, replace);
196
198
  await writeFileVia(ctx, abs, next);
197
- return `replaced ${args.all ? count : 1} occurrence(s) in ${abs}`;
199
+ const warn = ctx.checkClaim?.(String(args.path));
200
+ return `replaced ${args.all ? count : 1} occurrence(s) in ${abs}${warn ? `\n${warn}` : ""}`;
198
201
  },
199
202
  };
200
203
  tools.list_dir = {
@@ -266,20 +269,20 @@ function workerToolset() {
266
269
  tools.web_search = {
267
270
  schema: {
268
271
  name: "web_search",
269
- description: "Search the web. Returns ranked results with title, URL and snippet. " +
270
- "Set deep=true to also fetch top pages and return quotable passages (slower; use for claims that need grounding).",
272
+ description: "Search the web. Fans out across multiple engines (DuckDuckGo, Bing, +TinyFish if configured), merges and quality-ranks results, and dedupes by canonical URL. Returns ranked results with title, URL and snippet. " +
273
+ "Set deep=true to widen the query into complementary phrasings, fetch the top pages, and return quotable passages with publication dates — use for thorough research and any claim that needs grounding. Raise count (up to 25) to pull more sources per call.",
271
274
  parameters: {
272
275
  type: "object",
273
276
  properties: {
274
277
  query: { type: "string" },
275
- count: { type: "number", description: "Max results, default 6, max 10" },
276
- deep: { type: "boolean", description: "Fetch page content for quotable passages" },
278
+ count: { type: "number", description: "Max results, default 8, max 25" },
279
+ deep: { type: "boolean", description: "Multi-phrasing sweep + fetch pages for quotable passages" },
277
280
  },
278
281
  required: ["query"],
279
282
  },
280
283
  },
281
284
  run: async (args, ctx) => {
282
- const count = Math.min(Math.max(Number(args.count) || 6, 1), 10);
285
+ const count = Math.min(Math.max(Number(args.count) || 8, 1), 25);
283
286
  const hits = await (0, webtools_1.webSearch)(ctx.cfg, String(args.query), count, ctx.signal, Boolean(args.deep), (msg) => ctx.log?.("warn", msg));
284
287
  if (!hits.length)
285
288
  return "no results";
@@ -315,25 +318,92 @@ function workerToolset() {
315
318
  tools.note = {
316
319
  schema: {
317
320
  name: "note",
318
- description: "Post a durable fact/discovery to the swarm's shared blackboard so the conductor and other agents can see it. Use sparingly — facts other tasks need, not progress chatter.",
321
+ description: "Post a durable fact/discovery to the swarm's shared blackboard so the conductor and other agents can see it. Use sparingly — facts other tasks need, not progress chatter. Mark kind='decision' for choices the rest of the mission must respect (these are never trimmed from digests).",
319
322
  parameters: {
320
323
  type: "object",
321
324
  properties: {
322
325
  text: { type: "string" },
323
326
  key: { type: "string", description: "Optional short label" },
327
+ kind: {
328
+ type: "string",
329
+ enum: ["finding", "decision", "open-question", "handoff", "claim"],
330
+ description: "Category (default finding). kind='claim' with key=<file path> advertises you are editing that file",
331
+ },
324
332
  },
325
333
  required: ["text"],
326
334
  },
327
335
  },
328
336
  run: async (args, ctx) => {
329
- ctx.addNote(String(args.text), args.key ? String(args.key) : undefined);
337
+ const kind = ["finding", "decision", "open-question", "handoff", "claim"].includes(String(args.kind))
338
+ ? String(args.kind)
339
+ : undefined;
340
+ ctx.addNote(String(args.text), args.key ? String(args.key) : undefined, kind);
330
341
  return "noted on the blackboard";
331
342
  },
332
343
  };
344
+ tools.search_notes = {
345
+ schema: {
346
+ name: "search_notes",
347
+ description: "Keyword-search the ENTIRE blackboard history (the digest in your prompt only shows the recent tail). Use when you need a fact another agent may have posted earlier in the run.",
348
+ parameters: {
349
+ type: "object",
350
+ properties: {
351
+ query: { type: "string", description: "Keywords to match against note text/labels" },
352
+ },
353
+ required: ["query"],
354
+ },
355
+ },
356
+ run: async (args, ctx) => {
357
+ if (!ctx.searchNotes)
358
+ return "note search is unavailable in this context";
359
+ return ctx.searchNotes(String(args.query ?? ""));
360
+ },
361
+ };
362
+ tools.read_report = {
363
+ schema: {
364
+ name: "read_report",
365
+ description: "Read the FULL report of a settled task (dependency reports in your prompt are excerpts). Use when an excerpt cuts off details you need.",
366
+ parameters: {
367
+ type: "object",
368
+ properties: {
369
+ task_id: { type: "string", description: "e.g. T3" },
370
+ },
371
+ required: ["task_id"],
372
+ },
373
+ },
374
+ run: async (args, ctx) => {
375
+ if (!ctx.readReport)
376
+ return "report lookup is unavailable in this context";
377
+ return ctx.readReport(String(args.task_id ?? ""));
378
+ },
379
+ };
380
+ tools.checkpoint = {
381
+ schema: {
382
+ name: "checkpoint",
383
+ description: "Journal a durable progress checkpoint: a dense summary of what you've completed, key findings, and what remains. If the run is interrupted, the next attempt resumes from your latest checkpoint instead of starting over. Use after completing each major chunk of a long task.",
384
+ parameters: {
385
+ type: "object",
386
+ properties: {
387
+ summary: {
388
+ type: "string",
389
+ description: "Completed work (exact paths/commands), key findings, and remaining steps",
390
+ },
391
+ },
392
+ required: ["summary"],
393
+ },
394
+ },
395
+ run: async (args, ctx) => {
396
+ const summary = String(args.summary ?? "").trim();
397
+ if (!summary)
398
+ throw new Error("summary is required");
399
+ ctx.addCheckpoint?.(summary);
400
+ return "checkpoint saved";
401
+ },
402
+ };
333
403
  tools.save_artifact = {
334
404
  schema: {
335
405
  name: "save_artifact",
336
- description: "Save a deliverable into the run's artifacts folder (shown prominently to the operator). Provide content, or from_path to copy an existing file.",
406
+ description: "Save a deliverable into the run's artifacts folder (shown prominently to the operator). Provide content, or from_path to copy an existing file. Any file type works — save deliverables in the format that fits them (.csv/.json for data, .html for documents, runnable code files), not just markdown.",
337
407
  parameters: {
338
408
  type: "object",
339
409
  properties: {
@@ -366,6 +436,64 @@ function workerToolset() {
366
436
  return `saved artifacts/${name}`;
367
437
  },
368
438
  };
439
+ // Only offered when a crawl backend (Firecrawl / context.dev / deepcrawl)
440
+ // is configured — there is no free fallback for whole-site crawls.
441
+ if (cfg && (0, crawltools_1.resolveCrawlBackend)(cfg)) {
442
+ tools.crawl_site = {
443
+ schema: {
444
+ name: "crawl_site",
445
+ description: "Crawl a website (JS-rendered, clean markdown) and save every discovered page as a markdown file under crawl/<host>/ in the working directory. Returns an index of the saved files — read individual pages afterwards with read_file. Use for ingesting documentation sites or multi-page content; use fetch_url for single pages.",
446
+ parameters: {
447
+ type: "object",
448
+ properties: {
449
+ url: { type: "string", description: "Starting URL to crawl" },
450
+ max_pages: { type: "number", description: "Page limit (default 15, max 50)" },
451
+ include_paths: {
452
+ type: "array",
453
+ items: { type: "string" },
454
+ description: "Limit the crawl to URL path prefixes/globs, e.g. /docs/*",
455
+ },
456
+ },
457
+ required: ["url"],
458
+ },
459
+ },
460
+ run: async (args, ctx) => {
461
+ const url = String(args.url ?? "");
462
+ if (!/^https?:\/\//.test(url))
463
+ throw new Error("only http(s) URLs are supported");
464
+ const maxPages = Math.min(Math.max(Number(args.max_pages) || 15, 1), 50);
465
+ const includePaths = Array.isArray(args.include_paths)
466
+ ? args.include_paths.map(String).filter(Boolean)
467
+ : undefined;
468
+ const out = await (0, crawltools_1.crawlSite)(ctx.cfg, { url, maxPages, includePaths, signal: ctx.signal });
469
+ if (!out.pages.length) {
470
+ return `crawled ${url} via ${out.backend}: no pages with content${out.warnings.length ? `\nwarnings: ${out.warnings.join("; ")}` : ""}`;
471
+ }
472
+ const used = new Set();
473
+ const lines = [];
474
+ for (const page of out.pages) {
475
+ const { host, slug } = (0, crawltools_1.slugForUrl)(page.url || url);
476
+ let rel = `crawl/${host}/${slug}.md`;
477
+ for (let n = 2; used.has(rel); n++)
478
+ rel = `crawl/${host}/${slug}-${n}.md`;
479
+ used.add(rel);
480
+ const abs = resolveWrite(rel, ctx);
481
+ const header = `# ${page.title || page.url || "untitled"}\n\nSource: ${page.url || url}\n\n`;
482
+ await writeFileVia(ctx, abs, header + page.markdown);
483
+ if (lines.length < 50) {
484
+ lines.push(` ${rel} — "${page.title || "untitled"}" (${page.markdown.length.toLocaleString()} chars)`);
485
+ }
486
+ }
487
+ const hidden = out.pages.length - lines.length;
488
+ return [
489
+ `crawled ${url} via ${out.backend}: ${out.pages.length} page${out.pages.length > 1 ? "s" : ""} saved`,
490
+ ...lines,
491
+ ...(hidden > 0 ? [` …and ${hidden} more (list crawl/ to see all)`] : []),
492
+ ...(out.warnings.length ? [`warnings: ${out.warnings.join("; ")}`] : []),
493
+ ].join("\n");
494
+ },
495
+ };
496
+ }
369
497
  return tools;
370
498
  }
371
499
  function verifierToolset() {
@@ -383,6 +511,7 @@ function synthToolset() {
383
511
  return {
384
512
  read_file: all.read_file,
385
513
  list_dir: all.list_dir,
514
+ save_artifact: all.save_artifact,
386
515
  };
387
516
  }
388
517
  // ---------- terminal tool schemas (handled by the agent loop, not executed) ----------
@@ -402,6 +531,21 @@ exports.REPORT_TOOL = {
402
531
  items: { type: "string" },
403
532
  description: "Paths of files you created/changed that matter",
404
533
  },
534
+ key_facts: {
535
+ type: "array",
536
+ items: { type: "string" },
537
+ description: "3-8 standalone facts downstream tasks need (figures, paths, URLs, decisions)",
538
+ },
539
+ open_questions: {
540
+ type: "array",
541
+ items: { type: "string" },
542
+ description: "Unresolved questions or risks the conductor should know about",
543
+ },
544
+ files_touched: {
545
+ type: "array",
546
+ items: { type: "string" },
547
+ description: "Every file you created or modified (exact paths)",
548
+ },
405
549
  },
406
550
  required: ["status", "report"],
407
551
  },
@@ -460,6 +604,17 @@ exports.SPAWN_TASKS_TOOL = {
460
604
  },
461
605
  verify: { type: "boolean", description: "Adversarially verify this task's result before accepting it" },
462
606
  context: { type: "string", description: "Facts, paths, URLs, constraints the worker needs inlined" },
607
+ model: {
608
+ type: "string",
609
+ enum: ["cheap", "default", "strong"],
610
+ description: "Model tier: cheap for scouts/bulk extraction, strong for leads, integration, and verified deliverables",
611
+ },
612
+ team: {
613
+ type: "boolean",
614
+ description: "Run as a sub-swarm: this task gets its own conductor that decomposes it into parallel sub-tasks and reports one consolidated result. Use for coherent multi-task subsystems (e.g. 'build the backend'). Teams cannot spawn teams.",
615
+ },
616
+ team_max_workers: { type: "number", description: "Parallelism inside the team (default: half the run's)" },
617
+ team_budget_tokens: { type: "number", description: "Token slice for the team (default: a quarter of what remains)" },
463
618
  },
464
619
  required: ["title", "objective"],
465
620
  },
@@ -468,6 +623,41 @@ exports.SPAWN_TASKS_TOOL = {
468
623
  required: ["tasks"],
469
624
  },
470
625
  };
626
+ exports.CONDUCTOR_READ_REPORT_TOOL = {
627
+ name: "read_report",
628
+ description: "Read the full report of any settled task. Updates show one-line summaries once many tasks settle — use this when a summary isn't enough to plan from.",
629
+ parameters: {
630
+ type: "object",
631
+ properties: {
632
+ task_id: { type: "string", description: "e.g. T17" },
633
+ },
634
+ required: ["task_id"],
635
+ },
636
+ };
637
+ exports.UPDATE_PLAN_TOOL = {
638
+ name: "update_plan",
639
+ description: "Maintain the mission's living plan document (artifacts/mission-plan.md, full overwrite). On missions beyond ~20 tasks, keep it current: approach, phases, what's done, what's next, open risks. Its head is pinned into every update you receive, surviving history trimming and restarts.",
640
+ parameters: {
641
+ type: "object",
642
+ properties: {
643
+ markdown: { type: "string", description: "The complete plan document (markdown)" },
644
+ },
645
+ required: ["markdown"],
646
+ },
647
+ };
648
+ exports.SET_PHASE_TOOL = {
649
+ name: "set_phase",
650
+ description: "Declare the mission's current phase/milestone. Use on long missions to structure the work (e.g. 'discovery' → 'build' → 'integrate' → 'polish'). The phase and its exit criteria are pinned into every update you receive, surviving history trimming.",
651
+ parameters: {
652
+ type: "object",
653
+ properties: {
654
+ name: { type: "string", description: "Short phase name" },
655
+ goal: { type: "string", description: "What this phase accomplishes" },
656
+ exit_criteria: { type: "string", description: "Concrete conditions that end this phase" },
657
+ },
658
+ required: ["name"],
659
+ },
660
+ };
471
661
  exports.WAIT_TOOL = {
472
662
  name: "wait",
473
663
  description: "Do nothing for now; wake again when running tasks report.",
package/dist/webtools.js CHANGED
@@ -1,95 +1,162 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.webSearch = webSearch;
4
+ exports.parseBingHtml = parseBingHtml;
4
5
  exports.fetchUrl = fetchUrl;
5
- const child_process_1 = require("child_process");
6
+ const crawltools_1 = require("./crawltools");
7
+ const searchcore_1 = require("./searchcore");
6
8
  const util_1 = require("./util");
7
9
  const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36 agentswarm/0.1";
10
+ /** How many of the merged pool get fetched for passage extraction in deep mode. */
11
+ const DEEP_FETCH = 12;
12
+ /** Quotable passages kept per fetched page. */
13
+ const DEEP_PASSAGES = 3;
8
14
  /**
9
- * Web search backends, best first:
10
- * 1. SearchKit CLI (local, multi-engine, ranked + citable; `deep` fetches
11
- * pages and returns quotable passages)when installed.
12
- * 2. TinyFish Search (fast, structured)when a key is configured.
13
- * 3. DuckDuckGo HTML scraping always available, last resort.
15
+ * Web search: fan out across every available engine in parallel (DuckDuckGo +
16
+ * Bing scraping, plus TinyFish when keyed). In `deep` mode it also fans the
17
+ * query into a few complementary phrasings so one call sweeps queries ×
18
+ * engines into a much larger pool then quality-ranks and dedupes by
19
+ * canonical URL, fetches the top pages concurrently for quotable passages,
20
+ * and re-ranks by content quality. Ranking/passage algorithms live in
21
+ * searchcore.ts.
14
22
  */
15
23
  async function webSearch(cfg, query, count, signal, deep = false, warn) {
16
- if (cfg.searchBackend === "auto" && searchkitOk !== false) {
17
- try {
18
- const hits = await searchkitSearch(cfg, query, count, deep, signal);
19
- searchkitOk = true;
20
- if (hits.length)
21
- return hits;
24
+ // Deep searches widen recall by issuing complementary phrasings; the fast
25
+ // path stays a single query so an agent's tool loop isn't slowed.
26
+ const queries = deep ? (0, searchcore_1.expandQueries)(query) : [query];
27
+ const perEngine = Math.min(count, 15);
28
+ const engineCalls = [];
29
+ for (const q of queries) {
30
+ if (cfg.searchBackend === "tinyfish" && cfg.tinyfishApiKey) {
31
+ engineCalls.push(tinyfishSearch(cfg, q, perEngine, signal));
22
32
  }
23
- catch (e) {
24
- // Not installed stop probing for the rest of this process.
25
- if (e?.code === "ENOENT")
26
- searchkitOk = false;
27
- else if (!searchkitWarned) {
28
- // Installed but failing — say so once instead of silently degrading.
29
- searchkitWarned = true;
30
- warn?.(`searchkit failed (${(0, util_1.errMsg)(e)}); falling back to ${cfg.tinyfishApiKey ? "TinyFish" : "DuckDuckGo"}. ` +
31
- `Set searchBackend=ddg to skip searchkit.`);
33
+ else {
34
+ engineCalls.push(ddgSearch(q, perEngine, signal), bingSearch(q, perEngine, signal));
35
+ if (cfg.searchBackend === "auto" && cfg.tinyfishApiKey) {
36
+ engineCalls.push(tinyfishSearch(cfg, q, perEngine, signal));
32
37
  }
33
- /* fall through */
34
38
  }
35
39
  }
36
- if (cfg.searchBackend !== "ddg" && cfg.tinyfishApiKey) {
40
+ const settled = await Promise.allSettled(engineCalls);
41
+ const candidates = settled.flatMap((s) => (s.status === "fulfilled" ? s.value : []));
42
+ if (!candidates.length) {
43
+ const firstErr = settled.find((s) => s.status === "rejected");
44
+ if (firstErr)
45
+ throw firstErr.reason;
46
+ return [];
47
+ }
48
+ const failures = settled.filter((s) => s.status === "rejected").length;
49
+ if (failures && failures === settled.length) {
50
+ throw (settled.find((s) => s.status === "rejected")).reason;
51
+ }
52
+ if (failures) {
53
+ warn?.(`${failures}/${settled.length} search engine calls failed; results come from the rest`);
54
+ }
55
+ const merged = (0, searchcore_1.mergeCandidates)(candidates, count);
56
+ if (!deep || !merged.length) {
57
+ return merged.map((c) => ({ title: c.title, url: c.url, snippet: c.snippet, date: c.date }));
58
+ }
59
+ return deepEnrich(merged, query, signal);
60
+ }
61
+ /**
62
+ * Deep mode: fetch the top pages concurrently, extract readable text and
63
+ * quotable passages, and re-rank by composite content quality. Pages that
64
+ * fail to fetch keep their snippet-level hit.
65
+ */
66
+ async function deepEnrich(merged, query, signal) {
67
+ const terms = (0, searchcore_1.queryTerms)(query);
68
+ const toFetch = merged.slice(0, Math.min(merged.length, DEEP_FETCH));
69
+ const pages = await Promise.allSettled(toFetch.map((c) => fetchReadable(c.url, signal)));
70
+ const scoredHits = merged.map((c, i) => {
71
+ const base = { title: c.title, url: c.url, snippet: c.snippet, date: c.date };
72
+ const page = i < pages.length && pages[i].status === "fulfilled" ? pages[i].value : "";
73
+ if (!page)
74
+ return { hit: base, score: (0, searchcore_1.rankBonus)(i + 1, 20) };
75
+ const passages = (0, searchcore_1.selectPassages)(page, query);
76
+ const date = (0, searchcore_1.detectDate)(page.slice(0, 4000)) || c.date;
77
+ let domain = "";
37
78
  try {
38
- return await tinyfishSearch(cfg, query, count, signal);
79
+ domain = new URL(c.url).hostname.replace(/^www\./, "");
39
80
  }
40
81
  catch {
41
- /* fall through to DDG */
82
+ /* keep empty */
42
83
  }
43
- }
44
- return ddgSearch(query, count, signal);
84
+ const score = (0, searchcore_1.scorePage)({ url: c.url, domain, title: c.title, text: page, date }, terms) +
85
+ (0, searchcore_1.passageBonus)(passages) +
86
+ (0, searchcore_1.rankBonus)(i + 1, 10);
87
+ return {
88
+ hit: { ...base, date, passages: passages.slice(0, DEEP_PASSAGES).map((p) => p.text) },
89
+ score,
90
+ };
91
+ });
92
+ return scoredHits.sort((a, b) => b.score - a.score).map((s) => s.hit);
45
93
  }
46
- // ---------------------------------------------------------------- searchkit
47
- let searchkitOk = null;
48
- let searchkitWarned = false;
49
- function runCli(cmd, args, timeoutMs, signal) {
50
- return new Promise((resolve, reject) => {
51
- (0, child_process_1.execFile)(cmd, args, { timeout: timeoutMs, maxBuffer: 8 * 1024 * 1024, signal }, (err, stdout) => (err ? reject(err) : resolve(stdout)));
94
+ /** Fetch one page as cleaned readable text for passage extraction (~3000 words max). */
95
+ async function fetchReadable(url, signal) {
96
+ // GitHub repo pages bury the README in app markup — the raw file is cleaner.
97
+ const gh = /^https?:\/\/github\.com\/([^/]+)\/([^/#?]+)\/?$/.exec(url);
98
+ if (gh) {
99
+ for (const branch of ["main", "master"]) {
100
+ try {
101
+ const res = await fetch(`https://raw.githubusercontent.com/${gh[1]}/${gh[2]}/${branch}/README.md`, {
102
+ headers: { "user-agent": UA },
103
+ signal: mergeSignal(20_000, signal),
104
+ });
105
+ if (res.ok)
106
+ return clip(await res.text());
107
+ }
108
+ catch {
109
+ /* fall through */
110
+ }
111
+ }
112
+ }
113
+ const res = await fetch(url, {
114
+ headers: { "user-agent": UA, accept: "text/html,text/*;q=0.9,*/*;q=0.5" },
115
+ signal: mergeSignal(20_000, signal),
116
+ redirect: "follow",
52
117
  });
118
+ if (!res.ok)
119
+ throw new Error(`HTTP ${res.status}`);
120
+ const ctype = res.headers.get("content-type") || "";
121
+ if (!/text\/|html|xml|json/i.test(ctype))
122
+ throw new Error(`not textual: ${ctype}`);
123
+ const body = await res.text();
124
+ const text = /html/i.test(ctype) ? (0, util_1.htmlToText)(body) : body;
125
+ return clip(text);
53
126
  }
54
- async function searchkitSearch(cfg, query, count, deep, signal) {
55
- const args = ["search", query, "--json", "--max-results", String(count)];
56
- if (!deep)
57
- args.push("--no-fetch");
58
- const out = await runCli(cfg.searchkitCmd, args, deep ? 90_000 : 30_000, signal);
59
- const start = out.indexOf("{");
60
- if (start < 0)
61
- throw new Error("searchkit: no JSON in output");
62
- const data = JSON.parse(out.slice(start));
63
- return (data.results || []).slice(0, count).map((r) => ({
64
- title: r.title || r.url,
65
- url: r.url,
66
- snippet: r.snippet || "",
67
- date: r.published_date || undefined,
68
- passages: Array.isArray(r.passages)
69
- ? r.passages.slice(0, 2).map((p) => String(p.text || "")).filter(Boolean)
70
- : undefined,
71
- }));
127
+ function clip(text) {
128
+ const words = text.replace(/\s+/g, " ").trim().split(" ");
129
+ return words.slice(0, 3000).join(" ");
130
+ }
131
+ function mergeSignal(timeoutMs, signal) {
132
+ const t = AbortSignal.timeout(timeoutMs);
133
+ if (!signal)
134
+ return t;
135
+ return typeof AbortSignal.any === "function" ? AbortSignal.any([t, signal]) : signal;
72
136
  }
137
+ // ---------------------------------------------------------------- engines
73
138
  async function tinyfishSearch(cfg, query, count, signal) {
74
139
  const url = `https://api.search.tinyfish.ai?query=${encodeURIComponent(query)}`;
75
140
  const res = await fetch(url, {
76
141
  headers: { "X-API-Key": cfg.tinyfishApiKey },
77
- signal: signal ?? AbortSignal.timeout(20000),
142
+ signal: mergeSignal(20_000, signal),
78
143
  });
79
144
  if (!res.ok)
80
145
  throw new Error(`tinyfish search ${res.status}`);
81
146
  const data = await res.json();
82
- return (data.results || []).slice(0, count).map((r) => ({
147
+ return (data.results || []).slice(0, count).map((r, i) => ({
83
148
  title: r.title || r.site_name || r.url,
84
149
  url: r.url,
85
150
  snippet: r.snippet || "",
151
+ rank: i + 1,
152
+ engine: "tinyfish",
86
153
  }));
87
154
  }
88
155
  /**
89
156
  * DuckDuckGo serves two scrape-friendly endpoints with different markup.
90
157
  * A parse miss on one falls through to the other, so a DDG layout change has
91
- * to break both before search goes dark. Link regexes tolerate either quote
92
- * style and either attribute order (groups 1+2 or 3+4).
158
+ * to break both before the engine goes dark. Link regexes tolerate either
159
+ * quote style and either attribute order (groups 1+2 or 3+4).
93
160
  */
94
161
  const DDG_ENDPOINTS = [
95
162
  {
@@ -108,7 +175,7 @@ async function ddgSearch(query, count, signal) {
108
175
  try {
109
176
  const res = await fetch(ep.url + encodeURIComponent(query), {
110
177
  headers: { "user-agent": UA },
111
- signal: signal ?? AbortSignal.timeout(20000),
178
+ signal: mergeSignal(20_000, signal),
112
179
  });
113
180
  if (!res.ok)
114
181
  throw new Error(`search failed: HTTP ${res.status}`);
@@ -147,19 +214,83 @@ function parseDdgHtml(html, count, linkRe) {
147
214
  continue;
148
215
  if (url.includes("duckduckgo.com/y.js"))
149
216
  continue; // ads
150
- hits.push({ title, url, snippet: snippets[hits.length] || "" });
217
+ const snippet = snippets[hits.length] || "";
218
+ hits.push({ title, url, snippet, rank: hits.length + 1, engine: "ddg", date: (0, searchcore_1.detectDate)(snippet) });
219
+ }
220
+ return hits;
221
+ }
222
+ /** Bing's HTML results page: each hit is an <li class="b_algo"> with an <h2><a> link. */
223
+ async function bingSearch(query, count, signal) {
224
+ const res = await fetch(`https://www.bing.com/search?q=${encodeURIComponent(query)}`, {
225
+ headers: { "user-agent": UA, "accept-language": "en-US,en;q=0.9" },
226
+ signal: mergeSignal(20_000, signal),
227
+ });
228
+ if (!res.ok)
229
+ throw new Error(`bing search ${res.status}`);
230
+ return parseBingHtml(await res.text(), count);
231
+ }
232
+ function parseBingHtml(html, count) {
233
+ const hits = [];
234
+ const blocks = html.split(/<li class="b_algo[^"]*"/i).slice(1);
235
+ for (const block of blocks) {
236
+ if (hits.length >= count)
237
+ break;
238
+ const link = /<h2[^>]*>\s*<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i.exec(block);
239
+ if (!link)
240
+ continue;
241
+ const url = decodeBingUrl((0, util_1.decodeEntities)(link[1]));
242
+ if (!url || !/^https?:\/\//.test(url))
243
+ continue;
244
+ const title = strip(link[2]);
245
+ const sn = /<p[^>]*>([\s\S]*?)<\/p>/i.exec(block);
246
+ const snippet = sn ? strip(sn[1]) : "";
247
+ hits.push({ title, url, snippet, rank: hits.length + 1, engine: "bing", date: (0, searchcore_1.detectDate)(snippet) });
151
248
  }
152
249
  return hits;
153
250
  }
251
+ /** Bing wraps result URLs in a /ck/ redirect with a base64url-encoded `u` param. */
252
+ function decodeBingUrl(href) {
253
+ let u;
254
+ try {
255
+ u = new URL(href, "https://www.bing.com");
256
+ }
257
+ catch {
258
+ return null;
259
+ }
260
+ if (!u.hostname.endsWith("bing.com") || !u.pathname.startsWith("/ck/"))
261
+ return href;
262
+ const encoded = u.searchParams.get("u");
263
+ if (!encoded)
264
+ return null;
265
+ const value = encoded.startsWith("a1") ? encoded.slice(2) : encoded;
266
+ const padded = value + "=".repeat((4 - (value.length % 4)) % 4);
267
+ try {
268
+ const decoded = Buffer.from(padded, "base64url").toString("utf8");
269
+ return decoded.startsWith("http://") || decoded.startsWith("https://") ? decoded : null;
270
+ }
271
+ catch {
272
+ return null;
273
+ }
274
+ }
154
275
  function strip(html) {
155
276
  return (0, util_1.decodeEntities)(html.replace(/<[^>]+>/g, "")).replace(/\s+/g, " ").trim();
156
277
  }
157
278
  /**
158
- * Fetch a URL as readable text. Uses TinyFish Fetch (real browser, clean
159
- * markdown) when a key is configured; falls back to a direct request with
160
- * HTML→text extraction.
279
+ * Fetch a URL as readable text. Prefers a configured crawl backend's scrape
280
+ * (Firecrawl/context.dev: real browser, clean markdown), then TinyFish Fetch,
281
+ * then a direct request with HTML→text extraction.
161
282
  */
162
283
  async function fetchUrl(cfg, url, raw, maxChars, signal) {
284
+ if (!raw && (0, crawltools_1.hasScrapeBackend)(cfg)) {
285
+ try {
286
+ const text = await (0, crawltools_1.scrapeUrl)(cfg, url, signal);
287
+ if (text)
288
+ return (0, util_1.truncateMiddle)(text, maxChars, "chars");
289
+ }
290
+ catch {
291
+ /* fall through to TinyFish → direct */
292
+ }
293
+ }
163
294
  if (cfg.tinyfishApiKey && !raw) {
164
295
  try {
165
296
  const text = await tinyfishFetch(cfg, url, signal);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@robzilla1738/agentswarm",
3
- "version": "0.3.0",
3
+ "version": "0.5.0",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
@@ -46,7 +46,8 @@
46
46
  "dev:ui": "npm --prefix ui run dev",
47
47
  "serve": "node bin/swarm.js serve",
48
48
  "demo": "node bin/swarm.js demo",
49
- "test": "node test/e2e.js"
49
+ "test": "npm run test:unit && node test/e2e.js",
50
+ "test:unit": "node --test test/unit/*.test.js"
50
51
  },
51
52
  "engines": {
52
53
  "node": ">=20.10"