@robzilla1738/agentswarm 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -5
- package/dist/agent.js +16 -1
- package/dist/cli.js +18 -4
- package/dist/config.js +35 -5
- package/dist/crawltools.js +247 -0
- package/dist/deepseek.js +125 -10
- package/dist/executor.js +771 -122
- package/dist/hub.js +16 -3
- package/dist/journal.js +61 -11
- package/dist/memory.js +83 -0
- package/dist/prompts.js +109 -16
- package/dist/report.js +252 -0
- package/dist/run.js +7 -2
- package/dist/searchcore.js +191 -0
- package/dist/state.js +57 -3
- package/dist/tools.js +202 -12
- package/dist/webtools.js +191 -60
- package/package.json +3 -2
- package/ui/out/404/index.html +1 -1
- package/ui/out/404.html +1 -1
- package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
- package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +1 -0
- package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
- package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +1 -0
- package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +1 -0
- package/ui/out/_next/static/css/9f7bd82b8e4c762c.css +3 -0
- package/ui/out/fonts/PlanetKosmos.ttf +0 -0
- package/ui/out/index.html +1 -1
- package/ui/out/index.txt +3 -3
- package/ui/out/run/index.html +1 -1
- package/ui/out/run/index.txt +3 -3
- package/ui/out/settings/index.html +1 -1
- package/ui/out/settings/index.txt +3 -3
- package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
- package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
- package/ui/out/_next/static/chunks/677-7ab85a6f38c3a235.js +0 -1
- package/ui/out/_next/static/chunks/app/page-0fda5b8e77d90b84.js +0 -1
- package/ui/out/_next/static/chunks/app/run/page-07aab6b1224c3c8c.js +0 -1
- package/ui/out/_next/static/chunks/app/settings/page-528482d468d84cfa.js +0 -1
- package/ui/out/_next/static/css/e2c82b53bf4519e8.css +0 -3
- /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → errjtBR_bKoee8ogLp8xk}/_buildManifest.js +0 -0
- /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → errjtBR_bKoee8ogLp8xk}/_ssgManifest.js +0 -0
package/dist/tools.js
CHANGED
|
@@ -33,12 +33,13 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.FINISH_TOOL = exports.WAIT_TOOL = exports.SPAWN_TASKS_TOOL = exports.SUBMIT_FINAL_TOOL = exports.VERDICT_TOOL = exports.REPORT_TOOL = void 0;
|
|
36
|
+
exports.FINISH_TOOL = exports.WAIT_TOOL = exports.SET_PHASE_TOOL = exports.UPDATE_PLAN_TOOL = exports.CONDUCTOR_READ_REPORT_TOOL = exports.SPAWN_TASKS_TOOL = exports.SUBMIT_FINAL_TOOL = exports.VERDICT_TOOL = exports.REPORT_TOOL = void 0;
|
|
37
37
|
exports.workerToolset = workerToolset;
|
|
38
38
|
exports.verifierToolset = verifierToolset;
|
|
39
39
|
exports.synthToolset = synthToolset;
|
|
40
40
|
const fs = __importStar(require("fs"));
|
|
41
41
|
const path = __importStar(require("path"));
|
|
42
|
+
const crawltools_1 = require("./crawltools");
|
|
42
43
|
const util_1 = require("./util");
|
|
43
44
|
const webtools_1 = require("./webtools");
|
|
44
45
|
// ---------- safety ----------
|
|
@@ -84,7 +85,7 @@ async function writeFileVia(ctx, abs, content) {
|
|
|
84
85
|
}
|
|
85
86
|
}
|
|
86
87
|
// ---------- tool definitions ----------
|
|
87
|
-
function workerToolset() {
|
|
88
|
+
function workerToolset(cfg) {
|
|
88
89
|
const tools = {};
|
|
89
90
|
tools.shell = {
|
|
90
91
|
schema: {
|
|
@@ -163,7 +164,8 @@ function workerToolset() {
|
|
|
163
164
|
if (content.length > 5_000_000)
|
|
164
165
|
throw new Error("content too large (>5MB)");
|
|
165
166
|
await writeFileVia(ctx, abs, content);
|
|
166
|
-
|
|
167
|
+
const warn = ctx.checkClaim?.(String(args.path));
|
|
168
|
+
return `wrote ${abs} (${content.length} chars)${warn ? `\n${warn}` : ""}`;
|
|
167
169
|
},
|
|
168
170
|
};
|
|
169
171
|
tools.replace_in_file = {
|
|
@@ -194,7 +196,8 @@ function workerToolset() {
|
|
|
194
196
|
}
|
|
195
197
|
const next = args.all ? raw.split(find).join(replace) : raw.replace(find, replace);
|
|
196
198
|
await writeFileVia(ctx, abs, next);
|
|
197
|
-
|
|
199
|
+
const warn = ctx.checkClaim?.(String(args.path));
|
|
200
|
+
return `replaced ${args.all ? count : 1} occurrence(s) in ${abs}${warn ? `\n${warn}` : ""}`;
|
|
198
201
|
},
|
|
199
202
|
};
|
|
200
203
|
tools.list_dir = {
|
|
@@ -266,20 +269,20 @@ function workerToolset() {
|
|
|
266
269
|
tools.web_search = {
|
|
267
270
|
schema: {
|
|
268
271
|
name: "web_search",
|
|
269
|
-
description: "Search the web. Returns ranked results with title, URL and snippet. " +
|
|
270
|
-
"Set deep=true to
|
|
272
|
+
description: "Search the web. Fans out across multiple engines (DuckDuckGo, Bing, +TinyFish if configured), merges and quality-ranks results, and dedupes by canonical URL. Returns ranked results with title, URL and snippet. " +
|
|
273
|
+
"Set deep=true to widen the query into complementary phrasings, fetch the top pages, and return quotable passages with publication dates — use for thorough research and any claim that needs grounding. Raise count (up to 25) to pull more sources per call.",
|
|
271
274
|
parameters: {
|
|
272
275
|
type: "object",
|
|
273
276
|
properties: {
|
|
274
277
|
query: { type: "string" },
|
|
275
|
-
count: { type: "number", description: "Max results, default
|
|
276
|
-
deep: { type: "boolean", description: "
|
|
278
|
+
count: { type: "number", description: "Max results, default 8, max 25" },
|
|
279
|
+
deep: { type: "boolean", description: "Multi-phrasing sweep + fetch pages for quotable passages" },
|
|
277
280
|
},
|
|
278
281
|
required: ["query"],
|
|
279
282
|
},
|
|
280
283
|
},
|
|
281
284
|
run: async (args, ctx) => {
|
|
282
|
-
const count = Math.min(Math.max(Number(args.count) ||
|
|
285
|
+
const count = Math.min(Math.max(Number(args.count) || 8, 1), 25);
|
|
283
286
|
const hits = await (0, webtools_1.webSearch)(ctx.cfg, String(args.query), count, ctx.signal, Boolean(args.deep), (msg) => ctx.log?.("warn", msg));
|
|
284
287
|
if (!hits.length)
|
|
285
288
|
return "no results";
|
|
@@ -315,25 +318,92 @@ function workerToolset() {
|
|
|
315
318
|
tools.note = {
|
|
316
319
|
schema: {
|
|
317
320
|
name: "note",
|
|
318
|
-
description: "Post a durable fact/discovery to the swarm's shared blackboard so the conductor and other agents can see it. Use sparingly — facts other tasks need, not progress chatter.",
|
|
321
|
+
description: "Post a durable fact/discovery to the swarm's shared blackboard so the conductor and other agents can see it. Use sparingly — facts other tasks need, not progress chatter. Mark kind='decision' for choices the rest of the mission must respect (these are never trimmed from digests).",
|
|
319
322
|
parameters: {
|
|
320
323
|
type: "object",
|
|
321
324
|
properties: {
|
|
322
325
|
text: { type: "string" },
|
|
323
326
|
key: { type: "string", description: "Optional short label" },
|
|
327
|
+
kind: {
|
|
328
|
+
type: "string",
|
|
329
|
+
enum: ["finding", "decision", "open-question", "handoff", "claim"],
|
|
330
|
+
description: "Category (default finding). kind='claim' with key=<file path> advertises you are editing that file",
|
|
331
|
+
},
|
|
324
332
|
},
|
|
325
333
|
required: ["text"],
|
|
326
334
|
},
|
|
327
335
|
},
|
|
328
336
|
run: async (args, ctx) => {
|
|
329
|
-
|
|
337
|
+
const kind = ["finding", "decision", "open-question", "handoff", "claim"].includes(String(args.kind))
|
|
338
|
+
? String(args.kind)
|
|
339
|
+
: undefined;
|
|
340
|
+
ctx.addNote(String(args.text), args.key ? String(args.key) : undefined, kind);
|
|
330
341
|
return "noted on the blackboard";
|
|
331
342
|
},
|
|
332
343
|
};
|
|
344
|
+
tools.search_notes = {
|
|
345
|
+
schema: {
|
|
346
|
+
name: "search_notes",
|
|
347
|
+
description: "Keyword-search the ENTIRE blackboard history (the digest in your prompt only shows the recent tail). Use when you need a fact another agent may have posted earlier in the run.",
|
|
348
|
+
parameters: {
|
|
349
|
+
type: "object",
|
|
350
|
+
properties: {
|
|
351
|
+
query: { type: "string", description: "Keywords to match against note text/labels" },
|
|
352
|
+
},
|
|
353
|
+
required: ["query"],
|
|
354
|
+
},
|
|
355
|
+
},
|
|
356
|
+
run: async (args, ctx) => {
|
|
357
|
+
if (!ctx.searchNotes)
|
|
358
|
+
return "note search is unavailable in this context";
|
|
359
|
+
return ctx.searchNotes(String(args.query ?? ""));
|
|
360
|
+
},
|
|
361
|
+
};
|
|
362
|
+
tools.read_report = {
|
|
363
|
+
schema: {
|
|
364
|
+
name: "read_report",
|
|
365
|
+
description: "Read the FULL report of a settled task (dependency reports in your prompt are excerpts). Use when an excerpt cuts off details you need.",
|
|
366
|
+
parameters: {
|
|
367
|
+
type: "object",
|
|
368
|
+
properties: {
|
|
369
|
+
task_id: { type: "string", description: "e.g. T3" },
|
|
370
|
+
},
|
|
371
|
+
required: ["task_id"],
|
|
372
|
+
},
|
|
373
|
+
},
|
|
374
|
+
run: async (args, ctx) => {
|
|
375
|
+
if (!ctx.readReport)
|
|
376
|
+
return "report lookup is unavailable in this context";
|
|
377
|
+
return ctx.readReport(String(args.task_id ?? ""));
|
|
378
|
+
},
|
|
379
|
+
};
|
|
380
|
+
tools.checkpoint = {
|
|
381
|
+
schema: {
|
|
382
|
+
name: "checkpoint",
|
|
383
|
+
description: "Journal a durable progress checkpoint: a dense summary of what you've completed, key findings, and what remains. If the run is interrupted, the next attempt resumes from your latest checkpoint instead of starting over. Use after completing each major chunk of a long task.",
|
|
384
|
+
parameters: {
|
|
385
|
+
type: "object",
|
|
386
|
+
properties: {
|
|
387
|
+
summary: {
|
|
388
|
+
type: "string",
|
|
389
|
+
description: "Completed work (exact paths/commands), key findings, and remaining steps",
|
|
390
|
+
},
|
|
391
|
+
},
|
|
392
|
+
required: ["summary"],
|
|
393
|
+
},
|
|
394
|
+
},
|
|
395
|
+
run: async (args, ctx) => {
|
|
396
|
+
const summary = String(args.summary ?? "").trim();
|
|
397
|
+
if (!summary)
|
|
398
|
+
throw new Error("summary is required");
|
|
399
|
+
ctx.addCheckpoint?.(summary);
|
|
400
|
+
return "checkpoint saved";
|
|
401
|
+
},
|
|
402
|
+
};
|
|
333
403
|
tools.save_artifact = {
|
|
334
404
|
schema: {
|
|
335
405
|
name: "save_artifact",
|
|
336
|
-
description: "Save a deliverable into the run's artifacts folder (shown prominently to the operator). Provide content, or from_path to copy an existing file.",
|
|
406
|
+
description: "Save a deliverable into the run's artifacts folder (shown prominently to the operator). Provide content, or from_path to copy an existing file. Any file type works — save deliverables in the format that fits them (.csv/.json for data, .html for documents, runnable code files), not just markdown.",
|
|
337
407
|
parameters: {
|
|
338
408
|
type: "object",
|
|
339
409
|
properties: {
|
|
@@ -366,6 +436,64 @@ function workerToolset() {
|
|
|
366
436
|
return `saved artifacts/${name}`;
|
|
367
437
|
},
|
|
368
438
|
};
|
|
439
|
+
// Only offered when a crawl backend (Firecrawl / context.dev / deepcrawl)
|
|
440
|
+
// is configured — there is no free fallback for whole-site crawls.
|
|
441
|
+
if (cfg && (0, crawltools_1.resolveCrawlBackend)(cfg)) {
|
|
442
|
+
tools.crawl_site = {
|
|
443
|
+
schema: {
|
|
444
|
+
name: "crawl_site",
|
|
445
|
+
description: "Crawl a website (JS-rendered, clean markdown) and save every discovered page as a markdown file under crawl/<host>/ in the working directory. Returns an index of the saved files — read individual pages afterwards with read_file. Use for ingesting documentation sites or multi-page content; use fetch_url for single pages.",
|
|
446
|
+
parameters: {
|
|
447
|
+
type: "object",
|
|
448
|
+
properties: {
|
|
449
|
+
url: { type: "string", description: "Starting URL to crawl" },
|
|
450
|
+
max_pages: { type: "number", description: "Page limit (default 15, max 50)" },
|
|
451
|
+
include_paths: {
|
|
452
|
+
type: "array",
|
|
453
|
+
items: { type: "string" },
|
|
454
|
+
description: "Limit the crawl to URL path prefixes/globs, e.g. /docs/*",
|
|
455
|
+
},
|
|
456
|
+
},
|
|
457
|
+
required: ["url"],
|
|
458
|
+
},
|
|
459
|
+
},
|
|
460
|
+
run: async (args, ctx) => {
|
|
461
|
+
const url = String(args.url ?? "");
|
|
462
|
+
if (!/^https?:\/\//.test(url))
|
|
463
|
+
throw new Error("only http(s) URLs are supported");
|
|
464
|
+
const maxPages = Math.min(Math.max(Number(args.max_pages) || 15, 1), 50);
|
|
465
|
+
const includePaths = Array.isArray(args.include_paths)
|
|
466
|
+
? args.include_paths.map(String).filter(Boolean)
|
|
467
|
+
: undefined;
|
|
468
|
+
const out = await (0, crawltools_1.crawlSite)(ctx.cfg, { url, maxPages, includePaths, signal: ctx.signal });
|
|
469
|
+
if (!out.pages.length) {
|
|
470
|
+
return `crawled ${url} via ${out.backend}: no pages with content${out.warnings.length ? `\nwarnings: ${out.warnings.join("; ")}` : ""}`;
|
|
471
|
+
}
|
|
472
|
+
const used = new Set();
|
|
473
|
+
const lines = [];
|
|
474
|
+
for (const page of out.pages) {
|
|
475
|
+
const { host, slug } = (0, crawltools_1.slugForUrl)(page.url || url);
|
|
476
|
+
let rel = `crawl/${host}/${slug}.md`;
|
|
477
|
+
for (let n = 2; used.has(rel); n++)
|
|
478
|
+
rel = `crawl/${host}/${slug}-${n}.md`;
|
|
479
|
+
used.add(rel);
|
|
480
|
+
const abs = resolveWrite(rel, ctx);
|
|
481
|
+
const header = `# ${page.title || page.url || "untitled"}\n\nSource: ${page.url || url}\n\n`;
|
|
482
|
+
await writeFileVia(ctx, abs, header + page.markdown);
|
|
483
|
+
if (lines.length < 50) {
|
|
484
|
+
lines.push(` ${rel} — "${page.title || "untitled"}" (${page.markdown.length.toLocaleString()} chars)`);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
const hidden = out.pages.length - lines.length;
|
|
488
|
+
return [
|
|
489
|
+
`crawled ${url} via ${out.backend}: ${out.pages.length} page${out.pages.length > 1 ? "s" : ""} saved`,
|
|
490
|
+
...lines,
|
|
491
|
+
...(hidden > 0 ? [` …and ${hidden} more (list crawl/ to see all)`] : []),
|
|
492
|
+
...(out.warnings.length ? [`warnings: ${out.warnings.join("; ")}`] : []),
|
|
493
|
+
].join("\n");
|
|
494
|
+
},
|
|
495
|
+
};
|
|
496
|
+
}
|
|
369
497
|
return tools;
|
|
370
498
|
}
|
|
371
499
|
function verifierToolset() {
|
|
@@ -383,6 +511,7 @@ function synthToolset() {
|
|
|
383
511
|
return {
|
|
384
512
|
read_file: all.read_file,
|
|
385
513
|
list_dir: all.list_dir,
|
|
514
|
+
save_artifact: all.save_artifact,
|
|
386
515
|
};
|
|
387
516
|
}
|
|
388
517
|
// ---------- terminal tool schemas (handled by the agent loop, not executed) ----------
|
|
@@ -402,6 +531,21 @@ exports.REPORT_TOOL = {
|
|
|
402
531
|
items: { type: "string" },
|
|
403
532
|
description: "Paths of files you created/changed that matter",
|
|
404
533
|
},
|
|
534
|
+
key_facts: {
|
|
535
|
+
type: "array",
|
|
536
|
+
items: { type: "string" },
|
|
537
|
+
description: "3-8 standalone facts downstream tasks need (figures, paths, URLs, decisions)",
|
|
538
|
+
},
|
|
539
|
+
open_questions: {
|
|
540
|
+
type: "array",
|
|
541
|
+
items: { type: "string" },
|
|
542
|
+
description: "Unresolved questions or risks the conductor should know about",
|
|
543
|
+
},
|
|
544
|
+
files_touched: {
|
|
545
|
+
type: "array",
|
|
546
|
+
items: { type: "string" },
|
|
547
|
+
description: "Every file you created or modified (exact paths)",
|
|
548
|
+
},
|
|
405
549
|
},
|
|
406
550
|
required: ["status", "report"],
|
|
407
551
|
},
|
|
@@ -460,6 +604,17 @@ exports.SPAWN_TASKS_TOOL = {
|
|
|
460
604
|
},
|
|
461
605
|
verify: { type: "boolean", description: "Adversarially verify this task's result before accepting it" },
|
|
462
606
|
context: { type: "string", description: "Facts, paths, URLs, constraints the worker needs inlined" },
|
|
607
|
+
model: {
|
|
608
|
+
type: "string",
|
|
609
|
+
enum: ["cheap", "default", "strong"],
|
|
610
|
+
description: "Model tier: cheap for scouts/bulk extraction, strong for leads, integration, and verified deliverables",
|
|
611
|
+
},
|
|
612
|
+
team: {
|
|
613
|
+
type: "boolean",
|
|
614
|
+
description: "Run as a sub-swarm: this task gets its own conductor that decomposes it into parallel sub-tasks and reports one consolidated result. Use for coherent multi-task subsystems (e.g. 'build the backend'). Teams cannot spawn teams.",
|
|
615
|
+
},
|
|
616
|
+
team_max_workers: { type: "number", description: "Parallelism inside the team (default: half the run's)" },
|
|
617
|
+
team_budget_tokens: { type: "number", description: "Token slice for the team (default: a quarter of what remains)" },
|
|
463
618
|
},
|
|
464
619
|
required: ["title", "objective"],
|
|
465
620
|
},
|
|
@@ -468,6 +623,41 @@ exports.SPAWN_TASKS_TOOL = {
|
|
|
468
623
|
required: ["tasks"],
|
|
469
624
|
},
|
|
470
625
|
};
|
|
626
|
+
exports.CONDUCTOR_READ_REPORT_TOOL = {
|
|
627
|
+
name: "read_report",
|
|
628
|
+
description: "Read the full report of any settled task. Updates show one-line summaries once many tasks settle — use this when a summary isn't enough to plan from.",
|
|
629
|
+
parameters: {
|
|
630
|
+
type: "object",
|
|
631
|
+
properties: {
|
|
632
|
+
task_id: { type: "string", description: "e.g. T17" },
|
|
633
|
+
},
|
|
634
|
+
required: ["task_id"],
|
|
635
|
+
},
|
|
636
|
+
};
|
|
637
|
+
exports.UPDATE_PLAN_TOOL = {
|
|
638
|
+
name: "update_plan",
|
|
639
|
+
description: "Maintain the mission's living plan document (artifacts/mission-plan.md, full overwrite). On missions beyond ~20 tasks, keep it current: approach, phases, what's done, what's next, open risks. Its head is pinned into every update you receive, surviving history trimming and restarts.",
|
|
640
|
+
parameters: {
|
|
641
|
+
type: "object",
|
|
642
|
+
properties: {
|
|
643
|
+
markdown: { type: "string", description: "The complete plan document (markdown)" },
|
|
644
|
+
},
|
|
645
|
+
required: ["markdown"],
|
|
646
|
+
},
|
|
647
|
+
};
|
|
648
|
+
exports.SET_PHASE_TOOL = {
|
|
649
|
+
name: "set_phase",
|
|
650
|
+
description: "Declare the mission's current phase/milestone. Use on long missions to structure the work (e.g. 'discovery' → 'build' → 'integrate' → 'polish'). The phase and its exit criteria are pinned into every update you receive, surviving history trimming.",
|
|
651
|
+
parameters: {
|
|
652
|
+
type: "object",
|
|
653
|
+
properties: {
|
|
654
|
+
name: { type: "string", description: "Short phase name" },
|
|
655
|
+
goal: { type: "string", description: "What this phase accomplishes" },
|
|
656
|
+
exit_criteria: { type: "string", description: "Concrete conditions that end this phase" },
|
|
657
|
+
},
|
|
658
|
+
required: ["name"],
|
|
659
|
+
},
|
|
660
|
+
};
|
|
471
661
|
exports.WAIT_TOOL = {
|
|
472
662
|
name: "wait",
|
|
473
663
|
description: "Do nothing for now; wake again when running tasks report.",
|
package/dist/webtools.js
CHANGED
|
@@ -1,95 +1,162 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.webSearch = webSearch;
|
|
4
|
+
exports.parseBingHtml = parseBingHtml;
|
|
4
5
|
exports.fetchUrl = fetchUrl;
|
|
5
|
-
const
|
|
6
|
+
const crawltools_1 = require("./crawltools");
|
|
7
|
+
const searchcore_1 = require("./searchcore");
|
|
6
8
|
const util_1 = require("./util");
|
|
7
9
|
const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36 agentswarm/0.1";
|
|
10
|
+
/** How many of the merged pool get fetched for passage extraction in deep mode. */
|
|
11
|
+
const DEEP_FETCH = 12;
|
|
12
|
+
/** Quotable passages kept per fetched page. */
|
|
13
|
+
const DEEP_PASSAGES = 3;
|
|
8
14
|
/**
|
|
9
|
-
* Web search
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
15
|
+
* Web search: fan out across every available engine in parallel (DuckDuckGo +
|
|
16
|
+
* Bing scraping, plus TinyFish when keyed). In `deep` mode it also fans the
|
|
17
|
+
* query into a few complementary phrasings — so one call sweeps queries ×
|
|
18
|
+
* engines into a much larger pool — then quality-ranks and dedupes by
|
|
19
|
+
* canonical URL, fetches the top pages concurrently for quotable passages,
|
|
20
|
+
* and re-ranks by content quality. Ranking/passage algorithms live in
|
|
21
|
+
* searchcore.ts.
|
|
14
22
|
*/
|
|
15
23
|
async function webSearch(cfg, query, count, signal, deep = false, warn) {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
// Deep searches widen recall by issuing complementary phrasings; the fast
|
|
25
|
+
// path stays a single query so an agent's tool loop isn't slowed.
|
|
26
|
+
const queries = deep ? (0, searchcore_1.expandQueries)(query) : [query];
|
|
27
|
+
const perEngine = Math.min(count, 15);
|
|
28
|
+
const engineCalls = [];
|
|
29
|
+
for (const q of queries) {
|
|
30
|
+
if (cfg.searchBackend === "tinyfish" && cfg.tinyfishApiKey) {
|
|
31
|
+
engineCalls.push(tinyfishSearch(cfg, q, perEngine, signal));
|
|
22
32
|
}
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
if (
|
|
26
|
-
|
|
27
|
-
else if (!searchkitWarned) {
|
|
28
|
-
// Installed but failing — say so once instead of silently degrading.
|
|
29
|
-
searchkitWarned = true;
|
|
30
|
-
warn?.(`searchkit failed (${(0, util_1.errMsg)(e)}); falling back to ${cfg.tinyfishApiKey ? "TinyFish" : "DuckDuckGo"}. ` +
|
|
31
|
-
`Set searchBackend=ddg to skip searchkit.`);
|
|
33
|
+
else {
|
|
34
|
+
engineCalls.push(ddgSearch(q, perEngine, signal), bingSearch(q, perEngine, signal));
|
|
35
|
+
if (cfg.searchBackend === "auto" && cfg.tinyfishApiKey) {
|
|
36
|
+
engineCalls.push(tinyfishSearch(cfg, q, perEngine, signal));
|
|
32
37
|
}
|
|
33
|
-
/* fall through */
|
|
34
38
|
}
|
|
35
39
|
}
|
|
36
|
-
|
|
40
|
+
const settled = await Promise.allSettled(engineCalls);
|
|
41
|
+
const candidates = settled.flatMap((s) => (s.status === "fulfilled" ? s.value : []));
|
|
42
|
+
if (!candidates.length) {
|
|
43
|
+
const firstErr = settled.find((s) => s.status === "rejected");
|
|
44
|
+
if (firstErr)
|
|
45
|
+
throw firstErr.reason;
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
const failures = settled.filter((s) => s.status === "rejected").length;
|
|
49
|
+
if (failures && failures === settled.length) {
|
|
50
|
+
throw (settled.find((s) => s.status === "rejected")).reason;
|
|
51
|
+
}
|
|
52
|
+
if (failures) {
|
|
53
|
+
warn?.(`${failures}/${settled.length} search engine calls failed; results come from the rest`);
|
|
54
|
+
}
|
|
55
|
+
const merged = (0, searchcore_1.mergeCandidates)(candidates, count);
|
|
56
|
+
if (!deep || !merged.length) {
|
|
57
|
+
return merged.map((c) => ({ title: c.title, url: c.url, snippet: c.snippet, date: c.date }));
|
|
58
|
+
}
|
|
59
|
+
return deepEnrich(merged, query, signal);
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Deep mode: fetch the top pages concurrently, extract readable text and
|
|
63
|
+
* quotable passages, and re-rank by composite content quality. Pages that
|
|
64
|
+
* fail to fetch keep their snippet-level hit.
|
|
65
|
+
*/
|
|
66
|
+
async function deepEnrich(merged, query, signal) {
|
|
67
|
+
const terms = (0, searchcore_1.queryTerms)(query);
|
|
68
|
+
const toFetch = merged.slice(0, Math.min(merged.length, DEEP_FETCH));
|
|
69
|
+
const pages = await Promise.allSettled(toFetch.map((c) => fetchReadable(c.url, signal)));
|
|
70
|
+
const scoredHits = merged.map((c, i) => {
|
|
71
|
+
const base = { title: c.title, url: c.url, snippet: c.snippet, date: c.date };
|
|
72
|
+
const page = i < pages.length && pages[i].status === "fulfilled" ? pages[i].value : "";
|
|
73
|
+
if (!page)
|
|
74
|
+
return { hit: base, score: (0, searchcore_1.rankBonus)(i + 1, 20) };
|
|
75
|
+
const passages = (0, searchcore_1.selectPassages)(page, query);
|
|
76
|
+
const date = (0, searchcore_1.detectDate)(page.slice(0, 4000)) || c.date;
|
|
77
|
+
let domain = "";
|
|
37
78
|
try {
|
|
38
|
-
|
|
79
|
+
domain = new URL(c.url).hostname.replace(/^www\./, "");
|
|
39
80
|
}
|
|
40
81
|
catch {
|
|
41
|
-
/*
|
|
82
|
+
/* keep empty */
|
|
42
83
|
}
|
|
43
|
-
|
|
44
|
-
|
|
84
|
+
const score = (0, searchcore_1.scorePage)({ url: c.url, domain, title: c.title, text: page, date }, terms) +
|
|
85
|
+
(0, searchcore_1.passageBonus)(passages) +
|
|
86
|
+
(0, searchcore_1.rankBonus)(i + 1, 10);
|
|
87
|
+
return {
|
|
88
|
+
hit: { ...base, date, passages: passages.slice(0, DEEP_PASSAGES).map((p) => p.text) },
|
|
89
|
+
score,
|
|
90
|
+
};
|
|
91
|
+
});
|
|
92
|
+
return scoredHits.sort((a, b) => b.score - a.score).map((s) => s.hit);
|
|
45
93
|
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
94
|
+
/** Fetch one page as cleaned readable text for passage extraction (~3000 words max). */
|
|
95
|
+
async function fetchReadable(url, signal) {
|
|
96
|
+
// GitHub repo pages bury the README in app markup — the raw file is cleaner.
|
|
97
|
+
const gh = /^https?:\/\/github\.com\/([^/]+)\/([^/#?]+)\/?$/.exec(url);
|
|
98
|
+
if (gh) {
|
|
99
|
+
for (const branch of ["main", "master"]) {
|
|
100
|
+
try {
|
|
101
|
+
const res = await fetch(`https://raw.githubusercontent.com/${gh[1]}/${gh[2]}/${branch}/README.md`, {
|
|
102
|
+
headers: { "user-agent": UA },
|
|
103
|
+
signal: mergeSignal(20_000, signal),
|
|
104
|
+
});
|
|
105
|
+
if (res.ok)
|
|
106
|
+
return clip(await res.text());
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
/* fall through */
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
const res = await fetch(url, {
|
|
114
|
+
headers: { "user-agent": UA, accept: "text/html,text/*;q=0.9,*/*;q=0.5" },
|
|
115
|
+
signal: mergeSignal(20_000, signal),
|
|
116
|
+
redirect: "follow",
|
|
52
117
|
});
|
|
118
|
+
if (!res.ok)
|
|
119
|
+
throw new Error(`HTTP ${res.status}`);
|
|
120
|
+
const ctype = res.headers.get("content-type") || "";
|
|
121
|
+
if (!/text\/|html|xml|json/i.test(ctype))
|
|
122
|
+
throw new Error(`not textual: ${ctype}`);
|
|
123
|
+
const body = await res.text();
|
|
124
|
+
const text = /html/i.test(ctype) ? (0, util_1.htmlToText)(body) : body;
|
|
125
|
+
return clip(text);
|
|
53
126
|
}
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
const
|
|
60
|
-
if (
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
return (data.results || []).slice(0, count).map((r) => ({
|
|
64
|
-
title: r.title || r.url,
|
|
65
|
-
url: r.url,
|
|
66
|
-
snippet: r.snippet || "",
|
|
67
|
-
date: r.published_date || undefined,
|
|
68
|
-
passages: Array.isArray(r.passages)
|
|
69
|
-
? r.passages.slice(0, 2).map((p) => String(p.text || "")).filter(Boolean)
|
|
70
|
-
: undefined,
|
|
71
|
-
}));
|
|
127
|
+
function clip(text) {
|
|
128
|
+
const words = text.replace(/\s+/g, " ").trim().split(" ");
|
|
129
|
+
return words.slice(0, 3000).join(" ");
|
|
130
|
+
}
|
|
131
|
+
function mergeSignal(timeoutMs, signal) {
|
|
132
|
+
const t = AbortSignal.timeout(timeoutMs);
|
|
133
|
+
if (!signal)
|
|
134
|
+
return t;
|
|
135
|
+
return typeof AbortSignal.any === "function" ? AbortSignal.any([t, signal]) : signal;
|
|
72
136
|
}
|
|
137
|
+
// ---------------------------------------------------------------- engines
|
|
73
138
|
async function tinyfishSearch(cfg, query, count, signal) {
|
|
74
139
|
const url = `https://api.search.tinyfish.ai?query=${encodeURIComponent(query)}`;
|
|
75
140
|
const res = await fetch(url, {
|
|
76
141
|
headers: { "X-API-Key": cfg.tinyfishApiKey },
|
|
77
|
-
signal: signal
|
|
142
|
+
signal: mergeSignal(20_000, signal),
|
|
78
143
|
});
|
|
79
144
|
if (!res.ok)
|
|
80
145
|
throw new Error(`tinyfish search ${res.status}`);
|
|
81
146
|
const data = await res.json();
|
|
82
|
-
return (data.results || []).slice(0, count).map((r) => ({
|
|
147
|
+
return (data.results || []).slice(0, count).map((r, i) => ({
|
|
83
148
|
title: r.title || r.site_name || r.url,
|
|
84
149
|
url: r.url,
|
|
85
150
|
snippet: r.snippet || "",
|
|
151
|
+
rank: i + 1,
|
|
152
|
+
engine: "tinyfish",
|
|
86
153
|
}));
|
|
87
154
|
}
|
|
88
155
|
/**
|
|
89
156
|
* DuckDuckGo serves two scrape-friendly endpoints with different markup.
|
|
90
157
|
* A parse miss on one falls through to the other, so a DDG layout change has
|
|
91
|
-
* to break both before
|
|
92
|
-
* style and either attribute order (groups 1+2 or 3+4).
|
|
158
|
+
* to break both before the engine goes dark. Link regexes tolerate either
|
|
159
|
+
* quote style and either attribute order (groups 1+2 or 3+4).
|
|
93
160
|
*/
|
|
94
161
|
const DDG_ENDPOINTS = [
|
|
95
162
|
{
|
|
@@ -108,7 +175,7 @@ async function ddgSearch(query, count, signal) {
|
|
|
108
175
|
try {
|
|
109
176
|
const res = await fetch(ep.url + encodeURIComponent(query), {
|
|
110
177
|
headers: { "user-agent": UA },
|
|
111
|
-
signal: signal
|
|
178
|
+
signal: mergeSignal(20_000, signal),
|
|
112
179
|
});
|
|
113
180
|
if (!res.ok)
|
|
114
181
|
throw new Error(`search failed: HTTP ${res.status}`);
|
|
@@ -147,19 +214,83 @@ function parseDdgHtml(html, count, linkRe) {
|
|
|
147
214
|
continue;
|
|
148
215
|
if (url.includes("duckduckgo.com/y.js"))
|
|
149
216
|
continue; // ads
|
|
150
|
-
|
|
217
|
+
const snippet = snippets[hits.length] || "";
|
|
218
|
+
hits.push({ title, url, snippet, rank: hits.length + 1, engine: "ddg", date: (0, searchcore_1.detectDate)(snippet) });
|
|
219
|
+
}
|
|
220
|
+
return hits;
|
|
221
|
+
}
|
|
222
|
+
/** Bing's HTML results page: each hit is an <li class="b_algo"> with an <h2><a> link. */
|
|
223
|
+
async function bingSearch(query, count, signal) {
|
|
224
|
+
const res = await fetch(`https://www.bing.com/search?q=${encodeURIComponent(query)}`, {
|
|
225
|
+
headers: { "user-agent": UA, "accept-language": "en-US,en;q=0.9" },
|
|
226
|
+
signal: mergeSignal(20_000, signal),
|
|
227
|
+
});
|
|
228
|
+
if (!res.ok)
|
|
229
|
+
throw new Error(`bing search ${res.status}`);
|
|
230
|
+
return parseBingHtml(await res.text(), count);
|
|
231
|
+
}
|
|
232
|
+
function parseBingHtml(html, count) {
|
|
233
|
+
const hits = [];
|
|
234
|
+
const blocks = html.split(/<li class="b_algo[^"]*"/i).slice(1);
|
|
235
|
+
for (const block of blocks) {
|
|
236
|
+
if (hits.length >= count)
|
|
237
|
+
break;
|
|
238
|
+
const link = /<h2[^>]*>\s*<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i.exec(block);
|
|
239
|
+
if (!link)
|
|
240
|
+
continue;
|
|
241
|
+
const url = decodeBingUrl((0, util_1.decodeEntities)(link[1]));
|
|
242
|
+
if (!url || !/^https?:\/\//.test(url))
|
|
243
|
+
continue;
|
|
244
|
+
const title = strip(link[2]);
|
|
245
|
+
const sn = /<p[^>]*>([\s\S]*?)<\/p>/i.exec(block);
|
|
246
|
+
const snippet = sn ? strip(sn[1]) : "";
|
|
247
|
+
hits.push({ title, url, snippet, rank: hits.length + 1, engine: "bing", date: (0, searchcore_1.detectDate)(snippet) });
|
|
151
248
|
}
|
|
152
249
|
return hits;
|
|
153
250
|
}
|
|
251
|
+
/** Bing wraps result URLs in a /ck/ redirect with a base64url-encoded `u` param. */
|
|
252
|
+
function decodeBingUrl(href) {
|
|
253
|
+
let u;
|
|
254
|
+
try {
|
|
255
|
+
u = new URL(href, "https://www.bing.com");
|
|
256
|
+
}
|
|
257
|
+
catch {
|
|
258
|
+
return null;
|
|
259
|
+
}
|
|
260
|
+
if (!u.hostname.endsWith("bing.com") || !u.pathname.startsWith("/ck/"))
|
|
261
|
+
return href;
|
|
262
|
+
const encoded = u.searchParams.get("u");
|
|
263
|
+
if (!encoded)
|
|
264
|
+
return null;
|
|
265
|
+
const value = encoded.startsWith("a1") ? encoded.slice(2) : encoded;
|
|
266
|
+
const padded = value + "=".repeat((4 - (value.length % 4)) % 4);
|
|
267
|
+
try {
|
|
268
|
+
const decoded = Buffer.from(padded, "base64url").toString("utf8");
|
|
269
|
+
return decoded.startsWith("http://") || decoded.startsWith("https://") ? decoded : null;
|
|
270
|
+
}
|
|
271
|
+
catch {
|
|
272
|
+
return null;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
154
275
|
function strip(html) {
|
|
155
276
|
return (0, util_1.decodeEntities)(html.replace(/<[^>]+>/g, "")).replace(/\s+/g, " ").trim();
|
|
156
277
|
}
|
|
157
278
|
/**
|
|
158
|
-
* Fetch a URL as readable text.
|
|
159
|
-
*
|
|
160
|
-
* HTML→text extraction.
|
|
279
|
+
* Fetch a URL as readable text. Prefers a configured crawl backend's scrape
|
|
280
|
+
* (Firecrawl/context.dev: real browser, clean markdown), then TinyFish Fetch,
|
|
281
|
+
* then a direct request with HTML→text extraction.
|
|
161
282
|
*/
|
|
162
283
|
async function fetchUrl(cfg, url, raw, maxChars, signal) {
|
|
284
|
+
if (!raw && (0, crawltools_1.hasScrapeBackend)(cfg)) {
|
|
285
|
+
try {
|
|
286
|
+
const text = await (0, crawltools_1.scrapeUrl)(cfg, url, signal);
|
|
287
|
+
if (text)
|
|
288
|
+
return (0, util_1.truncateMiddle)(text, maxChars, "chars");
|
|
289
|
+
}
|
|
290
|
+
catch {
|
|
291
|
+
/* fall through to TinyFish → direct */
|
|
292
|
+
}
|
|
293
|
+
}
|
|
163
294
|
if (cfg.tinyfishApiKey && !raw) {
|
|
164
295
|
try {
|
|
165
296
|
const text = await tinyfishFetch(cfg, url, signal);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@robzilla1738/agentswarm",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
@@ -46,7 +46,8 @@
|
|
|
46
46
|
"dev:ui": "npm --prefix ui run dev",
|
|
47
47
|
"serve": "node bin/swarm.js serve",
|
|
48
48
|
"demo": "node bin/swarm.js demo",
|
|
49
|
-
"test": "node test/e2e.js"
|
|
49
|
+
"test": "npm run test:unit && node test/e2e.js",
|
|
50
|
+
"test:unit": "node --test test/unit/*.test.js"
|
|
50
51
|
},
|
|
51
52
|
"engines": {
|
|
52
53
|
"node": ">=20.10"
|