@robzilla1738/agentswarm 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +29 -12
  2. package/dist/agent.js +2 -1
  3. package/dist/cli.js +21 -4
  4. package/dist/config.js +27 -1
  5. package/dist/executor.js +243 -43
  6. package/dist/hub.js +69 -3
  7. package/dist/memory.js +5 -4
  8. package/dist/pdftext.js +211 -0
  9. package/dist/prompts.js +23 -15
  10. package/dist/report.js +37 -0
  11. package/dist/run.js +8 -0
  12. package/dist/sandbox.js +11 -0
  13. package/dist/searchcore.js +55 -2
  14. package/dist/state.js +34 -6
  15. package/dist/tools.js +196 -19
  16. package/dist/util.js +85 -0
  17. package/dist/webtools.js +145 -15
  18. package/package.json +1 -1
  19. package/ui/out/404/index.html +1 -1
  20. package/ui/out/404.html +1 -1
  21. package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
  22. package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
  23. package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
  24. package/ui/out/_next/static/css/{9f7bd82b8e4c762c.css → d95c2ba395730031.css} +1 -1
  25. package/ui/out/index.html +1 -1
  26. package/ui/out/index.txt +3 -3
  27. package/ui/out/run/index.html +1 -1
  28. package/ui/out/run/index.txt +3 -3
  29. package/ui/out/settings/index.html +1 -1
  30. package/ui/out/settings/index.txt +3 -3
  31. package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +0 -1
  32. package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +0 -1
  33. package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +0 -1
  34. /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
  35. /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0
package/dist/tools.js CHANGED
@@ -40,6 +40,7 @@ exports.synthToolset = synthToolset;
40
40
  const fs = __importStar(require("fs"));
41
41
  const path = __importStar(require("path"));
42
42
  const crawltools_1 = require("./crawltools");
43
+ const searchcore_1 = require("./searchcore");
43
44
  const util_1 = require("./util");
44
45
  const webtools_1 = require("./webtools");
45
46
  // ---------- safety ----------
@@ -62,9 +63,48 @@ function checkCommand(cmd, cfg) {
62
63
  function resolveRead(p, ctx) {
63
64
  return path.resolve(ctx.workdir, p);
64
65
  }
66
+ /** Single-quote a string for sh. */
67
+ function shq(s) {
68
+ return `'${s.replace(/'/g, `'\\''`)}'`;
69
+ }
70
+ /**
71
+ * Where a write actually lands: realpath of the deepest existing ancestor plus
72
+ * the not-yet-created remainder. Confinement checks must use this, or a
73
+ * symlink inside the workdir smuggles writes anywhere on the host.
74
+ */
75
+ function realDestination(abs) {
76
+ let dir = abs;
77
+ const tail = [];
78
+ while (!fs.existsSync(dir)) {
79
+ tail.unshift(path.basename(dir));
80
+ const parent = path.dirname(dir);
81
+ if (parent === dir)
82
+ break;
83
+ dir = parent;
84
+ }
85
+ try {
86
+ dir = fs.realpathSync(dir);
87
+ }
88
+ catch {
89
+ /* races/permissions: keep the lexical path */
90
+ }
91
+ return path.join(dir, ...tail);
92
+ }
93
+ function realBase(base) {
94
+ try {
95
+ return fs.realpathSync(base);
96
+ }
97
+ catch {
98
+ return base;
99
+ }
100
+ }
65
101
  function resolveWrite(p, ctx) {
66
102
  const abs = path.resolve(ctx.workdir, p);
67
- const ok = (0, util_1.pathInside)(ctx.workdir, abs) || (0, util_1.pathInside)(ctx.runDirPath, abs) || !ctx.cfg.safeMode;
103
+ // Remote sandboxes own their filesystem host-side realpath is meaningless there.
104
+ const real = ctx.sandbox.localFs ? realDestination(abs) : abs;
105
+ const ok = (0, util_1.pathInside)(realBase(ctx.workdir), real) ||
106
+ (0, util_1.pathInside)(realBase(ctx.runDirPath), real) ||
107
+ !ctx.cfg.safeMode;
68
108
  if (!ok) {
69
109
  throw new Error(`safeMode: writes are restricted to the working directory (${ctx.workdir}). ` +
70
110
  `Use a relative path, or save deliverables with save_artifact.`);
@@ -171,7 +211,7 @@ function workerToolset(cfg) {
171
211
  tools.replace_in_file = {
172
212
  schema: {
173
213
  name: "replace_in_file",
174
- description: "Exact string replacement in a file. `find` must match exactly (including whitespace). Fails if not found, or if ambiguous when all=false.",
214
+ description: "Exact string replacement in a file. `find` must match exactly (including whitespace). Fails if not found, or if ambiguous when all=false. For several edits to the same file, pass `edits` — they apply in order, all-or-nothing, in one call.",
175
215
  parameters: {
176
216
  type: "object",
177
217
  properties: {
@@ -179,25 +219,96 @@ function workerToolset(cfg) {
179
219
  find: { type: "string" },
180
220
  replace: { type: "string" },
181
221
  all: { type: "boolean", description: "Replace every occurrence (default false)" },
222
+ edits: {
223
+ type: "array",
224
+ description: "Batch mode: multiple find/replace pairs applied in order, atomically (replaces top-level find/replace)",
225
+ items: {
226
+ type: "object",
227
+ properties: {
228
+ find: { type: "string" },
229
+ replace: { type: "string" },
230
+ all: { type: "boolean" },
231
+ },
232
+ required: ["find", "replace"],
233
+ },
234
+ },
182
235
  },
183
- required: ["path", "find", "replace"],
236
+ required: ["path"],
184
237
  },
185
238
  },
186
239
  run: async (args, ctx) => {
187
240
  const abs = resolveWrite(String(args.path), ctx);
188
241
  const raw = await readFileVia(ctx, abs);
189
- const find = String(args.find);
190
- const replace = String(args.replace);
191
- const count = raw.split(find).length - 1;
192
- if (count === 0)
193
- throw new Error("find string not found in file");
194
- if (count > 1 && !args.all) {
195
- throw new Error(`find string matches ${count} times; provide more context or set all=true`);
242
+ const edits = Array.isArray(args.edits) && args.edits.length
243
+ ? args.edits.map((e) => ({
244
+ find: String(e.find ?? ""),
245
+ replace: String(e.replace ?? ""),
246
+ all: Boolean(e.all),
247
+ }))
248
+ : args.find !== undefined && args.replace !== undefined
249
+ ? [{ find: String(args.find), replace: String(args.replace), all: Boolean(args.all) }]
250
+ : null;
251
+ if (!edits)
252
+ throw new Error("provide find+replace, or an edits array");
253
+ // Validate-then-apply against the progressively edited content:
254
+ // any failing edit aborts the whole batch with nothing written.
255
+ let next = raw;
256
+ let total = 0;
257
+ const at = (i) => (edits.length > 1 ? `edit ${i + 1}: ` : "");
258
+ for (let i = 0; i < edits.length; i++) {
259
+ const { find, replace, all } = edits[i];
260
+ if (!find)
261
+ throw new Error(`${at(i)}find must not be empty`);
262
+ const count = next.split(find).length - 1;
263
+ if (count === 0) {
264
+ throw new Error(`${at(i)}find string not found in file${edits.length > 1 ? " — no edits were applied" : ""}`);
265
+ }
266
+ if (count > 1 && !all) {
267
+ throw new Error(`${at(i)}find string matches ${count} times; provide more context or set all=true${edits.length > 1 ? " — no edits were applied" : ""}`);
268
+ }
269
+ next = all ? next.split(find).join(replace) : next.replace(find, replace);
270
+ total += all ? count : 1;
196
271
  }
197
- const next = args.all ? raw.split(find).join(replace) : raw.replace(find, replace);
198
272
  await writeFileVia(ctx, abs, next);
199
273
  const warn = ctx.checkClaim?.(String(args.path));
200
- return `replaced ${args.all ? count : 1} occurrence(s) in ${abs}${warn ? `\n${warn}` : ""}`;
274
+ return `replaced ${total} occurrence(s) via ${edits.length} edit(s) in ${abs}${warn ? `\n${warn}` : ""}`;
275
+ },
276
+ };
277
+ tools.grep_files = {
278
+ schema: {
279
+ name: "grep_files",
280
+ description: "Search file contents with a regex (grep -E syntax). Returns matching lines as path:line:text. Use this to locate code or text instead of shell grep pipelines — one round-trip, works identically in remote sandboxes, skips node_modules/.git/build output.",
281
+ parameters: {
282
+ type: "object",
283
+ properties: {
284
+ pattern: { type: "string", description: "Extended regex (grep -E)" },
285
+ path: { type: "string", description: "Directory or file to search (default: working directory)" },
286
+ glob: { type: "string", description: "Filename filter, e.g. *.ts" },
287
+ ignore_case: { type: "boolean" },
288
+ max_results: { type: "number", description: "Default 50, max 200" },
289
+ },
290
+ required: ["pattern"],
291
+ },
292
+ },
293
+ run: async (args, ctx) => {
294
+ const pattern = String(args.pattern ?? "");
295
+ if (!pattern.trim())
296
+ throw new Error("pattern is required");
297
+ const root = args.path ? resolveRead(String(args.path), ctx) : ctx.workdir;
298
+ const max = Math.min(Math.max(Number(args.max_results) || 50, 1), 200);
299
+ const flags = `-rnE${args.ignore_case ? "i" : ""}`;
300
+ const include = args.glob ? ` --include=${shq(String(args.glob))}` : "";
301
+ const excludes = ["node_modules", ".git", "dist", ".next", "out", "build", "target", "__pycache__", ".venv"]
302
+ .map((d) => ` --exclude-dir=${d}`)
303
+ .join("");
304
+ const cmd = `grep ${flags}${include}${excludes} -e ${shq(pattern)} ${shq(root)} | head -n ${max + 1}`;
305
+ const r = await ctx.sandbox.exec(cmd, { cwd: ctx.workdir, timeoutSec: 60, signal: ctx.signal });
306
+ const lines = r.out.split("\n").filter(Boolean);
307
+ if (!lines.length)
308
+ return "no matches";
309
+ const shown = lines.slice(0, max);
310
+ const more = lines.length > max ? `\n…more matches truncated (raise max_results or narrow the pattern)` : "";
311
+ return shown.join("\n") + more;
201
312
  },
202
313
  };
203
314
  tools.list_dir = {
@@ -295,6 +406,39 @@ function workerToolset(cfg) {
295
406
  .join("\n");
296
407
  },
297
408
  };
409
+ tools.academic_search = {
410
+ schema: {
411
+ name: "academic_search",
412
+ description: "Search scholarly sources: arXiv preprints and Crossref journal/conference metadata (keyless APIs). Returns papers with title, link (arXiv/DOI), abstract snippet, and date. Use for scientific or technical questions where peer-reviewed and preprint sources beat the open web.",
413
+ parameters: {
414
+ type: "object",
415
+ properties: {
416
+ query: { type: "string" },
417
+ count: { type: "number", description: "Max results, default 8, max 20" },
418
+ },
419
+ required: ["query"],
420
+ },
421
+ },
422
+ run: async (args, ctx) => {
423
+ const count = Math.min(Math.max(Number(args.count) || 8, 1), 20);
424
+ const q = String(args.query);
425
+ const settled = await Promise.allSettled([
426
+ (0, webtools_1.arxivSearch)(q, count, ctx.signal),
427
+ (0, webtools_1.crossrefSearch)(q, count, ctx.signal),
428
+ ]);
429
+ const candidates = settled.flatMap((s) => (s.status === "fulfilled" ? s.value : []));
430
+ if (!candidates.length) {
431
+ const err = settled.find((s) => s.status === "rejected");
432
+ if (err)
433
+ throw err.reason;
434
+ return "no results";
435
+ }
436
+ const merged = (0, searchcore_1.mergeCandidates)(candidates, count);
437
+ return merged
438
+ .map((h, i) => `${i + 1}. ${h.title}${h.date ? ` (${h.date})` : ""} [${h.engine}]\n ${h.url}\n ${h.snippet}`)
439
+ .join("\n");
440
+ },
441
+ };
298
442
  tools.fetch_url = {
299
443
  schema: {
300
444
  name: "fetch_url",
@@ -318,7 +462,7 @@ function workerToolset(cfg) {
318
462
  tools.note = {
319
463
  schema: {
320
464
  name: "note",
321
- description: "Post a durable fact/discovery to the swarm's shared blackboard so the conductor and other agents can see it. Use sparingly — facts other tasks need, not progress chatter. Mark kind='decision' for choices the rest of the mission must respect (these are never trimmed from digests).",
465
+ description: "Post a durable fact/discovery to the swarm's shared blackboard so the conductor and other agents can see it. Use sparingly — facts other tasks need, not progress chatter. Mark kind='decision' for choices the rest of the mission must respect, and kind='conflict' when independent sources disagree on a material fact (both are never trimmed from digests).",
322
466
  parameters: {
323
467
  type: "object",
324
468
  properties: {
@@ -326,18 +470,20 @@ function workerToolset(cfg) {
326
470
  key: { type: "string", description: "Optional short label" },
327
471
  kind: {
328
472
  type: "string",
329
- enum: ["finding", "decision", "open-question", "handoff", "claim"],
330
- description: "Category (default finding). kind='claim' with key=<file path> advertises you are editing that file",
473
+ enum: ["finding", "decision", "conflict", "open-question", "handoff", "claim"],
474
+ description: "Category (default finding). kind='conflict' flags sources that disagree — name both. kind='claim' with key=<file path> advertises you are editing that file",
331
475
  },
476
+ url: { type: "string", description: "Source URL backing this note, when it came from the web" },
332
477
  },
333
478
  required: ["text"],
334
479
  },
335
480
  },
336
481
  run: async (args, ctx) => {
337
- const kind = ["finding", "decision", "open-question", "handoff", "claim"].includes(String(args.kind))
482
+ const kind = ["finding", "decision", "conflict", "open-question", "handoff", "claim"].includes(String(args.kind))
338
483
  ? String(args.kind)
339
484
  : undefined;
340
- ctx.addNote(String(args.text), args.key ? String(args.key) : undefined, kind);
485
+ const url = /^https?:\/\//.test(String(args.url ?? "")) ? String(args.url) : undefined;
486
+ ctx.addNote(String(args.text), args.key ? String(args.key) : undefined, kind, url);
341
487
  return "noted on the blackboard";
342
488
  },
343
489
  };
@@ -416,8 +562,12 @@ function workerToolset(cfg) {
416
562
  },
417
563
  run: async (args, ctx) => {
418
564
  const name = String(args.name).replace(/^\/+/, "");
419
- const dest = path.join(ctx.runDirPath, "artifacts", name);
420
- if (!(0, util_1.pathInside)(path.join(ctx.runDirPath, "artifacts"), dest)) {
565
+ const artifactsRoot = path.join(ctx.runDirPath, "artifacts");
566
+ (0, util_1.ensureDir)(artifactsRoot);
567
+ const dest = path.join(artifactsRoot, name);
568
+ // Realpath-based: neither ../ traversal nor a planted symlink may move
569
+ // the artifact outside the run's artifacts folder.
570
+ if (!(0, util_1.pathInside)(realBase(artifactsRoot), realDestination(dest))) {
421
571
  throw new Error("artifact name must stay inside the artifacts folder");
422
572
  }
423
573
  (0, util_1.ensureDir)(path.dirname(dest));
@@ -546,6 +696,20 @@ exports.REPORT_TOOL = {
546
696
  items: { type: "string" },
547
697
  description: "Every file you created or modified (exact paths)",
548
698
  },
699
+ sources: {
700
+ type: "array",
701
+ description: "Web sources your findings rely on — REQUIRED whenever your work drew on the web. They flow into the final report's bibliography; a web-sourced claim without an entry here cannot be cited.",
702
+ items: {
703
+ type: "object",
704
+ properties: {
705
+ url: { type: "string" },
706
+ title: { type: "string" },
707
+ date: { type: "string", description: "Publication date if known (ISO or year)" },
708
+ note: { type: "string", description: "What this source supports" },
709
+ },
710
+ required: ["url"],
711
+ },
712
+ },
549
713
  },
550
714
  required: ["status", "report"],
551
715
  },
@@ -561,6 +725,19 @@ exports.VERDICT_TOOL = {
561
725
  type: "string",
562
726
  description: "If fail: exactly what is wrong and where. If pass: one-line confirmation of the evidence.",
563
727
  },
728
+ issues: {
729
+ type: "array",
730
+ description: "On fail: one entry per concrete problem. The worker's retry sees these verbatim — make each actionable.",
731
+ items: {
732
+ type: "object",
733
+ properties: {
734
+ problem: { type: "string", description: "What is wrong" },
735
+ evidence: { type: "string", description: "What you observed that proves it (command output, file content, URL)" },
736
+ fix: { type: "string", description: "The exact change that would resolve it" },
737
+ },
738
+ required: ["problem"],
739
+ },
740
+ },
564
741
  },
565
742
  required: ["pass", "feedback"],
566
743
  },
package/dist/util.js CHANGED
@@ -48,6 +48,7 @@ exports.ensureDir = ensureDir;
48
48
  exports.readJson = readJson;
49
49
  exports.writeJson = writeJson;
50
50
  exports.pathInside = pathInside;
51
+ exports.validateArtifactFormat = validateArtifactFormat;
51
52
  exports.decodeEntities = decodeEntities;
52
53
  exports.htmlToText = htmlToText;
53
54
  const fs = __importStar(require("fs"));
@@ -147,6 +148,90 @@ function pathInside(parent, child) {
147
148
  const rel = path.relative(path.resolve(parent), path.resolve(child));
148
149
  return rel === "" || (!rel.startsWith("..") && !path.isAbsolute(rel));
149
150
  }
151
+ // ---------- artifact validation ----------
152
+ /**
153
+ * Cheap structural checks for common deliverable formats — catches a worker
154
+ * shipping malformed JSON/CSV/stub HTML before an LLM verifier spends tokens
155
+ * on it. Returns a problem description, or null when the file looks sound
156
+ * (or is a format we don't check).
157
+ */
158
+ function validateArtifactFormat(absPath) {
159
+ const ext = path.extname(absPath).toLowerCase();
160
+ if (![".json", ".csv", ".html", ".htm"].includes(ext))
161
+ return null;
162
+ let raw;
163
+ try {
164
+ raw = fs.readFileSync(absPath, "utf8");
165
+ }
166
+ catch {
167
+ return null; // existence/size is the caller's check
168
+ }
169
+ if (ext === ".json") {
170
+ try {
171
+ JSON.parse(raw);
172
+ return null;
173
+ }
174
+ catch (e) {
175
+ return `not valid JSON (${errMsg(e)})`;
176
+ }
177
+ }
178
+ if (ext === ".csv") {
179
+ const counts = csvFieldCounts(raw, 50);
180
+ if (!counts.length)
181
+ return "CSV has no records";
182
+ const expect = counts[0];
183
+ const bad = counts.findIndex((c) => c !== expect);
184
+ if (bad > 0)
185
+ return `inconsistent CSV: record 1 has ${expect} field(s), record ${bad + 1} has ${counts[bad]}`;
186
+ return null;
187
+ }
188
+ // .html / .htm — catch empty shells and plain text passed off as HTML.
189
+ if (raw.length < 200 || !/<[a-z!][^>]*>/i.test(raw) || !/<\/[a-z][a-z0-9]*>/i.test(raw)) {
190
+ return "HTML looks like a stub (too short or no real markup)";
191
+ }
192
+ return null;
193
+ }
194
+ /** Field count per CSV record (quote-aware, handles newlines inside quotes). */
195
+ function csvFieldCounts(raw, maxRecords) {
196
+ const counts = [];
197
+ let fields = 1;
198
+ let chars = 0; // non-separator chars seen in the current record
199
+ let inQ = false;
200
+ for (let i = 0; i < raw.length && counts.length < maxRecords; i++) {
201
+ const ch = raw[i];
202
+ if (inQ) {
203
+ if (ch === '"') {
204
+ if (raw[i + 1] === '"')
205
+ i++;
206
+ else
207
+ inQ = false;
208
+ }
209
+ chars++;
210
+ }
211
+ else if (ch === '"') {
212
+ inQ = true;
213
+ chars++;
214
+ }
215
+ else if (ch === ",") {
216
+ fields++;
217
+ chars++;
218
+ }
219
+ else if (ch === "\n" || ch === "\r") {
220
+ if (ch === "\r" && raw[i + 1] === "\n")
221
+ i++;
222
+ if (chars > 0)
223
+ counts.push(fields); // skip blank lines
224
+ fields = 1;
225
+ chars = 0;
226
+ }
227
+ else {
228
+ chars++;
229
+ }
230
+ }
231
+ if (chars > 0 && counts.length < maxRecords)
232
+ counts.push(fields);
233
+ return counts;
234
+ }
150
235
  // ---------- html ----------
151
236
  const ENTITIES = {
152
237
  amp: "&", lt: "<", gt: ">", quot: '"', apos: "'", nbsp: " ",
package/dist/webtools.js CHANGED
@@ -1,9 +1,16 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.webSearch = webSearch;
4
+ exports._resetEngineCooldowns = _resetEngineCooldowns;
5
+ exports.tinyfishSearch = tinyfishSearch;
6
+ exports.ddgSearch = ddgSearch;
7
+ exports.bingSearch = bingSearch;
8
+ exports.arxivSearch = arxivSearch;
9
+ exports.crossrefSearch = crossrefSearch;
4
10
  exports.parseBingHtml = parseBingHtml;
5
11
  exports.fetchUrl = fetchUrl;
6
12
  const crawltools_1 = require("./crawltools");
13
+ const pdftext_1 = require("./pdftext");
7
14
  const searchcore_1 = require("./searchcore");
8
15
  const util_1 = require("./util");
9
16
  const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36 agentswarm/0.1";
@@ -20,7 +27,7 @@ const DEEP_PASSAGES = 3;
20
27
  * and re-ranks by content quality. Ranking/passage algorithms live in
21
28
  * searchcore.ts.
22
29
  */
23
- async function webSearch(cfg, query, count, signal, deep = false, warn) {
30
+ async function webSearch(cfg, query, count, signal, deep = false, warn, _retried = false) {
24
31
  // Deep searches widen recall by issuing complementary phrasings; the fast
25
32
  // path stays a single query so an agent's tool loop isn't slowed.
26
33
  const queries = deep ? (0, searchcore_1.expandQueries)(query) : [query];
@@ -37,18 +44,30 @@ async function webSearch(cfg, query, count, signal, deep = false, warn) {
37
44
  }
38
45
  }
39
46
  }
47
+ // Scholarly questions also sweep the keyless academic APIs (deep mode only).
48
+ if (deep && (0, searchcore_1.looksAcademic)(query)) {
49
+ engineCalls.push(arxivSearch(query, perEngine, signal), crossrefSearch(query, perEngine, signal));
50
+ }
40
51
  const settled = await Promise.allSettled(engineCalls);
41
52
  const candidates = settled.flatMap((s) => (s.status === "fulfilled" ? s.value : []));
42
53
  if (!candidates.length) {
43
54
  const firstErr = settled.find((s) => s.status === "rejected");
55
+ if (firstErr && settled.every((s) => s.status === "rejected"))
56
+ throw firstErr.reason;
57
+ // Engines answered but nothing parsed/matched: one retry with a
58
+ // simplified phrasing before giving up.
59
+ if (!_retried) {
60
+ const alt = (0, searchcore_1.reformulate)(query);
61
+ if (alt) {
62
+ warn?.(`no results for "${query}" — retrying as "${alt}"`);
63
+ return webSearch(cfg, alt, count, signal, deep, warn, true);
64
+ }
65
+ }
44
66
  if (firstErr)
45
67
  throw firstErr.reason;
46
68
  return [];
47
69
  }
48
70
  const failures = settled.filter((s) => s.status === "rejected").length;
49
- if (failures && failures === settled.length) {
50
- throw (settled.find((s) => s.status === "rejected")).reason;
51
- }
52
71
  if (failures) {
53
72
  warn?.(`${failures}/${settled.length} search engine calls failed; results come from the rest`);
54
73
  }
@@ -111,16 +130,23 @@ async function fetchReadable(url, signal) {
111
130
  }
112
131
  }
113
132
  const res = await fetch(url, {
114
- headers: { "user-agent": UA, accept: "text/html,text/*;q=0.9,*/*;q=0.5" },
133
+ headers: { "user-agent": UA, accept: "text/html,application/pdf,text/*;q=0.9,*/*;q=0.5" },
115
134
  signal: mergeSignal(20_000, signal),
116
135
  redirect: "follow",
117
136
  });
118
137
  if (!res.ok)
119
138
  throw new Error(`HTTP ${res.status}`);
120
139
  const ctype = res.headers.get("content-type") || "";
140
+ if (/application\/pdf/i.test(ctype)) {
141
+ const buf = Buffer.from(await res.arrayBuffer());
142
+ const pdf = buf.length <= 20_000_000 ? (0, pdftext_1.extractPdfText)(buf) : null;
143
+ if (!pdf)
144
+ throw new Error("pdf with no extractable text");
145
+ return clip(pdf.text);
146
+ }
121
147
  if (!/text\/|html|xml|json/i.test(ctype))
122
148
  throw new Error(`not textual: ${ctype}`);
123
- const body = await res.text();
149
+ const body = decodeBody(Buffer.from(await res.arrayBuffer()), ctype);
124
150
  const text = /html/i.test(ctype) ? (0, util_1.htmlToText)(body) : body;
125
151
  return clip(text);
126
152
  }
@@ -135,6 +161,36 @@ function mergeSignal(timeoutMs, signal) {
135
161
  return typeof AbortSignal.any === "function" ? AbortSignal.any([t, signal]) : signal;
136
162
  }
137
163
  // ---------------------------------------------------------------- engines
164
+ /**
165
+ * Per-engine rate-limit cooldowns: an engine that answers 429/403/503 sits
166
+ * out (60s, or the server's retry-after up to 120s) instead of getting
167
+ * hammered into a long block mid-research. A tiny retry-after (≤5s) is
168
+ * honored once in-call.
169
+ */
170
+ const engineCooldown = new Map();
171
+ /** Test hook. */
172
+ function _resetEngineCooldowns() {
173
+ engineCooldown.clear();
174
+ }
175
+ async function engineFetch(engine, url, init, signal) {
176
+ const until = engineCooldown.get(engine) ?? 0;
177
+ if (until > Date.now()) {
178
+ throw new Error(`${engine} is cooling down after a rate limit (${Math.ceil((until - Date.now()) / 1000)}s left)`);
179
+ }
180
+ for (let attempt = 0;; attempt++) {
181
+ const res = await fetch(url, { ...init, signal: mergeSignal(20_000, signal) });
182
+ if (![429, 403, 503].includes(res.status))
183
+ return res;
184
+ const retryAfter = Number(res.headers.get("retry-after"));
185
+ if (attempt === 0 && Number.isFinite(retryAfter) && retryAfter > 0 && retryAfter <= 5) {
186
+ await new Promise((r) => setTimeout(r, retryAfter * 1000));
187
+ continue;
188
+ }
189
+ const ms = Number.isFinite(retryAfter) && retryAfter > 0 ? Math.min(retryAfter, 120) * 1000 : 60_000;
190
+ engineCooldown.set(engine, Date.now() + ms);
191
+ throw new Error(`${engine} rate-limited (HTTP ${res.status}); cooling down ${Math.round(ms / 1000)}s`);
192
+ }
193
+ }
138
194
  async function tinyfishSearch(cfg, query, count, signal) {
139
195
  const url = `https://api.search.tinyfish.ai?query=${encodeURIComponent(query)}`;
140
196
  const res = await fetch(url, {
@@ -173,10 +229,9 @@ async function ddgSearch(query, count, signal) {
173
229
  let reachedAny = false;
174
230
  for (const ep of DDG_ENDPOINTS) {
175
231
  try {
176
- const res = await fetch(ep.url + encodeURIComponent(query), {
232
+ const res = await engineFetch("duckduckgo", ep.url + encodeURIComponent(query), {
177
233
  headers: { "user-agent": UA },
178
- signal: mergeSignal(20_000, signal),
179
- });
234
+ }, signal);
180
235
  if (!res.ok)
181
236
  throw new Error(`search failed: HTTP ${res.status}`);
182
237
  reachedAny = true;
@@ -221,14 +276,56 @@ function parseDdgHtml(html, count, linkRe) {
221
276
  }
222
277
  /** Bing's HTML results page: each hit is an <li class="b_algo"> with an <h2><a> link. */
223
278
  async function bingSearch(query, count, signal) {
224
- const res = await fetch(`https://www.bing.com/search?q=${encodeURIComponent(query)}`, {
279
+ const res = await engineFetch("bing", `https://www.bing.com/search?q=${encodeURIComponent(query)}`, {
225
280
  headers: { "user-agent": UA, "accept-language": "en-US,en;q=0.9" },
226
- signal: mergeSignal(20_000, signal),
227
- });
281
+ }, signal);
228
282
  if (!res.ok)
229
283
  throw new Error(`bing search ${res.status}`);
230
284
  return parseBingHtml(await res.text(), count);
231
285
  }
286
+ // ---------------------------------------------------------------- academic engines (keyless)
287
+ /** arXiv's Atom API — preprints with abstracts, no key needed. */
288
+ async function arxivSearch(query, count, signal) {
289
+ const url = `https://export.arxiv.org/api/query?search_query=all:${encodeURIComponent(query)}&max_results=${Math.min(count, 15)}`;
290
+ const res = await engineFetch("arxiv", url, { headers: { "user-agent": UA } }, signal);
291
+ if (!res.ok)
292
+ throw new Error(`arxiv search ${res.status}`);
293
+ const xml = await res.text();
294
+ const out = [];
295
+ for (const entry of xml.split(/<entry>/).slice(1)) {
296
+ if (out.length >= count)
297
+ break;
298
+ const title = strip((/<title>([\s\S]*?)<\/title>/.exec(entry) || [])[1] || "");
299
+ const id = ((/<id>([\s\S]*?)<\/id>/.exec(entry) || [])[1] || "").trim();
300
+ const summary = strip((/<summary>([\s\S]*?)<\/summary>/.exec(entry) || [])[1] || "");
301
+ const published = (/<published>(\d{4}-\d{2}-\d{2})/.exec(entry) || [])[1];
302
+ if (!id || !title || !/^https?:\/\//.test(id))
303
+ continue;
304
+ out.push({ title, url: id, snippet: summary.slice(0, 300), rank: out.length + 1, engine: "arxiv", date: published });
305
+ }
306
+ return out;
307
+ }
308
+ /** Crossref's works API — journal/conference metadata with DOIs, no key needed. */
309
+ async function crossrefSearch(query, count, signal) {
310
+ const url = `https://api.crossref.org/works?query=${encodeURIComponent(query)}&rows=${Math.min(count, 15)}&select=title,DOI,abstract,issued,container-title`;
311
+ const res = await engineFetch("crossref", url, { headers: { "user-agent": UA } }, signal);
312
+ if (!res.ok)
313
+ throw new Error(`crossref search ${res.status}`);
314
+ const data = await res.json();
315
+ const out = [];
316
+ for (const it of data?.message?.items ?? []) {
317
+ if (out.length >= count)
318
+ break;
319
+ const title = strip(String(Array.isArray(it.title) ? it.title[0] ?? "" : it.title ?? ""));
320
+ if (!title || !it.DOI)
321
+ continue;
322
+ const date = Array.isArray(it.issued?.["date-parts"]?.[0]) ? it.issued["date-parts"][0].join("-") : undefined;
323
+ const venue = Array.isArray(it["container-title"]) ? it["container-title"][0] : "";
324
+ const snippet = (strip(String(it.abstract ?? "")) || venue || "").slice(0, 300);
325
+ out.push({ title, url: `https://doi.org/${it.DOI}`, snippet, rank: out.length + 1, engine: "crossref", date });
326
+ }
327
+ return out;
328
+ }
232
329
  function parseBingHtml(html, count) {
233
330
  const hits = [];
234
331
  const blocks = html.split(/<li class="b_algo[^"]*"/i).slice(1);
@@ -302,18 +399,51 @@ async function fetchUrl(cfg, url, raw, maxChars, signal) {
302
399
  }
303
400
  }
304
401
  const res = await fetch(url, {
305
- headers: { "user-agent": UA, accept: "text/html,application/json,text/*;q=0.9,*/*;q=0.5" },
402
+ headers: { "user-agent": UA, accept: "text/html,application/json,application/pdf,text/*;q=0.9,*/*;q=0.5" },
306
403
  signal: signal ?? AbortSignal.timeout(25000),
307
404
  redirect: "follow",
308
405
  });
309
406
  const ctype = res.headers.get("content-type") || "";
310
- const body = await res.text();
311
407
  if (!res.ok) {
312
- return `HTTP ${res.status} ${res.statusText}\n${(0, util_1.truncateMiddle)(body, 2000, "chars")}`;
408
+ // An error page is not content: returning it as a successful result lets
409
+ // "HTTP 403 ... subscribe to continue" become a "fact" in someone's report.
410
+ const body = await res.text().catch(() => "");
411
+ throw new Error(`HTTP ${res.status} ${res.statusText} — page is not usable as a source (paywall/login/blocked?). ` +
412
+ `Try web_search for an alternative source.${body ? ` Server said: ${(0, util_1.oneLine)((0, util_1.htmlToText)(body), 200)}` : ""}`);
413
+ }
414
+ const buf = Buffer.from(await res.arrayBuffer());
415
+ if (/application\/pdf/i.test(ctype) || buf.subarray(0, 5).toString("latin1") === "%PDF-") {
416
+ if (buf.length > 20_000_000)
417
+ throw new Error(`PDF is ${Math.round(buf.length / 1e6)}MB — too large to extract`);
418
+ const pdf = (0, pdftext_1.extractPdfText)(buf);
419
+ if (!pdf) {
420
+ throw new Error("PDF contains no extractable text (likely scanned or encrypted) — find an HTML version of this source.");
421
+ }
422
+ return (0, util_1.truncateMiddle)(`[PDF, ${pdf.pages} page${pdf.pages > 1 ? "s" : ""}]\n${pdf.text}`, maxChars, "chars");
313
423
  }
424
+ const body = decodeBody(buf, ctype);
314
425
  const text = !raw && /html/i.test(ctype) ? (0, util_1.htmlToText)(body) : body;
426
+ if (!raw && /html/i.test(ctype)) {
427
+ const trimmed = text.trim();
428
+ if (trimmed.length < 400 && /subscrib|sign.?in|log.?in|enable javascript|access denied|are you a (human|robot)|captcha/i.test(trimmed)) {
429
+ return `WARNING: this page returned only a paywall/anti-bot shell — the text below is probably not the real content. Try web_search for an alternative source.\n\n${trimmed}`;
430
+ }
431
+ }
315
432
  return (0, util_1.truncateMiddle)(text, maxChars, "chars");
316
433
  }
434
+ /** Decode a response body honoring its content-type charset (UTF-8 fallback). */
435
+ function decodeBody(buf, ctype) {
436
+ const charset = /charset=([\w-]+)/i.exec(ctype)?.[1]?.toLowerCase();
437
+ if (charset && charset !== "utf-8" && charset !== "utf8") {
438
+ try {
439
+ return new TextDecoder(charset).decode(buf);
440
+ }
441
+ catch {
442
+ /* unknown label — fall through to utf-8 */
443
+ }
444
+ }
445
+ return buf.toString("utf8");
446
+ }
317
447
  async function tinyfishFetch(cfg, url, signal) {
318
448
  const res = await fetch("https://api.fetch.tinyfish.ai", {
319
449
  method: "POST",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@robzilla1738/agentswarm",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },