@robzilla1738/agentswarm 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +29 -12
  2. package/dist/agent.js +2 -1
  3. package/dist/cli.js +21 -4
  4. package/dist/config.js +27 -1
  5. package/dist/executor.js +243 -43
  6. package/dist/hub.js +69 -3
  7. package/dist/memory.js +5 -4
  8. package/dist/pdftext.js +211 -0
  9. package/dist/prompts.js +23 -15
  10. package/dist/report.js +37 -0
  11. package/dist/run.js +8 -0
  12. package/dist/sandbox.js +11 -0
  13. package/dist/searchcore.js +55 -2
  14. package/dist/state.js +34 -6
  15. package/dist/tools.js +196 -19
  16. package/dist/util.js +85 -0
  17. package/dist/webtools.js +145 -15
  18. package/package.json +1 -1
  19. package/ui/out/404/index.html +1 -1
  20. package/ui/out/404.html +1 -1
  21. package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
  22. package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
  23. package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
  24. package/ui/out/_next/static/css/{9f7bd82b8e4c762c.css → d95c2ba395730031.css} +1 -1
  25. package/ui/out/index.html +1 -1
  26. package/ui/out/index.txt +3 -3
  27. package/ui/out/run/index.html +1 -1
  28. package/ui/out/run/index.txt +3 -3
  29. package/ui/out/settings/index.html +1 -1
  30. package/ui/out/settings/index.txt +3 -3
  31. package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +0 -1
  32. package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +0 -1
  33. package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +0 -1
  34. /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
  35. /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0
@@ -0,0 +1,211 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.extractPdfText = extractPdfText;
37
+ const zlib = __importStar(require("zlib"));
38
+ /**
39
+ * Minimal zero-dependency PDF text extraction: inflate FlateDecode content
40
+ * streams (Node's built-in zlib) and interpret the text-showing operators
41
+ * (Tj / TJ / ' / "). Good enough for most digitally-produced text PDFs;
42
+ * returns null for scanned, encrypted, or exotic-encoding documents so the
43
+ * caller can tell the agent to find an HTML source instead.
44
+ */
45
+ function extractPdfText(buf) {
46
+ if (buf.subarray(0, 5).toString("latin1") !== "%PDF-")
47
+ return null;
48
+ // latin1 preserves bytes 1:1, so stream offsets in the string match the buffer.
49
+ const raw = buf.toString("latin1");
50
+ const pages = (raw.match(/\/Type\s*\/Pages?\b/g) || []).filter((m) => !/Pages/.test(m)).length || 1;
51
+ let text = "";
52
+ const streamRe = /<<([\s\S]{0,2000}?)>>\s*stream\r?\n/g;
53
+ let m;
54
+ while ((m = streamRe.exec(raw))) {
55
+ const dict = m[1];
56
+ const start = m.index + m[0].length;
57
+ const end = raw.indexOf("endstream", start);
58
+ if (end < 0)
59
+ continue;
60
+ streamRe.lastIndex = end;
61
+ // Only plain or Flate-compressed streams are supported.
62
+ if (/\/Filter/.test(dict) && !/FlateDecode/.test(dict))
63
+ continue;
64
+ let len = end;
65
+ while (len > start && (raw[len - 1] === "\n" || raw[len - 1] === "\r"))
66
+ len--;
67
+ let data = buf.subarray(start, len);
68
+ if (/FlateDecode/.test(dict)) {
69
+ try {
70
+ data = zlib.inflateSync(data);
71
+ }
72
+ catch {
73
+ continue;
74
+ }
75
+ }
76
+ const content = data.toString("latin1");
77
+ if (!/\bBT\b/.test(content))
78
+ continue; // not a text content stream
79
+ const extracted = extractFromContent(content);
80
+ if (extracted.trim())
81
+ text += extracted + "\n";
82
+ }
83
+ const cleaned = text
84
+ .replace(/[^\S\n]+/g, " ")
85
+ .replace(/ ?\n ?/g, "\n")
86
+ .replace(/\n{3,}/g, "\n\n")
87
+ .trim();
88
+ // CID/Type0 fonts yield glyph-index garbage; require a body of real characters.
89
+ const printable = cleaned.replace(/[^\x20-\x7E\n -￿]/g, "");
90
+ if (printable.replace(/\s/g, "").length < 40)
91
+ return null;
92
+ return { text: printable, pages };
93
+ }
94
+ /** Walk a content stream, collecting strings shown by Tj/TJ/'/" with newline heuristics. */
95
+ function extractFromContent(src) {
96
+ let out = "";
97
+ let pending = [];
98
+ const n = src.length;
99
+ let i = 0;
100
+ while (i < n) {
101
+ const ch = src[i];
102
+ if (ch === "(") {
103
+ const [s, next] = parseLiteralString(src, i);
104
+ pending.push(s);
105
+ i = next;
106
+ }
107
+ else if (ch === "<" && src[i + 1] !== "<") {
108
+ const close = src.indexOf(">", i + 1);
109
+ if (close < 0)
110
+ break;
111
+ pending.push(decodeHexString(src.slice(i + 1, close)));
112
+ i = close + 1;
113
+ }
114
+ else if (ch === "%") {
115
+ // comment to end of line
116
+ while (i < n && src[i] !== "\n" && src[i] !== "\r")
117
+ i++;
118
+ }
119
+ else if (/[A-Za-z'"*]/.test(ch)) {
120
+ let j = i;
121
+ while (j < n && /[A-Za-z'"*]/.test(src[j]))
122
+ j++;
123
+ const op = src.slice(i, j);
124
+ if (op === "Tj" || op === "TJ") {
125
+ out += pending.join("");
126
+ }
127
+ else if (op === "'" || op === '"') {
128
+ out += "\n" + pending.join("");
129
+ }
130
+ else if (op === "Td" || op === "TD" || op === "T*" || op === "Tm" || op === "ET") {
131
+ if (pending.length)
132
+ out += pending.join("");
133
+ if (!out.endsWith("\n"))
134
+ out += "\n";
135
+ }
136
+ pending = [];
137
+ i = j;
138
+ }
139
+ else if (ch === "-" || (ch >= "0" && ch <= "9") || ch === ".") {
140
+ let j = i + 1;
141
+ while (j < n && /[0-9.]/.test(src[j]))
142
+ j++;
143
+ // Large negative kerning inside a TJ array is a word gap.
144
+ const num = parseFloat(src.slice(i, j));
145
+ if (num <= -180 && pending.length && !pending[pending.length - 1].endsWith(" "))
146
+ pending.push(" ");
147
+ i = j;
148
+ }
149
+ else {
150
+ i++;
151
+ }
152
+ }
153
+ return out;
154
+ }
155
+ /** PDF literal string: balanced parens, backslash escapes, octal codes. */
156
+ function parseLiteralString(src, start) {
157
+ let out = "";
158
+ let depth = 0;
159
+ let i = start;
160
+ for (; i < src.length; i++) {
161
+ const ch = src[i];
162
+ if (ch === "\\") {
163
+ const next = src[i + 1];
164
+ if (next >= "0" && next <= "7") {
165
+ let oct = "";
166
+ for (let k = 1; k <= 3 && src[i + k] >= "0" && src[i + k] <= "7"; k++)
167
+ oct += src[i + k];
168
+ out += String.fromCharCode(parseInt(oct, 8));
169
+ i += oct.length;
170
+ }
171
+ else {
172
+ const map = { n: "\n", r: "\r", t: "\t", b: "\b", f: "\f", "(": "(", ")": ")", "\\": "\\" };
173
+ out += map[next] ?? next ?? "";
174
+ i++;
175
+ }
176
+ }
177
+ else if (ch === "(") {
178
+ depth++;
179
+ if (depth > 1)
180
+ out += ch;
181
+ }
182
+ else if (ch === ")") {
183
+ depth--;
184
+ if (depth === 0) {
185
+ i++;
186
+ break;
187
+ }
188
+ out += ch;
189
+ }
190
+ else {
191
+ out += ch;
192
+ }
193
+ }
194
+ return [out, i];
195
+ }
196
+ /** PDF hex string: byte pairs; a UTF-16BE BOM switches to two-byte chars. */
197
+ function decodeHexString(hex) {
198
+ const clean = hex.replace(/[^0-9a-fA-F]/g, "");
199
+ const bytes = [];
200
+ for (let i = 0; i + 1 < clean.length; i += 2)
201
+ bytes.push(parseInt(clean.slice(i, i + 2), 16));
202
+ if (clean.length % 2)
203
+ bytes.push(parseInt(clean[clean.length - 1] + "0", 16));
204
+ if (bytes.length >= 2 && bytes[0] === 0xfe && bytes[1] === 0xff) {
205
+ let s = "";
206
+ for (let i = 2; i + 1 < bytes.length; i += 2)
207
+ s += String.fromCharCode((bytes[i] << 8) | bytes[i + 1]);
208
+ return s;
209
+ }
210
+ return bytes.map((b) => String.fromCharCode(b)).join("");
211
+ }
package/dist/prompts.js CHANGED
@@ -122,7 +122,7 @@ function taskTable(tasks) {
122
122
  return "(no tasks yet)";
123
123
  const line = (t) => {
124
124
  const deps = t.deps.length ? ` deps:[${t.deps.join(",")}]` : "";
125
- const extra = t.status === "failed" && t.error ? ` — ${(0, util_1.clip)(t.error, 80)}` : "";
125
+ const extra = (t.status === "failed" || t.status === "blocked") && t.error ? ` — ${(0, util_1.clip)(t.error, 120)}` : "";
126
126
  return `${t.id} [${t.status}${t.attempt > 1 ? ` a${t.attempt}` : ""}] (${t.role})${deps} ${(0, util_1.clip)(t.title, 70)}${extra}`;
127
127
  };
128
128
  const settled = tasks.filter((t) => ["done", "failed", "blocked"].includes(t.status));
@@ -146,6 +146,13 @@ function taskTable(tasks) {
146
146
  }
147
147
  return out.join("\n");
148
148
  }
149
+ function sourcesLine(t, max = 6) {
150
+ if (!t.sources?.length)
151
+ return "";
152
+ const shown = t.sources.slice(0, max).map((s) => s.url);
153
+ const more = t.sources.length > max ? ` (+${t.sources.length - max} more)` : "";
154
+ return `\nsources: ${shown.join(" · ")}${more}`;
155
+ }
149
156
  function reportBlock(t) {
150
157
  const head = `── ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → ${t.status.toUpperCase()}${t.attempt > 1 ? ` (attempt ${t.attempt})` : ""}`;
151
158
  const body = t.report ? (0, util_1.clip)(t.report, 1600) : t.error ? `error: ${(0, util_1.clip)(t.error, 400)}` : "(no report)";
@@ -154,7 +161,7 @@ function reportBlock(t) {
154
161
  const files = t.filesTouched?.length ? `\nfiles touched: ${t.filesTouched.join(", ")}` : "";
155
162
  const arts = t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : "";
156
163
  const fb = t.feedback ? `\nverifier: ${(0, util_1.clip)(t.feedback, 300)}` : "";
157
- return `${head}\n${body}${facts}${open}${files}${arts}${fb}`;
164
+ return `${head}\n${body}${facts}${open}${files}${arts}${sourcesLine(t)}${fb}`;
158
165
  }
159
166
  /**
160
167
  * Compact dependency context for a downstream worker: structured handoff
@@ -168,11 +175,12 @@ function depReportBlock(t) {
168
175
  const arts = t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : "";
169
176
  const full = (t.report ?? "").length > 1200 ? `\n(excerpt — full text: read_report("${t.id}"))` : "";
170
177
  const body = t.report ? (0, util_1.clip)(t.report, 1200) : t.error ? `error: ${(0, util_1.clip)(t.error, 400)}` : "(no report)";
171
- return `${head}\n${body}${facts}${files}${arts}${full}`;
178
+ return `${head}\n${body}${facts}${files}${arts}${sourcesLine(t)}${full}`;
172
179
  }
173
180
  // ============================================================ workers
174
181
  const ROLE_HINTS = {
175
- researcher: "Research craft: be exhaustive. Run deep web_search (deep=true, high count) across several distinct phrasings — pull DOZENS of sources for your sub-question, not three. Triangulate across independent sources; prefer primary docs and official sources over blog spam; capture exact figures, dates, and URLs, and keep the quotable passages the search returns. Record key findings as blackboard notes (with the source URL) and save a structured markdown file of your sources+findings as an artifact so the synthesizer can build on it. " +
182
+ researcher: "Research craft: be exhaustive. Run deep web_search (deep=true, high count) across several distinct phrasings — pull DOZENS of sources for your sub-question, not three. Triangulate across independent sources; prefer primary docs and official sources over blog spam; capture exact figures, dates, and URLs, and keep the quotable passages the search returns. Record key findings as blackboard notes (with url=<source>) and save a structured markdown file of your sources+findings as an artifact so the synthesizer can build on it. " +
183
+ "A finding without a source is an opinion: list EVERY source your findings rest on in report(...)'s `sources` field (url + what it supports) — only sources reported there can be cited in the final deliverable. When independent sources disagree on a material fact, post note(kind:'conflict') naming both sources and the discrepancy — never silently pick one. For scientific or technical questions, also run academic_search (arXiv + Crossref) — peer-reviewed beats blog posts. " +
176
184
  "If a crawl_site tool is available, use it to ingest whole documentation sites or multi-page sources into local markdown files, then read the saved files — far cheaper and broader than fetching pages one by one.",
177
185
  coder: "Engineering craft: read existing code before changing it; match its conventions; build/run/test after every meaningful change and include the command + result in your report. Leave the tree compiling.",
178
186
  analyst: "Analysis craft: quantify wherever possible; state assumptions explicitly; separate observation from interpretation; sanity-check numbers twice.",
@@ -209,7 +217,7 @@ OPERATING PROTOCOL
209
217
  - You are fully autonomous. Never ask questions; decide and act.
210
218
  - Plan briefly, then act in small verified steps: after changing anything, prove it worked (run it, read it back, test it).
211
219
  - Evidence over assumption: read before you edit; check outputs; cite concrete paths, commands and numbers.
212
- - Be token-lean: targeted reads (line ranges, grep via shell) over wholesale dumps; don't re-read unchanged files.
220
+ - Be token-lean: targeted reads (line ranges, grep_files) over wholesale dumps; don't re-read unchanged files. Several edits to one file → one replace_in_file call with edits[].
213
221
  - Post durable discoveries other agents will need to the blackboard with note(...) — facts only, used sparingly.
214
222
  - Editing files other tasks might also touch? First search_notes for claims, then post note(kind:"claim", key:"<path>") before editing. Claims are advisory — coordinate, don't fight.
215
223
  - Save deliverable files with save_artifact so the operator sees them. Pick the format that genuinely fits the deliverable — structured data as .csv/.json, polished documents as self-contained .html, code as runnable files — not everything is a markdown report.
@@ -217,7 +225,7 @@ OPERATING PROTOCOL
217
225
  - Genuinely impossible / missing prerequisite → report(status:"blocked", …) early instead of thrashing.
218
226
  - You have at most ${opts.maxSteps} tool steps. Budget them.
219
227
  - Dependency reports above are excerpts; use read_report(task_id) for full text, and search_notes(query) to find facts posted earlier in the run.
220
- - ALWAYS end by calling report(...). The conductor sees ONLY that report — it is the entire value of your work. Specific beats vague: what you did, what you verified, key findings, exact paths. Fill key_facts (standalone facts downstream tasks need), open_questions, and files_touched — they are handed verbatim to dependent tasks.
228
+ - ALWAYS end by calling report(...). The conductor sees ONLY that report — it is the entire value of your work. Specific beats vague: what you did, what you verified, key findings, exact paths. Fill key_facts (standalone facts downstream tasks need), open_questions, and files_touched — they are handed verbatim to dependent tasks. If your work drew on the web, fill sources (url + what it supports): only sources reported there can be cited in the final deliverable.
221
229
  ${roleHint ? "\n" + roleHint : ""}`;
222
230
  }
223
231
  exports.WORKER_KICKOFF = "Begin now. Work the task to completion, then call report(...).";
@@ -227,7 +235,7 @@ function forcedFinal(reason) {
227
235
  return `${reason} Stop working and call your terminal tool RIGHT NOW with your best honest account: what you completed, what you verified, what remains.`;
228
236
  }
229
237
  // ============================================================ verifier
230
- function verifierSystem(meta, task) {
238
+ function verifierSystem(meta, task, depReports = "") {
231
239
  return `You are an adversarial verification agent. A worker claims it completed this task — your job is to try to falsify that claim with evidence.
232
240
 
233
241
  MISSION (for context): ${(0, util_1.clip)(meta.mission, 400)}
@@ -238,18 +246,18 @@ ${task.context ? `Context: ${(0, util_1.clip)(task.context, 600)}` : ""}
238
246
  Worker's report:
239
247
  ${(0, util_1.clip)(task.report ?? "", 2400)}
240
248
  ${task.artifacts.length ? `Claimed artifacts: ${task.artifacts.join(", ")}` : ""}
241
-
249
+ ${depReports ? `\nUPSTREAM INPUTS (settled dependency reports — what this task had to build on; judge completeness against them):\n${depReports}\n` : ""}
242
250
  Working directory: ${meta.cwd}
243
251
 
244
252
  PROTOCOL
245
253
  - Do NOT trust the report. Verify concretely with tools: read the files it claims to have written, run the build/tests/commands, fetch the URLs, check the numbers. You see only the worker's CLAIMS — gather your own evidence; do not assume shared context.
246
254
  - RUBRIC — fail unless all hold:
247
- 1. Completeness: every part of the objective and its "Done when" criteria is addressed.
255
+ 1. Completeness: every part of the objective and its "Done when" criteria is addressed${depReports ? " (including everything the upstream inputs handed over)" : ""}.
248
256
  2. Evidence: each substantive claim in the report is backed by something you verified yourself.
249
257
  3. Deliverables: claimed files/artifacts exist, are non-trivial (not stubs/placeholders), and match what the report says about them.
250
258
  4. Correctness: commands/builds/tests the task implies actually succeed when you run them.
251
259
  - Spot-check depth over exhaustive breadth; ~5-12 tool steps.
252
- - Then call verdict(pass, feedback). On fail, feedback must be actionable: exactly what is wrong and where. On pass, one line citing the evidence you checked.`;
260
+ - Then call verdict(pass, feedback, issues). On fail, ALSO fill issues one entry per concrete problem with the evidence you gathered and the exact change needed; the worker's retry sees them verbatim. On pass, feedback is one line citing the evidence you checked.`;
253
261
  }
254
262
  exports.VERIFIER_KICKOFF = "Verify now, then call verdict(...).";
255
263
  // ============================================================ synthesizer
@@ -265,13 +273,13 @@ Conductor's closing notes: ${opts.finishNotes || "(none)"}
265
273
  ALL TASK REPORTS
266
274
  ${opts.reports}
267
275
 
268
- ${opts.blackboard ? `BLACKBOARD\n${opts.blackboard}\n` : ""}${opts.artifactList ? `ARTIFACTS ON DISK\n${opts.artifactList}\n` : ""}
276
+ ${opts.sources ? `SOURCES (numbered, deduplicated from the task reports — the only sources that exist)\n${opts.sources}\n\n` : ""}${opts.blackboard ? `BLACKBOARD\n${opts.blackboard}\n` : ""}${opts.artifactList ? `ARTIFACTS ON DISK\n${opts.artifactList}\n` : ""}
269
277
  Working directory: ${opts.meta.cwd}
270
278
 
271
279
  PROTOCOL
272
280
  - You may read files (read_file / list_dir) to confirm specifics before writing — verify key claims you repeat.
273
281
  - The mission's PRIMARY deliverable should exist in the format that serves it best, not only as prose. If the task reports produced data, comparisons, or rankings that the artifacts don't already capture in a structured form, save them now with save_artifact (e.g. data/results.csv, data/findings.json) before submitting. Don't duplicate artifacts that already exist — point to them.
274
- - Then call submit_final with:
282
+ ${opts.sources ? `- CITE YOUR SOURCES: where a claim rests on a numbered source, cite it inline as [n]. End report_markdown with a \`## Sources\` section listing each number you actually cited as a markdown link ([n] [title](url)). Never invent a source or cite a number not in the list. Where sources conflict, present both positions with their citations — do not silently pick one.\n` : ""}- Then call submit_final with:
275
283
  • report_markdown — the deliverable document. Structure: # title; **Outcome** first (did the mission succeed, headline results); then What was built/found with evidence and exact paths; How to use/run it (if applicable); Open issues & recommended next steps. Write for the operator: complete, concrete, zero filler. Use real markdown tables for tabular findings. (A styled HTML rendering is generated automatically — do not hand-write one.)
276
284
  • summary — ≤8 sentences for the console.
277
285
  - The report stands alone: a reader who saw nothing else must understand what happened and where everything is.`;
@@ -292,7 +300,7 @@ ${reports}
292
300
 
293
301
  Reply with EXACTLY "COMPLETE" if the mission's requirements are genuinely covered. Otherwise reply with a short numbered list of concrete gaps (max 5), each one actionable enough to become a task. Do not invent nice-to-haves — only true gaps against the stated mission.`;
294
302
  }
295
- function synthCheckPrompt(mission, reports, finalReport) {
303
+ function synthCheckPrompt(mission, reports, finalReport, sources) {
296
304
  return `You are checking a final mission report for faithfulness before delivery. Compare it against the underlying task reports.
297
305
 
298
306
  MISSION
@@ -301,10 +309,10 @@ ${mission}
301
309
  TASK REPORTS (ground truth)
302
310
  ${reports}
303
311
 
304
- FINAL REPORT (to check)
312
+ ${sources ? `SOURCE LIST (the only citable sources)\n${sources}\n\n` : ""}FINAL REPORT (to check)
305
313
  ${finalReport}
306
314
 
307
- Reply with EXACTLY "OK" if the final report's claims are supported by the task reports and nothing material is misrepresented or fabricated. Otherwise list the specific discrepancies (max 5), each citing what the final report says vs what the task reports support.`;
315
+ Reply with EXACTLY "OK" if the final report's claims are supported by the task reports and nothing material is misrepresented or fabricated${sources ? ", its inline [n] citations all reference numbers that exist in the source list, and no key web-derived factual claim is left uncited" : ""}. Otherwise list the specific discrepancies (max 5), each citing what the final report says vs what the task reports support.`;
308
316
  }
309
317
  // ============================================================ compaction
310
318
  function compactorPrompt(serialized) {
package/dist/report.js CHANGED
@@ -11,8 +11,45 @@
11
11
  * broken markup.
12
12
  */
13
13
  Object.defineProperty(exports, "__esModule", { value: true });
14
+ exports.aggregateSources = aggregateSources;
15
+ exports.sourcesBlock = sourcesBlock;
14
16
  exports.mdToHtml = mdToHtml;
15
17
  exports.renderFinalHtml = renderFinalHtml;
18
+ const searchcore_1 = require("./searchcore");
19
+ /**
20
+ * Dedupe every task's reported sources (by canonical URL) into one numbered
21
+ * bibliography for the synthesizer. First occurrence wins the number; later
22
+ * tasks fill in missing titles/dates.
23
+ */
24
+ function aggregateSources(tasks) {
25
+ const byKey = new Map();
26
+ for (const t of tasks) {
27
+ for (const s of t.sources ?? []) {
28
+ const key = (0, searchcore_1.canonicalizeUrl)(s.url);
29
+ const cur = byKey.get(key);
30
+ if (cur) {
31
+ if (!cur.taskIds.includes(t.id))
32
+ cur.taskIds.push(t.id);
33
+ if (!cur.title && s.title)
34
+ cur.title = s.title;
35
+ if (!cur.date && s.date)
36
+ cur.date = s.date;
37
+ if (!cur.note && s.note)
38
+ cur.note = s.note;
39
+ }
40
+ else {
41
+ byKey.set(key, { ...s, n: byKey.size + 1, taskIds: [t.id] });
42
+ }
43
+ }
44
+ }
45
+ return [...byKey.values()];
46
+ }
47
+ /** Render the numbered source list for prompts (one line per source). */
48
+ function sourcesBlock(sources) {
49
+ return sources
50
+ .map((s) => `[${s.n}] ${s.title ? `${s.title} — ` : ""}${s.url}${s.date ? ` (${s.date})` : ""}${s.note ? ` — ${s.note}` : ""} [cited by ${s.taskIds.join(",")}]`)
51
+ .join("\n");
52
+ }
16
53
  function esc(s) {
17
54
  return s
18
55
  .replace(/&/g, "&amp;")
package/dist/run.js CHANGED
@@ -198,6 +198,14 @@ function listRuns(pricing) {
198
198
  s.pid = readPid(id);
199
199
  out.push(applyLiveness(s));
200
200
  }
201
+ // Deleted runs must not pin their reduced state in a long-lived hub forever.
202
+ const live = new Set(ids);
203
+ for (const key of summaryCache.keys())
204
+ if (!live.has(key))
205
+ summaryCache.delete(key);
206
+ for (const key of liveCache.keys())
207
+ if (!live.has(key))
208
+ liveCache.delete(key);
201
209
  out.sort((a, b) => b.createdAt - a.createdAt);
202
210
  return out;
203
211
  }
package/dist/sandbox.js CHANGED
@@ -302,7 +302,17 @@ class RemoteRuntime {
302
302
  throw new Error(`${what} failed (exit ${r.code}): ${r.out.slice(0, 300)}`);
303
303
  return r.out;
304
304
  }
305
+ /** base64-over-shell transfers buffer the whole file — refuse the huge ones. */
306
+ async checkSize(abs, capBytes, what) {
307
+ const out = await this.execOk(`wc -c < ${shq(abs)}`, `stat ${abs}`);
308
+ const size = Number(out.trim());
309
+ if (Number.isFinite(size) && size > capBytes) {
310
+ throw new Error(`${what}: file is ${Math.round(size / 1e6)}MB (cap ${Math.round(capBytes / 1e6)}MB) — ` +
311
+ `compress it or extract the relevant part in the sandbox first`);
312
+ }
313
+ }
305
314
  async readFile(abs) {
315
+ await this.checkSize(abs, 4_000_000, `read ${abs}`);
306
316
  const out = await this.execOk(`base64 < ${shq(abs)}`, `read ${abs}`);
307
317
  return Buffer.from(out.replace(/\s+/g, ""), "base64").toString("utf8");
308
318
  }
@@ -319,6 +329,7 @@ class RemoteRuntime {
319
329
  }
320
330
  }
321
331
  async pull(remoteAbs, localAbs) {
332
+ await this.checkSize(remoteAbs, 32_000_000, `pull ${remoteAbs}`);
322
333
  const out = await this.execOk(`base64 < ${shq(remoteAbs)}`, `pull ${remoteAbs}`);
323
334
  (0, util_1.ensureDir)(path.dirname(localAbs));
324
335
  fs.writeFileSync(localAbs, Buffer.from(out.replace(/\s+/g, ""), "base64"));
@@ -9,8 +9,11 @@
9
9
  Object.defineProperty(exports, "__esModule", { value: true });
10
10
  exports.queryTerms = queryTerms;
11
11
  exports.expandQueries = expandQueries;
12
+ exports.reformulate = reformulate;
13
+ exports.looksAcademic = looksAcademic;
12
14
  exports.canonicalizeUrl = canonicalizeUrl;
13
15
  exports.classifySource = classifySource;
16
+ exports.freshnessBoost = freshnessBoost;
14
17
  exports.detectDate = detectDate;
15
18
  exports.selectPassages = selectPassages;
16
19
  exports.scorePage = scorePage;
@@ -42,6 +45,22 @@ function expandQueries(query, max = 3) {
42
45
  const seen = new Set();
43
46
  return out.map((q) => q.trim()).filter((q) => q && !seen.has(q.toLowerCase()) && seen.add(q.toLowerCase())).slice(0, max);
44
47
  }
48
+ /**
49
+ * Fallback phrasing when a query returns nothing: strip quotes and search
50
+ * operators down to the top keyword terms. Returns "" when no useful
51
+ * simplification exists.
52
+ */
53
+ function reformulate(query) {
54
+ const cleaned = query
55
+ .replace(/["'""'']/g, " ")
56
+ .replace(/\b(site|intitle|inurl|filetype):\S+/gi, " ");
57
+ const alt = queryTerms(cleaned).slice(0, 6).join(" ");
58
+ return alt && alt !== query.toLowerCase().trim() ? alt : "";
59
+ }
60
+ /** Queries that smell academic trigger the scholarly engines in deep mode. */
61
+ function looksAcademic(query) {
62
+ return /\b(paper|papers|study|studies|research|arxiv|doi|journal|peer.?review(ed)?|preprint|dataset|benchmark|survey|meta.?analysis|citations?|et al)\b/i.test(query);
63
+ }
45
64
  const TRACKING_KEYS = new Set(["fbclid", "gclid", "mc_cid", "mc_eid"]);
46
65
  /** Stable canonical form for dedup: strip tracking params, www, trailing slash; sort the query. */
47
66
  function canonicalizeUrl(url) {
@@ -61,18 +80,50 @@ function canonicalizeUrl(url) {
61
80
  path = path.replace(/\/+$/, "");
62
81
  return `${u.protocol.toLowerCase()}//${host}${path}${query}`;
63
82
  }
83
+ const ACADEMIC_HOSTS = [
84
+ "arxiv.org",
85
+ "doi.org",
86
+ "semanticscholar.org",
87
+ "ncbi.nlm.nih.gov",
88
+ "nature.com",
89
+ "sciencedirect.com",
90
+ "springer.com",
91
+ "link.springer.com",
92
+ "scholar.google.com",
93
+ "acm.org",
94
+ "ieee.org",
95
+ ];
64
96
  function classifySource(domain) {
65
97
  const d = domain.toLowerCase();
66
98
  if (d.endsWith(".gov") || d.endsWith(".mil"))
67
99
  return "government";
68
100
  if (d.endsWith(".edu"))
69
101
  return "academic";
102
+ if (ACADEMIC_HOSTS.some((h) => d === h || d.endsWith("." + h)))
103
+ return "academic";
70
104
  if (["twitter.com", "x.com", "reddit.com", "facebook.com"].some((s) => d.includes(s)))
71
105
  return "social";
72
106
  if (d.includes("news") || d.includes("reuters.com") || d.includes("apnews.com") || d.includes("bbc."))
73
107
  return "news";
74
108
  return "secondary";
75
109
  }
110
+ /** Recency boost from an ISO date or bare year: +3 <1y, +2 <2y, +1 <5y, 0 older/undated. */
111
+ function freshnessBoost(date, now = Date.now()) {
112
+ if (!date)
113
+ return 0;
114
+ const m = /^(\d{4})(?:-(\d{1,2})(?:-(\d{1,2}))?)?/.exec(date.trim());
115
+ if (!m)
116
+ return 0;
117
+ const t = Date.UTC(Number(m[1]), m[2] ? Number(m[2]) - 1 : 6, m[3] ? Number(m[3]) : 15);
118
+ const years = (now - t) / 31_557_600_000;
119
+ if (years < 1)
120
+ return 3;
121
+ if (years < 2)
122
+ return 2;
123
+ if (years < 5)
124
+ return 1;
125
+ return 0;
126
+ }
76
127
  /** ISO date if present, else a bare year. */
77
128
  function detectDate(text) {
78
129
  const iso = /\b(20\d{2}-\d{2}-\d{2})\b/.exec(text);
@@ -136,8 +187,7 @@ function scorePage(page, terms) {
136
187
  score += 4;
137
188
  if (["pypi.org", "npmjs.com", "rubygems.org"].includes(domain))
138
189
  score -= 2;
139
- if (page.date)
140
- score += 1;
190
+ score += freshnessBoost(page.date);
141
191
  const lowered = page.text.toLowerCase();
142
192
  for (const t of terms)
143
193
  if (lowered.includes(t))
@@ -158,6 +208,9 @@ function resultQualityScore(c) {
158
208
  score += 4;
159
209
  if (url.includes("github.com") || url.includes("gitlab.com"))
160
210
  score += 3;
211
+ if (c.engine === "arxiv" || c.engine === "crossref")
212
+ score += 3;
213
+ score += Math.min(2, freshnessBoost(c.date));
161
214
  if (LOW_VALUE_SNIPPET.some((t) => snippet.includes(t)))
162
215
  score -= 10;
163
216
  return score;
package/dist/state.js CHANGED
@@ -21,6 +21,8 @@ class RunState {
21
21
  usageByModel = new Map();
22
22
  totalUsage = { ...types_1.ZERO_USAGE };
23
23
  cost = 0;
24
+ /** Sampled cumulative token spend over time (budget sparkline). */
25
+ budgetSeries = [];
24
26
  finalSummary;
25
27
  finalReportPath;
26
28
  lastSeq = 0;
@@ -55,6 +57,7 @@ class RunState {
55
57
  this.usageByModel.set(model, (0, types_1.addUsage)(this.usageByModel.get(model) ?? { ...types_1.ZERO_USAGE }, u));
56
58
  this.totalUsage = (0, types_1.addUsage)(this.totalUsage, u);
57
59
  this.cost += (0, types_1.usageCost)(u, this.pricing[model]);
60
+ this.pushBudgetPoint(ev.t);
58
61
  }
59
62
  return;
60
63
  }
@@ -127,6 +130,8 @@ class RunState {
127
130
  t.openQuestions = ev.openQuestions;
128
131
  if (Array.isArray(ev.filesTouched))
129
132
  t.filesTouched = ev.filesTouched;
133
+ if (Array.isArray(ev.sources))
134
+ t.sources = ev.sources;
130
135
  }
131
136
  break;
132
137
  }
@@ -202,15 +207,17 @@ class RunState {
202
207
  key: ev.key,
203
208
  kind: ev.kind,
204
209
  text: ev.text,
210
+ url: typeof ev.url === "string" ? ev.url : undefined,
205
211
  });
206
212
  // Reduced state is held live by the hub and the resume seed — keep
207
- // only the tail that digests/views actually use. Decisions are never
208
- // dropped: they anchor the conductor's long-horizon coherence.
213
+ // only the tail that digests/views actually use. Decisions and
214
+ // conflicts are never dropped: they anchor long-horizon coherence.
209
215
  if (this.notes.length > 1000) {
210
- const decisions = this.notes.filter((n) => n.kind === "decision");
211
- const rest = this.notes.filter((n) => n.kind !== "decision");
212
- rest.splice(0, rest.length - Math.max(0, 1000 - decisions.length));
213
- this.notes = [...decisions, ...rest].sort((a, b) => a.t - b.t);
216
+ const keep = (n) => n.kind === "decision" || n.kind === "conflict";
217
+ const pinned = this.notes.filter(keep);
218
+ const rest = this.notes.filter((n) => !keep(n));
219
+ rest.splice(0, rest.length - Math.max(0, 1000 - pinned.length));
220
+ this.notes = [...pinned, ...rest].sort((a, b) => a.t - b.t);
214
221
  }
215
222
  break;
216
223
  case "conductor.say":
@@ -233,6 +240,7 @@ class RunState {
233
240
  this.usageByModel.set(model, (0, types_1.addUsage)(this.usageByModel.get(model) ?? { ...types_1.ZERO_USAGE }, u));
234
241
  this.totalUsage = (0, types_1.addUsage)(this.totalUsage, u);
235
242
  this.cost += (0, types_1.usageCost)(u, this.pricing[model]);
243
+ this.pushBudgetPoint(ev.t);
236
244
  break;
237
245
  }
238
246
  case "run.final":
@@ -241,6 +249,26 @@ class RunState {
241
249
  break;
242
250
  }
243
251
  }
252
+ /**
253
+ * Sample the cumulative spend: a point per meaningful jump (≥0.5% of the
254
+ * budget cap, or 2k tokens unbounded), halving resolution past 600 points.
255
+ */
256
+ pushBudgetPoint(t) {
257
+ const tokens = this.totalUsage.promptTokens + this.totalUsage.completionTokens;
258
+ const cap = this.meta?.options?.maxTokens ?? 0;
259
+ const minStep = cap > 0 ? Math.max(2000, cap * 0.005) : 2000;
260
+ const last = this.budgetSeries[this.budgetSeries.length - 1];
261
+ if (last && tokens - last.tokens < minStep) {
262
+ last.t = t;
263
+ last.tokens = tokens;
264
+ last.cost = this.cost;
265
+ return;
266
+ }
267
+ this.budgetSeries.push({ t, tokens, cost: this.cost });
268
+ if (this.budgetSeries.length > 600) {
269
+ this.budgetSeries = this.budgetSeries.filter((_, i) => i % 2 === 0 || i === this.budgetSeries.length - 1);
270
+ }
271
+ }
244
272
  taskList() {
245
273
  return this.taskOrder.map((id) => this.tasks.get(id)).filter(Boolean);
246
274
  }