@robzilla1738/agentswarm 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -12
- package/dist/agent.js +2 -1
- package/dist/cli.js +21 -4
- package/dist/config.js +27 -1
- package/dist/executor.js +243 -43
- package/dist/hub.js +69 -3
- package/dist/memory.js +5 -4
- package/dist/pdftext.js +211 -0
- package/dist/prompts.js +23 -15
- package/dist/report.js +37 -0
- package/dist/run.js +8 -0
- package/dist/sandbox.js +11 -0
- package/dist/searchcore.js +55 -2
- package/dist/state.js +34 -6
- package/dist/tools.js +196 -19
- package/dist/util.js +85 -0
- package/dist/webtools.js +145 -15
- package/package.json +1 -1
- package/ui/out/404/index.html +1 -1
- package/ui/out/404.html +1 -1
- package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
- package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
- package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
- package/ui/out/_next/static/css/{9f7bd82b8e4c762c.css → d95c2ba395730031.css} +1 -1
- package/ui/out/index.html +1 -1
- package/ui/out/index.txt +3 -3
- package/ui/out/run/index.html +1 -1
- package/ui/out/run/index.txt +3 -3
- package/ui/out/settings/index.html +1 -1
- package/ui/out/settings/index.txt +3 -3
- package/ui/out/_next/static/chunks/677-859e8d42add1806b.js +0 -1
- package/ui/out/_next/static/chunks/app/run/page-2420c9e4c963d9b3.js +0 -1
- package/ui/out/_next/static/chunks/app/settings/page-092a6bf42dfde57d.js +0 -1
- /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
- /package/ui/out/_next/static/{errjtBR_bKoee8ogLp8xk → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0
package/dist/pdftext.js
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.extractPdfText = extractPdfText;
|
|
37
|
+
const zlib = __importStar(require("zlib"));
|
|
38
|
+
/**
|
|
39
|
+
* Minimal zero-dependency PDF text extraction: inflate FlateDecode content
|
|
40
|
+
* streams (Node's built-in zlib) and interpret the text-showing operators
|
|
41
|
+
* (Tj / TJ / ' / "). Good enough for most digitally-produced text PDFs;
|
|
42
|
+
* returns null for scanned, encrypted, or exotic-encoding documents so the
|
|
43
|
+
* caller can tell the agent to find an HTML source instead.
|
|
44
|
+
*/
|
|
45
|
+
function extractPdfText(buf) {
|
|
46
|
+
if (buf.subarray(0, 5).toString("latin1") !== "%PDF-")
|
|
47
|
+
return null;
|
|
48
|
+
// latin1 preserves bytes 1:1, so stream offsets in the string match the buffer.
|
|
49
|
+
const raw = buf.toString("latin1");
|
|
50
|
+
const pages = (raw.match(/\/Type\s*\/Pages?\b/g) || []).filter((m) => !/Pages/.test(m)).length || 1;
|
|
51
|
+
let text = "";
|
|
52
|
+
const streamRe = /<<([\s\S]{0,2000}?)>>\s*stream\r?\n/g;
|
|
53
|
+
let m;
|
|
54
|
+
while ((m = streamRe.exec(raw))) {
|
|
55
|
+
const dict = m[1];
|
|
56
|
+
const start = m.index + m[0].length;
|
|
57
|
+
const end = raw.indexOf("endstream", start);
|
|
58
|
+
if (end < 0)
|
|
59
|
+
continue;
|
|
60
|
+
streamRe.lastIndex = end;
|
|
61
|
+
// Only plain or Flate-compressed streams are supported.
|
|
62
|
+
if (/\/Filter/.test(dict) && !/FlateDecode/.test(dict))
|
|
63
|
+
continue;
|
|
64
|
+
let len = end;
|
|
65
|
+
while (len > start && (raw[len - 1] === "\n" || raw[len - 1] === "\r"))
|
|
66
|
+
len--;
|
|
67
|
+
let data = buf.subarray(start, len);
|
|
68
|
+
if (/FlateDecode/.test(dict)) {
|
|
69
|
+
try {
|
|
70
|
+
data = zlib.inflateSync(data);
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
const content = data.toString("latin1");
|
|
77
|
+
if (!/\bBT\b/.test(content))
|
|
78
|
+
continue; // not a text content stream
|
|
79
|
+
const extracted = extractFromContent(content);
|
|
80
|
+
if (extracted.trim())
|
|
81
|
+
text += extracted + "\n";
|
|
82
|
+
}
|
|
83
|
+
const cleaned = text
|
|
84
|
+
.replace(/[^\S\n]+/g, " ")
|
|
85
|
+
.replace(/ ?\n ?/g, "\n")
|
|
86
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
87
|
+
.trim();
|
|
88
|
+
// CID/Type0 fonts yield glyph-index garbage; require a body of real characters.
|
|
89
|
+
const printable = cleaned.replace(/[^\x20-\x7E\n -]/g, "");
|
|
90
|
+
if (printable.replace(/\s/g, "").length < 40)
|
|
91
|
+
return null;
|
|
92
|
+
return { text: printable, pages };
|
|
93
|
+
}
|
|
94
|
+
/** Walk a content stream, collecting strings shown by Tj/TJ/'/" with newline heuristics. */
|
|
95
|
+
function extractFromContent(src) {
|
|
96
|
+
let out = "";
|
|
97
|
+
let pending = [];
|
|
98
|
+
const n = src.length;
|
|
99
|
+
let i = 0;
|
|
100
|
+
while (i < n) {
|
|
101
|
+
const ch = src[i];
|
|
102
|
+
if (ch === "(") {
|
|
103
|
+
const [s, next] = parseLiteralString(src, i);
|
|
104
|
+
pending.push(s);
|
|
105
|
+
i = next;
|
|
106
|
+
}
|
|
107
|
+
else if (ch === "<" && src[i + 1] !== "<") {
|
|
108
|
+
const close = src.indexOf(">", i + 1);
|
|
109
|
+
if (close < 0)
|
|
110
|
+
break;
|
|
111
|
+
pending.push(decodeHexString(src.slice(i + 1, close)));
|
|
112
|
+
i = close + 1;
|
|
113
|
+
}
|
|
114
|
+
else if (ch === "%") {
|
|
115
|
+
// comment to end of line
|
|
116
|
+
while (i < n && src[i] !== "\n" && src[i] !== "\r")
|
|
117
|
+
i++;
|
|
118
|
+
}
|
|
119
|
+
else if (/[A-Za-z'"*]/.test(ch)) {
|
|
120
|
+
let j = i;
|
|
121
|
+
while (j < n && /[A-Za-z'"*]/.test(src[j]))
|
|
122
|
+
j++;
|
|
123
|
+
const op = src.slice(i, j);
|
|
124
|
+
if (op === "Tj" || op === "TJ") {
|
|
125
|
+
out += pending.join("");
|
|
126
|
+
}
|
|
127
|
+
else if (op === "'" || op === '"') {
|
|
128
|
+
out += "\n" + pending.join("");
|
|
129
|
+
}
|
|
130
|
+
else if (op === "Td" || op === "TD" || op === "T*" || op === "Tm" || op === "ET") {
|
|
131
|
+
if (pending.length)
|
|
132
|
+
out += pending.join("");
|
|
133
|
+
if (!out.endsWith("\n"))
|
|
134
|
+
out += "\n";
|
|
135
|
+
}
|
|
136
|
+
pending = [];
|
|
137
|
+
i = j;
|
|
138
|
+
}
|
|
139
|
+
else if (ch === "-" || (ch >= "0" && ch <= "9") || ch === ".") {
|
|
140
|
+
let j = i + 1;
|
|
141
|
+
while (j < n && /[0-9.]/.test(src[j]))
|
|
142
|
+
j++;
|
|
143
|
+
// Large negative kerning inside a TJ array is a word gap.
|
|
144
|
+
const num = parseFloat(src.slice(i, j));
|
|
145
|
+
if (num <= -180 && pending.length && !pending[pending.length - 1].endsWith(" "))
|
|
146
|
+
pending.push(" ");
|
|
147
|
+
i = j;
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
i++;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return out;
|
|
154
|
+
}
|
|
155
|
+
/** PDF literal string: balanced parens, backslash escapes, octal codes. */
|
|
156
|
+
function parseLiteralString(src, start) {
|
|
157
|
+
let out = "";
|
|
158
|
+
let depth = 0;
|
|
159
|
+
let i = start;
|
|
160
|
+
for (; i < src.length; i++) {
|
|
161
|
+
const ch = src[i];
|
|
162
|
+
if (ch === "\\") {
|
|
163
|
+
const next = src[i + 1];
|
|
164
|
+
if (next >= "0" && next <= "7") {
|
|
165
|
+
let oct = "";
|
|
166
|
+
for (let k = 1; k <= 3 && src[i + k] >= "0" && src[i + k] <= "7"; k++)
|
|
167
|
+
oct += src[i + k];
|
|
168
|
+
out += String.fromCharCode(parseInt(oct, 8));
|
|
169
|
+
i += oct.length;
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
const map = { n: "\n", r: "\r", t: "\t", b: "\b", f: "\f", "(": "(", ")": ")", "\\": "\\" };
|
|
173
|
+
out += map[next] ?? next ?? "";
|
|
174
|
+
i++;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
else if (ch === "(") {
|
|
178
|
+
depth++;
|
|
179
|
+
if (depth > 1)
|
|
180
|
+
out += ch;
|
|
181
|
+
}
|
|
182
|
+
else if (ch === ")") {
|
|
183
|
+
depth--;
|
|
184
|
+
if (depth === 0) {
|
|
185
|
+
i++;
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
out += ch;
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
out += ch;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return [out, i];
|
|
195
|
+
}
|
|
196
|
+
/** PDF hex string: byte pairs; a UTF-16BE BOM switches to two-byte chars. */
|
|
197
|
+
function decodeHexString(hex) {
|
|
198
|
+
const clean = hex.replace(/[^0-9a-fA-F]/g, "");
|
|
199
|
+
const bytes = [];
|
|
200
|
+
for (let i = 0; i + 1 < clean.length; i += 2)
|
|
201
|
+
bytes.push(parseInt(clean.slice(i, i + 2), 16));
|
|
202
|
+
if (clean.length % 2)
|
|
203
|
+
bytes.push(parseInt(clean[clean.length - 1] + "0", 16));
|
|
204
|
+
if (bytes.length >= 2 && bytes[0] === 0xfe && bytes[1] === 0xff) {
|
|
205
|
+
let s = "";
|
|
206
|
+
for (let i = 2; i + 1 < bytes.length; i += 2)
|
|
207
|
+
s += String.fromCharCode((bytes[i] << 8) | bytes[i + 1]);
|
|
208
|
+
return s;
|
|
209
|
+
}
|
|
210
|
+
return bytes.map((b) => String.fromCharCode(b)).join("");
|
|
211
|
+
}
|
package/dist/prompts.js
CHANGED
|
@@ -122,7 +122,7 @@ function taskTable(tasks) {
|
|
|
122
122
|
return "(no tasks yet)";
|
|
123
123
|
const line = (t) => {
|
|
124
124
|
const deps = t.deps.length ? ` deps:[${t.deps.join(",")}]` : "";
|
|
125
|
-
const extra = t.status === "failed" && t.error ? ` — ${(0, util_1.clip)(t.error,
|
|
125
|
+
const extra = (t.status === "failed" || t.status === "blocked") && t.error ? ` — ${(0, util_1.clip)(t.error, 120)}` : "";
|
|
126
126
|
return `${t.id} [${t.status}${t.attempt > 1 ? ` a${t.attempt}` : ""}] (${t.role})${deps} ${(0, util_1.clip)(t.title, 70)}${extra}`;
|
|
127
127
|
};
|
|
128
128
|
const settled = tasks.filter((t) => ["done", "failed", "blocked"].includes(t.status));
|
|
@@ -146,6 +146,13 @@ function taskTable(tasks) {
|
|
|
146
146
|
}
|
|
147
147
|
return out.join("\n");
|
|
148
148
|
}
|
|
149
|
+
function sourcesLine(t, max = 6) {
|
|
150
|
+
if (!t.sources?.length)
|
|
151
|
+
return "";
|
|
152
|
+
const shown = t.sources.slice(0, max).map((s) => s.url);
|
|
153
|
+
const more = t.sources.length > max ? ` (+${t.sources.length - max} more)` : "";
|
|
154
|
+
return `\nsources: ${shown.join(" · ")}${more}`;
|
|
155
|
+
}
|
|
149
156
|
function reportBlock(t) {
|
|
150
157
|
const head = `── ${t.id} (${t.role}) "${(0, util_1.clip)(t.title, 60)}" → ${t.status.toUpperCase()}${t.attempt > 1 ? ` (attempt ${t.attempt})` : ""}`;
|
|
151
158
|
const body = t.report ? (0, util_1.clip)(t.report, 1600) : t.error ? `error: ${(0, util_1.clip)(t.error, 400)}` : "(no report)";
|
|
@@ -154,7 +161,7 @@ function reportBlock(t) {
|
|
|
154
161
|
const files = t.filesTouched?.length ? `\nfiles touched: ${t.filesTouched.join(", ")}` : "";
|
|
155
162
|
const arts = t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : "";
|
|
156
163
|
const fb = t.feedback ? `\nverifier: ${(0, util_1.clip)(t.feedback, 300)}` : "";
|
|
157
|
-
return `${head}\n${body}${facts}${open}${files}${arts}${fb}`;
|
|
164
|
+
return `${head}\n${body}${facts}${open}${files}${arts}${sourcesLine(t)}${fb}`;
|
|
158
165
|
}
|
|
159
166
|
/**
|
|
160
167
|
* Compact dependency context for a downstream worker: structured handoff
|
|
@@ -168,11 +175,12 @@ function depReportBlock(t) {
|
|
|
168
175
|
const arts = t.artifacts.length ? `\nartifacts: ${t.artifacts.join(", ")}` : "";
|
|
169
176
|
const full = (t.report ?? "").length > 1200 ? `\n(excerpt — full text: read_report("${t.id}"))` : "";
|
|
170
177
|
const body = t.report ? (0, util_1.clip)(t.report, 1200) : t.error ? `error: ${(0, util_1.clip)(t.error, 400)}` : "(no report)";
|
|
171
|
-
return `${head}\n${body}${facts}${files}${arts}${full}`;
|
|
178
|
+
return `${head}\n${body}${facts}${files}${arts}${sourcesLine(t)}${full}`;
|
|
172
179
|
}
|
|
173
180
|
// ============================================================ workers
|
|
174
181
|
const ROLE_HINTS = {
|
|
175
|
-
researcher: "Research craft: be exhaustive. Run deep web_search (deep=true, high count) across several distinct phrasings — pull DOZENS of sources for your sub-question, not three. Triangulate across independent sources; prefer primary docs and official sources over blog spam; capture exact figures, dates, and URLs, and keep the quotable passages the search returns. Record key findings as blackboard notes (with
|
|
182
|
+
researcher: "Research craft: be exhaustive. Run deep web_search (deep=true, high count) across several distinct phrasings — pull DOZENS of sources for your sub-question, not three. Triangulate across independent sources; prefer primary docs and official sources over blog spam; capture exact figures, dates, and URLs, and keep the quotable passages the search returns. Record key findings as blackboard notes (with url=<source>) and save a structured markdown file of your sources+findings as an artifact so the synthesizer can build on it. " +
|
|
183
|
+
"A finding without a source is an opinion: list EVERY source your findings rest on in report(...)'s `sources` field (url + what it supports) — only sources reported there can be cited in the final deliverable. When independent sources disagree on a material fact, post note(kind:'conflict') naming both sources and the discrepancy — never silently pick one. For scientific or technical questions, also run academic_search (arXiv + Crossref) — peer-reviewed beats blog posts. " +
|
|
176
184
|
"If a crawl_site tool is available, use it to ingest whole documentation sites or multi-page sources into local markdown files, then read the saved files — far cheaper and broader than fetching pages one by one.",
|
|
177
185
|
coder: "Engineering craft: read existing code before changing it; match its conventions; build/run/test after every meaningful change and include the command + result in your report. Leave the tree compiling.",
|
|
178
186
|
analyst: "Analysis craft: quantify wherever possible; state assumptions explicitly; separate observation from interpretation; sanity-check numbers twice.",
|
|
@@ -209,7 +217,7 @@ OPERATING PROTOCOL
|
|
|
209
217
|
- You are fully autonomous. Never ask questions; decide and act.
|
|
210
218
|
- Plan briefly, then act in small verified steps: after changing anything, prove it worked (run it, read it back, test it).
|
|
211
219
|
- Evidence over assumption: read before you edit; check outputs; cite concrete paths, commands and numbers.
|
|
212
|
-
- Be token-lean: targeted reads (line ranges,
|
|
220
|
+
- Be token-lean: targeted reads (line ranges, grep_files) over wholesale dumps; don't re-read unchanged files. Several edits to one file → one replace_in_file call with edits[].
|
|
213
221
|
- Post durable discoveries other agents will need to the blackboard with note(...) — facts only, used sparingly.
|
|
214
222
|
- Editing files other tasks might also touch? First search_notes for claims, then post note(kind:"claim", key:"<path>") before editing. Claims are advisory — coordinate, don't fight.
|
|
215
223
|
- Save deliverable files with save_artifact so the operator sees them. Pick the format that genuinely fits the deliverable — structured data as .csv/.json, polished documents as self-contained .html, code as runnable files — not everything is a markdown report.
|
|
@@ -217,7 +225,7 @@ OPERATING PROTOCOL
|
|
|
217
225
|
- Genuinely impossible / missing prerequisite → report(status:"blocked", …) early instead of thrashing.
|
|
218
226
|
- You have at most ${opts.maxSteps} tool steps. Budget them.
|
|
219
227
|
- Dependency reports above are excerpts; use read_report(task_id) for full text, and search_notes(query) to find facts posted earlier in the run.
|
|
220
|
-
- ALWAYS end by calling report(...). The conductor sees ONLY that report — it is the entire value of your work. Specific beats vague: what you did, what you verified, key findings, exact paths. Fill key_facts (standalone facts downstream tasks need), open_questions, and files_touched — they are handed verbatim to dependent tasks.
|
|
228
|
+
- ALWAYS end by calling report(...). The conductor sees ONLY that report — it is the entire value of your work. Specific beats vague: what you did, what you verified, key findings, exact paths. Fill key_facts (standalone facts downstream tasks need), open_questions, and files_touched — they are handed verbatim to dependent tasks. If your work drew on the web, fill sources (url + what it supports): only sources reported there can be cited in the final deliverable.
|
|
221
229
|
${roleHint ? "\n" + roleHint : ""}`;
|
|
222
230
|
}
|
|
223
231
|
exports.WORKER_KICKOFF = "Begin now. Work the task to completion, then call report(...).";
|
|
@@ -227,7 +235,7 @@ function forcedFinal(reason) {
|
|
|
227
235
|
return `${reason} Stop working and call your terminal tool RIGHT NOW with your best honest account: what you completed, what you verified, what remains.`;
|
|
228
236
|
}
|
|
229
237
|
// ============================================================ verifier
|
|
230
|
-
function verifierSystem(meta, task) {
|
|
238
|
+
function verifierSystem(meta, task, depReports = "") {
|
|
231
239
|
return `You are an adversarial verification agent. A worker claims it completed this task — your job is to try to falsify that claim with evidence.
|
|
232
240
|
|
|
233
241
|
MISSION (for context): ${(0, util_1.clip)(meta.mission, 400)}
|
|
@@ -238,18 +246,18 @@ ${task.context ? `Context: ${(0, util_1.clip)(task.context, 600)}` : ""}
|
|
|
238
246
|
Worker's report:
|
|
239
247
|
${(0, util_1.clip)(task.report ?? "", 2400)}
|
|
240
248
|
${task.artifacts.length ? `Claimed artifacts: ${task.artifacts.join(", ")}` : ""}
|
|
241
|
-
|
|
249
|
+
${depReports ? `\nUPSTREAM INPUTS (settled dependency reports — what this task had to build on; judge completeness against them):\n${depReports}\n` : ""}
|
|
242
250
|
Working directory: ${meta.cwd}
|
|
243
251
|
|
|
244
252
|
PROTOCOL
|
|
245
253
|
- Do NOT trust the report. Verify concretely with tools: read the files it claims to have written, run the build/tests/commands, fetch the URLs, check the numbers. You see only the worker's CLAIMS — gather your own evidence; do not assume shared context.
|
|
246
254
|
- RUBRIC — fail unless all hold:
|
|
247
|
-
1. Completeness: every part of the objective and its "Done when" criteria is addressed.
|
|
255
|
+
1. Completeness: every part of the objective and its "Done when" criteria is addressed${depReports ? " (including everything the upstream inputs handed over)" : ""}.
|
|
248
256
|
2. Evidence: each substantive claim in the report is backed by something you verified yourself.
|
|
249
257
|
3. Deliverables: claimed files/artifacts exist, are non-trivial (not stubs/placeholders), and match what the report says about them.
|
|
250
258
|
4. Correctness: commands/builds/tests the task implies actually succeed when you run them.
|
|
251
259
|
- Spot-check depth over exhaustive breadth; ~5-12 tool steps.
|
|
252
|
-
- Then call verdict(pass, feedback). On fail,
|
|
260
|
+
- Then call verdict(pass, feedback, issues). On fail, ALSO fill issues — one entry per concrete problem with the evidence you gathered and the exact change needed; the worker's retry sees them verbatim. On pass, feedback is one line citing the evidence you checked.`;
|
|
253
261
|
}
|
|
254
262
|
exports.VERIFIER_KICKOFF = "Verify now, then call verdict(...).";
|
|
255
263
|
// ============================================================ synthesizer
|
|
@@ -265,13 +273,13 @@ Conductor's closing notes: ${opts.finishNotes || "(none)"}
|
|
|
265
273
|
ALL TASK REPORTS
|
|
266
274
|
${opts.reports}
|
|
267
275
|
|
|
268
|
-
${opts.blackboard ? `BLACKBOARD\n${opts.blackboard}\n` : ""}${opts.artifactList ? `ARTIFACTS ON DISK\n${opts.artifactList}\n` : ""}
|
|
276
|
+
${opts.sources ? `SOURCES (numbered, deduplicated from the task reports — the only sources that exist)\n${opts.sources}\n\n` : ""}${opts.blackboard ? `BLACKBOARD\n${opts.blackboard}\n` : ""}${opts.artifactList ? `ARTIFACTS ON DISK\n${opts.artifactList}\n` : ""}
|
|
269
277
|
Working directory: ${opts.meta.cwd}
|
|
270
278
|
|
|
271
279
|
PROTOCOL
|
|
272
280
|
- You may read files (read_file / list_dir) to confirm specifics before writing — verify key claims you repeat.
|
|
273
281
|
- The mission's PRIMARY deliverable should exist in the format that serves it best, not only as prose. If the task reports produced data, comparisons, or rankings that the artifacts don't already capture in a structured form, save them now with save_artifact (e.g. data/results.csv, data/findings.json) before submitting. Don't duplicate artifacts that already exist — point to them.
|
|
274
|
-
- Then call submit_final with:
|
|
282
|
+
${opts.sources ? `- CITE YOUR SOURCES: where a claim rests on a numbered source, cite it inline as [n]. End report_markdown with a \`## Sources\` section listing each number you actually cited as a markdown link ([n] [title](url)). Never invent a source or cite a number not in the list. Where sources conflict, present both positions with their citations — do not silently pick one.\n` : ""}- Then call submit_final with:
|
|
275
283
|
• report_markdown — the deliverable document. Structure: # title; **Outcome** first (did the mission succeed, headline results); then What was built/found with evidence and exact paths; How to use/run it (if applicable); Open issues & recommended next steps. Write for the operator: complete, concrete, zero filler. Use real markdown tables for tabular findings. (A styled HTML rendering is generated automatically — do not hand-write one.)
|
|
276
284
|
• summary — ≤8 sentences for the console.
|
|
277
285
|
- The report stands alone: a reader who saw nothing else must understand what happened and where everything is.`;
|
|
@@ -292,7 +300,7 @@ ${reports}
|
|
|
292
300
|
|
|
293
301
|
Reply with EXACTLY "COMPLETE" if the mission's requirements are genuinely covered. Otherwise reply with a short numbered list of concrete gaps (max 5), each one actionable enough to become a task. Do not invent nice-to-haves — only true gaps against the stated mission.`;
|
|
294
302
|
}
|
|
295
|
-
function synthCheckPrompt(mission, reports, finalReport) {
|
|
303
|
+
function synthCheckPrompt(mission, reports, finalReport, sources) {
|
|
296
304
|
return `You are checking a final mission report for faithfulness before delivery. Compare it against the underlying task reports.
|
|
297
305
|
|
|
298
306
|
MISSION
|
|
@@ -301,10 +309,10 @@ ${mission}
|
|
|
301
309
|
TASK REPORTS (ground truth)
|
|
302
310
|
${reports}
|
|
303
311
|
|
|
304
|
-
FINAL REPORT (to check)
|
|
312
|
+
${sources ? `SOURCE LIST (the only citable sources)\n${sources}\n\n` : ""}FINAL REPORT (to check)
|
|
305
313
|
${finalReport}
|
|
306
314
|
|
|
307
|
-
Reply with EXACTLY "OK" if the final report's claims are supported by the task reports and nothing material is misrepresented or fabricated. Otherwise list the specific discrepancies (max 5), each citing what the final report says vs what the task reports support.`;
|
|
315
|
+
Reply with EXACTLY "OK" if the final report's claims are supported by the task reports and nothing material is misrepresented or fabricated${sources ? ", its inline [n] citations all reference numbers that exist in the source list, and no key web-derived factual claim is left uncited" : ""}. Otherwise list the specific discrepancies (max 5), each citing what the final report says vs what the task reports support.`;
|
|
308
316
|
}
|
|
309
317
|
// ============================================================ compaction
|
|
310
318
|
function compactorPrompt(serialized) {
|
package/dist/report.js
CHANGED
|
@@ -11,8 +11,45 @@
|
|
|
11
11
|
* broken markup.
|
|
12
12
|
*/
|
|
13
13
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
+
exports.aggregateSources = aggregateSources;
|
|
15
|
+
exports.sourcesBlock = sourcesBlock;
|
|
14
16
|
exports.mdToHtml = mdToHtml;
|
|
15
17
|
exports.renderFinalHtml = renderFinalHtml;
|
|
18
|
+
const searchcore_1 = require("./searchcore");
|
|
19
|
+
/**
|
|
20
|
+
* Dedupe every task's reported sources (by canonical URL) into one numbered
|
|
21
|
+
* bibliography for the synthesizer. First occurrence wins the number; later
|
|
22
|
+
* tasks fill in missing titles/dates.
|
|
23
|
+
*/
|
|
24
|
+
function aggregateSources(tasks) {
|
|
25
|
+
const byKey = new Map();
|
|
26
|
+
for (const t of tasks) {
|
|
27
|
+
for (const s of t.sources ?? []) {
|
|
28
|
+
const key = (0, searchcore_1.canonicalizeUrl)(s.url);
|
|
29
|
+
const cur = byKey.get(key);
|
|
30
|
+
if (cur) {
|
|
31
|
+
if (!cur.taskIds.includes(t.id))
|
|
32
|
+
cur.taskIds.push(t.id);
|
|
33
|
+
if (!cur.title && s.title)
|
|
34
|
+
cur.title = s.title;
|
|
35
|
+
if (!cur.date && s.date)
|
|
36
|
+
cur.date = s.date;
|
|
37
|
+
if (!cur.note && s.note)
|
|
38
|
+
cur.note = s.note;
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
byKey.set(key, { ...s, n: byKey.size + 1, taskIds: [t.id] });
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return [...byKey.values()];
|
|
46
|
+
}
|
|
47
|
+
/** Render the numbered source list for prompts (one line per source). */
|
|
48
|
+
function sourcesBlock(sources) {
|
|
49
|
+
return sources
|
|
50
|
+
.map((s) => `[${s.n}] ${s.title ? `${s.title} — ` : ""}${s.url}${s.date ? ` (${s.date})` : ""}${s.note ? ` — ${s.note}` : ""} [cited by ${s.taskIds.join(",")}]`)
|
|
51
|
+
.join("\n");
|
|
52
|
+
}
|
|
16
53
|
function esc(s) {
|
|
17
54
|
return s
|
|
18
55
|
.replace(/&/g, "&")
|
package/dist/run.js
CHANGED
|
@@ -198,6 +198,14 @@ function listRuns(pricing) {
|
|
|
198
198
|
s.pid = readPid(id);
|
|
199
199
|
out.push(applyLiveness(s));
|
|
200
200
|
}
|
|
201
|
+
// Deleted runs must not pin their reduced state in a long-lived hub forever.
|
|
202
|
+
const live = new Set(ids);
|
|
203
|
+
for (const key of summaryCache.keys())
|
|
204
|
+
if (!live.has(key))
|
|
205
|
+
summaryCache.delete(key);
|
|
206
|
+
for (const key of liveCache.keys())
|
|
207
|
+
if (!live.has(key))
|
|
208
|
+
liveCache.delete(key);
|
|
201
209
|
out.sort((a, b) => b.createdAt - a.createdAt);
|
|
202
210
|
return out;
|
|
203
211
|
}
|
package/dist/sandbox.js
CHANGED
|
@@ -302,7 +302,17 @@ class RemoteRuntime {
|
|
|
302
302
|
throw new Error(`${what} failed (exit ${r.code}): ${r.out.slice(0, 300)}`);
|
|
303
303
|
return r.out;
|
|
304
304
|
}
|
|
305
|
+
/** base64-over-shell transfers buffer the whole file — refuse the huge ones. */
|
|
306
|
+
async checkSize(abs, capBytes, what) {
|
|
307
|
+
const out = await this.execOk(`wc -c < ${shq(abs)}`, `stat ${abs}`);
|
|
308
|
+
const size = Number(out.trim());
|
|
309
|
+
if (Number.isFinite(size) && size > capBytes) {
|
|
310
|
+
throw new Error(`${what}: file is ${Math.round(size / 1e6)}MB (cap ${Math.round(capBytes / 1e6)}MB) — ` +
|
|
311
|
+
`compress it or extract the relevant part in the sandbox first`);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
305
314
|
async readFile(abs) {
|
|
315
|
+
await this.checkSize(abs, 4_000_000, `read ${abs}`);
|
|
306
316
|
const out = await this.execOk(`base64 < ${shq(abs)}`, `read ${abs}`);
|
|
307
317
|
return Buffer.from(out.replace(/\s+/g, ""), "base64").toString("utf8");
|
|
308
318
|
}
|
|
@@ -319,6 +329,7 @@ class RemoteRuntime {
|
|
|
319
329
|
}
|
|
320
330
|
}
|
|
321
331
|
async pull(remoteAbs, localAbs) {
|
|
332
|
+
await this.checkSize(remoteAbs, 32_000_000, `pull ${remoteAbs}`);
|
|
322
333
|
const out = await this.execOk(`base64 < ${shq(remoteAbs)}`, `pull ${remoteAbs}`);
|
|
323
334
|
(0, util_1.ensureDir)(path.dirname(localAbs));
|
|
324
335
|
fs.writeFileSync(localAbs, Buffer.from(out.replace(/\s+/g, ""), "base64"));
|
package/dist/searchcore.js
CHANGED
|
@@ -9,8 +9,11 @@
|
|
|
9
9
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
10
|
exports.queryTerms = queryTerms;
|
|
11
11
|
exports.expandQueries = expandQueries;
|
|
12
|
+
exports.reformulate = reformulate;
|
|
13
|
+
exports.looksAcademic = looksAcademic;
|
|
12
14
|
exports.canonicalizeUrl = canonicalizeUrl;
|
|
13
15
|
exports.classifySource = classifySource;
|
|
16
|
+
exports.freshnessBoost = freshnessBoost;
|
|
14
17
|
exports.detectDate = detectDate;
|
|
15
18
|
exports.selectPassages = selectPassages;
|
|
16
19
|
exports.scorePage = scorePage;
|
|
@@ -42,6 +45,22 @@ function expandQueries(query, max = 3) {
|
|
|
42
45
|
const seen = new Set();
|
|
43
46
|
return out.map((q) => q.trim()).filter((q) => q && !seen.has(q.toLowerCase()) && seen.add(q.toLowerCase())).slice(0, max);
|
|
44
47
|
}
|
|
48
|
+
/**
|
|
49
|
+
* Fallback phrasing when a query returns nothing: strip quotes and search
|
|
50
|
+
* operators down to the top keyword terms. Returns "" when no useful
|
|
51
|
+
* simplification exists.
|
|
52
|
+
*/
|
|
53
|
+
function reformulate(query) {
|
|
54
|
+
const cleaned = query
|
|
55
|
+
.replace(/["'""'']/g, " ")
|
|
56
|
+
.replace(/\b(site|intitle|inurl|filetype):\S+/gi, " ");
|
|
57
|
+
const alt = queryTerms(cleaned).slice(0, 6).join(" ");
|
|
58
|
+
return alt && alt !== query.toLowerCase().trim() ? alt : "";
|
|
59
|
+
}
|
|
60
|
+
/** Queries that smell academic trigger the scholarly engines in deep mode. */
|
|
61
|
+
function looksAcademic(query) {
|
|
62
|
+
return /\b(paper|papers|study|studies|research|arxiv|doi|journal|peer.?review(ed)?|preprint|dataset|benchmark|survey|meta.?analysis|citations?|et al)\b/i.test(query);
|
|
63
|
+
}
|
|
45
64
|
const TRACKING_KEYS = new Set(["fbclid", "gclid", "mc_cid", "mc_eid"]);
|
|
46
65
|
/** Stable canonical form for dedup: strip tracking params, www, trailing slash; sort the query. */
|
|
47
66
|
function canonicalizeUrl(url) {
|
|
@@ -61,18 +80,50 @@ function canonicalizeUrl(url) {
|
|
|
61
80
|
path = path.replace(/\/+$/, "");
|
|
62
81
|
return `${u.protocol.toLowerCase()}//${host}${path}${query}`;
|
|
63
82
|
}
|
|
83
|
+
const ACADEMIC_HOSTS = [
|
|
84
|
+
"arxiv.org",
|
|
85
|
+
"doi.org",
|
|
86
|
+
"semanticscholar.org",
|
|
87
|
+
"ncbi.nlm.nih.gov",
|
|
88
|
+
"nature.com",
|
|
89
|
+
"sciencedirect.com",
|
|
90
|
+
"springer.com",
|
|
91
|
+
"link.springer.com",
|
|
92
|
+
"scholar.google.com",
|
|
93
|
+
"acm.org",
|
|
94
|
+
"ieee.org",
|
|
95
|
+
];
|
|
64
96
|
function classifySource(domain) {
|
|
65
97
|
const d = domain.toLowerCase();
|
|
66
98
|
if (d.endsWith(".gov") || d.endsWith(".mil"))
|
|
67
99
|
return "government";
|
|
68
100
|
if (d.endsWith(".edu"))
|
|
69
101
|
return "academic";
|
|
102
|
+
if (ACADEMIC_HOSTS.some((h) => d === h || d.endsWith("." + h)))
|
|
103
|
+
return "academic";
|
|
70
104
|
if (["twitter.com", "x.com", "reddit.com", "facebook.com"].some((s) => d.includes(s)))
|
|
71
105
|
return "social";
|
|
72
106
|
if (d.includes("news") || d.includes("reuters.com") || d.includes("apnews.com") || d.includes("bbc."))
|
|
73
107
|
return "news";
|
|
74
108
|
return "secondary";
|
|
75
109
|
}
|
|
110
|
+
/** Recency boost from an ISO date or bare year: +3 <1y, +2 <2y, +1 <5y, 0 older/undated. */
|
|
111
|
+
function freshnessBoost(date, now = Date.now()) {
|
|
112
|
+
if (!date)
|
|
113
|
+
return 0;
|
|
114
|
+
const m = /^(\d{4})(?:-(\d{1,2})(?:-(\d{1,2}))?)?/.exec(date.trim());
|
|
115
|
+
if (!m)
|
|
116
|
+
return 0;
|
|
117
|
+
const t = Date.UTC(Number(m[1]), m[2] ? Number(m[2]) - 1 : 6, m[3] ? Number(m[3]) : 15);
|
|
118
|
+
const years = (now - t) / 31_557_600_000;
|
|
119
|
+
if (years < 1)
|
|
120
|
+
return 3;
|
|
121
|
+
if (years < 2)
|
|
122
|
+
return 2;
|
|
123
|
+
if (years < 5)
|
|
124
|
+
return 1;
|
|
125
|
+
return 0;
|
|
126
|
+
}
|
|
76
127
|
/** ISO date if present, else a bare year. */
|
|
77
128
|
function detectDate(text) {
|
|
78
129
|
const iso = /\b(20\d{2}-\d{2}-\d{2})\b/.exec(text);
|
|
@@ -136,8 +187,7 @@ function scorePage(page, terms) {
|
|
|
136
187
|
score += 4;
|
|
137
188
|
if (["pypi.org", "npmjs.com", "rubygems.org"].includes(domain))
|
|
138
189
|
score -= 2;
|
|
139
|
-
|
|
140
|
-
score += 1;
|
|
190
|
+
score += freshnessBoost(page.date);
|
|
141
191
|
const lowered = page.text.toLowerCase();
|
|
142
192
|
for (const t of terms)
|
|
143
193
|
if (lowered.includes(t))
|
|
@@ -158,6 +208,9 @@ function resultQualityScore(c) {
|
|
|
158
208
|
score += 4;
|
|
159
209
|
if (url.includes("github.com") || url.includes("gitlab.com"))
|
|
160
210
|
score += 3;
|
|
211
|
+
if (c.engine === "arxiv" || c.engine === "crossref")
|
|
212
|
+
score += 3;
|
|
213
|
+
score += Math.min(2, freshnessBoost(c.date));
|
|
161
214
|
if (LOW_VALUE_SNIPPET.some((t) => snippet.includes(t)))
|
|
162
215
|
score -= 10;
|
|
163
216
|
return score;
|
package/dist/state.js
CHANGED
|
@@ -21,6 +21,8 @@ class RunState {
|
|
|
21
21
|
usageByModel = new Map();
|
|
22
22
|
totalUsage = { ...types_1.ZERO_USAGE };
|
|
23
23
|
cost = 0;
|
|
24
|
+
/** Sampled cumulative token spend over time (budget sparkline). */
|
|
25
|
+
budgetSeries = [];
|
|
24
26
|
finalSummary;
|
|
25
27
|
finalReportPath;
|
|
26
28
|
lastSeq = 0;
|
|
@@ -55,6 +57,7 @@ class RunState {
|
|
|
55
57
|
this.usageByModel.set(model, (0, types_1.addUsage)(this.usageByModel.get(model) ?? { ...types_1.ZERO_USAGE }, u));
|
|
56
58
|
this.totalUsage = (0, types_1.addUsage)(this.totalUsage, u);
|
|
57
59
|
this.cost += (0, types_1.usageCost)(u, this.pricing[model]);
|
|
60
|
+
this.pushBudgetPoint(ev.t);
|
|
58
61
|
}
|
|
59
62
|
return;
|
|
60
63
|
}
|
|
@@ -127,6 +130,8 @@ class RunState {
|
|
|
127
130
|
t.openQuestions = ev.openQuestions;
|
|
128
131
|
if (Array.isArray(ev.filesTouched))
|
|
129
132
|
t.filesTouched = ev.filesTouched;
|
|
133
|
+
if (Array.isArray(ev.sources))
|
|
134
|
+
t.sources = ev.sources;
|
|
130
135
|
}
|
|
131
136
|
break;
|
|
132
137
|
}
|
|
@@ -202,15 +207,17 @@ class RunState {
|
|
|
202
207
|
key: ev.key,
|
|
203
208
|
kind: ev.kind,
|
|
204
209
|
text: ev.text,
|
|
210
|
+
url: typeof ev.url === "string" ? ev.url : undefined,
|
|
205
211
|
});
|
|
206
212
|
// Reduced state is held live by the hub and the resume seed — keep
|
|
207
|
-
// only the tail that digests/views actually use. Decisions
|
|
208
|
-
// dropped: they anchor
|
|
213
|
+
// only the tail that digests/views actually use. Decisions and
|
|
214
|
+
// conflicts are never dropped: they anchor long-horizon coherence.
|
|
209
215
|
if (this.notes.length > 1000) {
|
|
210
|
-
const
|
|
211
|
-
const
|
|
212
|
-
|
|
213
|
-
|
|
216
|
+
const keep = (n) => n.kind === "decision" || n.kind === "conflict";
|
|
217
|
+
const pinned = this.notes.filter(keep);
|
|
218
|
+
const rest = this.notes.filter((n) => !keep(n));
|
|
219
|
+
rest.splice(0, rest.length - Math.max(0, 1000 - pinned.length));
|
|
220
|
+
this.notes = [...pinned, ...rest].sort((a, b) => a.t - b.t);
|
|
214
221
|
}
|
|
215
222
|
break;
|
|
216
223
|
case "conductor.say":
|
|
@@ -233,6 +240,7 @@ class RunState {
|
|
|
233
240
|
this.usageByModel.set(model, (0, types_1.addUsage)(this.usageByModel.get(model) ?? { ...types_1.ZERO_USAGE }, u));
|
|
234
241
|
this.totalUsage = (0, types_1.addUsage)(this.totalUsage, u);
|
|
235
242
|
this.cost += (0, types_1.usageCost)(u, this.pricing[model]);
|
|
243
|
+
this.pushBudgetPoint(ev.t);
|
|
236
244
|
break;
|
|
237
245
|
}
|
|
238
246
|
case "run.final":
|
|
@@ -241,6 +249,26 @@ class RunState {
|
|
|
241
249
|
break;
|
|
242
250
|
}
|
|
243
251
|
}
|
|
252
|
+
/**
|
|
253
|
+
* Sample the cumulative spend: a point per meaningful jump (≥0.5% of the
|
|
254
|
+
* budget cap, or 2k tokens unbounded), halving resolution past 600 points.
|
|
255
|
+
*/
|
|
256
|
+
pushBudgetPoint(t) {
|
|
257
|
+
const tokens = this.totalUsage.promptTokens + this.totalUsage.completionTokens;
|
|
258
|
+
const cap = this.meta?.options?.maxTokens ?? 0;
|
|
259
|
+
const minStep = cap > 0 ? Math.max(2000, cap * 0.005) : 2000;
|
|
260
|
+
const last = this.budgetSeries[this.budgetSeries.length - 1];
|
|
261
|
+
if (last && tokens - last.tokens < minStep) {
|
|
262
|
+
last.t = t;
|
|
263
|
+
last.tokens = tokens;
|
|
264
|
+
last.cost = this.cost;
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
this.budgetSeries.push({ t, tokens, cost: this.cost });
|
|
268
|
+
if (this.budgetSeries.length > 600) {
|
|
269
|
+
this.budgetSeries = this.budgetSeries.filter((_, i) => i % 2 === 0 || i === this.budgetSeries.length - 1);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
244
272
|
taskList() {
|
|
245
273
|
return this.taskOrder.map((id) => this.tasks.get(id)).filter(Boolean);
|
|
246
274
|
}
|