@robzilla1738/agentswarm 0.3.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -11
- package/dist/agent.js +18 -2
- package/dist/cli.js +39 -8
- package/dist/config.js +62 -6
- package/dist/crawltools.js +247 -0
- package/dist/deepseek.js +125 -10
- package/dist/executor.js +993 -144
- package/dist/hub.js +85 -6
- package/dist/journal.js +61 -11
- package/dist/memory.js +84 -0
- package/dist/pdftext.js +211 -0
- package/dist/prompts.js +124 -23
- package/dist/report.js +289 -0
- package/dist/run.js +15 -2
- package/dist/sandbox.js +11 -0
- package/dist/searchcore.js +244 -0
- package/dist/state.js +85 -3
- package/dist/tools.js +392 -25
- package/dist/util.js +85 -0
- package/dist/webtools.js +327 -66
- package/package.json +3 -2
- package/ui/out/404/index.html +1 -1
- package/ui/out/404.html +1 -1
- package/ui/out/_next/static/chunks/532-35122e93f37719b9.js +1 -0
- package/ui/out/_next/static/chunks/677-721ce1c8b7a6a317.js +1 -0
- package/ui/out/_next/static/chunks/app/page-dc9f6744d203e76c.js +1 -0
- package/ui/out/_next/static/chunks/app/run/page-3674e103981703a2.js +1 -0
- package/ui/out/_next/static/chunks/app/settings/page-41a5d8ba43ecfd4a.js +1 -0
- package/ui/out/_next/static/css/d95c2ba395730031.css +3 -0
- package/ui/out/fonts/PlanetKosmos.ttf +0 -0
- package/ui/out/index.html +1 -1
- package/ui/out/index.txt +3 -3
- package/ui/out/run/index.html +1 -1
- package/ui/out/run/index.txt +3 -3
- package/ui/out/settings/index.html +1 -1
- package/ui/out/settings/index.txt +3 -3
- package/ui/out/_next/static/chunks/383-289a866b246b41cc.js +0 -1
- package/ui/out/_next/static/chunks/619-ba102abea3e3d0e4.js +0 -1
- package/ui/out/_next/static/chunks/677-7ab85a6f38c3a235.js +0 -1
- package/ui/out/_next/static/chunks/app/page-0fda5b8e77d90b84.js +0 -1
- package/ui/out/_next/static/chunks/app/run/page-07aab6b1224c3c8c.js +0 -1
- package/ui/out/_next/static/chunks/app/settings/page-528482d468d84cfa.js +0 -1
- package/ui/out/_next/static/css/e2c82b53bf4519e8.css +0 -3
- /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → 7_pihFubDGD40BCy2ynlr}/_buildManifest.js +0 -0
- /package/ui/out/_next/static/{Rm5Fhkds2-wIOnVlME55J → 7_pihFubDGD40BCy2ynlr}/_ssgManifest.js +0 -0
package/dist/hub.js
CHANGED
|
@@ -34,6 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.startHub = startHub;
|
|
37
|
+
exports.publicConfig = publicConfig;
|
|
37
38
|
const fs = __importStar(require("fs"));
|
|
38
39
|
const http = __importStar(require("http"));
|
|
39
40
|
const os = __importStar(require("os"));
|
|
@@ -41,6 +42,8 @@ const path = __importStar(require("path"));
|
|
|
41
42
|
const url_1 = require("url");
|
|
42
43
|
const config_1 = require("./config");
|
|
43
44
|
const control_1 = require("./control");
|
|
45
|
+
const crawltools_1 = require("./crawltools");
|
|
46
|
+
const webtools_1 = require("./webtools");
|
|
44
47
|
const deepseek_1 = require("./deepseek");
|
|
45
48
|
const providers_1 = require("./providers");
|
|
46
49
|
const journal_1 = require("./journal");
|
|
@@ -81,9 +84,16 @@ function startHub(opts) {
|
|
|
81
84
|
async function handle(req, res, opts) {
|
|
82
85
|
const url = new url_1.URL(req.url || "/", `http://localhost:${opts.port}`);
|
|
83
86
|
const p = url.pathname;
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
+
// Localhost-only CORS. The hub launches runs and reads reports with the
|
|
88
|
+
// operator's keys — a random website's JS must never get a readable
|
|
89
|
+
// response. The dev UI on another localhost port is the one legitimate
|
|
90
|
+
// cross-origin client; everyone else gets no CORS headers at all.
|
|
91
|
+
const origin = String(req.headers.origin || "");
|
|
92
|
+
if (/^https?:\/\/(localhost|127\.0\.0\.1|\[::1\])(:\d+)?$/.test(origin)) {
|
|
93
|
+
res.setHeader("access-control-allow-origin", origin);
|
|
94
|
+
res.setHeader("access-control-allow-methods", "GET, POST, DELETE, OPTIONS");
|
|
95
|
+
res.setHeader("access-control-allow-headers", "content-type");
|
|
96
|
+
}
|
|
87
97
|
if (req.method === "OPTIONS") {
|
|
88
98
|
res.writeHead(204);
|
|
89
99
|
res.end();
|
|
@@ -157,6 +167,51 @@ async function api(req, res, url, opts) {
|
|
|
157
167
|
const r = await (0, sandbox_1.testSandbox)(cfg, kind);
|
|
158
168
|
return sendJson(res, 200, { kind, ...r });
|
|
159
169
|
}
|
|
170
|
+
// Settings diagnostics: prove the search engines / crawl backend actually
|
|
171
|
+
// work with the saved keys before a mission depends on them.
|
|
172
|
+
if (p === "/api/search/test" && method === "POST") {
|
|
173
|
+
const q = "open source vector database";
|
|
174
|
+
const probe = async (engine, fn) => {
|
|
175
|
+
try {
|
|
176
|
+
const hits = await fn();
|
|
177
|
+
return { engine, ok: hits.length > 0, detail: `${hits.length} result(s)` };
|
|
178
|
+
}
|
|
179
|
+
catch (e) {
|
|
180
|
+
return { engine, ok: false, detail: (0, util_1.errMsg)(e) };
|
|
181
|
+
}
|
|
182
|
+
};
|
|
183
|
+
const checks = [probe("duckduckgo", () => (0, webtools_1.ddgSearch)(q, 3)), probe("bing", () => (0, webtools_1.bingSearch)(q, 3))];
|
|
184
|
+
if (cfg.tinyfishApiKey)
|
|
185
|
+
checks.push(probe("tinyfish", () => (0, webtools_1.tinyfishSearch)(cfg, q, 3)));
|
|
186
|
+
const engines = await Promise.all(checks);
|
|
187
|
+
return sendJson(res, 200, { ok: engines.some((e) => e.ok), engines });
|
|
188
|
+
}
|
|
189
|
+
if (p === "/api/crawl/test" && method === "POST") {
|
|
190
|
+
const backend = (0, crawltools_1.resolveCrawlBackend)(cfg);
|
|
191
|
+
if (!backend) {
|
|
192
|
+
return sendJson(res, 200, { ok: false, backend: null, detail: "no crawl backend configured — add a key first" });
|
|
193
|
+
}
|
|
194
|
+
try {
|
|
195
|
+
if ((0, crawltools_1.hasScrapeBackend)(cfg)) {
|
|
196
|
+
const text = await (0, crawltools_1.scrapeUrl)(cfg, "https://example.com/");
|
|
197
|
+
return sendJson(res, 200, {
|
|
198
|
+
ok: Boolean(text && text.length > 50),
|
|
199
|
+
backend,
|
|
200
|
+
detail: text ? `scraped ${text.length} chars` : "empty scrape result",
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
// deepcrawl has no single-page scrape — smoke a 1-page crawl instead.
|
|
204
|
+
const out = await (0, crawltools_1.crawlSite)(cfg, { url: "https://example.com/", maxPages: 1 });
|
|
205
|
+
return sendJson(res, 200, {
|
|
206
|
+
ok: out.pages.length > 0,
|
|
207
|
+
backend,
|
|
208
|
+
detail: out.pages.length ? `crawled ${out.pages.length} page(s)` : out.warnings.join("; ") || "no pages",
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
catch (e) {
|
|
212
|
+
return sendJson(res, 200, { ok: false, backend, detail: (0, util_1.errMsg)(e) });
|
|
213
|
+
}
|
|
214
|
+
}
|
|
160
215
|
if (p === "/api/models" && method === "GET") {
|
|
161
216
|
try {
|
|
162
217
|
const models = await (0, deepseek_1.listModels)(cfg);
|
|
@@ -255,7 +310,7 @@ async function api(req, res, url, opts) {
|
|
|
255
310
|
return sendJson(res, 200, { events, live: (0, run_1.isRunLive)(id) });
|
|
256
311
|
}
|
|
257
312
|
if (sub === "/stream" && method === "GET") {
|
|
258
|
-
return streamEvents(res, id);
|
|
313
|
+
return streamEvents(res, id, url.searchParams.get("quiet") === "1");
|
|
259
314
|
}
|
|
260
315
|
if (sub === "/note" && method === "POST") {
|
|
261
316
|
const body = await readBody(req);
|
|
@@ -295,6 +350,14 @@ async function api(req, res, url, opts) {
|
|
|
295
350
|
res.end(fs.readFileSync(file));
|
|
296
351
|
return;
|
|
297
352
|
}
|
|
353
|
+
if (sub === "/plan" && method === "GET") {
|
|
354
|
+
const file = path.join((0, config_1.runDir)(id), "artifacts", "mission-plan.md");
|
|
355
|
+
if (!fs.existsSync(file))
|
|
356
|
+
return sendJson(res, 404, { error: "no plan yet" });
|
|
357
|
+
res.writeHead(200, { "content-type": "text/markdown; charset=utf-8" });
|
|
358
|
+
res.end(fs.readFileSync(file));
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
298
361
|
if (sub === "/artifacts" && method === "GET") {
|
|
299
362
|
return sendJson(res, 200, { artifacts: listArtifactFiles(id) });
|
|
300
363
|
}
|
|
@@ -316,7 +379,7 @@ async function api(req, res, url, opts) {
|
|
|
316
379
|
}
|
|
317
380
|
sendJson(res, 404, { error: "not found" });
|
|
318
381
|
}
|
|
319
|
-
function streamEvents(res, id) {
|
|
382
|
+
function streamEvents(res, id, quiet = false) {
|
|
320
383
|
res.writeHead(200, {
|
|
321
384
|
"content-type": "text/event-stream",
|
|
322
385
|
"cache-control": "no-cache, no-transform",
|
|
@@ -335,6 +398,9 @@ function streamEvents(res, id) {
|
|
|
335
398
|
return;
|
|
336
399
|
}
|
|
337
400
|
for (const ev of evs) {
|
|
401
|
+
// quiet mode: skip streaming chatter for clients rendering many agents.
|
|
402
|
+
if (quiet && ev.type === "agent.delta")
|
|
403
|
+
continue;
|
|
338
404
|
res.write(`data: ${JSON.stringify(ev)}\n\n`);
|
|
339
405
|
}
|
|
340
406
|
};
|
|
@@ -392,7 +458,15 @@ function publicConfig(cfg) {
|
|
|
392
458
|
tinyfishKeySet: Boolean(cfg.tinyfishApiKey),
|
|
393
459
|
tinyfishKeyMasked: (0, config_1.maskKey)(cfg.tinyfishApiKey),
|
|
394
460
|
searchBackend: cfg.searchBackend,
|
|
395
|
-
|
|
461
|
+
crawlBackend: cfg.crawlBackend,
|
|
462
|
+
crawlResolved: (0, crawltools_1.resolveCrawlBackend)(cfg),
|
|
463
|
+
firecrawlKeySet: Boolean(cfg.firecrawlApiKey),
|
|
464
|
+
firecrawlKeyMasked: (0, config_1.maskKey)(cfg.firecrawlApiKey),
|
|
465
|
+
contextdevKeySet: Boolean(cfg.contextdevApiKey),
|
|
466
|
+
contextdevKeyMasked: (0, config_1.maskKey)(cfg.contextdevApiKey),
|
|
467
|
+
deepcrawlKeySet: Boolean(cfg.deepcrawlApiKey),
|
|
468
|
+
deepcrawlKeyMasked: (0, config_1.maskKey)(cfg.deepcrawlApiKey),
|
|
469
|
+
deepcrawlBaseUrl: cfg.deepcrawlBaseUrl,
|
|
396
470
|
sandboxRuntime: cfg.sandboxRuntime,
|
|
397
471
|
sandboxResolved: (0, sandbox_1.resolveSandboxKind)(cfg),
|
|
398
472
|
sandboxImage: cfg.sandboxImage,
|
|
@@ -416,6 +490,9 @@ function publicConfig(cfg) {
|
|
|
416
490
|
reasoningEffort: cfg.reasoningEffort,
|
|
417
491
|
safeMode: cfg.safeMode,
|
|
418
492
|
contextTokenLimit: cfg.contextTokenLimit,
|
|
493
|
+
contextWindows: cfg.contextWindows,
|
|
494
|
+
cheapModel: cfg.cheapModel,
|
|
495
|
+
strongModel: cfg.strongModel,
|
|
419
496
|
knownModels,
|
|
420
497
|
pricing: cfg.pricing,
|
|
421
498
|
};
|
|
@@ -469,6 +546,8 @@ function snapshot(state, id) {
|
|
|
469
546
|
operatorNotes: state.operatorNotes,
|
|
470
547
|
usageByModel: Object.fromEntries(state.usageByModel),
|
|
471
548
|
cost: state.cost,
|
|
549
|
+
budgetSeries: state.budgetSeries,
|
|
550
|
+
planExcerpt: state.planExcerpt,
|
|
472
551
|
finalSummary: state.finalSummary,
|
|
473
552
|
finalReportPath: state.finalReportPath,
|
|
474
553
|
live: (0, run_1.isRunLive)(id),
|
package/dist/journal.js
CHANGED
|
@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.Journal = void 0;
|
|
36
|
+
exports.Journal = exports.TeamJournal = void 0;
|
|
37
37
|
exports.eventsFile = eventsFile;
|
|
38
38
|
exports.readEvents = readEvents;
|
|
39
39
|
exports.lastSeq = lastSeq;
|
|
@@ -41,14 +41,36 @@ exports.readNewEvents = readNewEvents;
|
|
|
41
41
|
const fs = __importStar(require("fs"));
|
|
42
42
|
const path = __importStar(require("path"));
|
|
43
43
|
/**
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
*
|
|
44
|
+
* A child swarm's view of its parent's journal: same file, same sequence,
|
|
45
|
+
* every event stamped with the owning team's task id so the reducer can
|
|
46
|
+
* partition team activity away from the root run.
|
|
47
47
|
*/
|
|
48
|
+
class TeamJournal {
|
|
49
|
+
inner;
|
|
50
|
+
teamId;
|
|
51
|
+
constructor(inner, teamId) {
|
|
52
|
+
this.inner = inner;
|
|
53
|
+
this.teamId = teamId;
|
|
54
|
+
}
|
|
55
|
+
append(type, payload = {}) {
|
|
56
|
+
return this.inner.append(type, { teamId: this.teamId, ...payload });
|
|
57
|
+
}
|
|
58
|
+
flush() {
|
|
59
|
+
return this.inner.flush();
|
|
60
|
+
}
|
|
61
|
+
get degraded() {
|
|
62
|
+
return this.inner.degraded;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
exports.TeamJournal = TeamJournal;
|
|
48
66
|
class Journal {
|
|
49
67
|
file;
|
|
50
68
|
seq;
|
|
51
69
|
chain = Promise.resolve();
|
|
70
|
+
buf = "";
|
|
71
|
+
failures = 0;
|
|
72
|
+
/** Set after repeated append failures: the source of truth is no longer being persisted. */
|
|
73
|
+
degraded = false;
|
|
52
74
|
onEvent;
|
|
53
75
|
constructor(runDirPath, startSeq) {
|
|
54
76
|
this.file = path.join(runDirPath, "events.jsonl");
|
|
@@ -56,12 +78,8 @@ class Journal {
|
|
|
56
78
|
}
|
|
57
79
|
append(type, payload = {}) {
|
|
58
80
|
const ev = { seq: this.seq++, t: Date.now(), type, ...payload };
|
|
59
|
-
|
|
60
|
-
this.chain = this.chain
|
|
61
|
-
.then(() => fs.promises.appendFile(this.file, line, "utf8"))
|
|
62
|
-
.catch(() => {
|
|
63
|
-
/* never break the run on journal IO; next append retries the chain */
|
|
64
|
-
});
|
|
81
|
+
this.buf += JSON.stringify(ev) + "\n";
|
|
82
|
+
this.chain = this.chain.then(() => this.drain());
|
|
65
83
|
try {
|
|
66
84
|
this.onEvent?.(ev);
|
|
67
85
|
}
|
|
@@ -70,8 +88,40 @@ class Journal {
|
|
|
70
88
|
}
|
|
71
89
|
return ev;
|
|
72
90
|
}
|
|
91
|
+
async drain() {
|
|
92
|
+
if (!this.buf)
|
|
93
|
+
return;
|
|
94
|
+
const chunk = this.buf;
|
|
95
|
+
this.buf = "";
|
|
96
|
+
try {
|
|
97
|
+
await fs.promises.appendFile(this.file, chunk, "utf8");
|
|
98
|
+
this.failures = 0;
|
|
99
|
+
}
|
|
100
|
+
catch (e) {
|
|
101
|
+
// Keep the unwritten events buffered so the next append/flush retries
|
|
102
|
+
// them in order; after repeated failures, stop pretending it's fine.
|
|
103
|
+
this.buf = chunk + this.buf;
|
|
104
|
+
this.failures++;
|
|
105
|
+
if (this.failures >= 5 && !this.degraded) {
|
|
106
|
+
this.degraded = true;
|
|
107
|
+
process.stderr.write(`agentswarm: journal writes are failing (${String(e)}); run state is no longer durable\n`);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
73
111
|
flush() {
|
|
74
|
-
return this.chain;
|
|
112
|
+
return this.chain.then(() => this.drain());
|
|
113
|
+
}
|
|
114
|
+
/** Last-gasp synchronous flush for signal handlers and exit paths. */
|
|
115
|
+
flushSync() {
|
|
116
|
+
if (!this.buf)
|
|
117
|
+
return;
|
|
118
|
+
try {
|
|
119
|
+
fs.appendFileSync(this.file, this.buf, "utf8");
|
|
120
|
+
this.buf = "";
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
/* nothing left to do */
|
|
124
|
+
}
|
|
75
125
|
}
|
|
76
126
|
}
|
|
77
127
|
exports.Journal = Journal;
|
package/dist/memory.js
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.memoryFile = memoryFile;
|
|
37
|
+
exports.loadMemory = loadMemory;
|
|
38
|
+
exports.appendMemory = appendMemory;
|
|
39
|
+
exports.memoryBlock = memoryBlock;
|
|
40
|
+
const crypto = __importStar(require("crypto"));
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const path = __importStar(require("path"));
|
|
43
|
+
const config_1 = require("./config");
|
|
44
|
+
const util_1 = require("./util");
|
|
45
|
+
const MAX_ENTRIES = 20;
|
|
46
|
+
function memoryFile(cwd) {
|
|
47
|
+
const hash = crypto.createHash("sha1").update(path.resolve(cwd)).digest("hex").slice(0, 12);
|
|
48
|
+
return path.join((0, config_1.home)(), "memory", `${hash}.json`);
|
|
49
|
+
}
|
|
50
|
+
function loadMemory(cwd) {
|
|
51
|
+
try {
|
|
52
|
+
const raw = JSON.parse(fs.readFileSync(memoryFile(cwd), "utf8"));
|
|
53
|
+
return Array.isArray(raw?.entries) ? raw.entries : [];
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return [];
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
function appendMemory(cwd, entry) {
|
|
60
|
+
try {
|
|
61
|
+
// Same-run entries replace (interim → final); writeJson is temp+rename so
|
|
62
|
+
// a crash mid-write never loses the prior history.
|
|
63
|
+
const prior = loadMemory(cwd).filter((e) => !(entry.runId && e.runId === entry.runId));
|
|
64
|
+
const entries = [...prior, entry].slice(-MAX_ENTRIES);
|
|
65
|
+
(0, util_1.writeJson)(memoryFile(cwd), { cwd: path.resolve(cwd), entries });
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
/* memory is best-effort */
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
/** Prompt block for the conductor, or "" when there's no history. */
|
|
72
|
+
function memoryBlock(cwd) {
|
|
73
|
+
const entries = loadMemory(cwd);
|
|
74
|
+
if (!entries.length)
|
|
75
|
+
return "";
|
|
76
|
+
const lines = entries.slice(-8).map((e) => {
|
|
77
|
+
const when = new Date(e.finishedAt).toISOString().slice(0, 10);
|
|
78
|
+
const decisions = e.keyDecisions.length
|
|
79
|
+
? ` Decisions: ${e.keyDecisions.map((d) => (0, util_1.oneLine)(d, 100)).join("; ")}`
|
|
80
|
+
: "";
|
|
81
|
+
return `- [${when}, ${e.status}] "${(0, util_1.oneLine)(e.mission, 100)}" — ${(0, util_1.oneLine)(e.summary, 200)}${decisions}`;
|
|
82
|
+
});
|
|
83
|
+
return (0, util_1.clip)(`PRIOR RUNS IN THIS WORKSPACE (build on them; don't redo settled decisions without reason):\n${lines.join("\n")}`, 4000);
|
|
84
|
+
}
|
package/dist/pdftext.js
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.extractPdfText = extractPdfText;
|
|
37
|
+
const zlib = __importStar(require("zlib"));
|
|
38
|
+
/**
|
|
39
|
+
* Minimal zero-dependency PDF text extraction: inflate FlateDecode content
|
|
40
|
+
* streams (Node's built-in zlib) and interpret the text-showing operators
|
|
41
|
+
* (Tj / TJ / ' / "). Good enough for most digitally-produced text PDFs;
|
|
42
|
+
* returns null for scanned, encrypted, or exotic-encoding documents so the
|
|
43
|
+
* caller can tell the agent to find an HTML source instead.
|
|
44
|
+
*/
|
|
45
|
+
function extractPdfText(buf) {
|
|
46
|
+
if (buf.subarray(0, 5).toString("latin1") !== "%PDF-")
|
|
47
|
+
return null;
|
|
48
|
+
// latin1 preserves bytes 1:1, so stream offsets in the string match the buffer.
|
|
49
|
+
const raw = buf.toString("latin1");
|
|
50
|
+
const pages = (raw.match(/\/Type\s*\/Pages?\b/g) || []).filter((m) => !/Pages/.test(m)).length || 1;
|
|
51
|
+
let text = "";
|
|
52
|
+
const streamRe = /<<([\s\S]{0,2000}?)>>\s*stream\r?\n/g;
|
|
53
|
+
let m;
|
|
54
|
+
while ((m = streamRe.exec(raw))) {
|
|
55
|
+
const dict = m[1];
|
|
56
|
+
const start = m.index + m[0].length;
|
|
57
|
+
const end = raw.indexOf("endstream", start);
|
|
58
|
+
if (end < 0)
|
|
59
|
+
continue;
|
|
60
|
+
streamRe.lastIndex = end;
|
|
61
|
+
// Only plain or Flate-compressed streams are supported.
|
|
62
|
+
if (/\/Filter/.test(dict) && !/FlateDecode/.test(dict))
|
|
63
|
+
continue;
|
|
64
|
+
let len = end;
|
|
65
|
+
while (len > start && (raw[len - 1] === "\n" || raw[len - 1] === "\r"))
|
|
66
|
+
len--;
|
|
67
|
+
let data = buf.subarray(start, len);
|
|
68
|
+
if (/FlateDecode/.test(dict)) {
|
|
69
|
+
try {
|
|
70
|
+
data = zlib.inflateSync(data);
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
const content = data.toString("latin1");
|
|
77
|
+
if (!/\bBT\b/.test(content))
|
|
78
|
+
continue; // not a text content stream
|
|
79
|
+
const extracted = extractFromContent(content);
|
|
80
|
+
if (extracted.trim())
|
|
81
|
+
text += extracted + "\n";
|
|
82
|
+
}
|
|
83
|
+
const cleaned = text
|
|
84
|
+
.replace(/[^\S\n]+/g, " ")
|
|
85
|
+
.replace(/ ?\n ?/g, "\n")
|
|
86
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
87
|
+
.trim();
|
|
88
|
+
// CID/Type0 fonts yield glyph-index garbage; require a body of real characters.
|
|
89
|
+
const printable = cleaned.replace(/[^\x20-\x7E\n -]/g, "");
|
|
90
|
+
if (printable.replace(/\s/g, "").length < 40)
|
|
91
|
+
return null;
|
|
92
|
+
return { text: printable, pages };
|
|
93
|
+
}
|
|
94
|
+
/** Walk a content stream, collecting strings shown by Tj/TJ/'/" with newline heuristics. */
|
|
95
|
+
function extractFromContent(src) {
|
|
96
|
+
let out = "";
|
|
97
|
+
let pending = [];
|
|
98
|
+
const n = src.length;
|
|
99
|
+
let i = 0;
|
|
100
|
+
while (i < n) {
|
|
101
|
+
const ch = src[i];
|
|
102
|
+
if (ch === "(") {
|
|
103
|
+
const [s, next] = parseLiteralString(src, i);
|
|
104
|
+
pending.push(s);
|
|
105
|
+
i = next;
|
|
106
|
+
}
|
|
107
|
+
else if (ch === "<" && src[i + 1] !== "<") {
|
|
108
|
+
const close = src.indexOf(">", i + 1);
|
|
109
|
+
if (close < 0)
|
|
110
|
+
break;
|
|
111
|
+
pending.push(decodeHexString(src.slice(i + 1, close)));
|
|
112
|
+
i = close + 1;
|
|
113
|
+
}
|
|
114
|
+
else if (ch === "%") {
|
|
115
|
+
// comment to end of line
|
|
116
|
+
while (i < n && src[i] !== "\n" && src[i] !== "\r")
|
|
117
|
+
i++;
|
|
118
|
+
}
|
|
119
|
+
else if (/[A-Za-z'"*]/.test(ch)) {
|
|
120
|
+
let j = i;
|
|
121
|
+
while (j < n && /[A-Za-z'"*]/.test(src[j]))
|
|
122
|
+
j++;
|
|
123
|
+
const op = src.slice(i, j);
|
|
124
|
+
if (op === "Tj" || op === "TJ") {
|
|
125
|
+
out += pending.join("");
|
|
126
|
+
}
|
|
127
|
+
else if (op === "'" || op === '"') {
|
|
128
|
+
out += "\n" + pending.join("");
|
|
129
|
+
}
|
|
130
|
+
else if (op === "Td" || op === "TD" || op === "T*" || op === "Tm" || op === "ET") {
|
|
131
|
+
if (pending.length)
|
|
132
|
+
out += pending.join("");
|
|
133
|
+
if (!out.endsWith("\n"))
|
|
134
|
+
out += "\n";
|
|
135
|
+
}
|
|
136
|
+
pending = [];
|
|
137
|
+
i = j;
|
|
138
|
+
}
|
|
139
|
+
else if (ch === "-" || (ch >= "0" && ch <= "9") || ch === ".") {
|
|
140
|
+
let j = i + 1;
|
|
141
|
+
while (j < n && /[0-9.]/.test(src[j]))
|
|
142
|
+
j++;
|
|
143
|
+
// Large negative kerning inside a TJ array is a word gap.
|
|
144
|
+
const num = parseFloat(src.slice(i, j));
|
|
145
|
+
if (num <= -180 && pending.length && !pending[pending.length - 1].endsWith(" "))
|
|
146
|
+
pending.push(" ");
|
|
147
|
+
i = j;
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
i++;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return out;
|
|
154
|
+
}
|
|
155
|
+
/** PDF literal string: balanced parens, backslash escapes, octal codes. */
|
|
156
|
+
function parseLiteralString(src, start) {
|
|
157
|
+
let out = "";
|
|
158
|
+
let depth = 0;
|
|
159
|
+
let i = start;
|
|
160
|
+
for (; i < src.length; i++) {
|
|
161
|
+
const ch = src[i];
|
|
162
|
+
if (ch === "\\") {
|
|
163
|
+
const next = src[i + 1];
|
|
164
|
+
if (next >= "0" && next <= "7") {
|
|
165
|
+
let oct = "";
|
|
166
|
+
for (let k = 1; k <= 3 && src[i + k] >= "0" && src[i + k] <= "7"; k++)
|
|
167
|
+
oct += src[i + k];
|
|
168
|
+
out += String.fromCharCode(parseInt(oct, 8));
|
|
169
|
+
i += oct.length;
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
const map = { n: "\n", r: "\r", t: "\t", b: "\b", f: "\f", "(": "(", ")": ")", "\\": "\\" };
|
|
173
|
+
out += map[next] ?? next ?? "";
|
|
174
|
+
i++;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
else if (ch === "(") {
|
|
178
|
+
depth++;
|
|
179
|
+
if (depth > 1)
|
|
180
|
+
out += ch;
|
|
181
|
+
}
|
|
182
|
+
else if (ch === ")") {
|
|
183
|
+
depth--;
|
|
184
|
+
if (depth === 0) {
|
|
185
|
+
i++;
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
out += ch;
|
|
189
|
+
}
|
|
190
|
+
else {
|
|
191
|
+
out += ch;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return [out, i];
|
|
195
|
+
}
|
|
196
|
+
/** PDF hex string: byte pairs; a UTF-16BE BOM switches to two-byte chars. */
|
|
197
|
+
function decodeHexString(hex) {
|
|
198
|
+
const clean = hex.replace(/[^0-9a-fA-F]/g, "");
|
|
199
|
+
const bytes = [];
|
|
200
|
+
for (let i = 0; i + 1 < clean.length; i += 2)
|
|
201
|
+
bytes.push(parseInt(clean.slice(i, i + 2), 16));
|
|
202
|
+
if (clean.length % 2)
|
|
203
|
+
bytes.push(parseInt(clean[clean.length - 1] + "0", 16));
|
|
204
|
+
if (bytes.length >= 2 && bytes[0] === 0xfe && bytes[1] === 0xff) {
|
|
205
|
+
let s = "";
|
|
206
|
+
for (let i = 2; i + 1 < bytes.length; i += 2)
|
|
207
|
+
s += String.fromCharCode((bytes[i] << 8) | bytes[i + 1]);
|
|
208
|
+
return s;
|
|
209
|
+
}
|
|
210
|
+
return bytes.map((b) => String.fromCharCode(b)).join("");
|
|
211
|
+
}
|