imperium-crawl 2.4.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -9
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +23 -3
- package/dist/cli.js.map +1 -1
- package/dist/constants.d.ts +1 -1
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +31 -1
- package/dist/constants.js.map +1 -1
- package/dist/flows/engine.d.ts +7 -0
- package/dist/flows/engine.d.ts.map +1 -0
- package/dist/flows/engine.js +183 -0
- package/dist/flows/engine.js.map +1 -0
- package/dist/flows/index.d.ts +6 -0
- package/dist/flows/index.d.ts.map +1 -0
- package/dist/flows/index.js +6 -0
- package/dist/flows/index.js.map +1 -0
- package/dist/flows/server.d.ts +11 -0
- package/dist/flows/server.d.ts.map +1 -0
- package/dist/flows/server.js +81 -0
- package/dist/flows/server.js.map +1 -0
- package/dist/flows/smart-target.d.ts +9 -0
- package/dist/flows/smart-target.d.ts.map +1 -0
- package/dist/flows/smart-target.js +84 -0
- package/dist/flows/smart-target.js.map +1 -0
- package/dist/flows/storage.d.ts +26 -0
- package/dist/flows/storage.d.ts.map +1 -0
- package/dist/flows/storage.js +118 -0
- package/dist/flows/storage.js.map +1 -0
- package/dist/flows/templates.d.ts +4 -0
- package/dist/flows/templates.d.ts.map +1 -0
- package/dist/flows/templates.js +35 -0
- package/dist/flows/templates.js.map +1 -0
- package/dist/flows/types.d.ts +3356 -0
- package/dist/flows/types.d.ts.map +1 -0
- package/dist/flows/types.js +133 -0
- package/dist/flows/types.js.map +1 -0
- package/dist/knowledge/store.d.ts +19 -0
- package/dist/knowledge/store.d.ts.map +1 -1
- package/dist/knowledge/store.js +63 -4
- package/dist/knowledge/store.js.map +1 -1
- package/dist/sessions/browser-connect.d.ts +30 -0
- package/dist/sessions/browser-connect.d.ts.map +1 -0
- package/dist/sessions/browser-connect.js +68 -0
- package/dist/sessions/browser-connect.js.map +1 -0
- package/dist/sessions/browser-state.d.ts +35 -0
- package/dist/sessions/browser-state.d.ts.map +1 -0
- package/dist/sessions/browser-state.js +74 -0
- package/dist/sessions/browser-state.js.map +1 -0
- package/dist/sessions/inject-cookies.d.ts +20 -0
- package/dist/sessions/inject-cookies.d.ts.map +1 -0
- package/dist/sessions/inject-cookies.js +57 -0
- package/dist/sessions/inject-cookies.js.map +1 -0
- package/dist/sessions/manager.d.ts +11 -1
- package/dist/sessions/manager.d.ts.map +1 -1
- package/dist/sessions/manager.js +40 -6
- package/dist/sessions/manager.js.map +1 -1
- package/dist/snapshot/store.d.ts +8 -0
- package/dist/snapshot/store.d.ts.map +1 -1
- package/dist/snapshot/store.js +48 -0
- package/dist/snapshot/store.js.map +1 -1
- package/dist/stealth/antibot-detector.d.ts +1 -1
- package/dist/stealth/antibot-detector.d.ts.map +1 -1
- package/dist/stealth/antibot-detector.js +56 -0
- package/dist/stealth/antibot-detector.js.map +1 -1
- package/dist/stealth/browser-image-extract.d.ts +43 -0
- package/dist/stealth/browser-image-extract.d.ts.map +1 -0
- package/dist/stealth/browser-image-extract.js +268 -0
- package/dist/stealth/browser-image-extract.js.map +1 -0
- package/dist/stealth/browser.d.ts +5 -0
- package/dist/stealth/browser.d.ts.map +1 -1
- package/dist/stealth/browser.js +82 -1
- package/dist/stealth/browser.js.map +1 -1
- package/dist/stealth/chrome-profile.d.ts +1 -0
- package/dist/stealth/chrome-profile.d.ts.map +1 -1
- package/dist/stealth/chrome-profile.js +28 -5
- package/dist/stealth/chrome-profile.js.map +1 -1
- package/dist/stealth/detector.d.ts +10 -1
- package/dist/stealth/detector.d.ts.map +1 -1
- package/dist/stealth/detector.js +117 -25
- package/dist/stealth/detector.js.map +1 -1
- package/dist/stealth/headers.d.ts +1 -1
- package/dist/stealth/headers.d.ts.map +1 -1
- package/dist/stealth/headers.js +94 -2
- package/dist/stealth/headers.js.map +1 -1
- package/dist/stealth/index.d.ts +4 -0
- package/dist/stealth/index.d.ts.map +1 -1
- package/dist/stealth/index.js +207 -25
- package/dist/stealth/index.js.map +1 -1
- package/dist/stealth/proxy.d.ts +40 -1
- package/dist/stealth/proxy.d.ts.map +1 -1
- package/dist/stealth/proxy.js +90 -6
- package/dist/stealth/proxy.js.map +1 -1
- package/dist/tools/action-executor.d.ts +2 -0
- package/dist/tools/action-executor.d.ts.map +1 -1
- package/dist/tools/action-executor.js +38 -0
- package/dist/tools/action-executor.js.map +1 -1
- package/dist/tools/batch-download.d.ts +33 -0
- package/dist/tools/batch-download.d.ts.map +1 -0
- package/dist/tools/batch-download.js +208 -0
- package/dist/tools/batch-download.js.map +1 -0
- package/dist/tools/browser.d.ts +100 -0
- package/dist/tools/browser.d.ts.map +1 -0
- package/dist/tools/browser.js +448 -0
- package/dist/tools/browser.js.map +1 -0
- package/dist/tools/download.d.ts +35 -2
- package/dist/tools/download.d.ts.map +1 -1
- package/dist/tools/download.js +245 -44
- package/dist/tools/download.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +23 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/inspect-flow.d.ts +24 -0
- package/dist/tools/inspect-flow.d.ts.map +1 -0
- package/dist/tools/inspect-flow.js +23 -0
- package/dist/tools/inspect-flow.js.map +1 -0
- package/dist/tools/interact.d.ts +28 -15
- package/dist/tools/interact.d.ts.map +1 -1
- package/dist/tools/interact.js +48 -1
- package/dist/tools/interact.js.map +1 -1
- package/dist/tools/list-flows.d.ts +21 -0
- package/dist/tools/list-flows.d.ts.map +1 -0
- package/dist/tools/list-flows.js +18 -0
- package/dist/tools/list-flows.js.map +1 -0
- package/dist/tools/manifest.d.ts.map +1 -1
- package/dist/tools/manifest.js +43 -0
- package/dist/tools/manifest.js.map +1 -1
- package/dist/tools/monitor.d.ts +46 -0
- package/dist/tools/monitor.d.ts.map +1 -0
- package/dist/tools/monitor.js +213 -0
- package/dist/tools/monitor.js.map +1 -0
- package/dist/tools/pdf-extract.d.ts +38 -0
- package/dist/tools/pdf-extract.d.ts.map +1 -0
- package/dist/tools/pdf-extract.js +244 -0
- package/dist/tools/pdf-extract.js.map +1 -0
- package/dist/tools/record-flow.d.ts +39 -0
- package/dist/tools/record-flow.d.ts.map +1 -0
- package/dist/tools/record-flow.js +406 -0
- package/dist/tools/record-flow.js.map +1 -0
- package/dist/tools/run-flow.d.ts +54 -0
- package/dist/tools/run-flow.d.ts.map +1 -0
- package/dist/tools/run-flow.js +47 -0
- package/dist/tools/run-flow.js.map +1 -0
- package/dist/tools/run-skill.d.ts +2 -2
- package/dist/tools/run-skill.d.ts.map +1 -1
- package/dist/tools/run-skill.js +1 -0
- package/dist/tools/run-skill.js.map +1 -1
- package/dist/tools/scrape.d.ts.map +1 -1
- package/dist/tools/scrape.js +17 -1
- package/dist/tools/scrape.js.map +1 -1
- package/dist/tools/serve-flow.d.ts +36 -0
- package/dist/tools/serve-flow.d.ts.map +1 -0
- package/dist/tools/serve-flow.js +42 -0
- package/dist/tools/serve-flow.js.map +1 -0
- package/dist/tools/validate-flow.d.ts +24 -0
- package/dist/tools/validate-flow.d.ts.map +1 -0
- package/dist/tools/validate-flow.js +23 -0
- package/dist/tools/validate-flow.js.map +1 -0
- package/dist/tools/watch.d.ts +68 -0
- package/dist/tools/watch.d.ts.map +1 -0
- package/dist/tools/watch.js +224 -0
- package/dist/tools/watch.js.map +1 -0
- package/dist/utils/fetcher.d.ts +13 -4
- package/dist/utils/fetcher.d.ts.map +1 -1
- package/dist/utils/fetcher.js +121 -24
- package/dist/utils/fetcher.js.map +1 -1
- package/package.json +15 -4
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* monitor — portfolio-level change tracker over many URLs, grouped by topic.
|
|
3
|
+
*
|
|
4
|
+
* v2.5.0: minimal scope — JSON config parser + markdown digest generator.
|
|
5
|
+
* Internally reuses the watch tool's runWatchOnce() for per-URL change detection.
|
|
6
|
+
* Filters out sub-threshold churn (min_change_pct) and emits a single digest
|
|
7
|
+
* file per run, listing the top changes per topic.
|
|
8
|
+
*
|
|
9
|
+
* YAML config + LLM summarisation are deferred to v2.6.0.
|
|
10
|
+
*/
|
|
11
|
+
import { z } from "zod";
|
|
12
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
13
|
+
import { existsSync } from "node:fs";
|
|
14
|
+
import { join, resolve as resolvePath } from "node:path";
|
|
15
|
+
import { toolResult, errorResult } from "../utils/tool-response.js";
|
|
16
|
+
import { debugLog } from "../utils/debug.js";
|
|
17
|
+
import { runWatchOnce } from "./watch.js";
|
|
18
|
+
export const name = "monitor";
|
|
19
|
+
export const description = "Portfolio-level change tracker: read a JSON config of topics and URLs, run watch on each, emit a markdown digest of changes.";
|
|
20
|
+
export const schema = z.object({
|
|
21
|
+
config: z
|
|
22
|
+
.string()
|
|
23
|
+
.optional()
|
|
24
|
+
.describe("Path to JSON config (topics with URL lists). Mutually exclusive with --urls/--topic."),
|
|
25
|
+
urls: z
|
|
26
|
+
.array(z.string())
|
|
27
|
+
.optional()
|
|
28
|
+
.describe("Single-topic shortcut: list of URLs. Repeat --urls."),
|
|
29
|
+
topic: z
|
|
30
|
+
.string()
|
|
31
|
+
.optional()
|
|
32
|
+
.describe("Topic name when using --urls"),
|
|
33
|
+
output_dir: z
|
|
34
|
+
.string()
|
|
35
|
+
.default("./data/monitor")
|
|
36
|
+
.describe("Output dir for state, snapshots, and digests"),
|
|
37
|
+
min_change_pct: z
|
|
38
|
+
.number()
|
|
39
|
+
.min(0)
|
|
40
|
+
.max(100)
|
|
41
|
+
.default(5)
|
|
42
|
+
.describe("Minimum % of lines changed to count as a 'meaningful' change"),
|
|
43
|
+
export_format: z
|
|
44
|
+
.enum(["markdown"])
|
|
45
|
+
.default("markdown")
|
|
46
|
+
.describe("Digest format (markdown only in v2.5.0)"),
|
|
47
|
+
hash_on: z
|
|
48
|
+
.enum(["content", "readability", "markdown"])
|
|
49
|
+
.default("readability")
|
|
50
|
+
.describe("Passed through to watch — what to hash per URL"),
|
|
51
|
+
});
|
|
52
|
+
async function loadConfig(input) {
|
|
53
|
+
if (input.config) {
|
|
54
|
+
const path = resolvePath(input.config);
|
|
55
|
+
if (!existsSync(path))
|
|
56
|
+
throw new Error(`Config not found: ${path}`);
|
|
57
|
+
const raw = await readFile(path, "utf-8");
|
|
58
|
+
const parsed = JSON.parse(raw);
|
|
59
|
+
if (!parsed.topics || !Array.isArray(parsed.topics)) {
|
|
60
|
+
throw new Error("Config missing 'topics' array");
|
|
61
|
+
}
|
|
62
|
+
return { topics: parsed.topics };
|
|
63
|
+
}
|
|
64
|
+
if (input.urls && input.urls.length > 0) {
|
|
65
|
+
return {
|
|
66
|
+
topics: [{ name: input.topic || "default", urls: input.urls }],
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
throw new Error("Must provide --config OR --urls");
|
|
70
|
+
}
|
|
71
|
+
function computeChangePct(prev, next) {
|
|
72
|
+
if (!prev)
|
|
73
|
+
return 100;
|
|
74
|
+
const prevLines = prev.split("\n");
|
|
75
|
+
const nextLines = next.split("\n");
|
|
76
|
+
const prevSet = new Set(prevLines);
|
|
77
|
+
const nextSet = new Set(nextLines);
|
|
78
|
+
let removed = 0;
|
|
79
|
+
let added = 0;
|
|
80
|
+
for (const l of prevLines)
|
|
81
|
+
if (!nextSet.has(l))
|
|
82
|
+
removed++;
|
|
83
|
+
for (const l of nextLines)
|
|
84
|
+
if (!prevSet.has(l))
|
|
85
|
+
added++;
|
|
86
|
+
const total = Math.max(prevLines.length, nextLines.length, 1);
|
|
87
|
+
return ((removed + added) / (2 * total)) * 100;
|
|
88
|
+
}
|
|
89
|
+
function renderMarkdownDigest(reports, generatedAt, minPct) {
|
|
90
|
+
const lines = [];
|
|
91
|
+
lines.push(`# Monitor digest — ${generatedAt.split("T")[0]}`);
|
|
92
|
+
lines.push("");
|
|
93
|
+
lines.push(`Generated: ${generatedAt}`);
|
|
94
|
+
lines.push(`Threshold: ${minPct}% line change`);
|
|
95
|
+
lines.push("");
|
|
96
|
+
const totalMeaningful = reports.reduce((s, r) => s + r.meaningful_changes, 0);
|
|
97
|
+
lines.push(`**${totalMeaningful}** meaningful change(s) across **${reports.length}** topic(s).`);
|
|
98
|
+
lines.push("");
|
|
99
|
+
for (const report of reports) {
|
|
100
|
+
lines.push(`## ${report.name}`);
|
|
101
|
+
lines.push(`- URLs checked: ${report.urls_checked}`);
|
|
102
|
+
lines.push(`- Meaningful changes: ${report.meaningful_changes}`);
|
|
103
|
+
lines.push("");
|
|
104
|
+
const meaningful = report.changes.filter((c) => c.changed && c.change_pct >= minPct);
|
|
105
|
+
if (meaningful.length === 0) {
|
|
106
|
+
lines.push("_No meaningful changes._");
|
|
107
|
+
lines.push("");
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
for (const c of meaningful) {
|
|
111
|
+
lines.push(`### ${c.url}`);
|
|
112
|
+
lines.push(`- Change: ${c.change_pct.toFixed(1)}% of lines`);
|
|
113
|
+
lines.push(`- Previous: \`${c.previous_hash?.slice(0, 12) ?? "(none)"}\``);
|
|
114
|
+
lines.push(`- Current: \`${c.current_hash.slice(0, 12)}\``);
|
|
115
|
+
lines.push("");
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return lines.join("\n");
|
|
119
|
+
}
|
|
120
|
+
export async function execute(input) {
|
|
121
|
+
try {
|
|
122
|
+
const config = await loadConfig(input);
|
|
123
|
+
const outDir = resolvePath(input.output_dir);
|
|
124
|
+
await mkdir(outDir, { recursive: true });
|
|
125
|
+
const reports = [];
|
|
126
|
+
for (const topic of config.topics) {
|
|
127
|
+
const topicDir = join(outDir, slug(topic.name));
|
|
128
|
+
await mkdir(topicDir, { recursive: true });
|
|
129
|
+
const threshold = topic.min_change_pct ?? input.min_change_pct;
|
|
130
|
+
const changes = [];
|
|
131
|
+
for (const url of topic.urls) {
|
|
132
|
+
try {
|
|
133
|
+
const watchInput = {
|
|
134
|
+
url,
|
|
135
|
+
output_dir: topicDir,
|
|
136
|
+
hash_on: input.hash_on,
|
|
137
|
+
diff_format: "unified",
|
|
138
|
+
one_shot: true,
|
|
139
|
+
};
|
|
140
|
+
const wr = await runWatchOnce(watchInput);
|
|
141
|
+
// For change %, load previous snapshot next to current one if it exists
|
|
142
|
+
let pct = 0;
|
|
143
|
+
if (wr.first_run) {
|
|
144
|
+
pct = 0;
|
|
145
|
+
}
|
|
146
|
+
else if (wr.changed) {
|
|
147
|
+
// Re-derive from snapshot files written by runWatchOnce
|
|
148
|
+
const prevPath = wr.snapshot_file.replace(/\.snapshot\.txt$/, ".previous.txt");
|
|
149
|
+
let prevSig = null;
|
|
150
|
+
if (existsSync(prevPath)) {
|
|
151
|
+
try {
|
|
152
|
+
prevSig = await readFile(prevPath, "utf-8");
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
prevSig = null;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
let currSig = "";
|
|
159
|
+
try {
|
|
160
|
+
currSig = await readFile(wr.snapshot_file, "utf-8");
|
|
161
|
+
}
|
|
162
|
+
catch { /* noop */ }
|
|
163
|
+
pct = computeChangePct(prevSig, currSig);
|
|
164
|
+
}
|
|
165
|
+
changes.push({
|
|
166
|
+
url,
|
|
167
|
+
changed: wr.changed,
|
|
168
|
+
first_run: wr.first_run,
|
|
169
|
+
change_pct: pct,
|
|
170
|
+
previous_hash: wr.previous_hash,
|
|
171
|
+
current_hash: wr.current_hash,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
catch (urlErr) {
|
|
175
|
+
debugLog("monitor", `failed for ${url}`, urlErr);
|
|
176
|
+
changes.push({
|
|
177
|
+
url,
|
|
178
|
+
changed: false,
|
|
179
|
+
first_run: false,
|
|
180
|
+
change_pct: 0,
|
|
181
|
+
previous_hash: null,
|
|
182
|
+
current_hash: "",
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
const meaningful = changes.filter((c) => c.changed && c.change_pct >= threshold).length;
|
|
187
|
+
reports.push({
|
|
188
|
+
name: topic.name,
|
|
189
|
+
urls_checked: topic.urls.length,
|
|
190
|
+
changes,
|
|
191
|
+
meaningful_changes: meaningful,
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
const generatedAt = new Date().toISOString();
|
|
195
|
+
const digestBody = renderMarkdownDigest(reports, generatedAt, input.min_change_pct);
|
|
196
|
+
const digestFile = join(outDir, `digest-${generatedAt.replace(/[:.]/g, "-")}.md`);
|
|
197
|
+
await writeFile(digestFile, digestBody, "utf-8");
|
|
198
|
+
return toolResult({
|
|
199
|
+
generated_at: generatedAt,
|
|
200
|
+
topics: reports,
|
|
201
|
+
digest_file: digestFile,
|
|
202
|
+
format: input.export_format,
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
catch (err) {
|
|
206
|
+
debugLog("monitor", "failed", err);
|
|
207
|
+
return errorResult(err instanceof Error ? err.message : String(err));
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
function slug(s) {
|
|
211
|
+
return s.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 40) || "topic";
|
|
212
|
+
}
|
|
213
|
+
//# sourceMappingURL=monitor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"monitor.js","sourceRoot":"","sources":["../../src/tools/monitor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,WAAW,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACpE,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAC7C,OAAO,EAAE,YAAY,EAAmB,MAAM,YAAY,CAAC;AAE3D,MAAM,CAAC,MAAM,IAAI,GAAG,SAAS,CAAC;AAE9B,MAAM,CAAC,MAAM,WAAW,GACtB,8HAA8H,CAAC;AAEjI,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7B,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,sFAAsF,CAAC;IACnG,IAAI,EAAE,CAAC;SACJ,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;SACjB,QAAQ,EAAE;SACV,QAAQ,CAAC,qDAAqD,CAAC;IAClE,KAAK,EAAE,CAAC;SACL,MAAM,EAAE;SACR,QAAQ,EAAE;SACV,QAAQ,CAAC,8BAA8B,CAAC;IAC3C,UAAU,EAAE,CAAC;SACV,MAAM,EAAE;SACR,OAAO,CAAC,gBAAgB,CAAC;SACzB,QAAQ,CAAC,8CAA8C,CAAC;IAC3D,cAAc,EAAE,CAAC;SACd,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,GAAG,CAAC;SACR,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CAAC,8DAA8D,CAAC;IAC3E,aAAa,EAAE,CAAC;SACb,IAAI,CAAC,CAAC,UAAU,CAAC,CAAC;SAClB,OAAO,CAAC,UAAU,CAAC;SACnB,QAAQ,CAAC,yCAAyC,CAAC;IACtD,OAAO,EAAE,CAAC;SACP,IAAI,CAAC,CAAC,SAAS,EAAE,aAAa,EAAE,UAAU,CAAC,CAAC;SAC5C,OAAO,CAAC,aAAa,CAAC;SACtB,QAAQ,CAAC,gDAAgD,CAAC;CAC9D,CAAC,CAAC;AA8BH,KAAK,UAAU,UAAU,CAAC,KAAmB;IAC3C,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QACjB,MAAM,IAAI,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACvC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,qBAAqB,IAAI,EAAE,CAAC,CAAC;QACpE,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA2B,CAAC;QACzD,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;YACpD,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;QACnD,CAAC;QACD,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,MAAuB,EAAE,CAAC;IACpD,CAAC;IAED,IAAI,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxC,OAAO;YACL,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,KAAK,IAAI,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC;SAC/D,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;AACrD,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAmB,EAAE,IAAY;IACzD,IAAI,CAAC,IAAI;QAAE,OAAO,GAAG,CAAC;IACtB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;IACnC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,SAAS;QAAE,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,OAAO,EAAE,CAAC;IAC1D,KAAK,MAAM,CAAC,IAAI,SAAS;QAAE,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,KAAK,EAAE,CAAC;IACxD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,EAAE,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC9D,OAAO,CAAC,CAAC,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC;AACjD,CAAC;AAED,SAAS,oBAAoB,CAC3B,OAAsB,EACtB,WAAmB,EACnB,MAAc;IAEd,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,sBAAsB,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC9D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,cAAc,WAAW,EAAE,CAAC,CAAC;IACxC,KAAK,CAAC,IAAI,CAAC,cAAc,MAAM,eAAe,CAAC,CAAC;IAChD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,kBAAkB,EAAE,CAAC,CAAC,CAAC;IAC9E,KAAK,CAAC,IAAI,CAAC,KAAK,eAAe,oCAAoC,OAAO,CAAC,MAAM,cAAc,CAAC,CAAC;IACjG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEf,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;QAChC,KAAK,CAAC,IAAI,CAAC,mBAAmB,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,yBAAyB,MAAM,CAAC,kBAAkB,EAAE,CAAC,CAAC;QACjE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,UAAU,IAAI,MAAM,CAAC,CAAC;QACrF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;YACvC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACf,SAAS;QACX,CAAC;QAED,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;YAC3B,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC;YAC7D,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,QAAQ,IAAI,CAAC,CAAC;YAC3E,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;YAC5D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,KAAmB;IAC/C,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,CAAC;QACvC,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QAC7C,MAAM,KAAK,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAEzC,MAAM,OAAO,GAAkB,EAAE,CAAC;QAElC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;YAChD,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC3C,MAAM,SAAS,GAAG,KAAK,CAAC,cAAc,IAAI,KAAK,CAAC,cAAc,CAAC;YAE/D,MAAM,OAAO,GAAkB,EAAE,CAAC;YAClC,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,UAAU,GAAe;wBAC7B,GAAG;wBACH,UAAU,EAAE,QAAQ;wBACpB,OAAO,EAAE,KAAK,CAAC,OAAO;wBACtB,WAAW,EAAE,SAAS;wBACtB,QAAQ,EAAE,IAAI;qBACf,CAAC;oBACF,MAAM,EAAE,GAAG,MAAM,YAAY,CAAC,UAAU,CAAC,CAAC;oBAE1C,wEAAwE;oBACxE,IAAI,GAAG,GAAG,CAAC,CAAC;oBACZ,IAAI,EAAE,CAAC,SAAS,EAAE,CAAC;wBACjB,GAAG,GAAG,CAAC,CAAC;oBACV,CAAC;yBAAM,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;wBACtB,wDAAwD;wBACxD,MAAM,QAAQ,GAAG,EAAE,CAAC,aAAa,CAAC,OAAO,CAAC,kBAAkB,EAAE,eAAe,CAAC,CAAC;wBAC/E,IAAI,OAAO,GAAkB,IAAI,CAAC;wBAClC,IAAI,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;4BACzB,IAAI,CAAC;gCAAC,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;4BAAC,CAAC;4BAAC,MAAM,CAAC;gCAAC,OAAO,GAAG,IAAI,CAAC;4BAAC,CAAC;wBAChF,CAAC;wBACD,IAAI,OAAO,GAAG,EAAE,CAAC;wBACjB,IAAI,CAAC;4BAAC,OAAO,GAAG,MAAM,QAAQ,CAAC,EAAE,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;wBAAC,CAAC;wBAAC,MAAM,CAAC,CAAC,UAAU,CAAC,CAAC;wBACjF,GAAG,GAAG,gBAAgB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;oBAC3C,CAAC;oBAED,OAAO,CAAC,IAAI,CAAC;wBACX,GAAG;wBACH,OAAO,EAAE,EAAE,CAAC,OAAO;wBACnB,SAAS,EAAE,EAAE,CAAC,SAAS;wBACvB,UAAU,EAAE,GAAG;wBACf,aAAa,EAAE,EAAE,CAAC,aAAa;wBAC/B,YAAY,EAAE,EAAE,CAAC,YAAY;qBAC9B,CAAC,CAAC;gBACL,CAAC;gBAAC,OAAO,MAAM,EAAE,CAAC;oBAChB,QAAQ,CAAC,SAAS,EAAE,cAAc,GAAG,EAAE,EAAE,MAAM,CAAC,CAAC;oBACjD,OAAO,CAAC,IAAI,CAAC;wBACX,GAAG;wBACH,OAAO,EAAE,KAAK;wBACd,SAAS,EAAE,KAAK;wBAChB,UAAU,EAAE,CAAC;wBACb,aAAa,EAAE,IAAI;wBACnB,YAAY,EAAE,EAAE;qBACjB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAED,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,UAAU,IAAI,SAAS,CAAC,CAAC,MAAM,CAAC;YACxF,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,YAAY,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM;gBAC/B,OAAO;gBACP,kBAAkB,EAAE,UAAU;aAC/B,CAAC,CAAC;QACL,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC7C,MAAM,UAAU,GAAG,oBAAoB,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,CAAC,cAAc,CAAC,CAAC;QACpF,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,EAAE,UAAU,WAAW,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;QAClF,MAAM,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,OAAO,CAAC,CAAC;QAEjD,OAAO,UAAU,CAAC;YAChB,YAAY,EAAE,WAAW;YACzB,MAAM,EAAE,OAAO;YACf,WAAW,EAAE,UAAU;YACvB,MAAM,EAAE,KAAK,CAAC,aAAa;SAC5B,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,QAAQ,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAC;QACnC,OAAO,WAAW,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;IACvE,CAAC;AACH,CAAC;AAED,SAAS,IAAI,CAAC,CAAS;IACrB,OAAO,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,OAAO,CAAC;AACrG,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pdf-extract — extract text, pages, tables, and metadata from a PDF.
|
|
3
|
+
*
|
|
4
|
+
* v2.5.0: native strategy only (pdfjs-dist text layer).
|
|
5
|
+
* OCR + Claude Vision fallbacks are deferred to v2.6.0.
|
|
6
|
+
*
|
|
7
|
+
* Accepts a local path or a remote URL (auto-download to tmp).
|
|
8
|
+
*/
|
|
9
|
+
import { z } from "zod";
|
|
10
|
+
export declare const name = "pdf_extract";
|
|
11
|
+
export declare const description = "Extract text, pages, tables, and metadata from a local or remote PDF. Native text-layer strategy (pdfjs-dist). OCR/Vision fallbacks deferred to v2.6.0.";
|
|
12
|
+
export declare const schema: z.ZodObject<{
|
|
13
|
+
input: z.ZodString;
|
|
14
|
+
output: z.ZodDefault<z.ZodString>;
|
|
15
|
+
preserve_layout: z.ZodDefault<z.ZodBoolean>;
|
|
16
|
+
extract_tables: z.ZodDefault<z.ZodBoolean>;
|
|
17
|
+
max_pages: z.ZodDefault<z.ZodNumber>;
|
|
18
|
+
}, "strip", z.ZodTypeAny, {
|
|
19
|
+
input: string;
|
|
20
|
+
output: string;
|
|
21
|
+
max_pages: number;
|
|
22
|
+
preserve_layout: boolean;
|
|
23
|
+
extract_tables: boolean;
|
|
24
|
+
}, {
|
|
25
|
+
input: string;
|
|
26
|
+
output?: string | undefined;
|
|
27
|
+
max_pages?: number | undefined;
|
|
28
|
+
preserve_layout?: boolean | undefined;
|
|
29
|
+
extract_tables?: boolean | undefined;
|
|
30
|
+
}>;
|
|
31
|
+
export type PdfExtractInput = z.infer<typeof schema>;
|
|
32
|
+
export declare function execute(input: PdfExtractInput): Promise<{
|
|
33
|
+
content: {
|
|
34
|
+
type: "text";
|
|
35
|
+
text: string;
|
|
36
|
+
}[];
|
|
37
|
+
}>;
|
|
38
|
+
//# sourceMappingURL=pdf-extract.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-extract.d.ts","sourceRoot":"","sources":["../../src/tools/pdf-extract.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAQxB,eAAO,MAAM,IAAI,gBAAgB,CAAC;AAElC,eAAO,MAAM,WAAW,4JACmI,CAAC;AAE5J,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;EAsBjB,CAAC;AAEH,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,MAAM,CAAC,CAAC;AA8JrD,wBAAsB,OAAO,CAAC,KAAK,EAAE,eAAe;;;;;GAyGnD"}
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pdf-extract — extract text, pages, tables, and metadata from a PDF.
|
|
3
|
+
*
|
|
4
|
+
* v2.5.0: native strategy only (pdfjs-dist text layer).
|
|
5
|
+
* OCR + Claude Vision fallbacks are deferred to v2.6.0.
|
|
6
|
+
*
|
|
7
|
+
* Accepts a local path or a remote URL (auto-download to tmp).
|
|
8
|
+
*/
|
|
9
|
+
import { z } from "zod";
|
|
10
|
+
import { readFile, writeFile, mkdtemp } from "node:fs/promises";
|
|
11
|
+
import { existsSync } from "node:fs";
|
|
12
|
+
import { tmpdir } from "node:os";
|
|
13
|
+
import { join, resolve as resolvePath } from "node:path";
|
|
14
|
+
import { toolResult, errorResult } from "../utils/tool-response.js";
|
|
15
|
+
import { debugLog } from "../utils/debug.js";
|
|
16
|
+
export const name = "pdf_extract";
|
|
17
|
+
export const description = "Extract text, pages, tables, and metadata from a local or remote PDF. Native text-layer strategy (pdfjs-dist). OCR/Vision fallbacks deferred to v2.6.0.";
|
|
18
|
+
export const schema = z.object({
|
|
19
|
+
input: z
|
|
20
|
+
.string()
|
|
21
|
+
.min(1)
|
|
22
|
+
.describe("Local PDF path or remote URL (http/https). URL inputs are downloaded to a temp file."),
|
|
23
|
+
output: z
|
|
24
|
+
.string()
|
|
25
|
+
.default("./extracted.json")
|
|
26
|
+
.describe("Output JSON path"),
|
|
27
|
+
preserve_layout: z
|
|
28
|
+
.boolean()
|
|
29
|
+
.default(true)
|
|
30
|
+
.describe("Preserve line breaks and approximate layout when assembling text"),
|
|
31
|
+
extract_tables: z
|
|
32
|
+
.boolean()
|
|
33
|
+
.default(true)
|
|
34
|
+
.describe("Run basic regex-based table extraction"),
|
|
35
|
+
max_pages: z
|
|
36
|
+
.number()
|
|
37
|
+
.min(0)
|
|
38
|
+
.default(0)
|
|
39
|
+
.describe("Limit pages to extract (0 = all)"),
|
|
40
|
+
});
|
|
41
|
+
async function downloadToTmp(url) {
|
|
42
|
+
const res = await fetch(url);
|
|
43
|
+
if (!res.ok)
|
|
44
|
+
throw new Error(`Failed to download PDF (${res.status} ${res.statusText}): ${url}`);
|
|
45
|
+
const buf = Buffer.from(await res.arrayBuffer());
|
|
46
|
+
const dir = await mkdtemp(join(tmpdir(), "imperium-pdf-"));
|
|
47
|
+
const file = join(dir, "input.pdf");
|
|
48
|
+
await writeFile(file, buf);
|
|
49
|
+
return file;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Assemble text from pdfjs text items. When preserveLayout is true, we insert
|
|
53
|
+
* newlines on hasEOL markers and approximate line breaks using y-coordinate deltas.
|
|
54
|
+
*/
|
|
55
|
+
function assemblePageText(items, preserveLayout) {
|
|
56
|
+
if (!preserveLayout) {
|
|
57
|
+
return items.map((i) => i.str).join(" ").replace(/\s+/g, " ").trim();
|
|
58
|
+
}
|
|
59
|
+
const lines = [];
|
|
60
|
+
let current = "";
|
|
61
|
+
let lastY = null;
|
|
62
|
+
for (const it of items) {
|
|
63
|
+
const y = Array.isArray(it.transform) && it.transform.length >= 6 ? it.transform[5] : null;
|
|
64
|
+
const yChanged = y !== null && lastY !== null && Math.abs(y - lastY) > 2;
|
|
65
|
+
if (yChanged) {
|
|
66
|
+
if (current.trim())
|
|
67
|
+
lines.push(current.trimEnd());
|
|
68
|
+
current = "";
|
|
69
|
+
}
|
|
70
|
+
current += it.str;
|
|
71
|
+
if (it.hasEOL) {
|
|
72
|
+
if (current.trim())
|
|
73
|
+
lines.push(current.trimEnd());
|
|
74
|
+
current = "";
|
|
75
|
+
}
|
|
76
|
+
if (y !== null)
|
|
77
|
+
lastY = y;
|
|
78
|
+
}
|
|
79
|
+
if (current.trim())
|
|
80
|
+
lines.push(current.trimEnd());
|
|
81
|
+
return lines.join("\n");
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Basic table extraction: looks for lines with 2+ runs of whitespace (≥2 spaces
|
|
85
|
+
* or tabs) as column separators. A table is ≥2 consecutive rows with the same
|
|
86
|
+
* column count.
|
|
87
|
+
*/
|
|
88
|
+
function extractTablesFromText(text, pageNum) {
|
|
89
|
+
const lines = text.split("\n");
|
|
90
|
+
const tables = [];
|
|
91
|
+
let buffer = [];
|
|
92
|
+
let colCount = 0;
|
|
93
|
+
const flush = () => {
|
|
94
|
+
if (buffer.length >= 2) {
|
|
95
|
+
tables.push({ page: pageNum, rows: buffer });
|
|
96
|
+
}
|
|
97
|
+
buffer = [];
|
|
98
|
+
colCount = 0;
|
|
99
|
+
};
|
|
100
|
+
for (const line of lines) {
|
|
101
|
+
const trimmed = line.trim();
|
|
102
|
+
if (!trimmed) {
|
|
103
|
+
flush();
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
// Split on 2+ whitespace or tab
|
|
107
|
+
const cells = trimmed.split(/\s{2,}|\t+/).map((c) => c.trim()).filter(Boolean);
|
|
108
|
+
if (cells.length >= 2) {
|
|
109
|
+
if (colCount === 0) {
|
|
110
|
+
colCount = cells.length;
|
|
111
|
+
buffer.push(cells);
|
|
112
|
+
}
|
|
113
|
+
else if (cells.length === colCount) {
|
|
114
|
+
buffer.push(cells);
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
flush();
|
|
118
|
+
colCount = cells.length;
|
|
119
|
+
buffer.push(cells);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
flush();
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
flush();
|
|
127
|
+
return tables;
|
|
128
|
+
}
|
|
129
|
+
function computeConfidence(text) {
|
|
130
|
+
if (!text)
|
|
131
|
+
return 0;
|
|
132
|
+
const trimmed = text.trim();
|
|
133
|
+
if (trimmed.length === 0)
|
|
134
|
+
return 0;
|
|
135
|
+
// Heuristic: ratio of printable ASCII + basic unicode letters vs total chars.
|
|
136
|
+
let printable = 0;
|
|
137
|
+
for (const ch of trimmed) {
|
|
138
|
+
const code = ch.charCodeAt(0);
|
|
139
|
+
if (code >= 32 && code < 127)
|
|
140
|
+
printable++;
|
|
141
|
+
else if (/\p{L}|\p{N}|\p{P}|\s/u.test(ch))
|
|
142
|
+
printable++;
|
|
143
|
+
}
|
|
144
|
+
const ratio = printable / trimmed.length;
|
|
145
|
+
// Length factor: at least 50 chars = full confidence contribution
|
|
146
|
+
const lengthFactor = Math.min(1, trimmed.length / 50);
|
|
147
|
+
return Math.min(1, ratio * lengthFactor);
|
|
148
|
+
}
|
|
149
|
+
export async function execute(input) {
|
|
150
|
+
const warnings = [];
|
|
151
|
+
try {
|
|
152
|
+
// Resolve input (URL or local path)
|
|
153
|
+
let pdfPath;
|
|
154
|
+
const isUrl = /^https?:\/\//i.test(input.input);
|
|
155
|
+
if (isUrl) {
|
|
156
|
+
debugLog("pdf-extract", "downloading url", input.input);
|
|
157
|
+
pdfPath = await downloadToTmp(input.input);
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
pdfPath = resolvePath(input.input);
|
|
161
|
+
if (!existsSync(pdfPath)) {
|
|
162
|
+
return errorResult(`PDF not found at path: ${pdfPath}`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
// Load pdfjs-dist (legacy build works in Node w/o DOMMatrix)
|
|
166
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
167
|
+
const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
|
168
|
+
const data = new Uint8Array(await readFile(pdfPath));
|
|
169
|
+
const loadingTask = pdfjs.getDocument({
|
|
170
|
+
data,
|
|
171
|
+
useSystemFonts: true,
|
|
172
|
+
// Disable font loading / canvas / workers for Node
|
|
173
|
+
isEvalSupported: false,
|
|
174
|
+
disableFontFace: true,
|
|
175
|
+
});
|
|
176
|
+
const doc = await loadingTask.promise;
|
|
177
|
+
const totalPages = doc.numPages;
|
|
178
|
+
const pageLimit = input.max_pages > 0 ? Math.min(input.max_pages, totalPages) : totalPages;
|
|
179
|
+
const pages = [];
|
|
180
|
+
const tables = [];
|
|
181
|
+
const textChunks = [];
|
|
182
|
+
for (let p = 1; p <= pageLimit; p++) {
|
|
183
|
+
try {
|
|
184
|
+
const page = await doc.getPage(p);
|
|
185
|
+
const content = await page.getTextContent();
|
|
186
|
+
const pageText = assemblePageText(content.items, input.preserve_layout);
|
|
187
|
+
const conf = computeConfidence(pageText);
|
|
188
|
+
pages.push({ num: p, text: pageText, confidence: conf });
|
|
189
|
+
textChunks.push(pageText);
|
|
190
|
+
if (input.extract_tables) {
|
|
191
|
+
const pageTables = extractTablesFromText(pageText, p);
|
|
192
|
+
tables.push(...pageTables);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
catch (pageErr) {
|
|
196
|
+
warnings.push(`Page ${p}: ${pageErr instanceof Error ? pageErr.message : String(pageErr)}`);
|
|
197
|
+
pages.push({ num: p, text: "", confidence: 0 });
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
let metadataInfo = {};
|
|
201
|
+
try {
|
|
202
|
+
const md = await doc.getMetadata();
|
|
203
|
+
metadataInfo = (md.info || {});
|
|
204
|
+
}
|
|
205
|
+
catch (mdErr) {
|
|
206
|
+
warnings.push(`Metadata: ${mdErr instanceof Error ? mdErr.message : String(mdErr)}`);
|
|
207
|
+
}
|
|
208
|
+
const overall = pages.length > 0
|
|
209
|
+
? pages.reduce((s, p) => s + p.confidence, 0) / pages.length
|
|
210
|
+
: 0;
|
|
211
|
+
const fullText = textChunks.join("\n\n");
|
|
212
|
+
if (!fullText.trim()) {
|
|
213
|
+
warnings.push("No text extracted via native text layer. PDF may be a scanned image — OCR fallback will land in v2.6.0.");
|
|
214
|
+
}
|
|
215
|
+
const result = {
|
|
216
|
+
text: fullText,
|
|
217
|
+
pages,
|
|
218
|
+
tables,
|
|
219
|
+
metadata: {
|
|
220
|
+
title: typeof metadataInfo.Title === "string" ? metadataInfo.Title : undefined,
|
|
221
|
+
author: typeof metadataInfo.Author === "string" ? metadataInfo.Author : undefined,
|
|
222
|
+
pages: totalPages,
|
|
223
|
+
extracted_at: new Date().toISOString(),
|
|
224
|
+
source: input.input,
|
|
225
|
+
},
|
|
226
|
+
strategy_used: "native",
|
|
227
|
+
overall_confidence: overall,
|
|
228
|
+
warnings,
|
|
229
|
+
};
|
|
230
|
+
// Always write output JSON to disk
|
|
231
|
+
try {
|
|
232
|
+
await writeFile(resolvePath(input.output), JSON.stringify(result, null, 2), "utf-8");
|
|
233
|
+
}
|
|
234
|
+
catch (wErr) {
|
|
235
|
+
warnings.push(`Failed to write output file: ${wErr instanceof Error ? wErr.message : String(wErr)}`);
|
|
236
|
+
}
|
|
237
|
+
return toolResult({ ...result, output_file: resolvePath(input.output) });
|
|
238
|
+
}
|
|
239
|
+
catch (err) {
|
|
240
|
+
debugLog("pdf-extract", "failed", err);
|
|
241
|
+
return errorResult(err instanceof Error ? err.message : String(err));
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
//# sourceMappingURL=pdf-extract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-extract.js","sourceRoot":"","sources":["../../src/tools/pdf-extract.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAChE,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,IAAI,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,WAAW,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACpE,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAE7C,MAAM,CAAC,MAAM,IAAI,GAAG,aAAa,CAAC;AAElC,MAAM,CAAC,MAAM,WAAW,GACtB,yJAAyJ,CAAC;AAE5J,MAAM,CAAC,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC;IAC7B,KAAK,EAAE,CAAC;SACL,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,CAAC,sFAAsF,CAAC;IACnG,MAAM,EAAE,CAAC;SACN,MAAM,EAAE;SACR,OAAO,CAAC,kBAAkB,CAAC;SAC3B,QAAQ,CAAC,kBAAkB,CAAC;IAC/B,eAAe,EAAE,CAAC;SACf,OAAO,EAAE;SACT,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,kEAAkE,CAAC;IAC/E,cAAc,EAAE,CAAC;SACd,OAAO,EAAE;SACT,OAAO,CAAC,IAAI,CAAC;SACb,QAAQ,CAAC,wCAAwC,CAAC;IACrD,SAAS,EAAE,CAAC;SACT,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CAAC,kCAAkC,CAAC;CAChD,CAAC,CAAC;AAiDH,KAAK,UAAU,aAAa,CAAC,GAAW;IACtC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,CAAC,GAAG,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,2BAA2B,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,MAAM,GAAG,EAAE,CAAC,CAAC;IACjG,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;IACjD,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,eAAe,CAAC,CAAC,CAAC;IAC3D,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;IACpC,MAAM,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAC3B,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,KAAqB,EAAE,cAAuB;IACtE,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACvE,CAAC;IAED,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,KAAK,GAAkB,IAAI,CAAC;IAEhC,KAAK,MAAM,EAAE,IAAI,KAAK,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC3F,MAAM,QAAQ,GAAG,CAAC,KAAK,IAAI,IAAI,KAAK,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;QAEzE,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,OAAO,CAAC,IAAI,EAAE;gBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;YAClD,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;QAED,OAAO,IAAI,EAAE,CAAC,GAAG,CAAC;QAElB,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC;YACd,IAAI,OAAO,CAAC,IAAI,EAAE;gBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;YAClD,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;QAED,IAAI,CAAC,KAAK,IAAI;YAAE,KAAK,GAAG,CAAC,CAAC;IAC5B,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,EAAE;QAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;IAClD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;;GAIG;AACH,SAAS,qBAAqB,CAAC,IAAY,EAAE,OAAe;IAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,IAAI,MAAM,GAAe,EAAE,CAAC;IAC5B,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,MAAM,KAAK,GAAG,GAAG,EAAE;QACjB,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACvB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/C,CAAC;QACD,MAAM,GAAG,EAAE,CAAC;QACZ,QAAQ,GAAG,CAAC,CAAC;IACf,CAAC,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,KAAK,EAAE,CAAC;YACR,SAAS;QACX,CAAC;QACD,gCAAgC;QAChC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAC/E,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACtB,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;gBACnB,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC;gBACxB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;gBACrC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;iBAAM,CAAC;gBACN,KAAK,EAAE,CAAC;gBACR,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC;gBACxB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,KAAK,EAAE,CAAC;QACV,CAAC;IACH,CAAC;IACD,KAAK,EAAE,CAAC;IAER,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,CAAC;IACpB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEnC,8EAA8E;IAC9E,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC9B,IAAI,IAAI,IAAI,EAAE,IAAI,IAAI,GAAG,GAAG;YAAE,SAAS,EAAE,CAAC;aACrC,IAAI,uBAAuB,CAAC,IAAI,CAAC,EAAE,CAAC;YAAE,SAAS,EAAE,CAAC;IACzD,CAAC;IACD,MAAM,KAAK,GAAG,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC;IACzC,kEAAkE;IAClE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;IACtD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,YAAY,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,KAAsB;IAClD,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,oCAAoC;QACpC,IAAI,OAAe,CAAC;QACpB,MAAM,KAAK,GAAG,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAChD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,aAAa,EAAE,iBAAiB,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,MAAM,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACnC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzB,OAAO,WAAW,CAAC,0BAA0B,OAAO,EAAE,CAAC,CAAC;YAC1D,CAAC;QACH,CAAC;QAED,6DAA6D;QAC7D,8DAA8D;QAC9D,MAAM,KAAK,GAAQ,MAAM,MAAM,CAAC,iCAAiC,CAAC,CAAC;QAEnE,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACrD,MAAM,WAAW,GAAG,KAAK,CAAC,WAAW,CAAC;YACpC,IAAI;YACJ,cAAc,EAAE,IAAI;YACpB,mDAAmD;YACnD,eAAe,EAAE,KAAK;YACtB,eAAe,EAAE,IAAI;SACtB,CAAC,CAAC;QACH,MAAM,GAAG,GAAoB,MAAM,WAAW,CAAC,OAAO,CAAC;QAEvD,MAAM,UAAU,GAAG,GAAG,CAAC,QAAQ,CAAC;QAChC,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;QAE3F,MAAM,KAAK,GAAiB,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,MAAM,UAAU,GAAa,EAAE,CAAC;QAEhC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBAClC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,cAAc,EAAE,CAAC;gBAC5C,MAAM,QAAQ,GAAG,gBAAgB,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,eAAe,CAAC,CAAC;gBACxE,MAAM,IAAI,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;gBACzC,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;gBACzD,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC1B,IAAI,KAAK,CAAC,cAAc,EAAE,CAAC;oBACzB,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;oBACtD,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC;YAAC,OAAO,OAAO,EAAE,CAAC;gBACjB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,OAAO,YAAY,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;gBAC5F,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAED,IAAI,YAAY,GAA4B,EAAE,CAAC;QAC/C,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC;YACnC,YAAY,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,EAAE,CAA4B,CAAC;QAC5D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,QAAQ,CAAC,IAAI,CAAC,aAAa,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACvF,CAAC;QAED,MAAM,OAAO,GACX,KAAK,CAAC,MAAM,GAAG,CAAC;YACd,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM;YAC5D,CAAC,CAAC,CAAC,CAAC;QAER,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEzC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;YACrB,QAAQ,CAAC,IAAI,CACX,yGAAyG,CAC1G,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAqB;YAC/B,IAAI,EAAE,QAAQ;YACd,KAAK;YACL,MAAM;YACN,QAAQ,EAAE;gBACR,KAAK,EAAE,OAAO,YAAY,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAE,YAAY,CAAC,KAAgB,CAAC,CAAC,CAAC,SAAS;gBAC1F,MAAM,EAAE,OAAO,YAAY,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAE,YAAY,CAAC,MAAiB,CAAC,CAAC,CAAC,SAAS;gBAC7F,KAAK,EAAE,UAAU;gBACjB,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACtC,MAAM,EAAE,KAAK,CAAC,KAAK;aACpB;YACD,aAAa,EAAE,QAAQ;YACvB,kBAAkB,EAAE,OAAO;YAC3B,QAAQ;SACT,CAAC;QAEF,mCAAmC;QACnC,IAAI,CAAC;YACH,MAAM,SAAS,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACvF,CAAC;QAAC,OAAO,IAAI,EAAE,CAAC;YACd,QAAQ,CAAC,IAAI,CAAC,gCAAgC,IAAI,YAAY,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvG,CAAC;QAED,OAAO,UAAU,CAAC,EAAE,GAAG,MAAM,EAAE,WAAW,EAAE,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC3E,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,QAAQ,CAAC,aAAa,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAC;QACvC,OAAO,WAAW,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;IACvE,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
export declare const name = "record_flow";
|
|
3
|
+
export declare const description = "Record a headed browser workflow and save it as a generic Imperium Flow family/variant.";
|
|
4
|
+
export declare const schema: z.ZodObject<{
|
|
5
|
+
family: z.ZodString;
|
|
6
|
+
variant: z.ZodString;
|
|
7
|
+
url: z.ZodString;
|
|
8
|
+
description: z.ZodOptional<z.ZodString>;
|
|
9
|
+
flows_dir: z.ZodOptional<z.ZodString>;
|
|
10
|
+
global: z.ZodDefault<z.ZodBoolean>;
|
|
11
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
12
|
+
captcha: z.ZodDefault<z.ZodEnum<["auto", "manual", "off", "fail"]>>;
|
|
13
|
+
}, "strip", z.ZodTypeAny, {
|
|
14
|
+
url: string;
|
|
15
|
+
captcha: "auto" | "off" | "manual" | "fail";
|
|
16
|
+
family: string;
|
|
17
|
+
variant: string;
|
|
18
|
+
global: boolean;
|
|
19
|
+
description?: string | undefined;
|
|
20
|
+
session_id?: string | undefined;
|
|
21
|
+
flows_dir?: string | undefined;
|
|
22
|
+
}, {
|
|
23
|
+
url: string;
|
|
24
|
+
family: string;
|
|
25
|
+
variant: string;
|
|
26
|
+
description?: string | undefined;
|
|
27
|
+
captcha?: "auto" | "off" | "manual" | "fail" | undefined;
|
|
28
|
+
session_id?: string | undefined;
|
|
29
|
+
flows_dir?: string | undefined;
|
|
30
|
+
global?: boolean | undefined;
|
|
31
|
+
}>;
|
|
32
|
+
export type RecordFlowInput = z.infer<typeof schema>;
|
|
33
|
+
export declare function execute(input: RecordFlowInput): Promise<{
|
|
34
|
+
content: {
|
|
35
|
+
type: "text";
|
|
36
|
+
text: string;
|
|
37
|
+
}[];
|
|
38
|
+
}>;
|
|
39
|
+
//# sourceMappingURL=record-flow.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"record-flow.d.ts","sourceRoot":"","sources":["../../src/tools/record-flow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAgBxB,eAAO,MAAM,IAAI,gBAAgB,CAAC;AAClC,eAAO,MAAM,WAAW,4FAA4F,CAAC;AAErH,eAAO,MAAM,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;EASjB,CAAC;AAEH,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,MAAM,CAAC,CAAC;AAmSrD,wBAAsB,OAAO,CAAC,KAAK,EAAE,eAAe;;;;;GAoGnD"}
|