okrapdf 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -4
- package/dist/browser.d.ts +1 -1
- package/dist/browser.js +1 -1
- package/dist/{chunk-KJMV6TN2.js → chunk-7BAEYVWG.js} +2 -2
- package/dist/{chunk-F3LECDPP.js → chunk-ETARIBOV.js} +293 -35
- package/dist/chunk-ETARIBOV.js.map +1 -0
- package/dist/{chunk-QKII53VN.js → chunk-HPTXRSWK.js} +2 -2
- package/dist/chunk-HPTXRSWK.js.map +1 -0
- package/dist/chunk-MSZQPLMQ.js +497 -0
- package/dist/chunk-MSZQPLMQ.js.map +1 -0
- package/dist/{chunk-2VKGPLAA.js → chunk-YGIBZV5J.js} +174 -92
- package/dist/chunk-YGIBZV5J.js.map +1 -0
- package/dist/cli/bin.d.ts +39 -0
- package/dist/cli/bin.js +440 -56
- package/dist/cli/bin.js.map +1 -1
- package/dist/cli/index.d.ts +2 -2
- package/dist/cli/index.js +2 -1
- package/dist/{client-DMEw0oK3.d.ts → client-D4A0dQ4h.d.ts} +2 -1
- package/dist/index.d.ts +146 -4
- package/dist/index.js +19 -3
- package/dist/index.js.map +1 -1
- package/dist/react/index.d.ts +3 -3
- package/dist/react/index.js +2 -2
- package/dist/{types-D2JaySEg.d.ts → types-SYOi8k1l.d.ts} +12 -4
- package/dist/url.d.ts +1 -1
- package/dist/url.js +1 -1
- package/package.json +4 -2
- package/dist/chunk-2VKGPLAA.js.map +0 -1
- package/dist/chunk-F3LECDPP.js.map +0 -1
- package/dist/chunk-QKII53VN.js.map +0 -1
- /package/dist/{chunk-KJMV6TN2.js.map → chunk-7BAEYVWG.js.map} +0 -0
package/dist/cli/bin.js
CHANGED
|
@@ -6,13 +6,19 @@ import {
|
|
|
6
6
|
authStatus,
|
|
7
7
|
authToken,
|
|
8
8
|
authWhoAmI,
|
|
9
|
+
collectionAddDocs,
|
|
10
|
+
collectionCreate,
|
|
11
|
+
collectionDelete,
|
|
9
12
|
collectionExport,
|
|
10
13
|
collectionList,
|
|
11
14
|
collectionQueryRaw,
|
|
15
|
+
collectionRemoveDocs,
|
|
12
16
|
collectionSetVisibility,
|
|
17
|
+
collectionShow,
|
|
13
18
|
deleteDocument,
|
|
14
19
|
find,
|
|
15
20
|
formatCollectionCsv,
|
|
21
|
+
formatCollectionDetail,
|
|
16
22
|
formatCollectionExportFlat,
|
|
17
23
|
formatCollectionList,
|
|
18
24
|
formatCollectionTable,
|
|
@@ -35,6 +41,13 @@ import {
|
|
|
35
41
|
handleError,
|
|
36
42
|
history,
|
|
37
43
|
listDocuments,
|
|
44
|
+
localDoctor,
|
|
45
|
+
localIngest,
|
|
46
|
+
localPage,
|
|
47
|
+
localSearch,
|
|
48
|
+
localStatus,
|
|
49
|
+
localSummary,
|
|
50
|
+
localTables,
|
|
38
51
|
pageEdit,
|
|
39
52
|
pageGet,
|
|
40
53
|
pageResolve,
|
|
@@ -47,22 +60,210 @@ import {
|
|
|
47
60
|
tree,
|
|
48
61
|
upload,
|
|
49
62
|
writeOutput
|
|
50
|
-
} from "../chunk-
|
|
63
|
+
} from "../chunk-ETARIBOV.js";
|
|
64
|
+
import "../chunk-MSZQPLMQ.js";
|
|
51
65
|
import {
|
|
52
66
|
OkraClient
|
|
53
|
-
} from "../chunk-
|
|
67
|
+
} from "../chunk-YGIBZV5J.js";
|
|
54
68
|
import "../chunk-NIZM2ETT.js";
|
|
55
69
|
|
|
56
70
|
// src/cli/bin.ts
|
|
57
71
|
import { Command } from "commander";
|
|
58
|
-
import { writeFileSync } from "fs";
|
|
72
|
+
import { realpathSync, writeFileSync } from "fs";
|
|
73
|
+
import { resolve } from "path";
|
|
74
|
+
import { pathToFileURL } from "url";
|
|
75
|
+
|
|
76
|
+
// package.json
|
|
77
|
+
var package_default = {
|
|
78
|
+
name: "okrapdf",
|
|
79
|
+
version: "0.14.0",
|
|
80
|
+
okraApi: "^1.14.0",
|
|
81
|
+
description: "OkraPDF \u2014 upload a PDF, get an API. Runtime client, React hooks, and CLI.",
|
|
82
|
+
type: "module",
|
|
83
|
+
exports: {
|
|
84
|
+
".": {
|
|
85
|
+
import: {
|
|
86
|
+
types: "./dist/index.d.ts",
|
|
87
|
+
default: "./dist/index.js"
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"./doc": {
|
|
91
|
+
import: {
|
|
92
|
+
types: "./dist/url.d.ts",
|
|
93
|
+
default: "./dist/url.js"
|
|
94
|
+
}
|
|
95
|
+
},
|
|
96
|
+
"./browser": {
|
|
97
|
+
import: {
|
|
98
|
+
types: "./dist/browser.d.ts",
|
|
99
|
+
default: "./dist/browser.js"
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
"./worker": {
|
|
103
|
+
import: {
|
|
104
|
+
types: "./dist/worker.d.ts",
|
|
105
|
+
default: "./dist/worker.js"
|
|
106
|
+
}
|
|
107
|
+
},
|
|
108
|
+
"./react": {
|
|
109
|
+
import: {
|
|
110
|
+
types: "./dist/react/index.d.ts",
|
|
111
|
+
default: "./dist/react/index.js"
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
"./cli": {
|
|
115
|
+
import: {
|
|
116
|
+
types: "./dist/cli/index.d.ts",
|
|
117
|
+
default: "./dist/cli/index.js"
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
},
|
|
121
|
+
bin: {
|
|
122
|
+
okra: "./dist/cli/bin.js"
|
|
123
|
+
},
|
|
124
|
+
files: [
|
|
125
|
+
"dist"
|
|
126
|
+
],
|
|
127
|
+
scripts: {
|
|
128
|
+
build: "tsup",
|
|
129
|
+
"docs:cli": "node ./scripts/generate-cli-docs.mjs",
|
|
130
|
+
"docs:cli:check": "node ./scripts/generate-cli-docs.mjs --check",
|
|
131
|
+
test: "vitest run --exclude '**/**.e2e.test.ts'",
|
|
132
|
+
"test:e2e": "vitest run src/client.e2e.test.ts",
|
|
133
|
+
"test:watch": "vitest",
|
|
134
|
+
typecheck: "tsc --noEmit"
|
|
135
|
+
},
|
|
136
|
+
dependencies: {
|
|
137
|
+
commander: "^12.0.0",
|
|
138
|
+
ws: "^8.19.0",
|
|
139
|
+
zod: "^4.3.6"
|
|
140
|
+
},
|
|
141
|
+
peerDependencies: {
|
|
142
|
+
react: ">=18"
|
|
143
|
+
},
|
|
144
|
+
peerDependenciesMeta: {
|
|
145
|
+
react: {
|
|
146
|
+
optional: true
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
devDependencies: {
|
|
150
|
+
"@types/node": "^20.14.0",
|
|
151
|
+
"@types/react": "^18.2.0",
|
|
152
|
+
"@types/ws": "^8.18.1",
|
|
153
|
+
react: "^18.2.0",
|
|
154
|
+
tsup: "^8.0.0",
|
|
155
|
+
typescript: "^5.5.0",
|
|
156
|
+
vitest: "^2.0.0"
|
|
157
|
+
},
|
|
158
|
+
license: "MIT",
|
|
159
|
+
repository: {
|
|
160
|
+
type: "git",
|
|
161
|
+
url: "https://github.com/okrapdf/okrapdf-sdk"
|
|
162
|
+
},
|
|
163
|
+
homepage: "https://okrapdf.com",
|
|
164
|
+
author: "OkraPDF",
|
|
165
|
+
keywords: [
|
|
166
|
+
"pdf",
|
|
167
|
+
"ocr",
|
|
168
|
+
"document",
|
|
169
|
+
"extraction",
|
|
170
|
+
"api",
|
|
171
|
+
"sdk",
|
|
172
|
+
"structured-output"
|
|
173
|
+
],
|
|
174
|
+
publishConfig: {
|
|
175
|
+
access: "public"
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// src/cli/bin.ts
|
|
180
|
+
var CLI_VERSION = package_default.version;
|
|
181
|
+
var PRIMARY_COMMANDS = ["auth", "upload", "extract", "chat", "list", "read", "delete", "collection"];
|
|
182
|
+
var ADVANCED_COMMANDS = ["status", "tree", "find", "page", "search", "tables", "history", "toc", "local"];
|
|
183
|
+
var PRIMARY_COLLECTION_SUBCOMMANDS = ["list", "query"];
|
|
184
|
+
var ADVANCED_COLLECTION_SUBCOMMANDS = ["create", "show", "delete", "add", "remove", "publish", "unpublish", "export"];
|
|
185
|
+
var ROOT_HELP_FOOTER = [
|
|
186
|
+
"",
|
|
187
|
+
"Primary workflows:",
|
|
188
|
+
" okra auth login",
|
|
189
|
+
" okra upload ./report.pdf",
|
|
190
|
+
' okra chat "Summarize this document" --doc doc-abc123',
|
|
191
|
+
" okra extract ./report.pdf --schema ./schema.json",
|
|
192
|
+
' okra collection query earnings "What changed quarter over quarter?" -o earnings.csv',
|
|
193
|
+
"",
|
|
194
|
+
"Advanced inspection and local-only commands are intentionally hidden from",
|
|
195
|
+
"default help during the v0.14 clean-house release candidate."
|
|
196
|
+
].join("\n");
|
|
197
|
+
var COLLECTION_HELP_FOOTER = [
|
|
198
|
+
"",
|
|
199
|
+
"Stable v0.14 collection workflow:",
|
|
200
|
+
' okra collection query <name> "<question>"',
|
|
201
|
+
"",
|
|
202
|
+
"Experimental structured fan-out remains available via:",
|
|
203
|
+
' okra collection query <name> "<question>" --schema ./schema.json',
|
|
204
|
+
" okra collection extract <name> --schema ./schema.json",
|
|
205
|
+
"",
|
|
206
|
+
"Advanced collection management commands remain available but are",
|
|
207
|
+
"intentionally hidden from default help during the clean-house release",
|
|
208
|
+
"candidate."
|
|
209
|
+
].join("\n");
|
|
59
210
|
var program = new Command();
|
|
60
211
|
program.showHelpAfterError();
|
|
61
212
|
program.showSuggestionAfterError();
|
|
62
|
-
program.name("okra").description("
|
|
213
|
+
program.name("okra").description("Okra CLI \u2014 upload PDFs, chat with documents, and extract structured data").version(CLI_VERSION).option("-j, --json", "Output JSON (structured, machine-readable)").option("-q, --quiet", "Suppress progress and human-readable frills").option("-o, --output <file>", "Write output to file instead of stdout");
|
|
214
|
+
program.addHelpText(
|
|
215
|
+
"after",
|
|
216
|
+
ROOT_HELP_FOOTER
|
|
217
|
+
);
|
|
63
218
|
function globals() {
|
|
64
219
|
return program.opts();
|
|
65
220
|
}
|
|
221
|
+
function getMissingApiKeyMessage() {
|
|
222
|
+
return [
|
|
223
|
+
"No API key found.",
|
|
224
|
+
"",
|
|
225
|
+
"Set one up with:",
|
|
226
|
+
" okra auth login",
|
|
227
|
+
' export OKRA_API_KEY="okra_xxx"',
|
|
228
|
+
"",
|
|
229
|
+
"Get your API key at:",
|
|
230
|
+
" https://app.okrapdf.com/settings",
|
|
231
|
+
"",
|
|
232
|
+
"CLI docs:",
|
|
233
|
+
" https://docs.okrapdf.com/api-reference/cli"
|
|
234
|
+
].join("\n");
|
|
235
|
+
}
|
|
236
|
+
function formatDocumentReadyMessage(docId, pages) {
|
|
237
|
+
return [
|
|
238
|
+
`Ready: ${docId}${typeof pages === "number" ? ` (${pages} pages)` : ""}`,
|
|
239
|
+
"",
|
|
240
|
+
"Next:",
|
|
241
|
+
` okra chat "Summarize this document" --doc ${docId}`,
|
|
242
|
+
` okra read ${docId}`,
|
|
243
|
+
` okra extract ${docId} --schema ./schema.json`
|
|
244
|
+
].join("\n");
|
|
245
|
+
}
|
|
246
|
+
function formatQueuedDocumentMessage(docId) {
|
|
247
|
+
return [
|
|
248
|
+
`Queued: ${docId}`,
|
|
249
|
+
"",
|
|
250
|
+
"Next:",
|
|
251
|
+
` okra status ${docId}`,
|
|
252
|
+
"",
|
|
253
|
+
"Once processing finishes:",
|
|
254
|
+
` okra chat "Summarize this document" --doc ${docId}`,
|
|
255
|
+
` okra read ${docId}`
|
|
256
|
+
].join("\n");
|
|
257
|
+
}
|
|
258
|
+
function isDirectExecution() {
|
|
259
|
+
const entry = process.argv[1];
|
|
260
|
+
if (!entry) return false;
|
|
261
|
+
try {
|
|
262
|
+
return import.meta.url === pathToFileURL(realpathSync(resolve(entry))).href;
|
|
263
|
+
} catch {
|
|
264
|
+
return import.meta.url === pathToFileURL(resolve(entry)).href;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
66
267
|
function getClient() {
|
|
67
268
|
const apiKey = getApiKey();
|
|
68
269
|
const baseUrl = getBaseUrl();
|
|
@@ -71,54 +272,137 @@ function getClient() {
|
|
|
71
272
|
if (g.json) {
|
|
72
273
|
process.stderr.write(JSON.stringify({ error: "No API key found", code: 401 }) + "\n");
|
|
73
274
|
} else {
|
|
74
|
-
process.stderr.write(
|
|
75
|
-
'No API key found.\n\n Get one: https://docs.okrapdf.com/api-keys\n Then: export OKRA_API_KEY="okra_xxx"\n Or: npx okra auth login\n\n Docs: https://docs.okrapdf.com\n Discord: https://discord.gg/BHNmbZVs\n'
|
|
76
|
-
);
|
|
275
|
+
process.stderr.write(getMissingApiKeyMessage() + "\n");
|
|
77
276
|
}
|
|
78
277
|
process.exit(1);
|
|
79
278
|
}
|
|
80
279
|
return new OkraClient({ apiKey, baseUrl });
|
|
81
280
|
}
|
|
281
|
+
function writeLocalResult(result, humanText) {
|
|
282
|
+
const g = globals();
|
|
283
|
+
if (g.json) {
|
|
284
|
+
writeOutput(JSON.stringify(result), g.output);
|
|
285
|
+
} else {
|
|
286
|
+
writeOutput(humanText, g.output);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
var localCmd = program.command("local", { hidden: true }).description("Offline PDF tools for local OpenClaw/opencode harnesses");
|
|
290
|
+
localCmd.command("ingest <source>").description("Ingest a local PDF into the offline document store").option("--data-dir <path>", "Override local document store path").action(async (source, options) => {
|
|
291
|
+
const g = globals();
|
|
292
|
+
try {
|
|
293
|
+
const result = localIngest(source, { dataDir: options.dataDir });
|
|
294
|
+
writeLocalResult(
|
|
295
|
+
result,
|
|
296
|
+
`Indexed ${result.filename} as ${result.documentId} (${result.pageCount} pages, ${result.charCount} chars)`
|
|
297
|
+
);
|
|
298
|
+
} catch (error) {
|
|
299
|
+
handleError(error, g.json);
|
|
300
|
+
}
|
|
301
|
+
});
|
|
302
|
+
localCmd.command("status").description("Get local document status").requiredOption("--doc <id>", "Local document ID").option("--data-dir <path>", "Override local document store path").action(async (options) => {
|
|
303
|
+
const g = globals();
|
|
304
|
+
try {
|
|
305
|
+
const result = localStatus(options.doc, { dataDir: options.dataDir });
|
|
306
|
+
writeLocalResult(
|
|
307
|
+
result,
|
|
308
|
+
`${result.documentId}: ${result.status} (${result.pagesWithText}/${result.pageCount} pages with text)`
|
|
309
|
+
);
|
|
310
|
+
} catch (error) {
|
|
311
|
+
handleError(error, g.json);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
localCmd.command("summary").description("Produce an extractive summary of a local PDF").requiredOption("--doc <id>", "Local document ID").option("--data-dir <path>", "Override local document store path").action(async (options) => {
|
|
315
|
+
const g = globals();
|
|
316
|
+
try {
|
|
317
|
+
const result = localSummary(options.doc, { dataDir: options.dataDir });
|
|
318
|
+
writeLocalResult(result, result.summary);
|
|
319
|
+
} catch (error) {
|
|
320
|
+
handleError(error, g.json);
|
|
321
|
+
}
|
|
322
|
+
});
|
|
323
|
+
localCmd.command("search").description("Search within a local PDF").requiredOption("--doc <id>", "Local document ID").requiredOption("--query <text>", "Search query").option("--data-dir <path>", "Override local document store path").action(async (options) => {
|
|
324
|
+
const g = globals();
|
|
325
|
+
try {
|
|
326
|
+
const result = localSearch(options.doc, options.query, { dataDir: options.dataDir });
|
|
327
|
+
const preview = result.matches.map((match) => `p.${match.page}: ${match.snippet}`).join("\n");
|
|
328
|
+
writeLocalResult(result, preview || `No matches found for "${options.query}"`);
|
|
329
|
+
} catch (error) {
|
|
330
|
+
handleError(error, g.json);
|
|
331
|
+
}
|
|
332
|
+
});
|
|
333
|
+
localCmd.command("page").description("Read one extracted page from a local PDF").requiredOption("--doc <id>", "Local document ID").requiredOption("--page <n>", "1-indexed page number", parseInt).option("--data-dir <path>", "Override local document store path").action(async (options) => {
|
|
334
|
+
const g = globals();
|
|
335
|
+
try {
|
|
336
|
+
const result = localPage(options.doc, options.page, { dataDir: options.dataDir });
|
|
337
|
+
writeLocalResult(result, result.text);
|
|
338
|
+
} catch (error) {
|
|
339
|
+
handleError(error, g.json);
|
|
340
|
+
}
|
|
341
|
+
});
|
|
342
|
+
localCmd.command("tables").description("Detect table-like layout blocks from a local PDF").requiredOption("--doc <id>", "Local document ID").option("--query <text>", "Optional query to rank table-like blocks").option("--data-dir <path>", "Override local document store path").action(async (options) => {
|
|
343
|
+
const g = globals();
|
|
344
|
+
try {
|
|
345
|
+
const result = localTables(options.doc, options.query, { dataDir: options.dataDir });
|
|
346
|
+
const preview = result.tables.map((table) => `p.${table.page} (${table.rowCount} rows)
|
|
347
|
+
${table.preview}`).join("\n\n");
|
|
348
|
+
writeLocalResult(result, preview || "No table-like blocks found");
|
|
349
|
+
} catch (error) {
|
|
350
|
+
handleError(error, g.json);
|
|
351
|
+
}
|
|
352
|
+
});
|
|
353
|
+
localCmd.command("doctor").description("Check local offline PDF tool availability").option("--data-dir <path>", "Override local document store path").action(async (options) => {
|
|
354
|
+
const g = globals();
|
|
355
|
+
try {
|
|
356
|
+
const result = localDoctor({ dataDir: options.dataDir });
|
|
357
|
+
const human = [
|
|
358
|
+
`data dir: ${result.dataDir}`,
|
|
359
|
+
`pdftotext: ${result.tools.pdftotext.available ? result.tools.pdftotext.path : "missing"}`,
|
|
360
|
+
`pdfinfo: ${result.tools.pdfinfo.available ? result.tools.pdfinfo.path : "missing"}`,
|
|
361
|
+
`pdftoppm: ${result.tools.pdftoppm.available ? result.tools.pdftoppm.path : "missing"}`,
|
|
362
|
+
`tesseract: ${result.tools.tesseract.available ? result.tools.tesseract.path : "missing"}`
|
|
363
|
+
].join("\n");
|
|
364
|
+
writeLocalResult(result, human);
|
|
365
|
+
} catch (error) {
|
|
366
|
+
handleError(error, g.json);
|
|
367
|
+
}
|
|
368
|
+
});
|
|
82
369
|
async function runUploadCommand(source, options) {
|
|
83
370
|
const g = globals();
|
|
84
371
|
try {
|
|
85
372
|
const client = getClient();
|
|
373
|
+
let vendorOptions;
|
|
374
|
+
if (options.vendorOptions) {
|
|
375
|
+
try {
|
|
376
|
+
vendorOptions = JSON.parse(options.vendorOptions);
|
|
377
|
+
} catch {
|
|
378
|
+
process.stderr.write("Error: --vendor-options must be valid JSON\n");
|
|
379
|
+
process.exit(1);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
86
382
|
const result = await upload(client, source, {
|
|
87
383
|
...g,
|
|
88
|
-
noWait: options.wait === false
|
|
384
|
+
noWait: options.wait === false,
|
|
385
|
+
vendorOptions
|
|
89
386
|
});
|
|
90
387
|
if (g.json) {
|
|
91
388
|
writeOutput(JSON.stringify(result), g.output);
|
|
92
389
|
} else {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
lines.push("");
|
|
98
|
-
lines.push(` Markdown: ${result.urls.full_md.replace(result.id, short)}`);
|
|
99
|
-
lines.push(` Page 1: ${result.urls.page_png.replace(result.id, short).replace("{N}", "1")}`);
|
|
100
|
-
lines.push(` Completion: ${result.urls.completion.replace(result.id, short)}`);
|
|
101
|
-
lines.push("");
|
|
102
|
-
lines.push(" URL patterns:");
|
|
103
|
-
lines.push(" /v1/documents/{id}/pg_{N}.md page markdown");
|
|
104
|
-
lines.push(" /v1/documents/{id}/d_shimmer/pg_{N}.png page image");
|
|
105
|
-
lines.push(" /v1/documents/{id}/full.md full document");
|
|
106
|
-
lines.push("");
|
|
107
|
-
lines.push(" Docs: https://docs.okrapdf.com Discord: https://discord.gg/BHNmbZVs");
|
|
108
|
-
}
|
|
109
|
-
writeOutput(lines.join("\n"), g.output);
|
|
390
|
+
writeOutput(
|
|
391
|
+
options.wait === false ? formatQueuedDocumentMessage(result.id) : formatDocumentReadyMessage(result.id, result.pages),
|
|
392
|
+
g.output
|
|
393
|
+
);
|
|
110
394
|
}
|
|
111
395
|
} catch (error) {
|
|
112
396
|
handleError(error, g.json);
|
|
113
397
|
}
|
|
114
398
|
}
|
|
115
399
|
function registerUploadCommand(commandName, description) {
|
|
116
|
-
program.command(`${commandName} <source>`).description(description).option("--no-wait", "Fire-and-forget (don't wait for processing)").action(async (source, options) => {
|
|
400
|
+
program.command(`${commandName} <source>`).description(description).summary("Upload a PDF and wait for processing").option("--no-wait", "Fire-and-forget (don't wait for processing)").option("--vendor-options <json>", `JSON vendor-specific options (e.g., '{"model":"gemini-3.1-pro","parse_mode":"parse_page_with_agent"}')`).action(async (source, options) => {
|
|
117
401
|
await runUploadCommand(source, options);
|
|
118
402
|
});
|
|
119
403
|
}
|
|
120
404
|
registerUploadCommand("upload", "Upload a PDF (file path or URL), wait for processing");
|
|
121
|
-
program.command("extract <source>").description("Extract structured data from a document (doc ID, URL, or file path)").option("--no-wait", "Fire-and-forget (don't wait for processing)").option("--schema <file>", "JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", 'Extraction prompt (default: "Extract all data according to the schema")').action(async (source, options) => {
|
|
405
|
+
program.command("extract <source>").description("Extract structured data from a document (doc ID, URL, or file path)").summary("Upload a PDF and extract structured data").option("--no-wait", "Fire-and-forget (don't wait for processing)").option("--schema <file>", "JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", 'Extraction prompt (default: "Extract all data according to the schema")').action(async (source, options) => {
|
|
122
406
|
const g = globals();
|
|
123
407
|
try {
|
|
124
408
|
const client = getClient();
|
|
@@ -137,16 +421,10 @@ program.command("extract <source>").description("Extract structured data from a
|
|
|
137
421
|
if (g.json) {
|
|
138
422
|
writeOutput(JSON.stringify(result), g.output);
|
|
139
423
|
} else {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
lines.push("");
|
|
145
|
-
lines.push(` Markdown: ${result.urls.full_md.replace(docId, short)}`);
|
|
146
|
-
lines.push(` Page 1: ${result.urls.page_png.replace(docId, short).replace("{N}", "1")}`);
|
|
147
|
-
lines.push(` Completion: ${result.urls.completion.replace(docId, short)}`);
|
|
148
|
-
}
|
|
149
|
-
writeOutput(lines.join("\n"), g.output);
|
|
424
|
+
writeOutput(
|
|
425
|
+
options.wait === false ? formatQueuedDocumentMessage(docId) : formatDocumentReadyMessage(docId, result.pages),
|
|
426
|
+
g.output
|
|
427
|
+
);
|
|
150
428
|
}
|
|
151
429
|
return;
|
|
152
430
|
}
|
|
@@ -192,7 +470,7 @@ program.command("extract <source>").description("Extract structured data from a
|
|
|
192
470
|
handleError(error, g.json);
|
|
193
471
|
}
|
|
194
472
|
});
|
|
195
|
-
program.command("status <docId>").description("Get document processing status").action(async (docId) => {
|
|
473
|
+
program.command("status <docId>", { hidden: true }).description("Get document processing status").action(async (docId) => {
|
|
196
474
|
const g = globals();
|
|
197
475
|
try {
|
|
198
476
|
const client = getClient();
|
|
@@ -213,10 +491,32 @@ program.command("status <docId>").description("Get document processing status").
|
|
|
213
491
|
handleError(error, g.json);
|
|
214
492
|
}
|
|
215
493
|
});
|
|
216
|
-
program.command("chat <question>").description("Ask a question about a processed document").requiredOption("--doc <id>", "Document ID").option("--model <name>", "Override model").action(async (question, options) => {
|
|
494
|
+
program.command("chat <question>").description("Ask a question about a processed document").summary("Ask a question about one document").requiredOption("--doc <id>", "Document ID").option("--model <name>", "Override model").option("--stream", "Stream response tokens as they arrive").action(async (question, options) => {
|
|
217
495
|
const g = globals();
|
|
218
496
|
try {
|
|
219
497
|
const client = getClient();
|
|
498
|
+
if (options.stream) {
|
|
499
|
+
let fullText = "";
|
|
500
|
+
for await (const event of client.stream(options.doc, question, {
|
|
501
|
+
model: options.model
|
|
502
|
+
})) {
|
|
503
|
+
if (event.type === "text_delta") {
|
|
504
|
+
fullText += event.text;
|
|
505
|
+
if (!g.json) process.stdout.write(event.text);
|
|
506
|
+
} else if (event.type === "done") {
|
|
507
|
+
if (!g.json) process.stdout.write("\n");
|
|
508
|
+
if (g.json) {
|
|
509
|
+
writeOutput(
|
|
510
|
+
JSON.stringify({ docId: options.doc, question, answer: fullText }),
|
|
511
|
+
g.output
|
|
512
|
+
);
|
|
513
|
+
}
|
|
514
|
+
} else if (event.type === "error") {
|
|
515
|
+
throw new Error(event.message);
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
return;
|
|
519
|
+
}
|
|
220
520
|
const result = await client.generate(options.doc, question, options.model ? { model: options.model } : void 0);
|
|
221
521
|
if (g.json) {
|
|
222
522
|
writeOutput(JSON.stringify({ docId: options.doc, question, ...result }), g.output);
|
|
@@ -227,7 +527,7 @@ program.command("chat <question>").description("Ask a question about a processed
|
|
|
227
527
|
handleError(error, g.json);
|
|
228
528
|
}
|
|
229
529
|
});
|
|
230
|
-
program.command("list").alias("ls").description("List all documents").action(async () => {
|
|
530
|
+
program.command("list").alias("ls").description("List all documents").summary("List your documents").action(async () => {
|
|
231
531
|
const g = globals();
|
|
232
532
|
try {
|
|
233
533
|
const client = getClient();
|
|
@@ -237,7 +537,7 @@ program.command("list").alias("ls").description("List all documents").action(asy
|
|
|
237
537
|
handleError(error, g.json);
|
|
238
538
|
}
|
|
239
539
|
});
|
|
240
|
-
program.command("delete <docId>").alias("rm").description("Delete a document").action(async (docId) => {
|
|
540
|
+
program.command("delete <docId>").alias("rm").description("Delete a document").summary("Delete one document").action(async (docId) => {
|
|
241
541
|
const g = globals();
|
|
242
542
|
try {
|
|
243
543
|
const client = getClient();
|
|
@@ -251,7 +551,7 @@ program.command("delete <docId>").alias("rm").description("Delete a document").a
|
|
|
251
551
|
handleError(error, g.json);
|
|
252
552
|
}
|
|
253
553
|
});
|
|
254
|
-
program.command("read <docId>").description("Read document as markdown").option("-p, --pages <range>", "Page range (e.g., 1-5, 10-15)").action(async (docId, options) => {
|
|
554
|
+
program.command("read <docId>").description("Read document as markdown").summary("Read document markdown").option("-p, --pages <range>", "Page range (e.g., 1-5, 10-15)").action(async (docId, options) => {
|
|
255
555
|
const g = globals();
|
|
256
556
|
try {
|
|
257
557
|
const client = getClient();
|
|
@@ -266,7 +566,12 @@ program.command("read <docId>").description("Read document as markdown").option(
|
|
|
266
566
|
}
|
|
267
567
|
});
|
|
268
568
|
var collectionCmd = program.command("collection").alias("collections").alias("col").description("Collection operations");
|
|
269
|
-
collectionCmd.
|
|
569
|
+
collectionCmd.summary("Query across collections");
|
|
570
|
+
collectionCmd.addHelpText(
|
|
571
|
+
"after",
|
|
572
|
+
COLLECTION_HELP_FOOTER
|
|
573
|
+
);
|
|
574
|
+
collectionCmd.command("list").alias("ls").description("List available collections").summary("List collections").action(async () => {
|
|
270
575
|
const g = globals();
|
|
271
576
|
try {
|
|
272
577
|
const client = getClient();
|
|
@@ -276,7 +581,73 @@ collectionCmd.command("list").alias("ls").description("List available collection
|
|
|
276
581
|
handleError(error, g.json);
|
|
277
582
|
}
|
|
278
583
|
});
|
|
279
|
-
collectionCmd.command("
|
|
584
|
+
collectionCmd.command("create <name>", { hidden: true }).description("Create a new collection").option("--description <text>", "Collection description").option("--docs <ids>", "Comma-separated document IDs to seed").action(async (name, options) => {
|
|
585
|
+
const g = globals();
|
|
586
|
+
try {
|
|
587
|
+
const client = getClient();
|
|
588
|
+
const result = await collectionCreate(client, name, { ...g, ...options });
|
|
589
|
+
if (g.json) {
|
|
590
|
+
writeOutput(JSON.stringify(result), g.output);
|
|
591
|
+
} else {
|
|
592
|
+
writeOutput(`Created collection "${result.name}" (${result.id})`, g.output);
|
|
593
|
+
}
|
|
594
|
+
} catch (error) {
|
|
595
|
+
handleError(error, g.json);
|
|
596
|
+
}
|
|
597
|
+
});
|
|
598
|
+
collectionCmd.command("show <nameOrId>", { hidden: true }).description("Show collection details and documents").action(async (nameOrId) => {
|
|
599
|
+
const g = globals();
|
|
600
|
+
try {
|
|
601
|
+
const client = getClient();
|
|
602
|
+
const detail = await collectionShow(client, nameOrId);
|
|
603
|
+
writeOutput(formatCollectionDetail(detail, g.json), g.output);
|
|
604
|
+
} catch (error) {
|
|
605
|
+
handleError(error, g.json);
|
|
606
|
+
}
|
|
607
|
+
});
|
|
608
|
+
collectionCmd.command("delete <nameOrId>", { hidden: true }).alias("rm").description("Delete a collection (documents are preserved)").action(async (nameOrId) => {
|
|
609
|
+
const g = globals();
|
|
610
|
+
try {
|
|
611
|
+
const client = getClient();
|
|
612
|
+
await collectionDelete(client, nameOrId);
|
|
613
|
+
if (g.json) {
|
|
614
|
+
writeOutput(JSON.stringify({ ok: true, deleted: nameOrId }), g.output);
|
|
615
|
+
} else {
|
|
616
|
+
writeOutput(`Deleted collection "${nameOrId}"`, g.output);
|
|
617
|
+
}
|
|
618
|
+
} catch (error) {
|
|
619
|
+
handleError(error, g.json);
|
|
620
|
+
}
|
|
621
|
+
});
|
|
622
|
+
collectionCmd.command("add <nameOrId> <docIds...>", { hidden: true }).description("Add documents to a collection").action(async (nameOrId, docIds) => {
|
|
623
|
+
const g = globals();
|
|
624
|
+
try {
|
|
625
|
+
const client = getClient();
|
|
626
|
+
const result = await collectionAddDocs(client, nameOrId, docIds);
|
|
627
|
+
if (g.json) {
|
|
628
|
+
writeOutput(JSON.stringify(result), g.output);
|
|
629
|
+
} else {
|
|
630
|
+
writeOutput(`Added ${docIds.length} document(s) to "${nameOrId}"`, g.output);
|
|
631
|
+
}
|
|
632
|
+
} catch (error) {
|
|
633
|
+
handleError(error, g.json);
|
|
634
|
+
}
|
|
635
|
+
});
|
|
636
|
+
collectionCmd.command("remove <nameOrId> <docIds...>", { hidden: true }).description("Remove documents from a collection").action(async (nameOrId, docIds) => {
|
|
637
|
+
const g = globals();
|
|
638
|
+
try {
|
|
639
|
+
const client = getClient();
|
|
640
|
+
const result = await collectionRemoveDocs(client, nameOrId, docIds);
|
|
641
|
+
if (g.json) {
|
|
642
|
+
writeOutput(JSON.stringify(result), g.output);
|
|
643
|
+
} else {
|
|
644
|
+
writeOutput(`Removed ${docIds.length} document(s) from "${nameOrId}"`, g.output);
|
|
645
|
+
}
|
|
646
|
+
} catch (error) {
|
|
647
|
+
handleError(error, g.json);
|
|
648
|
+
}
|
|
649
|
+
});
|
|
650
|
+
collectionCmd.command("query <nameOrId> <question>").description("Fan-out query across collection documents").summary("Ask the same question across a collection").option("--schema <file>", "Experimental: JSON Schema file for structured extraction").action(async (nameOrId, question, options) => {
|
|
280
651
|
const g = globals();
|
|
281
652
|
try {
|
|
282
653
|
const client = getClient();
|
|
@@ -303,7 +674,7 @@ collectionCmd.command("query <nameOrId> <question>").description("Fan-out query
|
|
|
303
674
|
handleError(error, g.json);
|
|
304
675
|
}
|
|
305
676
|
});
|
|
306
|
-
collectionCmd.command("extract <nameOrId>").description("
|
|
677
|
+
collectionCmd.command("extract <nameOrId>", { hidden: true }).description("Experimental: extract structured data from all documents in a collection").summary("Experimental structured extraction").requiredOption("--schema <file>", "Experimental: JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", "Extraction prompt (default: auto-generated from schema)").action(async (nameOrId, options) => {
|
|
307
678
|
const g = globals();
|
|
308
679
|
try {
|
|
309
680
|
const client = getClient();
|
|
@@ -340,7 +711,7 @@ ${summary.completed} documents \u2014 $${summary.total_cost_usd.toFixed(4)} tota
|
|
|
340
711
|
handleError(error, g.json);
|
|
341
712
|
}
|
|
342
713
|
});
|
|
343
|
-
collectionCmd.command("publish <nameOrId>").description("Make a collection publicly queryable").action(async (nameOrId) => {
|
|
714
|
+
collectionCmd.command("publish <nameOrId>", { hidden: true }).description("Make a collection publicly queryable").action(async (nameOrId) => {
|
|
344
715
|
const g = globals();
|
|
345
716
|
try {
|
|
346
717
|
const client = getClient();
|
|
@@ -358,7 +729,7 @@ Share with: okra collection query ${nameOrId} "your question"`,
|
|
|
358
729
|
handleError(error, g.json);
|
|
359
730
|
}
|
|
360
731
|
});
|
|
361
|
-
collectionCmd.command("unpublish <nameOrId>").description("Make a collection private (owner-only)").action(async (nameOrId) => {
|
|
732
|
+
collectionCmd.command("unpublish <nameOrId>", { hidden: true }).description("Make a collection private (owner-only)").action(async (nameOrId) => {
|
|
362
733
|
const g = globals();
|
|
363
734
|
try {
|
|
364
735
|
const client = getClient();
|
|
@@ -372,7 +743,7 @@ collectionCmd.command("unpublish <nameOrId>").description("Make a collection pri
|
|
|
372
743
|
handleError(error, g.json);
|
|
373
744
|
}
|
|
374
745
|
});
|
|
375
|
-
collectionCmd.command("export <nameOrId>").description("Export pre-computed markdown for all documents in a collection").option("--flat", "Concatenated markdown with # headers + === separators").option("--zip", "One markdown file per document in a .zip archive").action(async (nameOrId, options) => {
|
|
746
|
+
collectionCmd.command("export <nameOrId>", { hidden: true }).description("Export pre-computed markdown for all documents in a collection").option("--flat", "Concatenated markdown with # headers + === separators").option("--zip", "One markdown file per document in a .zip archive").action(async (nameOrId, options) => {
|
|
376
747
|
const g = globals();
|
|
377
748
|
try {
|
|
378
749
|
const client = getClient();
|
|
@@ -449,7 +820,7 @@ authCmd.command("logout").description("Remove API key from global config").actio
|
|
|
449
820
|
handleError(error, globals().json);
|
|
450
821
|
}
|
|
451
822
|
});
|
|
452
|
-
program.command("tree <jobId>").description("Show document verification tree").option("-s, --status <status>", "Filter by status (complete|partial|pending|flagged|empty|gap)").option("-e, --entity <type>", "Filter by entity type (table|figure|footnote)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
|
|
823
|
+
program.command("tree <jobId>", { hidden: true }).description("Show document verification tree").option("-s, --status <status>", "Filter by status (complete|partial|pending|flagged|empty|gap)").option("-e, --entity <type>", "Filter by entity type (table|figure|footnote)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
|
|
453
824
|
const g = globals();
|
|
454
825
|
try {
|
|
455
826
|
const client = getClient();
|
|
@@ -463,7 +834,7 @@ program.command("tree <jobId>").description("Show document verification tree").o
|
|
|
463
834
|
handleError(error, g.json);
|
|
464
835
|
}
|
|
465
836
|
});
|
|
466
|
-
program.command("find <jobId> <selector>").description("Find entities using jQuery-like selectors").option("-k, --top-k <n>", "Limit results", parseInt).option("-c, --min-confidence <n>", "Minimum confidence (0-1)", parseFloat).option("-p, --pages <range>", "Page range (e.g., 1-10)").option("--sort <by>", "Sort by (confidence|page|type)").option("--stats", "Show aggregate statistics").option("-f, --format <format>", "Output format (text|json|entities|ids)", "text").action(async (jobId, selector, options) => {
|
|
837
|
+
program.command("find <jobId> <selector>", { hidden: true }).description("Find entities using jQuery-like selectors").option("-k, --top-k <n>", "Limit results", parseInt).option("-c, --min-confidence <n>", "Minimum confidence (0-1)", parseFloat).option("-p, --pages <range>", "Page range (e.g., 1-10)").option("--sort <by>", "Sort by (confidence|page|type)").option("--stats", "Show aggregate statistics").option("-f, --format <format>", "Output format (text|json|entities|ids)", "text").action(async (jobId, selector, options) => {
|
|
467
838
|
const g = globals();
|
|
468
839
|
try {
|
|
469
840
|
const client = getClient();
|
|
@@ -484,7 +855,7 @@ program.command("find <jobId> <selector>").description("Find entities using jQue
|
|
|
484
855
|
handleError(error, g.json);
|
|
485
856
|
}
|
|
486
857
|
});
|
|
487
|
-
var pageCmd = program.command("page").description("Page content operations");
|
|
858
|
+
var pageCmd = program.command("page", { hidden: true }).description("Page content operations");
|
|
488
859
|
pageCmd.command("get <jobId> <pageNum>").description("Get page content").option("-v, --version <n>", "Specific version", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "markdown").action(async (jobId, pageNum, options) => {
|
|
489
860
|
const g = globals();
|
|
490
861
|
try {
|
|
@@ -546,7 +917,7 @@ pageCmd.command("versions <jobId> <pageNum>").description("List page versions").
|
|
|
546
917
|
handleError(error, g.json);
|
|
547
918
|
}
|
|
548
919
|
});
|
|
549
|
-
program.command("search <jobId> <query>").description("Search page content").option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, query, options) => {
|
|
920
|
+
program.command("search <jobId> <query>", { hidden: true }).description("Search page content").option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, query, options) => {
|
|
550
921
|
const g = globals();
|
|
551
922
|
try {
|
|
552
923
|
const client = getClient();
|
|
@@ -557,7 +928,7 @@ program.command("search <jobId> <query>").description("Search page content").opt
|
|
|
557
928
|
handleError(error, g.json);
|
|
558
929
|
}
|
|
559
930
|
});
|
|
560
|
-
program.command("tables <jobId>").description("List extracted tables").option("-p, --page <n>", "Filter by page", parseInt).option("-s, --status <status>", "Filter by status (pending|verified|flagged|rejected)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
|
|
931
|
+
program.command("tables <jobId>", { hidden: true }).description("List extracted tables").option("-p, --page <n>", "Filter by page", parseInt).option("-s, --status <status>", "Filter by status (pending|verified|flagged|rejected)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
|
|
561
932
|
const g = globals();
|
|
562
933
|
try {
|
|
563
934
|
const client = getClient();
|
|
@@ -571,7 +942,7 @@ program.command("tables <jobId>").description("List extracted tables").option("-
|
|
|
571
942
|
handleError(error, g.json);
|
|
572
943
|
}
|
|
573
944
|
});
|
|
574
|
-
program.command("history <jobId>").description("Show verification history").option("-l, --limit <n>", "Limit entries", parseInt, 50).option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, options) => {
|
|
945
|
+
program.command("history <jobId>", { hidden: true }).description("Show verification history").option("-l, --limit <n>", "Limit entries", parseInt, 50).option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, options) => {
|
|
575
946
|
const g = globals();
|
|
576
947
|
try {
|
|
577
948
|
const client = getClient();
|
|
@@ -582,7 +953,7 @@ program.command("history <jobId>").description("Show verification history").opti
|
|
|
582
953
|
handleError(error, g.json);
|
|
583
954
|
}
|
|
584
955
|
});
|
|
585
|
-
program.command("toc <jobId>").description("Extract table of contents from PDF").option("--max-depth <n>", "Maximum TOC depth", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "text").option("--watch", "Watch live extraction events via WebSocket").action(async (jobId, options) => {
|
|
956
|
+
program.command("toc <jobId>", { hidden: true }).description("Extract table of contents from PDF").option("--max-depth <n>", "Maximum TOC depth", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "text").option("--watch", "Watch live extraction events via WebSocket").action(async (jobId, options) => {
|
|
586
957
|
const g = globals();
|
|
587
958
|
try {
|
|
588
959
|
const client = getClient();
|
|
@@ -596,5 +967,18 @@ program.command("toc <jobId>").description("Extract table of contents from PDF")
|
|
|
596
967
|
handleError(error, g.json);
|
|
597
968
|
}
|
|
598
969
|
});
|
|
599
|
-
|
|
970
|
+
if (isDirectExecution()) {
|
|
971
|
+
void program.parseAsync();
|
|
972
|
+
}
|
|
973
|
+
export {
|
|
974
|
+
ADVANCED_COLLECTION_SUBCOMMANDS,
|
|
975
|
+
ADVANCED_COMMANDS,
|
|
976
|
+
CLI_VERSION,
|
|
977
|
+
COLLECTION_HELP_FOOTER,
|
|
978
|
+
PRIMARY_COLLECTION_SUBCOMMANDS,
|
|
979
|
+
PRIMARY_COMMANDS,
|
|
980
|
+
ROOT_HELP_FOOTER,
|
|
981
|
+
getMissingApiKeyMessage,
|
|
982
|
+
program
|
|
983
|
+
};
|
|
600
984
|
//# sourceMappingURL=bin.js.map
|