okrapdf 0.12.1 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/bin.js CHANGED
@@ -6,15 +6,26 @@ import {
6
6
  authStatus,
7
7
  authToken,
8
8
  authWhoAmI,
9
+ collectionAddDocs,
10
+ collectionCreate,
11
+ collectionDelete,
9
12
  collectionExport,
10
13
  collectionList,
11
14
  collectionQueryRaw,
15
+ collectionRemoveDocs,
12
16
  collectionSetVisibility,
17
+ collectionShow,
18
+ deleteDocument,
13
19
  find,
14
20
  formatCollectionCsv,
21
+ formatCollectionDetail,
15
22
  formatCollectionExportFlat,
16
23
  formatCollectionList,
17
24
  formatCollectionTable,
25
+ formatDocumentList,
26
+ formatExtractCsv,
27
+ formatExtractJson,
28
+ formatExtractTable,
18
29
  formatFindOutput,
19
30
  formatHistoryOutput,
20
31
  formatPageOutput,
@@ -29,33 +40,230 @@ import {
29
40
  getBaseUrl,
30
41
  handleError,
31
42
  history,
43
+ listDocuments,
44
+ localDoctor,
45
+ localIngest,
46
+ localPage,
47
+ localSearch,
48
+ localStatus,
49
+ localSummary,
50
+ localTables,
32
51
  pageEdit,
33
52
  pageGet,
34
53
  pageResolve,
35
54
  pageVersions,
36
55
  progress,
56
+ readDocument,
37
57
  search,
38
58
  tables,
39
59
  toc,
40
60
  tree,
41
61
  upload,
42
62
  writeOutput
43
- } from "../chunk-XOHPZW3V.js";
63
+ } from "../chunk-ETARIBOV.js";
64
+ import "../chunk-MSZQPLMQ.js";
44
65
  import {
45
66
  OkraClient
46
- } from "../chunk-2HJPTW6S.js";
67
+ } from "../chunk-YGIBZV5J.js";
47
68
  import "../chunk-NIZM2ETT.js";
48
69
 
49
70
  // src/cli/bin.ts
50
71
  import { Command } from "commander";
51
- import { writeFileSync } from "fs";
72
+ import { realpathSync, writeFileSync } from "fs";
73
+ import { resolve } from "path";
74
+ import { pathToFileURL } from "url";
75
+
76
+ // package.json
77
+ var package_default = {
78
+ name: "okrapdf",
79
+ version: "0.14.0",
80
+ okraApi: "^1.14.0",
81
+ description: "OkraPDF \u2014 upload a PDF, get an API. Runtime client, React hooks, and CLI.",
82
+ type: "module",
83
+ exports: {
84
+ ".": {
85
+ import: {
86
+ types: "./dist/index.d.ts",
87
+ default: "./dist/index.js"
88
+ }
89
+ },
90
+ "./doc": {
91
+ import: {
92
+ types: "./dist/url.d.ts",
93
+ default: "./dist/url.js"
94
+ }
95
+ },
96
+ "./browser": {
97
+ import: {
98
+ types: "./dist/browser.d.ts",
99
+ default: "./dist/browser.js"
100
+ }
101
+ },
102
+ "./worker": {
103
+ import: {
104
+ types: "./dist/worker.d.ts",
105
+ default: "./dist/worker.js"
106
+ }
107
+ },
108
+ "./react": {
109
+ import: {
110
+ types: "./dist/react/index.d.ts",
111
+ default: "./dist/react/index.js"
112
+ }
113
+ },
114
+ "./cli": {
115
+ import: {
116
+ types: "./dist/cli/index.d.ts",
117
+ default: "./dist/cli/index.js"
118
+ }
119
+ }
120
+ },
121
+ bin: {
122
+ okra: "./dist/cli/bin.js"
123
+ },
124
+ files: [
125
+ "dist"
126
+ ],
127
+ scripts: {
128
+ build: "tsup",
129
+ "docs:cli": "node ./scripts/generate-cli-docs.mjs",
130
+ "docs:cli:check": "node ./scripts/generate-cli-docs.mjs --check",
131
+ test: "vitest run --exclude '**/**.e2e.test.ts'",
132
+ "test:e2e": "vitest run src/client.e2e.test.ts",
133
+ "test:watch": "vitest",
134
+ typecheck: "tsc --noEmit"
135
+ },
136
+ dependencies: {
137
+ commander: "^12.0.0",
138
+ ws: "^8.19.0",
139
+ zod: "^4.3.6"
140
+ },
141
+ peerDependencies: {
142
+ react: ">=18"
143
+ },
144
+ peerDependenciesMeta: {
145
+ react: {
146
+ optional: true
147
+ }
148
+ },
149
+ devDependencies: {
150
+ "@types/node": "^20.14.0",
151
+ "@types/react": "^18.2.0",
152
+ "@types/ws": "^8.18.1",
153
+ react: "^18.2.0",
154
+ tsup: "^8.0.0",
155
+ typescript: "^5.5.0",
156
+ vitest: "^2.0.0"
157
+ },
158
+ license: "MIT",
159
+ repository: {
160
+ type: "git",
161
+ url: "https://github.com/okrapdf/okrapdf-sdk"
162
+ },
163
+ homepage: "https://okrapdf.com",
164
+ author: "OkraPDF",
165
+ keywords: [
166
+ "pdf",
167
+ "ocr",
168
+ "document",
169
+ "extraction",
170
+ "api",
171
+ "sdk",
172
+ "structured-output"
173
+ ],
174
+ publishConfig: {
175
+ access: "public"
176
+ }
177
+ };
178
+
179
+ // src/cli/bin.ts
180
+ var CLI_VERSION = package_default.version;
181
+ var PRIMARY_COMMANDS = ["auth", "upload", "extract", "chat", "list", "read", "delete", "collection"];
182
+ var ADVANCED_COMMANDS = ["status", "tree", "find", "page", "search", "tables", "history", "toc", "local"];
183
+ var PRIMARY_COLLECTION_SUBCOMMANDS = ["list", "query"];
184
+ var ADVANCED_COLLECTION_SUBCOMMANDS = ["create", "show", "delete", "add", "remove", "publish", "unpublish", "export"];
185
+ var ROOT_HELP_FOOTER = [
186
+ "",
187
+ "Primary workflows:",
188
+ " okra auth login",
189
+ " okra upload ./report.pdf",
190
+ ' okra chat "Summarize this document" --doc doc-abc123',
191
+ " okra extract ./report.pdf --schema ./schema.json",
192
+ ' okra collection query earnings "What changed quarter over quarter?" -o earnings.csv',
193
+ "",
194
+ "Advanced inspection and local-only commands are intentionally hidden from",
195
+ "default help during the v0.14 clean-house release candidate."
196
+ ].join("\n");
197
+ var COLLECTION_HELP_FOOTER = [
198
+ "",
199
+ "Stable v0.14 collection workflow:",
200
+ ' okra collection query <name> "<question>"',
201
+ "",
202
+ "Experimental structured fan-out remains available via:",
203
+ ' okra collection query <name> "<question>" --schema ./schema.json',
204
+ " okra collection extract <name> --schema ./schema.json",
205
+ "",
206
+ "Advanced collection management commands remain available but are",
207
+ "intentionally hidden from default help during the clean-house release",
208
+ "candidate."
209
+ ].join("\n");
52
210
  var program = new Command();
53
211
  program.showHelpAfterError();
54
212
  program.showSuggestionAfterError();
55
- program.name("okra").description("OkraPDF CLI \u2014 upload PDFs, query collections, extract data").version("0.12.1").option("-j, --json", "Output JSON (structured, machine-readable)").option("-q, --quiet", "Suppress progress and human-readable frills").option("-o, --output <file>", "Write output to file instead of stdout");
213
+ program.name("okra").description("Okra CLI \u2014 upload PDFs, chat with documents, and extract structured data").version(CLI_VERSION).option("-j, --json", "Output JSON (structured, machine-readable)").option("-q, --quiet", "Suppress progress and human-readable frills").option("-o, --output <file>", "Write output to file instead of stdout");
214
+ program.addHelpText(
215
+ "after",
216
+ ROOT_HELP_FOOTER
217
+ );
56
218
  function globals() {
57
219
  return program.opts();
58
220
  }
221
+ function getMissingApiKeyMessage() {
222
+ return [
223
+ "No API key found.",
224
+ "",
225
+ "Set one up with:",
226
+ " okra auth login",
227
+ ' export OKRA_API_KEY="okra_xxx"',
228
+ "",
229
+ "Get your API key at:",
230
+ " https://app.okrapdf.com/settings",
231
+ "",
232
+ "CLI docs:",
233
+ " https://docs.okrapdf.com/api-reference/cli"
234
+ ].join("\n");
235
+ }
236
+ function formatDocumentReadyMessage(docId, pages) {
237
+ return [
238
+ `Ready: ${docId}${typeof pages === "number" ? ` (${pages} pages)` : ""}`,
239
+ "",
240
+ "Next:",
241
+ ` okra chat "Summarize this document" --doc ${docId}`,
242
+ ` okra read ${docId}`,
243
+ ` okra extract ${docId} --schema ./schema.json`
244
+ ].join("\n");
245
+ }
246
+ function formatQueuedDocumentMessage(docId) {
247
+ return [
248
+ `Queued: ${docId}`,
249
+ "",
250
+ "Next:",
251
+ ` okra status ${docId}`,
252
+ "",
253
+ "Once processing finishes:",
254
+ ` okra chat "Summarize this document" --doc ${docId}`,
255
+ ` okra read ${docId}`
256
+ ].join("\n");
257
+ }
258
+ function isDirectExecution() {
259
+ const entry = process.argv[1];
260
+ if (!entry) return false;
261
+ try {
262
+ return import.meta.url === pathToFileURL(realpathSync(resolve(entry))).href;
263
+ } catch {
264
+ return import.meta.url === pathToFileURL(resolve(entry)).href;
265
+ }
266
+ }
59
267
  function getClient() {
60
268
  const apiKey = getApiKey();
61
269
  const baseUrl = getBaseUrl();
@@ -64,55 +272,205 @@ function getClient() {
64
272
  if (g.json) {
65
273
  process.stderr.write(JSON.stringify({ error: "No API key found", code: 401 }) + "\n");
66
274
  } else {
67
- process.stderr.write(
68
- 'No API key found.\n\n Get one: https://docs.okrapdf.com/api-keys\n Then: export OKRA_API_KEY="okra_xxx"\n Or: npx okra auth login\n\n Docs: https://docs.okrapdf.com\n Discord: https://discord.gg/BHNmbZVs\n'
69
- );
275
+ process.stderr.write(getMissingApiKeyMessage() + "\n");
70
276
  }
71
277
  process.exit(1);
72
278
  }
73
279
  return new OkraClient({ apiKey, baseUrl });
74
280
  }
281
+ function writeLocalResult(result, humanText) {
282
+ const g = globals();
283
+ if (g.json) {
284
+ writeOutput(JSON.stringify(result), g.output);
285
+ } else {
286
+ writeOutput(humanText, g.output);
287
+ }
288
+ }
289
+ var localCmd = program.command("local", { hidden: true }).description("Offline PDF tools for local OpenClaw/opencode harnesses");
290
+ localCmd.command("ingest <source>").description("Ingest a local PDF into the offline document store").option("--data-dir <path>", "Override local document store path").action(async (source, options) => {
291
+ const g = globals();
292
+ try {
293
+ const result = localIngest(source, { dataDir: options.dataDir });
294
+ writeLocalResult(
295
+ result,
296
+ `Indexed ${result.filename} as ${result.documentId} (${result.pageCount} pages, ${result.charCount} chars)`
297
+ );
298
+ } catch (error) {
299
+ handleError(error, g.json);
300
+ }
301
+ });
302
+ localCmd.command("status").description("Get local document status").requiredOption("--doc <id>", "Local document ID").option("--data-dir <path>", "Override local document store path").action(async (options) => {
303
+ const g = globals();
304
+ try {
305
+ const result = localStatus(options.doc, { dataDir: options.dataDir });
306
+ writeLocalResult(
307
+ result,
308
+ `${result.documentId}: ${result.status} (${result.pagesWithText}/${result.pageCount} pages with text)`
309
+ );
310
+ } catch (error) {
311
+ handleError(error, g.json);
312
+ }
313
+ });
314
+ localCmd.command("summary").description("Produce an extractive summary of a local PDF").requiredOption("--doc <id>", "Local document ID").option("--data-dir <path>", "Override local document store path").action(async (options) => {
315
+ const g = globals();
316
+ try {
317
+ const result = localSummary(options.doc, { dataDir: options.dataDir });
318
+ writeLocalResult(result, result.summary);
319
+ } catch (error) {
320
+ handleError(error, g.json);
321
+ }
322
+ });
323
+ localCmd.command("search").description("Search within a local PDF").requiredOption("--doc <id>", "Local document ID").requiredOption("--query <text>", "Search query").option("--data-dir <path>", "Override local document store path").action(async (options) => {
324
+ const g = globals();
325
+ try {
326
+ const result = localSearch(options.doc, options.query, { dataDir: options.dataDir });
327
+ const preview = result.matches.map((match) => `p.${match.page}: ${match.snippet}`).join("\n");
328
+ writeLocalResult(result, preview || `No matches found for "${options.query}"`);
329
+ } catch (error) {
330
+ handleError(error, g.json);
331
+ }
332
+ });
333
+ localCmd.command("page").description("Read one extracted page from a local PDF").requiredOption("--doc <id>", "Local document ID").requiredOption("--page <n>", "1-indexed page number", parseInt).option("--data-dir <path>", "Override local document store path").action(async (options) => {
334
+ const g = globals();
335
+ try {
336
+ const result = localPage(options.doc, options.page, { dataDir: options.dataDir });
337
+ writeLocalResult(result, result.text);
338
+ } catch (error) {
339
+ handleError(error, g.json);
340
+ }
341
+ });
342
+ localCmd.command("tables").description("Detect table-like layout blocks from a local PDF").requiredOption("--doc <id>", "Local document ID").option("--query <text>", "Optional query to rank table-like blocks").option("--data-dir <path>", "Override local document store path").action(async (options) => {
343
+ const g = globals();
344
+ try {
345
+ const result = localTables(options.doc, options.query, { dataDir: options.dataDir });
346
+ const preview = result.tables.map((table) => `p.${table.page} (${table.rowCount} rows)
347
+ ${table.preview}`).join("\n\n");
348
+ writeLocalResult(result, preview || "No table-like blocks found");
349
+ } catch (error) {
350
+ handleError(error, g.json);
351
+ }
352
+ });
353
+ localCmd.command("doctor").description("Check local offline PDF tool availability").option("--data-dir <path>", "Override local document store path").action(async (options) => {
354
+ const g = globals();
355
+ try {
356
+ const result = localDoctor({ dataDir: options.dataDir });
357
+ const human = [
358
+ `data dir: ${result.dataDir}`,
359
+ `pdftotext: ${result.tools.pdftotext.available ? result.tools.pdftotext.path : "missing"}`,
360
+ `pdfinfo: ${result.tools.pdfinfo.available ? result.tools.pdfinfo.path : "missing"}`,
361
+ `pdftoppm: ${result.tools.pdftoppm.available ? result.tools.pdftoppm.path : "missing"}`,
362
+ `tesseract: ${result.tools.tesseract.available ? result.tools.tesseract.path : "missing"}`
363
+ ].join("\n");
364
+ writeLocalResult(result, human);
365
+ } catch (error) {
366
+ handleError(error, g.json);
367
+ }
368
+ });
75
369
  async function runUploadCommand(source, options) {
76
370
  const g = globals();
77
371
  try {
78
372
  const client = getClient();
373
+ let vendorOptions;
374
+ if (options.vendorOptions) {
375
+ try {
376
+ vendorOptions = JSON.parse(options.vendorOptions);
377
+ } catch {
378
+ process.stderr.write("Error: --vendor-options must be valid JSON\n");
379
+ process.exit(1);
380
+ }
381
+ }
79
382
  const result = await upload(client, source, {
80
383
  ...g,
81
- noWait: options.wait === false
384
+ noWait: options.wait === false,
385
+ vendorOptions
82
386
  });
83
387
  if (g.json) {
84
388
  writeOutput(JSON.stringify(result), g.output);
85
389
  } else {
86
- const lines = [`Done \u2014 ${result.pages ?? "?"} pages`, ""];
87
- lines.push(` ${result.id}`);
88
- if (result.urls) {
89
- const short = result.id.slice(0, 11) + "...";
90
- lines.push("");
91
- lines.push(` Markdown: ${result.urls.full_md.replace(result.id, short)}`);
92
- lines.push(` Page 1: ${result.urls.page_png.replace(result.id, short).replace("{N}", "1")}`);
93
- lines.push(` Completion: ${result.urls.completion.replace(result.id, short)}`);
94
- lines.push("");
95
- lines.push(" URL patterns:");
96
- lines.push(" /v1/documents/{id}/pg_{N}.md page markdown");
97
- lines.push(" /v1/documents/{id}/d_shimmer/pg_{N}.png page image");
98
- lines.push(" /v1/documents/{id}/full.md full document");
99
- lines.push("");
100
- lines.push(" Docs: https://docs.okrapdf.com Discord: https://discord.gg/BHNmbZVs");
101
- }
102
- writeOutput(lines.join("\n"), g.output);
390
+ writeOutput(
391
+ options.wait === false ? formatQueuedDocumentMessage(result.id) : formatDocumentReadyMessage(result.id, result.pages),
392
+ g.output
393
+ );
103
394
  }
104
395
  } catch (error) {
105
396
  handleError(error, g.json);
106
397
  }
107
398
  }
108
399
  function registerUploadCommand(commandName, description) {
109
- program.command(`${commandName} <source>`).description(description).option("--no-wait", "Fire-and-forget (don't wait for processing)").action(async (source, options) => {
400
+ program.command(`${commandName} <source>`).description(description).summary("Upload a PDF and wait for processing").option("--no-wait", "Fire-and-forget (don't wait for processing)").option("--vendor-options <json>", `JSON vendor-specific options (e.g., '{"model":"gemini-3.1-pro","parse_mode":"parse_page_with_agent"}')`).action(async (source, options) => {
110
401
  await runUploadCommand(source, options);
111
402
  });
112
403
  }
113
404
  registerUploadCommand("upload", "Upload a PDF (file path or URL), wait for processing");
114
- registerUploadCommand("extract", "Alias for upload (compatibility)");
115
- program.command("status <docId>").description("Get document processing status").action(async (docId) => {
405
+ program.command("extract <source>").description("Extract structured data from a document (doc ID, URL, or file path)").summary("Upload a PDF and extract structured data").option("--no-wait", "Fire-and-forget (don't wait for processing)").option("--schema <file>", "JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", 'Extraction prompt (default: "Extract all data according to the schema")').action(async (source, options) => {
406
+ const g = globals();
407
+ try {
408
+ const client = getClient();
409
+ const { readFileSync } = await import("fs");
410
+ const isExistingDoc = /^(?:ocr|doc)-[A-Za-z0-9_-]+$/.test(source);
411
+ let docId;
412
+ if (isExistingDoc) {
413
+ docId = source;
414
+ } else {
415
+ const result = await upload(client, source, {
416
+ ...g,
417
+ noWait: options.wait === false
418
+ });
419
+ docId = result.id;
420
+ if (!options.schema || options.wait === false) {
421
+ if (g.json) {
422
+ writeOutput(JSON.stringify(result), g.output);
423
+ } else {
424
+ writeOutput(
425
+ options.wait === false ? formatQueuedDocumentMessage(docId) : formatDocumentReadyMessage(docId, result.pages),
426
+ g.output
427
+ );
428
+ }
429
+ return;
430
+ }
431
+ }
432
+ if (!options.schema) {
433
+ if (isExistingDoc) {
434
+ writeOutput("Error: --schema is required when extracting from an existing document.", g.output);
435
+ process.exitCode = 1;
436
+ return;
437
+ }
438
+ return;
439
+ }
440
+ let schemaJson;
441
+ const schemaArg = options.schema;
442
+ if (schemaArg.startsWith("{")) {
443
+ schemaJson = JSON.parse(schemaArg);
444
+ } else {
445
+ schemaJson = JSON.parse(readFileSync(schemaArg, "utf8"));
446
+ }
447
+ const prompt = options.prompt || "Extract all data from this document according to the schema.";
448
+ progress(`Extracting from ${docId}\u2026`, g.quiet);
449
+ const extraction = await client.generate(docId, prompt, { schema: schemaJson });
450
+ if (g.json) {
451
+ writeOutput(JSON.stringify({
452
+ doc_id: docId,
453
+ data: extraction.data ?? extraction.answer
454
+ }), g.output);
455
+ } else if (g.output && g.output.endsWith(".csv")) {
456
+ const data = extraction.data ?? {};
457
+ const keys = Object.keys(data);
458
+ const header = ["doc_id", ...keys].join(",");
459
+ const values = keys.map((k) => {
460
+ const v = data[k];
461
+ if (v == null) return "";
462
+ if (typeof v === "string") return `"${String(v).replace(/"/g, '""')}"`;
463
+ return String(v);
464
+ });
465
+ writeOutput([header, [docId, ...values].join(",")].join("\n"), g.output);
466
+ } else {
467
+ writeOutput(JSON.stringify(extraction.data ?? extraction.answer, null, 2), g.output);
468
+ }
469
+ } catch (error) {
470
+ handleError(error, g.json);
471
+ }
472
+ });
473
+ program.command("status <docId>", { hidden: true }).description("Get document processing status").action(async (docId) => {
116
474
  const g = globals();
117
475
  try {
118
476
  const client = getClient();
@@ -133,10 +491,32 @@ program.command("status <docId>").description("Get document processing status").
133
491
  handleError(error, g.json);
134
492
  }
135
493
  });
136
- program.command("chat <question>").description("Ask a question about a processed document").requiredOption("--doc <id>", "Document ID").option("--model <name>", "Override model").action(async (question, options) => {
494
+ program.command("chat <question>").description("Ask a question about a processed document").summary("Ask a question about one document").requiredOption("--doc <id>", "Document ID").option("--model <name>", "Override model").option("--stream", "Stream response tokens as they arrive").action(async (question, options) => {
137
495
  const g = globals();
138
496
  try {
139
497
  const client = getClient();
498
+ if (options.stream) {
499
+ let fullText = "";
500
+ for await (const event of client.stream(options.doc, question, {
501
+ model: options.model
502
+ })) {
503
+ if (event.type === "text_delta") {
504
+ fullText += event.text;
505
+ if (!g.json) process.stdout.write(event.text);
506
+ } else if (event.type === "done") {
507
+ if (!g.json) process.stdout.write("\n");
508
+ if (g.json) {
509
+ writeOutput(
510
+ JSON.stringify({ docId: options.doc, question, answer: fullText }),
511
+ g.output
512
+ );
513
+ }
514
+ } else if (event.type === "error") {
515
+ throw new Error(event.message);
516
+ }
517
+ }
518
+ return;
519
+ }
140
520
  const result = await client.generate(options.doc, question, options.model ? { model: options.model } : void 0);
141
521
  if (g.json) {
142
522
  writeOutput(JSON.stringify({ docId: options.doc, question, ...result }), g.output);
@@ -147,8 +527,51 @@ program.command("chat <question>").description("Ask a question about a processed
147
527
  handleError(error, g.json);
148
528
  }
149
529
  });
530
+ program.command("list").alias("ls").description("List all documents").summary("List your documents").action(async () => {
531
+ const g = globals();
532
+ try {
533
+ const client = getClient();
534
+ const { documents } = await listDocuments(client, g);
535
+ writeOutput(formatDocumentList(documents, g.json), g.output);
536
+ } catch (error) {
537
+ handleError(error, g.json);
538
+ }
539
+ });
540
+ program.command("delete <docId>").alias("rm").description("Delete a document").summary("Delete one document").action(async (docId) => {
541
+ const g = globals();
542
+ try {
543
+ const client = getClient();
544
+ const result = await deleteDocument(client, docId, g);
545
+ if (g.json) {
546
+ writeOutput(JSON.stringify(result), g.output);
547
+ } else {
548
+ writeOutput(`Deleted ${docId}`, g.output);
549
+ }
550
+ } catch (error) {
551
+ handleError(error, g.json);
552
+ }
553
+ });
554
+ program.command("read <docId>").description("Read document as markdown").summary("Read document markdown").option("-p, --pages <range>", "Page range (e.g., 1-5, 10-15)").action(async (docId, options) => {
555
+ const g = globals();
556
+ try {
557
+ const client = getClient();
558
+ const result = await readDocument(client, docId, { ...g, pages: options.pages });
559
+ if (g.json) {
560
+ writeOutput(JSON.stringify(result), g.output);
561
+ } else {
562
+ writeOutput(result.markdown, g.output);
563
+ }
564
+ } catch (error) {
565
+ handleError(error, g.json);
566
+ }
567
+ });
150
568
  var collectionCmd = program.command("collection").alias("collections").alias("col").description("Collection operations");
151
- collectionCmd.command("list").alias("ls").description("List available collections").action(async () => {
569
+ collectionCmd.summary("Query across collections");
570
+ collectionCmd.addHelpText(
571
+ "after",
572
+ COLLECTION_HELP_FOOTER
573
+ );
574
+ collectionCmd.command("list").alias("ls").description("List available collections").summary("List collections").action(async () => {
152
575
  const g = globals();
153
576
  try {
154
577
  const client = getClient();
@@ -158,7 +581,73 @@ collectionCmd.command("list").alias("ls").description("List available collection
158
581
  handleError(error, g.json);
159
582
  }
160
583
  });
161
- collectionCmd.command("query <nameOrId> <question>").description("Fan-out query across collection documents").option("--schema <file>", "JSON Schema file for structured extraction").action(async (nameOrId, question, options) => {
584
+ collectionCmd.command("create <name>", { hidden: true }).description("Create a new collection").option("--description <text>", "Collection description").option("--docs <ids>", "Comma-separated document IDs to seed").action(async (name, options) => {
585
+ const g = globals();
586
+ try {
587
+ const client = getClient();
588
+ const result = await collectionCreate(client, name, { ...g, ...options });
589
+ if (g.json) {
590
+ writeOutput(JSON.stringify(result), g.output);
591
+ } else {
592
+ writeOutput(`Created collection "${result.name}" (${result.id})`, g.output);
593
+ }
594
+ } catch (error) {
595
+ handleError(error, g.json);
596
+ }
597
+ });
598
+ collectionCmd.command("show <nameOrId>", { hidden: true }).description("Show collection details and documents").action(async (nameOrId) => {
599
+ const g = globals();
600
+ try {
601
+ const client = getClient();
602
+ const detail = await collectionShow(client, nameOrId);
603
+ writeOutput(formatCollectionDetail(detail, g.json), g.output);
604
+ } catch (error) {
605
+ handleError(error, g.json);
606
+ }
607
+ });
608
+ collectionCmd.command("delete <nameOrId>", { hidden: true }).alias("rm").description("Delete a collection (documents are preserved)").action(async (nameOrId) => {
609
+ const g = globals();
610
+ try {
611
+ const client = getClient();
612
+ await collectionDelete(client, nameOrId);
613
+ if (g.json) {
614
+ writeOutput(JSON.stringify({ ok: true, deleted: nameOrId }), g.output);
615
+ } else {
616
+ writeOutput(`Deleted collection "${nameOrId}"`, g.output);
617
+ }
618
+ } catch (error) {
619
+ handleError(error, g.json);
620
+ }
621
+ });
622
+ collectionCmd.command("add <nameOrId> <docIds...>", { hidden: true }).description("Add documents to a collection").action(async (nameOrId, docIds) => {
623
+ const g = globals();
624
+ try {
625
+ const client = getClient();
626
+ const result = await collectionAddDocs(client, nameOrId, docIds);
627
+ if (g.json) {
628
+ writeOutput(JSON.stringify(result), g.output);
629
+ } else {
630
+ writeOutput(`Added ${docIds.length} document(s) to "${nameOrId}"`, g.output);
631
+ }
632
+ } catch (error) {
633
+ handleError(error, g.json);
634
+ }
635
+ });
636
+ collectionCmd.command("remove <nameOrId> <docIds...>", { hidden: true }).description("Remove documents from a collection").action(async (nameOrId, docIds) => {
637
+ const g = globals();
638
+ try {
639
+ const client = getClient();
640
+ const result = await collectionRemoveDocs(client, nameOrId, docIds);
641
+ if (g.json) {
642
+ writeOutput(JSON.stringify(result), g.output);
643
+ } else {
644
+ writeOutput(`Removed ${docIds.length} document(s) from "${nameOrId}"`, g.output);
645
+ }
646
+ } catch (error) {
647
+ handleError(error, g.json);
648
+ }
649
+ });
650
+ collectionCmd.command("query <nameOrId> <question>").description("Fan-out query across collection documents").summary("Ask the same question across a collection").option("--schema <file>", "Experimental: JSON Schema file for structured extraction").action(async (nameOrId, question, options) => {
162
651
  const g = globals();
163
652
  try {
164
653
  const client = getClient();
@@ -185,7 +674,44 @@ collectionCmd.command("query <nameOrId> <question>").description("Fan-out query
185
674
  handleError(error, g.json);
186
675
  }
187
676
  });
188
- collectionCmd.command("publish <nameOrId>").description("Make a collection publicly queryable").action(async (nameOrId) => {
677
+ collectionCmd.command("extract <nameOrId>", { hidden: true }).description("Experimental: extract structured data from all documents in a collection").summary("Experimental structured extraction").requiredOption("--schema <file>", "Experimental: JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", "Extraction prompt (default: auto-generated from schema)").action(async (nameOrId, options) => {
678
+ const g = globals();
679
+ try {
680
+ const client = getClient();
681
+ const { readFileSync } = await import("fs");
682
+ let schemaJson;
683
+ const schemaArg = options.schema;
684
+ if (schemaArg.startsWith("{")) {
685
+ schemaJson = JSON.parse(schemaArg);
686
+ } else {
687
+ schemaJson = JSON.parse(readFileSync(schemaArg, "utf8"));
688
+ }
689
+ const prompt = options.prompt || "Extract all data from this document according to the schema.";
690
+ const { results, summary } = await collectionQueryRaw(
691
+ client,
692
+ nameOrId,
693
+ prompt,
694
+ { ...g, schema: schemaJson }
695
+ );
696
+ if (g.json) {
697
+ writeOutput(formatExtractJson(results), g.output);
698
+ } else if (g.output && g.output.endsWith(".csv")) {
699
+ writeOutput(formatExtractCsv(results), g.output);
700
+ } else if (g.output) {
701
+ writeOutput(formatExtractJson(results), g.output);
702
+ } else {
703
+ writeOutput(formatExtractTable(results));
704
+ }
705
+ progress(
706
+ `
707
+ ${summary.completed} documents \u2014 $${summary.total_cost_usd.toFixed(4)} total`,
708
+ g.quiet
709
+ );
710
+ } catch (error) {
711
+ handleError(error, g.json);
712
+ }
713
+ });
714
+ collectionCmd.command("publish <nameOrId>", { hidden: true }).description("Make a collection publicly queryable").action(async (nameOrId) => {
189
715
  const g = globals();
190
716
  try {
191
717
  const client = getClient();
@@ -203,7 +729,7 @@ Share with: okra collection query ${nameOrId} "your question"`,
203
729
  handleError(error, g.json);
204
730
  }
205
731
  });
206
- collectionCmd.command("unpublish <nameOrId>").description("Make a collection private (owner-only)").action(async (nameOrId) => {
732
+ collectionCmd.command("unpublish <nameOrId>", { hidden: true }).description("Make a collection private (owner-only)").action(async (nameOrId) => {
207
733
  const g = globals();
208
734
  try {
209
735
  const client = getClient();
@@ -217,7 +743,7 @@ collectionCmd.command("unpublish <nameOrId>").description("Make a collection pri
217
743
  handleError(error, g.json);
218
744
  }
219
745
  });
220
- collectionCmd.command("export <nameOrId>").description("Export pre-computed markdown for all documents in a collection").option("--flat", "Concatenated markdown with # headers + === separators").option("--zip", "One markdown file per document in a .zip archive").action(async (nameOrId, options) => {
746
+ collectionCmd.command("export <nameOrId>", { hidden: true }).description("Export pre-computed markdown for all documents in a collection").option("--flat", "Concatenated markdown with # headers + === separators").option("--zip", "One markdown file per document in a .zip archive").action(async (nameOrId, options) => {
221
747
  const g = globals();
222
748
  try {
223
749
  const client = getClient();
@@ -294,7 +820,7 @@ authCmd.command("logout").description("Remove API key from global config").actio
294
820
  handleError(error, globals().json);
295
821
  }
296
822
  });
297
- program.command("tree <jobId>").description("Show document verification tree").option("-s, --status <status>", "Filter by status (complete|partial|pending|flagged|empty|gap)").option("-e, --entity <type>", "Filter by entity type (table|figure|footnote)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
823
+ program.command("tree <jobId>", { hidden: true }).description("Show document verification tree").option("-s, --status <status>", "Filter by status (complete|partial|pending|flagged|empty|gap)").option("-e, --entity <type>", "Filter by entity type (table|figure|footnote)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
298
824
  const g = globals();
299
825
  try {
300
826
  const client = getClient();
@@ -308,7 +834,7 @@ program.command("tree <jobId>").description("Show document verification tree").o
308
834
  handleError(error, g.json);
309
835
  }
310
836
  });
311
- program.command("find <jobId> <selector>").description("Find entities using jQuery-like selectors").option("-k, --top-k <n>", "Limit results", parseInt).option("-c, --min-confidence <n>", "Minimum confidence (0-1)", parseFloat).option("-p, --pages <range>", "Page range (e.g., 1-10)").option("--sort <by>", "Sort by (confidence|page|type)").option("--stats", "Show aggregate statistics").option("-f, --format <format>", "Output format (text|json|entities|ids)", "text").action(async (jobId, selector, options) => {
837
+ program.command("find <jobId> <selector>", { hidden: true }).description("Find entities using jQuery-like selectors").option("-k, --top-k <n>", "Limit results", parseInt).option("-c, --min-confidence <n>", "Minimum confidence (0-1)", parseFloat).option("-p, --pages <range>", "Page range (e.g., 1-10)").option("--sort <by>", "Sort by (confidence|page|type)").option("--stats", "Show aggregate statistics").option("-f, --format <format>", "Output format (text|json|entities|ids)", "text").action(async (jobId, selector, options) => {
312
838
  const g = globals();
313
839
  try {
314
840
  const client = getClient();
@@ -329,7 +855,7 @@ program.command("find <jobId> <selector>").description("Find entities using jQue
329
855
  handleError(error, g.json);
330
856
  }
331
857
  });
332
- var pageCmd = program.command("page").description("Page content operations");
858
+ var pageCmd = program.command("page", { hidden: true }).description("Page content operations");
333
859
  pageCmd.command("get <jobId> <pageNum>").description("Get page content").option("-v, --version <n>", "Specific version", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "markdown").action(async (jobId, pageNum, options) => {
334
860
  const g = globals();
335
861
  try {
@@ -391,7 +917,7 @@ pageCmd.command("versions <jobId> <pageNum>").description("List page versions").
391
917
  handleError(error, g.json);
392
918
  }
393
919
  });
394
- program.command("search <jobId> <query>").description("Search page content").option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, query, options) => {
920
+ program.command("search <jobId> <query>", { hidden: true }).description("Search page content").option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, query, options) => {
395
921
  const g = globals();
396
922
  try {
397
923
  const client = getClient();
@@ -402,7 +928,7 @@ program.command("search <jobId> <query>").description("Search page content").opt
402
928
  handleError(error, g.json);
403
929
  }
404
930
  });
405
- program.command("tables <jobId>").description("List extracted tables").option("-p, --page <n>", "Filter by page", parseInt).option("-s, --status <status>", "Filter by status (pending|verified|flagged|rejected)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
931
+ program.command("tables <jobId>", { hidden: true }).description("List extracted tables").option("-p, --page <n>", "Filter by page", parseInt).option("-s, --status <status>", "Filter by status (pending|verified|flagged|rejected)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
406
932
  const g = globals();
407
933
  try {
408
934
  const client = getClient();
@@ -416,7 +942,7 @@ program.command("tables <jobId>").description("List extracted tables").option("-
416
942
  handleError(error, g.json);
417
943
  }
418
944
  });
419
- program.command("history <jobId>").description("Show verification history").option("-l, --limit <n>", "Limit entries", parseInt, 50).option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, options) => {
945
+ program.command("history <jobId>", { hidden: true }).description("Show verification history").option("-l, --limit <n>", "Limit entries", parseInt, 50).option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, options) => {
420
946
  const g = globals();
421
947
  try {
422
948
  const client = getClient();
@@ -427,7 +953,7 @@ program.command("history <jobId>").description("Show verification history").opti
427
953
  handleError(error, g.json);
428
954
  }
429
955
  });
430
- program.command("toc <jobId>").description("Extract table of contents from PDF").option("--max-depth <n>", "Maximum TOC depth", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "text").option("--watch", "Watch live extraction events via WebSocket").action(async (jobId, options) => {
956
+ program.command("toc <jobId>", { hidden: true }).description("Extract table of contents from PDF").option("--max-depth <n>", "Maximum TOC depth", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "text").option("--watch", "Watch live extraction events via WebSocket").action(async (jobId, options) => {
431
957
  const g = globals();
432
958
  try {
433
959
  const client = getClient();
@@ -441,5 +967,18 @@ program.command("toc <jobId>").description("Extract table of contents from PDF")
441
967
  handleError(error, g.json);
442
968
  }
443
969
  });
444
- program.parse();
970
+ if (isDirectExecution()) {
971
+ void program.parseAsync();
972
+ }
973
+ export {
974
+ ADVANCED_COLLECTION_SUBCOMMANDS,
975
+ ADVANCED_COMMANDS,
976
+ CLI_VERSION,
977
+ COLLECTION_HELP_FOOTER,
978
+ PRIMARY_COLLECTION_SUBCOMMANDS,
979
+ PRIMARY_COMMANDS,
980
+ ROOT_HELP_FOOTER,
981
+ getMissingApiKeyMessage,
982
+ program
983
+ };
445
984
  //# sourceMappingURL=bin.js.map