okrapdf 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/bin.js CHANGED
@@ -6,13 +6,19 @@ import {
6
6
  authStatus,
7
7
  authToken,
8
8
  authWhoAmI,
9
+ collectionAddDocs,
10
+ collectionCreate,
11
+ collectionDelete,
9
12
  collectionExport,
10
13
  collectionList,
11
14
  collectionQueryRaw,
15
+ collectionRemoveDocs,
12
16
  collectionSetVisibility,
17
+ collectionShow,
13
18
  deleteDocument,
14
19
  find,
15
20
  formatCollectionCsv,
21
+ formatCollectionDetail,
16
22
  formatCollectionExportFlat,
17
23
  formatCollectionList,
18
24
  formatCollectionTable,
@@ -35,6 +41,13 @@ import {
35
41
  handleError,
36
42
  history,
37
43
  listDocuments,
44
+ localDoctor,
45
+ localIngest,
46
+ localPage,
47
+ localSearch,
48
+ localStatus,
49
+ localSummary,
50
+ localTables,
38
51
  pageEdit,
39
52
  pageGet,
40
53
  pageResolve,
@@ -47,22 +60,210 @@ import {
47
60
  tree,
48
61
  upload,
49
62
  writeOutput
50
- } from "../chunk-F3LECDPP.js";
63
+ } from "../chunk-ETARIBOV.js";
64
+ import "../chunk-MSZQPLMQ.js";
51
65
  import {
52
66
  OkraClient
53
- } from "../chunk-2VKGPLAA.js";
67
+ } from "../chunk-YGIBZV5J.js";
54
68
  import "../chunk-NIZM2ETT.js";
55
69
 
56
70
  // src/cli/bin.ts
57
71
  import { Command } from "commander";
58
- import { writeFileSync } from "fs";
72
+ import { realpathSync, writeFileSync } from "fs";
73
+ import { resolve } from "path";
74
+ import { pathToFileURL } from "url";
75
+
76
+ // package.json
77
+ var package_default = {
78
+ name: "okrapdf",
79
+ version: "0.14.0",
80
+ okraApi: "^1.14.0",
81
+ description: "OkraPDF \u2014 upload a PDF, get an API. Runtime client, React hooks, and CLI.",
82
+ type: "module",
83
+ exports: {
84
+ ".": {
85
+ import: {
86
+ types: "./dist/index.d.ts",
87
+ default: "./dist/index.js"
88
+ }
89
+ },
90
+ "./doc": {
91
+ import: {
92
+ types: "./dist/url.d.ts",
93
+ default: "./dist/url.js"
94
+ }
95
+ },
96
+ "./browser": {
97
+ import: {
98
+ types: "./dist/browser.d.ts",
99
+ default: "./dist/browser.js"
100
+ }
101
+ },
102
+ "./worker": {
103
+ import: {
104
+ types: "./dist/worker.d.ts",
105
+ default: "./dist/worker.js"
106
+ }
107
+ },
108
+ "./react": {
109
+ import: {
110
+ types: "./dist/react/index.d.ts",
111
+ default: "./dist/react/index.js"
112
+ }
113
+ },
114
+ "./cli": {
115
+ import: {
116
+ types: "./dist/cli/index.d.ts",
117
+ default: "./dist/cli/index.js"
118
+ }
119
+ }
120
+ },
121
+ bin: {
122
+ okra: "./dist/cli/bin.js"
123
+ },
124
+ files: [
125
+ "dist"
126
+ ],
127
+ scripts: {
128
+ build: "tsup",
129
+ "docs:cli": "node ./scripts/generate-cli-docs.mjs",
130
+ "docs:cli:check": "node ./scripts/generate-cli-docs.mjs --check",
131
+ test: "vitest run --exclude '**/**.e2e.test.ts'",
132
+ "test:e2e": "vitest run src/client.e2e.test.ts",
133
+ "test:watch": "vitest",
134
+ typecheck: "tsc --noEmit"
135
+ },
136
+ dependencies: {
137
+ commander: "^12.0.0",
138
+ ws: "^8.19.0",
139
+ zod: "^4.3.6"
140
+ },
141
+ peerDependencies: {
142
+ react: ">=18"
143
+ },
144
+ peerDependenciesMeta: {
145
+ react: {
146
+ optional: true
147
+ }
148
+ },
149
+ devDependencies: {
150
+ "@types/node": "^20.14.0",
151
+ "@types/react": "^18.2.0",
152
+ "@types/ws": "^8.18.1",
153
+ react: "^18.2.0",
154
+ tsup: "^8.0.0",
155
+ typescript: "^5.5.0",
156
+ vitest: "^2.0.0"
157
+ },
158
+ license: "MIT",
159
+ repository: {
160
+ type: "git",
161
+ url: "https://github.com/okrapdf/okrapdf-sdk"
162
+ },
163
+ homepage: "https://okrapdf.com",
164
+ author: "OkraPDF",
165
+ keywords: [
166
+ "pdf",
167
+ "ocr",
168
+ "document",
169
+ "extraction",
170
+ "api",
171
+ "sdk",
172
+ "structured-output"
173
+ ],
174
+ publishConfig: {
175
+ access: "public"
176
+ }
177
+ };
178
+
179
+ // src/cli/bin.ts
180
+ var CLI_VERSION = package_default.version;
181
+ var PRIMARY_COMMANDS = ["auth", "upload", "extract", "chat", "list", "read", "delete", "collection"];
182
+ var ADVANCED_COMMANDS = ["status", "tree", "find", "page", "search", "tables", "history", "toc", "local"];
183
+ var PRIMARY_COLLECTION_SUBCOMMANDS = ["list", "query"];
184
+ var ADVANCED_COLLECTION_SUBCOMMANDS = ["create", "show", "delete", "add", "remove", "publish", "unpublish", "export"];
185
+ var ROOT_HELP_FOOTER = [
186
+ "",
187
+ "Primary workflows:",
188
+ " okra auth login",
189
+ " okra upload ./report.pdf",
190
+ ' okra chat "Summarize this document" --doc doc-abc123',
191
+ " okra extract ./report.pdf --schema ./schema.json",
192
+ ' okra collection query earnings "What changed quarter over quarter?" -o earnings.csv',
193
+ "",
194
+ "Advanced inspection and local-only commands are intentionally hidden from",
195
+ "default help during the v0.14 clean-house release candidate."
196
+ ].join("\n");
197
+ var COLLECTION_HELP_FOOTER = [
198
+ "",
199
+ "Stable v0.14 collection workflow:",
200
+ ' okra collection query <name> "<question>"',
201
+ "",
202
+ "Experimental structured fan-out remains available via:",
203
+ ' okra collection query <name> "<question>" --schema ./schema.json',
204
+ " okra collection extract <name> --schema ./schema.json",
205
+ "",
206
+ "Advanced collection management commands remain available but are",
207
+ "intentionally hidden from default help during the clean-house release",
208
+ "candidate."
209
+ ].join("\n");
59
210
  var program = new Command();
60
211
  program.showHelpAfterError();
61
212
  program.showSuggestionAfterError();
62
- program.name("okra").description("OkraPDF CLI \u2014 upload PDFs, query collections, extract data").version("0.12.1").option("-j, --json", "Output JSON (structured, machine-readable)").option("-q, --quiet", "Suppress progress and human-readable frills").option("-o, --output <file>", "Write output to file instead of stdout");
213
+ program.name("okra").description("Okra CLI \u2014 upload PDFs, chat with documents, and extract structured data").version(CLI_VERSION).option("-j, --json", "Output JSON (structured, machine-readable)").option("-q, --quiet", "Suppress progress and human-readable frills").option("-o, --output <file>", "Write output to file instead of stdout");
214
+ program.addHelpText(
215
+ "after",
216
+ ROOT_HELP_FOOTER
217
+ );
63
218
  function globals() {
64
219
  return program.opts();
65
220
  }
221
+ function getMissingApiKeyMessage() {
222
+ return [
223
+ "No API key found.",
224
+ "",
225
+ "Set one up with:",
226
+ " okra auth login",
227
+ ' export OKRA_API_KEY="okra_xxx"',
228
+ "",
229
+ "Get your API key at:",
230
+ " https://app.okrapdf.com/settings",
231
+ "",
232
+ "CLI docs:",
233
+ " https://docs.okrapdf.com/api-reference/cli"
234
+ ].join("\n");
235
+ }
236
+ function formatDocumentReadyMessage(docId, pages) {
237
+ return [
238
+ `Ready: ${docId}${typeof pages === "number" ? ` (${pages} pages)` : ""}`,
239
+ "",
240
+ "Next:",
241
+ ` okra chat "Summarize this document" --doc ${docId}`,
242
+ ` okra read ${docId}`,
243
+ ` okra extract ${docId} --schema ./schema.json`
244
+ ].join("\n");
245
+ }
246
+ function formatQueuedDocumentMessage(docId) {
247
+ return [
248
+ `Queued: ${docId}`,
249
+ "",
250
+ "Next:",
251
+ ` okra status ${docId}`,
252
+ "",
253
+ "Once processing finishes:",
254
+ ` okra chat "Summarize this document" --doc ${docId}`,
255
+ ` okra read ${docId}`
256
+ ].join("\n");
257
+ }
258
+ function isDirectExecution() {
259
+ const entry = process.argv[1];
260
+ if (!entry) return false;
261
+ try {
262
+ return import.meta.url === pathToFileURL(realpathSync(resolve(entry))).href;
263
+ } catch {
264
+ return import.meta.url === pathToFileURL(resolve(entry)).href;
265
+ }
266
+ }
66
267
  function getClient() {
67
268
  const apiKey = getApiKey();
68
269
  const baseUrl = getBaseUrl();
@@ -71,54 +272,137 @@ function getClient() {
71
272
  if (g.json) {
72
273
  process.stderr.write(JSON.stringify({ error: "No API key found", code: 401 }) + "\n");
73
274
  } else {
74
- process.stderr.write(
75
- 'No API key found.\n\n Get one: https://docs.okrapdf.com/api-keys\n Then: export OKRA_API_KEY="okra_xxx"\n Or: npx okra auth login\n\n Docs: https://docs.okrapdf.com\n Discord: https://discord.gg/BHNmbZVs\n'
76
- );
275
+ process.stderr.write(getMissingApiKeyMessage() + "\n");
77
276
  }
78
277
  process.exit(1);
79
278
  }
80
279
  return new OkraClient({ apiKey, baseUrl });
81
280
  }
281
+ function writeLocalResult(result, humanText) {
282
+ const g = globals();
283
+ if (g.json) {
284
+ writeOutput(JSON.stringify(result), g.output);
285
+ } else {
286
+ writeOutput(humanText, g.output);
287
+ }
288
+ }
289
+ var localCmd = program.command("local", { hidden: true }).description("Offline PDF tools for local OpenClaw/opencode harnesses");
290
+ localCmd.command("ingest <source>").description("Ingest a local PDF into the offline document store").option("--data-dir <path>", "Override local document store path").action(async (source, options) => {
291
+ const g = globals();
292
+ try {
293
+ const result = localIngest(source, { dataDir: options.dataDir });
294
+ writeLocalResult(
295
+ result,
296
+ `Indexed ${result.filename} as ${result.documentId} (${result.pageCount} pages, ${result.charCount} chars)`
297
+ );
298
+ } catch (error) {
299
+ handleError(error, g.json);
300
+ }
301
+ });
302
+ localCmd.command("status").description("Get local document status").requiredOption("--doc <id>", "Local document ID").option("--data-dir <path>", "Override local document store path").action(async (options) => {
303
+ const g = globals();
304
+ try {
305
+ const result = localStatus(options.doc, { dataDir: options.dataDir });
306
+ writeLocalResult(
307
+ result,
308
+ `${result.documentId}: ${result.status} (${result.pagesWithText}/${result.pageCount} pages with text)`
309
+ );
310
+ } catch (error) {
311
+ handleError(error, g.json);
312
+ }
313
+ });
314
+ localCmd.command("summary").description("Produce an extractive summary of a local PDF").requiredOption("--doc <id>", "Local document ID").option("--data-dir <path>", "Override local document store path").action(async (options) => {
315
+ const g = globals();
316
+ try {
317
+ const result = localSummary(options.doc, { dataDir: options.dataDir });
318
+ writeLocalResult(result, result.summary);
319
+ } catch (error) {
320
+ handleError(error, g.json);
321
+ }
322
+ });
323
+ localCmd.command("search").description("Search within a local PDF").requiredOption("--doc <id>", "Local document ID").requiredOption("--query <text>", "Search query").option("--data-dir <path>", "Override local document store path").action(async (options) => {
324
+ const g = globals();
325
+ try {
326
+ const result = localSearch(options.doc, options.query, { dataDir: options.dataDir });
327
+ const preview = result.matches.map((match) => `p.${match.page}: ${match.snippet}`).join("\n");
328
+ writeLocalResult(result, preview || `No matches found for "${options.query}"`);
329
+ } catch (error) {
330
+ handleError(error, g.json);
331
+ }
332
+ });
333
+ localCmd.command("page").description("Read one extracted page from a local PDF").requiredOption("--doc <id>", "Local document ID").requiredOption("--page <n>", "1-indexed page number", parseInt).option("--data-dir <path>", "Override local document store path").action(async (options) => {
334
+ const g = globals();
335
+ try {
336
+ const result = localPage(options.doc, options.page, { dataDir: options.dataDir });
337
+ writeLocalResult(result, result.text);
338
+ } catch (error) {
339
+ handleError(error, g.json);
340
+ }
341
+ });
342
+ localCmd.command("tables").description("Detect table-like layout blocks from a local PDF").requiredOption("--doc <id>", "Local document ID").option("--query <text>", "Optional query to rank table-like blocks").option("--data-dir <path>", "Override local document store path").action(async (options) => {
343
+ const g = globals();
344
+ try {
345
+ const result = localTables(options.doc, options.query, { dataDir: options.dataDir });
346
+ const preview = result.tables.map((table) => `p.${table.page} (${table.rowCount} rows)
347
+ ${table.preview}`).join("\n\n");
348
+ writeLocalResult(result, preview || "No table-like blocks found");
349
+ } catch (error) {
350
+ handleError(error, g.json);
351
+ }
352
+ });
353
+ localCmd.command("doctor").description("Check local offline PDF tool availability").option("--data-dir <path>", "Override local document store path").action(async (options) => {
354
+ const g = globals();
355
+ try {
356
+ const result = localDoctor({ dataDir: options.dataDir });
357
+ const human = [
358
+ `data dir: ${result.dataDir}`,
359
+ `pdftotext: ${result.tools.pdftotext.available ? result.tools.pdftotext.path : "missing"}`,
360
+ `pdfinfo: ${result.tools.pdfinfo.available ? result.tools.pdfinfo.path : "missing"}`,
361
+ `pdftoppm: ${result.tools.pdftoppm.available ? result.tools.pdftoppm.path : "missing"}`,
362
+ `tesseract: ${result.tools.tesseract.available ? result.tools.tesseract.path : "missing"}`
363
+ ].join("\n");
364
+ writeLocalResult(result, human);
365
+ } catch (error) {
366
+ handleError(error, g.json);
367
+ }
368
+ });
82
369
  async function runUploadCommand(source, options) {
83
370
  const g = globals();
84
371
  try {
85
372
  const client = getClient();
373
+ let vendorOptions;
374
+ if (options.vendorOptions) {
375
+ try {
376
+ vendorOptions = JSON.parse(options.vendorOptions);
377
+ } catch {
378
+ process.stderr.write("Error: --vendor-options must be valid JSON\n");
379
+ process.exit(1);
380
+ }
381
+ }
86
382
  const result = await upload(client, source, {
87
383
  ...g,
88
- noWait: options.wait === false
384
+ noWait: options.wait === false,
385
+ vendorOptions
89
386
  });
90
387
  if (g.json) {
91
388
  writeOutput(JSON.stringify(result), g.output);
92
389
  } else {
93
- const lines = [`Done \u2014 ${result.pages ?? "?"} pages`, ""];
94
- lines.push(` ${result.id}`);
95
- if (result.urls) {
96
- const short = result.id.slice(0, 11) + "...";
97
- lines.push("");
98
- lines.push(` Markdown: ${result.urls.full_md.replace(result.id, short)}`);
99
- lines.push(` Page 1: ${result.urls.page_png.replace(result.id, short).replace("{N}", "1")}`);
100
- lines.push(` Completion: ${result.urls.completion.replace(result.id, short)}`);
101
- lines.push("");
102
- lines.push(" URL patterns:");
103
- lines.push(" /v1/documents/{id}/pg_{N}.md page markdown");
104
- lines.push(" /v1/documents/{id}/d_shimmer/pg_{N}.png page image");
105
- lines.push(" /v1/documents/{id}/full.md full document");
106
- lines.push("");
107
- lines.push(" Docs: https://docs.okrapdf.com Discord: https://discord.gg/BHNmbZVs");
108
- }
109
- writeOutput(lines.join("\n"), g.output);
390
+ writeOutput(
391
+ options.wait === false ? formatQueuedDocumentMessage(result.id) : formatDocumentReadyMessage(result.id, result.pages),
392
+ g.output
393
+ );
110
394
  }
111
395
  } catch (error) {
112
396
  handleError(error, g.json);
113
397
  }
114
398
  }
115
399
  function registerUploadCommand(commandName, description) {
116
- program.command(`${commandName} <source>`).description(description).option("--no-wait", "Fire-and-forget (don't wait for processing)").action(async (source, options) => {
400
+ program.command(`${commandName} <source>`).description(description).summary("Upload a PDF and wait for processing").option("--no-wait", "Fire-and-forget (don't wait for processing)").option("--vendor-options <json>", `JSON vendor-specific options (e.g., '{"model":"gemini-3.1-pro","parse_mode":"parse_page_with_agent"}')`).action(async (source, options) => {
117
401
  await runUploadCommand(source, options);
118
402
  });
119
403
  }
120
404
  registerUploadCommand("upload", "Upload a PDF (file path or URL), wait for processing");
121
- program.command("extract <source>").description("Extract structured data from a document (doc ID, URL, or file path)").option("--no-wait", "Fire-and-forget (don't wait for processing)").option("--schema <file>", "JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", 'Extraction prompt (default: "Extract all data according to the schema")').action(async (source, options) => {
405
+ program.command("extract <source>").description("Extract structured data from a document (doc ID, URL, or file path)").summary("Upload a PDF and extract structured data").option("--no-wait", "Fire-and-forget (don't wait for processing)").option("--schema <file>", "JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", 'Extraction prompt (default: "Extract all data according to the schema")').action(async (source, options) => {
122
406
  const g = globals();
123
407
  try {
124
408
  const client = getClient();
@@ -137,16 +421,10 @@ program.command("extract <source>").description("Extract structured data from a
137
421
  if (g.json) {
138
422
  writeOutput(JSON.stringify(result), g.output);
139
423
  } else {
140
- const lines = [`Done \u2014 ${result.pages ?? "?"} pages`, ""];
141
- lines.push(` ${docId}`);
142
- if (result.urls) {
143
- const short = docId.slice(0, 11) + "...";
144
- lines.push("");
145
- lines.push(` Markdown: ${result.urls.full_md.replace(docId, short)}`);
146
- lines.push(` Page 1: ${result.urls.page_png.replace(docId, short).replace("{N}", "1")}`);
147
- lines.push(` Completion: ${result.urls.completion.replace(docId, short)}`);
148
- }
149
- writeOutput(lines.join("\n"), g.output);
424
+ writeOutput(
425
+ options.wait === false ? formatQueuedDocumentMessage(docId) : formatDocumentReadyMessage(docId, result.pages),
426
+ g.output
427
+ );
150
428
  }
151
429
  return;
152
430
  }
@@ -192,7 +470,7 @@ program.command("extract <source>").description("Extract structured data from a
192
470
  handleError(error, g.json);
193
471
  }
194
472
  });
195
- program.command("status <docId>").description("Get document processing status").action(async (docId) => {
473
+ program.command("status <docId>", { hidden: true }).description("Get document processing status").action(async (docId) => {
196
474
  const g = globals();
197
475
  try {
198
476
  const client = getClient();
@@ -213,10 +491,32 @@ program.command("status <docId>").description("Get document processing status").
213
491
  handleError(error, g.json);
214
492
  }
215
493
  });
216
- program.command("chat <question>").description("Ask a question about a processed document").requiredOption("--doc <id>", "Document ID").option("--model <name>", "Override model").action(async (question, options) => {
494
+ program.command("chat <question>").description("Ask a question about a processed document").summary("Ask a question about one document").requiredOption("--doc <id>", "Document ID").option("--model <name>", "Override model").option("--stream", "Stream response tokens as they arrive").action(async (question, options) => {
217
495
  const g = globals();
218
496
  try {
219
497
  const client = getClient();
498
+ if (options.stream) {
499
+ let fullText = "";
500
+ for await (const event of client.stream(options.doc, question, {
501
+ model: options.model
502
+ })) {
503
+ if (event.type === "text_delta") {
504
+ fullText += event.text;
505
+ if (!g.json) process.stdout.write(event.text);
506
+ } else if (event.type === "done") {
507
+ if (!g.json) process.stdout.write("\n");
508
+ if (g.json) {
509
+ writeOutput(
510
+ JSON.stringify({ docId: options.doc, question, answer: fullText }),
511
+ g.output
512
+ );
513
+ }
514
+ } else if (event.type === "error") {
515
+ throw new Error(event.message);
516
+ }
517
+ }
518
+ return;
519
+ }
220
520
  const result = await client.generate(options.doc, question, options.model ? { model: options.model } : void 0);
221
521
  if (g.json) {
222
522
  writeOutput(JSON.stringify({ docId: options.doc, question, ...result }), g.output);
@@ -227,7 +527,7 @@ program.command("chat <question>").description("Ask a question about a processed
227
527
  handleError(error, g.json);
228
528
  }
229
529
  });
230
- program.command("list").alias("ls").description("List all documents").action(async () => {
530
+ program.command("list").alias("ls").description("List all documents").summary("List your documents").action(async () => {
231
531
  const g = globals();
232
532
  try {
233
533
  const client = getClient();
@@ -237,7 +537,7 @@ program.command("list").alias("ls").description("List all documents").action(asy
237
537
  handleError(error, g.json);
238
538
  }
239
539
  });
240
- program.command("delete <docId>").alias("rm").description("Delete a document").action(async (docId) => {
540
+ program.command("delete <docId>").alias("rm").description("Delete a document").summary("Delete one document").action(async (docId) => {
241
541
  const g = globals();
242
542
  try {
243
543
  const client = getClient();
@@ -251,7 +551,7 @@ program.command("delete <docId>").alias("rm").description("Delete a document").a
251
551
  handleError(error, g.json);
252
552
  }
253
553
  });
254
- program.command("read <docId>").description("Read document as markdown").option("-p, --pages <range>", "Page range (e.g., 1-5, 10-15)").action(async (docId, options) => {
554
+ program.command("read <docId>").description("Read document as markdown").summary("Read document markdown").option("-p, --pages <range>", "Page range (e.g., 1-5, 10-15)").action(async (docId, options) => {
255
555
  const g = globals();
256
556
  try {
257
557
  const client = getClient();
@@ -266,7 +566,12 @@ program.command("read <docId>").description("Read document as markdown").option(
266
566
  }
267
567
  });
268
568
  var collectionCmd = program.command("collection").alias("collections").alias("col").description("Collection operations");
269
- collectionCmd.command("list").alias("ls").description("List available collections").action(async () => {
569
+ collectionCmd.summary("Query across collections");
570
+ collectionCmd.addHelpText(
571
+ "after",
572
+ COLLECTION_HELP_FOOTER
573
+ );
574
+ collectionCmd.command("list").alias("ls").description("List available collections").summary("List collections").action(async () => {
270
575
  const g = globals();
271
576
  try {
272
577
  const client = getClient();
@@ -276,7 +581,73 @@ collectionCmd.command("list").alias("ls").description("List available collection
276
581
  handleError(error, g.json);
277
582
  }
278
583
  });
279
- collectionCmd.command("query <nameOrId> <question>").description("Fan-out query across collection documents").option("--schema <file>", "JSON Schema file for structured extraction").action(async (nameOrId, question, options) => {
584
+ collectionCmd.command("create <name>", { hidden: true }).description("Create a new collection").option("--description <text>", "Collection description").option("--docs <ids>", "Comma-separated document IDs to seed").action(async (name, options) => {
585
+ const g = globals();
586
+ try {
587
+ const client = getClient();
588
+ const result = await collectionCreate(client, name, { ...g, ...options });
589
+ if (g.json) {
590
+ writeOutput(JSON.stringify(result), g.output);
591
+ } else {
592
+ writeOutput(`Created collection "${result.name}" (${result.id})`, g.output);
593
+ }
594
+ } catch (error) {
595
+ handleError(error, g.json);
596
+ }
597
+ });
598
+ collectionCmd.command("show <nameOrId>", { hidden: true }).description("Show collection details and documents").action(async (nameOrId) => {
599
+ const g = globals();
600
+ try {
601
+ const client = getClient();
602
+ const detail = await collectionShow(client, nameOrId);
603
+ writeOutput(formatCollectionDetail(detail, g.json), g.output);
604
+ } catch (error) {
605
+ handleError(error, g.json);
606
+ }
607
+ });
608
+ collectionCmd.command("delete <nameOrId>", { hidden: true }).alias("rm").description("Delete a collection (documents are preserved)").action(async (nameOrId) => {
609
+ const g = globals();
610
+ try {
611
+ const client = getClient();
612
+ await collectionDelete(client, nameOrId);
613
+ if (g.json) {
614
+ writeOutput(JSON.stringify({ ok: true, deleted: nameOrId }), g.output);
615
+ } else {
616
+ writeOutput(`Deleted collection "${nameOrId}"`, g.output);
617
+ }
618
+ } catch (error) {
619
+ handleError(error, g.json);
620
+ }
621
+ });
622
+ collectionCmd.command("add <nameOrId> <docIds...>", { hidden: true }).description("Add documents to a collection").action(async (nameOrId, docIds) => {
623
+ const g = globals();
624
+ try {
625
+ const client = getClient();
626
+ const result = await collectionAddDocs(client, nameOrId, docIds);
627
+ if (g.json) {
628
+ writeOutput(JSON.stringify(result), g.output);
629
+ } else {
630
+ writeOutput(`Added ${docIds.length} document(s) to "${nameOrId}"`, g.output);
631
+ }
632
+ } catch (error) {
633
+ handleError(error, g.json);
634
+ }
635
+ });
636
+ collectionCmd.command("remove <nameOrId> <docIds...>", { hidden: true }).description("Remove documents from a collection").action(async (nameOrId, docIds) => {
637
+ const g = globals();
638
+ try {
639
+ const client = getClient();
640
+ const result = await collectionRemoveDocs(client, nameOrId, docIds);
641
+ if (g.json) {
642
+ writeOutput(JSON.stringify(result), g.output);
643
+ } else {
644
+ writeOutput(`Removed ${docIds.length} document(s) from "${nameOrId}"`, g.output);
645
+ }
646
+ } catch (error) {
647
+ handleError(error, g.json);
648
+ }
649
+ });
650
+ collectionCmd.command("query <nameOrId> <question>").description("Fan-out query across collection documents").summary("Ask the same question across a collection").option("--schema <file>", "Experimental: JSON Schema file for structured extraction").action(async (nameOrId, question, options) => {
280
651
  const g = globals();
281
652
  try {
282
653
  const client = getClient();
@@ -303,7 +674,7 @@ collectionCmd.command("query <nameOrId> <question>").description("Fan-out query
303
674
  handleError(error, g.json);
304
675
  }
305
676
  });
306
- collectionCmd.command("extract <nameOrId>").description("Extract structured data from all documents in a collection").requiredOption("--schema <file>", "JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", "Extraction prompt (default: auto-generated from schema)").action(async (nameOrId, options) => {
677
+ collectionCmd.command("extract <nameOrId>", { hidden: true }).description("Experimental: extract structured data from all documents in a collection").summary("Experimental structured extraction").requiredOption("--schema <file>", "Experimental: JSON Schema file or inline JSON for structured extraction").option("--prompt <query>", "Extraction prompt (default: auto-generated from schema)").action(async (nameOrId, options) => {
307
678
  const g = globals();
308
679
  try {
309
680
  const client = getClient();
@@ -340,7 +711,7 @@ ${summary.completed} documents \u2014 $${summary.total_cost_usd.toFixed(4)} tota
340
711
  handleError(error, g.json);
341
712
  }
342
713
  });
343
- collectionCmd.command("publish <nameOrId>").description("Make a collection publicly queryable").action(async (nameOrId) => {
714
+ collectionCmd.command("publish <nameOrId>", { hidden: true }).description("Make a collection publicly queryable").action(async (nameOrId) => {
344
715
  const g = globals();
345
716
  try {
346
717
  const client = getClient();
@@ -358,7 +729,7 @@ Share with: okra collection query ${nameOrId} "your question"`,
358
729
  handleError(error, g.json);
359
730
  }
360
731
  });
361
- collectionCmd.command("unpublish <nameOrId>").description("Make a collection private (owner-only)").action(async (nameOrId) => {
732
+ collectionCmd.command("unpublish <nameOrId>", { hidden: true }).description("Make a collection private (owner-only)").action(async (nameOrId) => {
362
733
  const g = globals();
363
734
  try {
364
735
  const client = getClient();
@@ -372,7 +743,7 @@ collectionCmd.command("unpublish <nameOrId>").description("Make a collection pri
372
743
  handleError(error, g.json);
373
744
  }
374
745
  });
375
- collectionCmd.command("export <nameOrId>").description("Export pre-computed markdown for all documents in a collection").option("--flat", "Concatenated markdown with # headers + === separators").option("--zip", "One markdown file per document in a .zip archive").action(async (nameOrId, options) => {
746
+ collectionCmd.command("export <nameOrId>", { hidden: true }).description("Export pre-computed markdown for all documents in a collection").option("--flat", "Concatenated markdown with # headers + === separators").option("--zip", "One markdown file per document in a .zip archive").action(async (nameOrId, options) => {
376
747
  const g = globals();
377
748
  try {
378
749
  const client = getClient();
@@ -449,7 +820,7 @@ authCmd.command("logout").description("Remove API key from global config").actio
449
820
  handleError(error, globals().json);
450
821
  }
451
822
  });
452
- program.command("tree <jobId>").description("Show document verification tree").option("-s, --status <status>", "Filter by status (complete|partial|pending|flagged|empty|gap)").option("-e, --entity <type>", "Filter by entity type (table|figure|footnote)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
823
+ program.command("tree <jobId>", { hidden: true }).description("Show document verification tree").option("-s, --status <status>", "Filter by status (complete|partial|pending|flagged|empty|gap)").option("-e, --entity <type>", "Filter by entity type (table|figure|footnote)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
453
824
  const g = globals();
454
825
  try {
455
826
  const client = getClient();
@@ -463,7 +834,7 @@ program.command("tree <jobId>").description("Show document verification tree").o
463
834
  handleError(error, g.json);
464
835
  }
465
836
  });
466
- program.command("find <jobId> <selector>").description("Find entities using jQuery-like selectors").option("-k, --top-k <n>", "Limit results", parseInt).option("-c, --min-confidence <n>", "Minimum confidence (0-1)", parseFloat).option("-p, --pages <range>", "Page range (e.g., 1-10)").option("--sort <by>", "Sort by (confidence|page|type)").option("--stats", "Show aggregate statistics").option("-f, --format <format>", "Output format (text|json|entities|ids)", "text").action(async (jobId, selector, options) => {
837
+ program.command("find <jobId> <selector>", { hidden: true }).description("Find entities using jQuery-like selectors").option("-k, --top-k <n>", "Limit results", parseInt).option("-c, --min-confidence <n>", "Minimum confidence (0-1)", parseFloat).option("-p, --pages <range>", "Page range (e.g., 1-10)").option("--sort <by>", "Sort by (confidence|page|type)").option("--stats", "Show aggregate statistics").option("-f, --format <format>", "Output format (text|json|entities|ids)", "text").action(async (jobId, selector, options) => {
467
838
  const g = globals();
468
839
  try {
469
840
  const client = getClient();
@@ -484,7 +855,7 @@ program.command("find <jobId> <selector>").description("Find entities using jQue
484
855
  handleError(error, g.json);
485
856
  }
486
857
  });
487
- var pageCmd = program.command("page").description("Page content operations");
858
+ var pageCmd = program.command("page", { hidden: true }).description("Page content operations");
488
859
  pageCmd.command("get <jobId> <pageNum>").description("Get page content").option("-v, --version <n>", "Specific version", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "markdown").action(async (jobId, pageNum, options) => {
489
860
  const g = globals();
490
861
  try {
@@ -546,7 +917,7 @@ pageCmd.command("versions <jobId> <pageNum>").description("List page versions").
546
917
  handleError(error, g.json);
547
918
  }
548
919
  });
549
- program.command("search <jobId> <query>").description("Search page content").option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, query, options) => {
920
+ program.command("search <jobId> <query>", { hidden: true }).description("Search page content").option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, query, options) => {
550
921
  const g = globals();
551
922
  try {
552
923
  const client = getClient();
@@ -557,7 +928,7 @@ program.command("search <jobId> <query>").description("Search page content").opt
557
928
  handleError(error, g.json);
558
929
  }
559
930
  });
560
- program.command("tables <jobId>").description("List extracted tables").option("-p, --page <n>", "Filter by page", parseInt).option("-s, --status <status>", "Filter by status (pending|verified|flagged|rejected)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
931
+ program.command("tables <jobId>", { hidden: true }).description("List extracted tables").option("-p, --page <n>", "Filter by page", parseInt).option("-s, --status <status>", "Filter by status (pending|verified|flagged|rejected)").option("-f, --format <format>", "Output format (text|json|markdown)", "text").action(async (jobId, options) => {
561
932
  const g = globals();
562
933
  try {
563
934
  const client = getClient();
@@ -571,7 +942,7 @@ program.command("tables <jobId>").description("List extracted tables").option("-
571
942
  handleError(error, g.json);
572
943
  }
573
944
  });
574
- program.command("history <jobId>").description("Show verification history").option("-l, --limit <n>", "Limit entries", parseInt, 50).option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, options) => {
945
+ program.command("history <jobId>", { hidden: true }).description("Show verification history").option("-l, --limit <n>", "Limit entries", parseInt, 50).option("-f, --format <format>", "Output format (text|json)", "text").action(async (jobId, options) => {
575
946
  const g = globals();
576
947
  try {
577
948
  const client = getClient();
@@ -582,7 +953,7 @@ program.command("history <jobId>").description("Show verification history").opti
582
953
  handleError(error, g.json);
583
954
  }
584
955
  });
585
- program.command("toc <jobId>").description("Extract table of contents from PDF").option("--max-depth <n>", "Maximum TOC depth", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "text").option("--watch", "Watch live extraction events via WebSocket").action(async (jobId, options) => {
956
+ program.command("toc <jobId>", { hidden: true }).description("Extract table of contents from PDF").option("--max-depth <n>", "Maximum TOC depth", parseInt).option("-f, --format <format>", "Output format (text|json|markdown)", "text").option("--watch", "Watch live extraction events via WebSocket").action(async (jobId, options) => {
586
957
  const g = globals();
587
958
  try {
588
959
  const client = getClient();
@@ -596,5 +967,18 @@ program.command("toc <jobId>").description("Extract table of contents from PDF")
596
967
  handleError(error, g.json);
597
968
  }
598
969
  });
599
- program.parse();
970
+ if (isDirectExecution()) {
971
+ void program.parseAsync();
972
+ }
973
+ export {
974
+ ADVANCED_COLLECTION_SUBCOMMANDS,
975
+ ADVANCED_COMMANDS,
976
+ CLI_VERSION,
977
+ COLLECTION_HELP_FOOTER,
978
+ PRIMARY_COLLECTION_SUBCOMMANDS,
979
+ PRIMARY_COMMANDS,
980
+ ROOT_HELP_FOOTER,
981
+ getMissingApiKeyMessage,
982
+ program
983
+ };
600
984
  //# sourceMappingURL=bin.js.map