okrapdf 0.11.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +283 -0
- package/dist/browser.d.ts +1 -1
- package/dist/{chunk-OVRTURGN.js → chunk-2HJPTW6S.js} +334 -5
- package/dist/chunk-2HJPTW6S.js.map +1 -0
- package/dist/{chunk-4IHOG655.js → chunk-XOHPZW3V.js} +101 -38
- package/dist/{chunk-4IHOG655.js.map → chunk-XOHPZW3V.js.map} +1 -1
- package/dist/chunk-YVUL6ZLA.js +155 -0
- package/dist/chunk-YVUL6ZLA.js.map +1 -0
- package/dist/cli/bin.js +117 -17
- package/dist/cli/bin.js.map +1 -1
- package/dist/cli/index.d.ts +16 -4
- package/dist/cli/index.js +7 -1
- package/dist/{client-CGA6CP3H.d.ts → client-p82YcAs3.d.ts} +11 -1
- package/dist/index.d.ts +20 -4
- package/dist/index.js +10 -4
- package/dist/index.js.map +1 -1
- package/dist/react/index.d.ts +3 -3
- package/dist/react/index.js +2 -2
- package/dist/{types-ZJxBNLTJ.d.ts → types-DDm2eEL0.d.ts} +114 -2
- package/dist/url.d.ts +1 -1
- package/package.json +18 -1
- package/dist/chunk-ME6F2MXQ.js +0 -84
- package/dist/chunk-ME6F2MXQ.js.map +0 -1
- package/dist/chunk-OVRTURGN.js.map +0 -1
package/README.md
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
# okrapdf
|
|
2
|
+
|
|
3
|
+
Upload a PDF, get an OpenAI-compatible endpoint.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
npm install okrapdf
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
Get your API key at [app.okrapdf.com/settings](https://app.okrapdf.com/settings).
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```ts
|
|
14
|
+
import { OkraClient } from 'okrapdf';
|
|
15
|
+
|
|
16
|
+
const okra = new OkraClient({ apiKey: process.env.OKRA_API_KEY });
|
|
17
|
+
const session = await okra.sessions.create('./invoice.pdf');
|
|
18
|
+
|
|
19
|
+
// Every document gets its own chat/completions URL
|
|
20
|
+
console.log(session.modelEndpoint);
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
That prints a URL like:
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
https://api.okrapdf.com/v1/documents/doc-441a8a0be0e94914b982
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
This is a full OpenAI-compatible base URL. Plug it into any client.
|
|
30
|
+
|
|
31
|
+
## What You Get
|
|
32
|
+
|
|
33
|
+
Upload a PDF and OkraPDF gives you predictable URLs for everything:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
Document: doc-441a8a0be0e94914b982
|
|
37
|
+
|
|
38
|
+
Completion: https://api.okrapdf.com/document/doc-441a8a0be0e94914b982/chat/completions
|
|
39
|
+
Status: https://api.okrapdf.com/document/doc-441a8a0be0e94914b982/status
|
|
40
|
+
Pages: https://api.okrapdf.com/document/doc-441a8a0be0e94914b982/pages
|
|
41
|
+
Entities: https://api.okrapdf.com/document/doc-441a8a0be0e94914b982/nodes
|
|
42
|
+
Download: https://api.okrapdf.com/document/doc-441a8a0be0e94914b982/download
|
|
43
|
+
|
|
44
|
+
Page images:
|
|
45
|
+
pg 1: https://api.okrapdf.com/v1/documents/doc-441a8a0be0e94914b982/pg_1.png
|
|
46
|
+
resized: https://api.okrapdf.com/v1/documents/doc-441a8a0be0e94914b982/w_200,h_300/pg_1.png
|
|
47
|
+
shimmer: https://api.okrapdf.com/v1/documents/doc-441a8a0be0e94914b982/d_shimmer/pg_1.png
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
All URLs are deterministic. Build them from the document ID without calling the API first.
|
|
51
|
+
|
|
52
|
+
## Use with OpenAI SDK
|
|
53
|
+
|
|
54
|
+
```ts
|
|
55
|
+
import OpenAI from 'openai';
|
|
56
|
+
|
|
57
|
+
const openai = new OpenAI({
|
|
58
|
+
apiKey: process.env.OKRA_API_KEY,
|
|
59
|
+
baseURL: session.modelEndpoint, // https://api.okrapdf.com/v1/documents/doc-...
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const res = await openai.chat.completions.create({
|
|
63
|
+
model: 'okra',
|
|
64
|
+
messages: [{ role: 'user', content: 'What form is this?' }],
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
console.log(res.choices[0].message.content);
|
|
68
|
+
// → "This is Form W-9 (Request for Taxpayer Identification Number and
|
|
69
|
+
// Certification), used by entities to collect a taxpayer's TIN..."
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Use with AI SDK
|
|
73
|
+
|
|
74
|
+
```ts
|
|
75
|
+
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
|
76
|
+
import { generateText } from 'ai';
|
|
77
|
+
|
|
78
|
+
const provider = createOpenAICompatible({
|
|
79
|
+
name: 'okra',
|
|
80
|
+
apiKey: process.env.OKRA_API_KEY,
|
|
81
|
+
baseURL: session.modelEndpoint,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
const { text } = await generateText({
|
|
85
|
+
model: provider('okra'),
|
|
86
|
+
prompt: 'Summarize this document in 3 bullet points',
|
|
87
|
+
});
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Use with curl
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
# Upload
|
|
94
|
+
curl -X POST https://api.okrapdf.com/document/doc-my-w9/upload-url \
|
|
95
|
+
-H "Authorization: Bearer $OKRA_API_KEY" \
|
|
96
|
+
-H "Content-Type: application/json" \
|
|
97
|
+
-d '{"url": "https://www.irs.gov/pub/irs-pdf/fw9.pdf"}'
|
|
98
|
+
|
|
99
|
+
# Ask a question
|
|
100
|
+
curl https://api.okrapdf.com/document/doc-my-w9/chat/completions \
|
|
101
|
+
-H "Authorization: Bearer $OKRA_API_KEY" \
|
|
102
|
+
-H "Content-Type: application/json" \
|
|
103
|
+
-d '{"messages": [{"role": "user", "content": "List all parts of this form."}]}'
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Response:
|
|
107
|
+
|
|
108
|
+
```json
|
|
109
|
+
{
|
|
110
|
+
"id": "chatcmpl-18g5qhmmrm",
|
|
111
|
+
"object": "chat.completion",
|
|
112
|
+
"model": "accounts/fireworks/models/kimi-k2p5",
|
|
113
|
+
"choices": [{
|
|
114
|
+
"message": {
|
|
115
|
+
"role": "assistant",
|
|
116
|
+
"content": "Based on the Form W-9 document, there are two numbered parts:\n\n| Part | Title |\n|------|-------|\n| Part I | Taxpayer Identification Number (TIN) |\n| Part II | Certification |"
|
|
117
|
+
},
|
|
118
|
+
"finish_reason": "stop"
|
|
119
|
+
}],
|
|
120
|
+
"usage": { "prompt_tokens": 227, "completion_tokens": 404, "total_tokens": 631 }
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## SDK Methods
|
|
125
|
+
|
|
126
|
+
The SDK wraps all of this so you don't need a separate client:
|
|
127
|
+
|
|
128
|
+
```ts
|
|
129
|
+
// Ask a question (non-streaming)
|
|
130
|
+
const { answer } = await session.prompt('What is the total amount due?');
|
|
131
|
+
|
|
132
|
+
// Stream
|
|
133
|
+
for await (const event of session.stream('Summarize this document')) {
|
|
134
|
+
if (event.type === 'text_delta') process.stdout.write(event.text);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Structured output with Zod
|
|
138
|
+
import { z } from 'zod';
|
|
139
|
+
|
|
140
|
+
const Invoice = z.object({
|
|
141
|
+
vendor: z.string(),
|
|
142
|
+
total: z.number(),
|
|
143
|
+
lineItems: z.array(z.object({
|
|
144
|
+
description: z.string(),
|
|
145
|
+
amount: z.number(),
|
|
146
|
+
})),
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
const { data } = await session.prompt('Extract the invoice', { schema: Invoice });
|
|
150
|
+
// data: { vendor: "Acme Corp", total: 1250.00, lineItems: [...] }
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Pages & Entities
|
|
154
|
+
|
|
155
|
+
```ts
|
|
156
|
+
const pages = await session.pages(); // { pageCount: 6, pages: [...] }
|
|
157
|
+
const { nodes } = await session.entities(); // extracted text, tables, etc.
|
|
158
|
+
const { nodes } = await session.entities({ type: 'table' });
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Upload
|
|
162
|
+
|
|
163
|
+
Accepts file paths, URLs, `Blob`, `ArrayBuffer`, or `Uint8Array`:
|
|
164
|
+
|
|
165
|
+
```ts
|
|
166
|
+
// URL
|
|
167
|
+
const session = await okra.sessions.create('https://example.com/report.pdf');
|
|
168
|
+
|
|
169
|
+
// Bytes
|
|
170
|
+
const session = await okra.sessions.create(pdfBytes, {
|
|
171
|
+
upload: { fileName: 'report.pdf' },
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
// Attach to existing document (no upload, no wait)
|
|
175
|
+
const session = okra.sessions.from('doc-441a8a0be0e94914b982');
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Deterministic URLs
|
|
179
|
+
|
|
180
|
+
Build page image and export URLs from a document ID — no API call needed:
|
|
181
|
+
|
|
182
|
+
```ts
|
|
183
|
+
import { doc } from 'okrapdf/doc';
|
|
184
|
+
|
|
185
|
+
const d = doc('doc-441a8a0be0e94914b982');
|
|
186
|
+
|
|
187
|
+
d.pages(1).image(); // .../pg_1.png
|
|
188
|
+
d.pages(1).image({ w: 200, h: 300 }); // .../w_200,h_300/pg_1.png
|
|
189
|
+
d.export('markdown'); // .../export.md
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## Collections — Fan-Out Query to CSV
|
|
193
|
+
|
|
194
|
+
Ask the same question across every document in a collection. Each doc answers independently in parallel, results stream back as NDJSON.
|
|
195
|
+
|
|
196
|
+
```ts
|
|
197
|
+
import { OkraClient } from 'okrapdf';
|
|
198
|
+
import { z } from 'zod';
|
|
199
|
+
import { writeFileSync } from 'fs';
|
|
200
|
+
|
|
201
|
+
const okra = new OkraClient({ apiKey: process.env.OKRA_API_KEY });
|
|
202
|
+
|
|
203
|
+
// Fan-out: ask every doc in the collection the same question
|
|
204
|
+
const stream = okra.collections.query(
|
|
205
|
+
'col-40da068481cf4f248853507cba6be611',
|
|
206
|
+
'Who are the top 3 people mentioned in this document?',
|
|
207
|
+
);
|
|
208
|
+
|
|
209
|
+
// Gather all results
|
|
210
|
+
const result = await stream.gather();
|
|
211
|
+
|
|
212
|
+
// Write CSV
|
|
213
|
+
const header = 'doc_id,doc_name,answer,cost_usd';
|
|
214
|
+
const rows = [...result.answers.values()].map(a =>
|
|
215
|
+
`"${a.docId}","${a.answer.slice(0, 200)}",${a.costUsd}`
|
|
216
|
+
);
|
|
217
|
+
writeFileSync('results.csv', [header, ...rows].join('\n'));
|
|
218
|
+
|
|
219
|
+
console.log(`${result.completed} docs, $${result.totalCostUsd.toFixed(4)} total`);
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Real output from a 10-K earnings collection:
|
|
223
|
+
|
|
224
|
+
```csv
|
|
225
|
+
doc_id,file_name,answer,cost_usd
|
|
226
|
+
"doc-9a3f21...","NVDA-10K-2025.pdf","Revenue: $130.5B, Net Income: $72.9B, YoY Growth: 114%",0.0048
|
|
227
|
+
"doc-b7e810...","AAPL-10K-2025.pdf","Revenue: $391.0B, Net Income: $101.2B, YoY Growth: 5%",0.0039
|
|
228
|
+
"doc-c4d562...","MSFT-10K-2025.pdf","Revenue: $254.2B, Net Income: $97.1B, YoY Growth: 16%",0.0051
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Works with structured output too — pass a Zod schema and each doc extracts typed data:
|
|
232
|
+
|
|
233
|
+
```ts
|
|
234
|
+
const FinancialReport = z.object({
|
|
235
|
+
company: z.string(),
|
|
236
|
+
revenue: z.number(),
|
|
237
|
+
netIncome: z.number(),
|
|
238
|
+
quarter: z.string(),
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
const stream = okra.collections.query(
|
|
242
|
+
'col-financials',
|
|
243
|
+
'Extract the financial summary',
|
|
244
|
+
{ schema: FinancialReport },
|
|
245
|
+
);
|
|
246
|
+
|
|
247
|
+
const result = await stream.gather();
|
|
248
|
+
for (const [docId, answer] of result.answers) {
|
|
249
|
+
console.log(answer.data); // { company: "NVIDIA", revenue: 35082, ... }
|
|
250
|
+
}
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
Or stream per-doc events in real time:
|
|
254
|
+
|
|
255
|
+
```ts
|
|
256
|
+
for await (const event of stream) {
|
|
257
|
+
if (event.type === 'result') {
|
|
258
|
+
console.log(`${event.doc_id}: ${event.answer.slice(0, 80)}...`);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Sub-path Exports
|
|
264
|
+
|
|
265
|
+
| Import | Use |
|
|
266
|
+
|--------|-----|
|
|
267
|
+
| `okrapdf` | `OkraClient`, types, errors |
|
|
268
|
+
| `okrapdf/doc` | `doc()` URL builder |
|
|
269
|
+
| `okrapdf/browser` | Browser-safe client (no Node.js deps) |
|
|
270
|
+
| `okrapdf/worker` | Cloudflare Worker adapter |
|
|
271
|
+
| `okrapdf/react` | React hooks (`useSession`, `usePages`) |
|
|
272
|
+
|
|
273
|
+
## CLI
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
npx okrapdf upload ./invoice.pdf
|
|
277
|
+
npx okrapdf pages doc-abc123
|
|
278
|
+
npx okrapdf chat doc-abc123 "What is the total?"
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## License
|
|
282
|
+
|
|
283
|
+
MIT
|
package/dist/browser.d.ts
CHANGED
|
@@ -52,6 +52,18 @@ function inferBlobName(input, fallback) {
|
|
|
52
52
|
}
|
|
53
53
|
return fallback;
|
|
54
54
|
}
|
|
55
|
+
function toHeaderSafeFileName(fileName) {
|
|
56
|
+
const leaf = fileName.split(/[\\/]/).pop() || fileName;
|
|
57
|
+
const cleaned = leaf.replace(/[\r\n]/g, " ").trim();
|
|
58
|
+
if (/^[\x20-\x7E]+$/.test(cleaned)) {
|
|
59
|
+
return cleaned;
|
|
60
|
+
}
|
|
61
|
+
const extMatch = cleaned.match(/(\.[A-Za-z0-9]{1,10})$/);
|
|
62
|
+
const extension = extMatch?.[1].toLowerCase() ?? "";
|
|
63
|
+
const base = extension ? cleaned.slice(0, -extension.length) : cleaned;
|
|
64
|
+
const asciiBase = base.normalize("NFKD").replace(/[^\x20-\x7E]/g, "").replace(/[^A-Za-z0-9._ -]/g, "_").replace(/\s+/g, "_").replace(/_+/g, "_").replace(/^[._-]+|[._-]+$/g, "");
|
|
65
|
+
return `${asciiBase || "document"}${extension || ".pdf"}`;
|
|
66
|
+
}
|
|
55
67
|
async function readLocalFileFromNode(inputPath) {
|
|
56
68
|
try {
|
|
57
69
|
const [fsModule, pathModule] = await Promise.all([
|
|
@@ -64,9 +76,35 @@ async function readLocalFileFromNode(inputPath) {
|
|
|
64
76
|
fileName: pathModule.basename(inputPath)
|
|
65
77
|
};
|
|
66
78
|
} catch (error) {
|
|
79
|
+
const code = typeof error === "object" && error && "code" in error ? String(error.code) : void 0;
|
|
80
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
81
|
+
if (code === "ENOENT") {
|
|
82
|
+
throw new OkraRuntimeError(
|
|
83
|
+
"INVALID_REQUEST",
|
|
84
|
+
`Local file not found: ${inputPath}`,
|
|
85
|
+
400,
|
|
86
|
+
error
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
if (code === "EACCES" || code === "EPERM") {
|
|
90
|
+
throw new OkraRuntimeError(
|
|
91
|
+
"INVALID_REQUEST",
|
|
92
|
+
`Cannot read local file (${code}): ${inputPath}`,
|
|
93
|
+
400,
|
|
94
|
+
error
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
if (code === "ERR_MODULE_NOT_FOUND" || code === "ERR_UNKNOWN_BUILTIN_MODULE") {
|
|
98
|
+
throw new OkraRuntimeError(
|
|
99
|
+
"INVALID_REQUEST",
|
|
100
|
+
"Local file path uploads are only supported in Node.js. In browser runtimes, pass File/Blob, ArrayBuffer, Uint8Array, or URL.",
|
|
101
|
+
400,
|
|
102
|
+
error
|
|
103
|
+
);
|
|
104
|
+
}
|
|
67
105
|
throw new OkraRuntimeError(
|
|
68
106
|
"INVALID_REQUEST",
|
|
69
|
-
|
|
107
|
+
`Failed to read local file "${inputPath}": ${msg}`,
|
|
70
108
|
400,
|
|
71
109
|
error
|
|
72
110
|
);
|
|
@@ -187,7 +225,7 @@ var OkraClient = class {
|
|
|
187
225
|
}
|
|
188
226
|
if (typeof globalThis !== "undefined" && "window" in globalThis && this.apiKey && !this.apiKey.startsWith("okra_pk_")) {
|
|
189
227
|
console.warn(
|
|
190
|
-
"[OkraPDF] Secret API key detected in browser. Use a publishable key (okra_pk_...) for client-side usage. See https://docs.okrapdf.
|
|
228
|
+
"[OkraPDF] Secret API key detected in browser. Use a publishable key (okra_pk_...) for client-side usage. See https://docs.okrapdf.com/api-keys#publishable-keys"
|
|
191
229
|
);
|
|
192
230
|
}
|
|
193
231
|
this.sessions = {
|
|
@@ -223,7 +261,9 @@ var OkraClient = class {
|
|
|
223
261
|
};
|
|
224
262
|
this.collections = {
|
|
225
263
|
list: (signal) => this.collectionList(signal),
|
|
226
|
-
get: (collectionId, signal) => this.collectionGet(collectionId, signal)
|
|
264
|
+
get: (collectionId, signal) => this.collectionGet(collectionId, signal),
|
|
265
|
+
query: (collectionId, prompt, options2) => this.collectionQuery(collectionId, prompt, options2),
|
|
266
|
+
exportMarkdown: (collectionId, options2) => this.collectionExportMarkdown(collectionId, options2)
|
|
227
267
|
};
|
|
228
268
|
}
|
|
229
269
|
// ─── Collections ────────────────────────────────────────────────────────
|
|
@@ -240,6 +280,235 @@ var OkraClient = class {
|
|
|
240
280
|
{ method: "GET", signal }
|
|
241
281
|
);
|
|
242
282
|
}
|
|
283
|
+
collectionQuery(collectionId, prompt, options) {
|
|
284
|
+
const ac = new AbortController();
|
|
285
|
+
if (options?.signal) {
|
|
286
|
+
options.signal.addEventListener("abort", () => ac.abort(), { once: true });
|
|
287
|
+
}
|
|
288
|
+
const body = { prompt, stream: true };
|
|
289
|
+
if (options?.schema) {
|
|
290
|
+
const normalized = normalizeSchema(options.schema);
|
|
291
|
+
body.response_format = {
|
|
292
|
+
type: "json_schema",
|
|
293
|
+
json_schema: { name: "result", schema: normalized.jsonSchema }
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
if (options?.docIds) {
|
|
297
|
+
body.doc_ids = options.docIds;
|
|
298
|
+
}
|
|
299
|
+
const responsePromise = this.rawRequest(
|
|
300
|
+
`/v1/collections/${encodeURIComponent(collectionId)}/query`,
|
|
301
|
+
{
|
|
302
|
+
method: "POST",
|
|
303
|
+
headers: { "Content-Type": "application/json" },
|
|
304
|
+
body: JSON.stringify(body),
|
|
305
|
+
signal: ac.signal
|
|
306
|
+
}
|
|
307
|
+
);
|
|
308
|
+
async function* iterateNdjson() {
|
|
309
|
+
const response = await responsePromise;
|
|
310
|
+
if (!response.ok) {
|
|
311
|
+
const text = await response.text();
|
|
312
|
+
throw new OkraRuntimeError("HTTP_ERROR", `Collection query failed: ${text}`, response.status);
|
|
313
|
+
}
|
|
314
|
+
if (!response.body) {
|
|
315
|
+
throw new OkraRuntimeError("INVALID_RESPONSE", "No response body for collection query", 500);
|
|
316
|
+
}
|
|
317
|
+
const reader = response.body.getReader();
|
|
318
|
+
const decoder = new TextDecoder();
|
|
319
|
+
let buffer = "";
|
|
320
|
+
try {
|
|
321
|
+
while (true) {
|
|
322
|
+
const { done, value } = await reader.read();
|
|
323
|
+
if (done) break;
|
|
324
|
+
buffer += decoder.decode(value, { stream: true });
|
|
325
|
+
const lines = buffer.split("\n");
|
|
326
|
+
buffer = lines.pop() || "";
|
|
327
|
+
for (const line of lines) {
|
|
328
|
+
const trimmed = line.trim();
|
|
329
|
+
if (!trimmed) continue;
|
|
330
|
+
try {
|
|
331
|
+
yield JSON.parse(trimmed);
|
|
332
|
+
} catch {
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
const remaining = buffer.trim();
|
|
337
|
+
if (remaining) {
|
|
338
|
+
try {
|
|
339
|
+
yield JSON.parse(remaining);
|
|
340
|
+
} catch {
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
} finally {
|
|
344
|
+
reader.releaseLock();
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
let iteratorInstance = null;
|
|
348
|
+
function getIterator() {
|
|
349
|
+
if (!iteratorInstance) iteratorInstance = iterateNdjson();
|
|
350
|
+
return iteratorInstance;
|
|
351
|
+
}
|
|
352
|
+
const stream = {
|
|
353
|
+
[Symbol.asyncIterator]() {
|
|
354
|
+
return getIterator();
|
|
355
|
+
},
|
|
356
|
+
async gather() {
|
|
357
|
+
const startTime = Date.now();
|
|
358
|
+
const answers = /* @__PURE__ */ new Map();
|
|
359
|
+
let queryId = "";
|
|
360
|
+
let queryPrompt = prompt;
|
|
361
|
+
let totalCostUsd = 0;
|
|
362
|
+
let completed = 0;
|
|
363
|
+
let failed = 0;
|
|
364
|
+
for await (const event of getIterator()) {
|
|
365
|
+
if (event.type === "start") {
|
|
366
|
+
queryId = event.query_id;
|
|
367
|
+
queryPrompt = event.prompt;
|
|
368
|
+
} else if (event.type === "result") {
|
|
369
|
+
answers.set(event.doc_id, {
|
|
370
|
+
docId: event.doc_id,
|
|
371
|
+
status: event.status,
|
|
372
|
+
answer: event.answer,
|
|
373
|
+
data: event.data,
|
|
374
|
+
costUsd: event.usage?.cost_usd ?? 0,
|
|
375
|
+
durationMs: event.duration_ms,
|
|
376
|
+
error: event.error
|
|
377
|
+
});
|
|
378
|
+
} else if (event.type === "done") {
|
|
379
|
+
totalCostUsd = event.total_cost_usd;
|
|
380
|
+
completed = event.completed;
|
|
381
|
+
failed = event.failed;
|
|
382
|
+
} else if (event.type === "error") {
|
|
383
|
+
throw new OkraRuntimeError("HTTP_ERROR", event.error, 500);
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
return {
|
|
387
|
+
queryId,
|
|
388
|
+
prompt: queryPrompt,
|
|
389
|
+
answers,
|
|
390
|
+
totalCostUsd,
|
|
391
|
+
durationMs: Date.now() - startTime,
|
|
392
|
+
completed,
|
|
393
|
+
failed
|
|
394
|
+
};
|
|
395
|
+
},
|
|
396
|
+
abort() {
|
|
397
|
+
ac.abort();
|
|
398
|
+
},
|
|
399
|
+
toReadableStream() {
|
|
400
|
+
const encoder = new TextEncoder();
|
|
401
|
+
const iter = getIterator();
|
|
402
|
+
return new ReadableStream({
|
|
403
|
+
async pull(controller) {
|
|
404
|
+
const { done, value } = await iter.next();
|
|
405
|
+
if (done) {
|
|
406
|
+
controller.close();
|
|
407
|
+
} else {
|
|
408
|
+
controller.enqueue(encoder.encode(JSON.stringify(value) + "\n"));
|
|
409
|
+
}
|
|
410
|
+
},
|
|
411
|
+
cancel() {
|
|
412
|
+
ac.abort();
|
|
413
|
+
}
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
};
|
|
417
|
+
return stream;
|
|
418
|
+
}
|
|
419
|
+
// ─── Collection Export (markdown from R2) ────────────────────────────────
|
|
420
|
+
collectionExportPath(collectionId, format) {
|
|
421
|
+
return `/v1/collections/${encodeURIComponent(collectionId)}/export?format=${encodeURIComponent(format)}`;
|
|
422
|
+
}
|
|
423
|
+
async collectionExportMarkdown(collectionId, options) {
|
|
424
|
+
let format = "markdown";
|
|
425
|
+
let signal;
|
|
426
|
+
if (options && typeof options === "object" && "aborted" in options) {
|
|
427
|
+
signal = options;
|
|
428
|
+
} else if (options) {
|
|
429
|
+
format = options.format ?? "markdown";
|
|
430
|
+
signal = options.signal;
|
|
431
|
+
}
|
|
432
|
+
const response = await this.rawRequest(
|
|
433
|
+
this.collectionExportPath(collectionId, format),
|
|
434
|
+
{ method: "GET", signal }
|
|
435
|
+
);
|
|
436
|
+
if (!response.ok) {
|
|
437
|
+
const text = await response.text();
|
|
438
|
+
throw new OkraRuntimeError("HTTP_ERROR", `Collection export failed: ${text}`, response.status);
|
|
439
|
+
}
|
|
440
|
+
if (format === "zip") {
|
|
441
|
+
const body = await response.arrayBuffer();
|
|
442
|
+
return new Uint8Array(body);
|
|
443
|
+
}
|
|
444
|
+
if (!response.body) {
|
|
445
|
+
throw new OkraRuntimeError("INVALID_RESPONSE", "No response body for collection export", 500);
|
|
446
|
+
}
|
|
447
|
+
const reader = response.body.getReader();
|
|
448
|
+
const decoder = new TextDecoder();
|
|
449
|
+
let buffer = "";
|
|
450
|
+
const documents = [];
|
|
451
|
+
let totalPages = 0;
|
|
452
|
+
let collectionName = "";
|
|
453
|
+
try {
|
|
454
|
+
while (true) {
|
|
455
|
+
const { done, value } = await reader.read();
|
|
456
|
+
if (done) break;
|
|
457
|
+
buffer += decoder.decode(value, { stream: true });
|
|
458
|
+
const lines = buffer.split("\n");
|
|
459
|
+
buffer = lines.pop() || "";
|
|
460
|
+
for (const line of lines) {
|
|
461
|
+
const trimmed = line.trim();
|
|
462
|
+
if (!trimmed) continue;
|
|
463
|
+
try {
|
|
464
|
+
const event = JSON.parse(trimmed);
|
|
465
|
+
if (event.type === "result") {
|
|
466
|
+
documents.push({
|
|
467
|
+
docId: event.doc_id,
|
|
468
|
+
fileName: event.file_name,
|
|
469
|
+
pageCount: event.page_count,
|
|
470
|
+
pages: event.pages
|
|
471
|
+
});
|
|
472
|
+
totalPages += event.page_count;
|
|
473
|
+
}
|
|
474
|
+
} catch {
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
const remaining = buffer.trim();
|
|
479
|
+
if (remaining) {
|
|
480
|
+
try {
|
|
481
|
+
const event = JSON.parse(remaining);
|
|
482
|
+
if (event.type === "result") {
|
|
483
|
+
documents.push({
|
|
484
|
+
docId: event.doc_id,
|
|
485
|
+
fileName: event.file_name,
|
|
486
|
+
pageCount: event.page_count,
|
|
487
|
+
pages: event.pages
|
|
488
|
+
});
|
|
489
|
+
totalPages += event.page_count;
|
|
490
|
+
}
|
|
491
|
+
} catch {
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
} finally {
|
|
495
|
+
reader.releaseLock();
|
|
496
|
+
}
|
|
497
|
+
try {
|
|
498
|
+
const col = await this.collectionGet(collectionId);
|
|
499
|
+
collectionName = col.name;
|
|
500
|
+
} catch {
|
|
501
|
+
collectionName = collectionId;
|
|
502
|
+
}
|
|
503
|
+
return {
|
|
504
|
+
collectionId,
|
|
505
|
+
collectionName,
|
|
506
|
+
documents,
|
|
507
|
+
totalDocuments: documents.length,
|
|
508
|
+
totalPages,
|
|
509
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
510
|
+
};
|
|
511
|
+
}
|
|
243
512
|
// ─── Upload ──────────────────────────────────────────────────────────────
|
|
244
513
|
async upload(input, options = {}) {
|
|
245
514
|
const documentId = options.documentId || makeDocId();
|
|
@@ -278,7 +547,7 @@ var OkraClient = class {
|
|
|
278
547
|
}
|
|
279
548
|
const headers = {
|
|
280
549
|
"Content-Type": "application/pdf",
|
|
281
|
-
"X-File-Name": fileName
|
|
550
|
+
"X-File-Name": toHeaderSafeFileName(fileName)
|
|
282
551
|
};
|
|
283
552
|
if (options.capabilities) {
|
|
284
553
|
headers["X-Capabilities"] = JSON.stringify(options.capabilities);
|
|
@@ -299,6 +568,66 @@ var OkraClient = class {
|
|
|
299
568
|
});
|
|
300
569
|
return this.sessions.from(documentId);
|
|
301
570
|
}
|
|
571
|
+
// ─── Workflow Config / Reparse ───────────────────────────────────────────
|
|
572
|
+
async updateConfig(documentId, update, signal) {
|
|
573
|
+
if (!documentId.trim()) {
|
|
574
|
+
throw new OkraRuntimeError("INVALID_REQUEST", "updateConfig requires a non-empty documentId", 400);
|
|
575
|
+
}
|
|
576
|
+
return this.requestJson(
|
|
577
|
+
`/v1/documents/${encodeURIComponent(documentId)}/config`,
|
|
578
|
+
{
|
|
579
|
+
method: "PUT",
|
|
580
|
+
headers: { "Content-Type": "application/json" },
|
|
581
|
+
body: JSON.stringify(update),
|
|
582
|
+
signal
|
|
583
|
+
}
|
|
584
|
+
);
|
|
585
|
+
}
|
|
586
|
+
async reparse(documentId, options = {}) {
|
|
587
|
+
if (!documentId.trim()) {
|
|
588
|
+
throw new OkraRuntimeError("INVALID_REQUEST", "reparse requires a non-empty documentId", 400);
|
|
589
|
+
}
|
|
590
|
+
const strategy = options.strategy && options.strategy !== "auto" ? `?strategy=${encodeURIComponent(options.strategy)}` : "";
|
|
591
|
+
return this.requestJson(
|
|
592
|
+
`/v1/documents/${encodeURIComponent(documentId)}/reparse${strategy}`,
|
|
593
|
+
{ method: "POST", signal: options.signal }
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
async applyWorkflow(documentId, options) {
|
|
597
|
+
if (!options?.capabilities) {
|
|
598
|
+
throw new OkraRuntimeError("INVALID_REQUEST", "applyWorkflow requires capabilities", 400);
|
|
599
|
+
}
|
|
600
|
+
const config = await this.updateConfig(
|
|
601
|
+
documentId,
|
|
602
|
+
{ capabilities: options.capabilities },
|
|
603
|
+
options.signal
|
|
604
|
+
);
|
|
605
|
+
if (options.reparse === false) {
|
|
606
|
+
return { config };
|
|
607
|
+
}
|
|
608
|
+
const reparse = await this.reparse(documentId, {
|
|
609
|
+
strategy: options.strategy,
|
|
610
|
+
signal: options.signal
|
|
611
|
+
});
|
|
612
|
+
return { config, reparse };
|
|
613
|
+
}
|
|
614
|
+
async getKeyWorkflow(signal) {
|
|
615
|
+
return this.requestJson(
|
|
616
|
+
"/v1/key-workflow",
|
|
617
|
+
{ method: "GET", signal }
|
|
618
|
+
);
|
|
619
|
+
}
|
|
620
|
+
async setKeyWorkflow(defaultCapabilities, signal) {
|
|
621
|
+
return this.requestJson(
|
|
622
|
+
"/v1/key-workflow",
|
|
623
|
+
{
|
|
624
|
+
method: "PUT",
|
|
625
|
+
headers: { "Content-Type": "application/json" },
|
|
626
|
+
body: JSON.stringify({ default_capabilities: defaultCapabilities }),
|
|
627
|
+
signal
|
|
628
|
+
}
|
|
629
|
+
);
|
|
630
|
+
}
|
|
302
631
|
// ─── Status / Wait ───────────────────────────────────────────────────────
|
|
303
632
|
async status(documentId, signal) {
|
|
304
633
|
return this.requestJson(
|
|
@@ -613,4 +942,4 @@ var OkraClient = class {
|
|
|
613
942
|
export {
|
|
614
943
|
OkraClient
|
|
615
944
|
};
|
|
616
|
-
//# sourceMappingURL=chunk-
|
|
945
|
+
//# sourceMappingURL=chunk-2HJPTW6S.js.map
|