okrapdf 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,626 @@
1
+ // src/errors.ts
2
+ var OkraRuntimeError = class extends Error {
3
+ code;
4
+ status;
5
+ details;
6
+ constructor(code, message, status = 500, details) {
7
+ super(message);
8
+ this.name = "OkraRuntimeError";
9
+ this.code = code;
10
+ this.status = status;
11
+ this.details = details;
12
+ }
13
+ };
14
+ var StructuredOutputError = class extends OkraRuntimeError {
15
+ code;
16
+ constructor(code, message, status, details) {
17
+ super(code, message, status, details);
18
+ this.name = "StructuredOutputError";
19
+ this.code = code;
20
+ }
21
+ };
22
+
23
+ // src/client.ts
24
+ import { z } from "zod";
25
+ var DEFAULT_BASE_URL = "https://api.okrapdf.com";
26
+ var DEFAULT_WAIT_TIMEOUT_MS = 5 * 6e4;
27
+ var DEFAULT_WAIT_POLL_MS = 1500;
28
+ var COMPLETE_PHASES = /* @__PURE__ */ new Set(["complete", "awaiting_review"]);
29
+ var TERMINAL_ERROR_PHASES = /* @__PURE__ */ new Set(["error"]);
30
+ var STRUCTURED_CODES = /* @__PURE__ */ new Set([
31
+ "SCHEMA_VALIDATION_FAILED",
32
+ "EXTRACTION_FAILED",
33
+ "TIMEOUT",
34
+ "DOCUMENT_NOT_FOUND"
35
+ ]);
36
+ var NODE_FS_PROMISES_SPECIFIER = `node:${"fs/promises"}`;
37
+ var NODE_PATH_SPECIFIER = `node:${"path"}`;
38
+ function sleep(ms) {
39
+ return new Promise((resolve) => setTimeout(resolve, ms));
40
+ }
41
+ function isHttpUrl(value) {
42
+ return /^https?:\/\//i.test(value);
43
+ }
44
+ function isDocumentId(value) {
45
+ return /^(?:ocr|doc)-[A-Za-z0-9_-]+$/.test(value);
46
+ }
47
+ function normalizeBaseUrl(baseUrl) {
48
+ return baseUrl.replace(/\/+$/, "");
49
+ }
50
+ function makeDocId() {
51
+ if (typeof crypto !== "undefined" && typeof crypto.randomUUID === "function") {
52
+ return `doc-${crypto.randomUUID().replace(/-/g, "").slice(0, 20)}`;
53
+ }
54
+ const rand = Math.random().toString(36).slice(2, 22);
55
+ return `doc-${rand}`;
56
+ }
57
+ function toUint8Array(input) {
58
+ if (input instanceof Uint8Array) return input;
59
+ return new Uint8Array(input);
60
+ }
61
+ function isBlobLike(input) {
62
+ if (typeof Blob !== "undefined" && input instanceof Blob) return true;
63
+ return !!input && typeof input === "object" && typeof input.arrayBuffer === "function";
64
+ }
65
+ function inferBlobName(input, fallback) {
66
+ const named = input;
67
+ if (typeof named.name === "string" && named.name.trim() !== "") {
68
+ return named.name;
69
+ }
70
+ return fallback;
71
+ }
72
+ async function readLocalFileFromNode(inputPath) {
73
+ try {
74
+ const [fsModule, pathModule] = await Promise.all([
75
+ import(NODE_FS_PROMISES_SPECIFIER),
76
+ import(NODE_PATH_SPECIFIER)
77
+ ]);
78
+ const raw = await fsModule.readFile(inputPath);
79
+ return {
80
+ bytes: toUint8Array(raw),
81
+ fileName: pathModule.basename(inputPath)
82
+ };
83
+ } catch (error) {
84
+ throw new OkraRuntimeError(
85
+ "INVALID_REQUEST",
86
+ "Local file path uploads are only supported in Node.js. In browser runtimes, pass File/Blob, ArrayBuffer, Uint8Array, or URL.",
87
+ 400,
88
+ error
89
+ );
90
+ }
91
+ }
92
+ function normalizeSchema(schema) {
93
+ const maybeZod = schema;
94
+ const hasSafeParse = typeof maybeZod.safeParse === "function";
95
+ if (hasSafeParse) {
96
+ return {
97
+ jsonSchema: z.toJSONSchema(maybeZod, { target: "draft-2020-12" }),
98
+ parser: maybeZod
99
+ };
100
+ }
101
+ return { jsonSchema: schema };
102
+ }
103
+ function isStructuredCode(code) {
104
+ return !!code && STRUCTURED_CODES.has(code);
105
+ }
106
+ var OkraSessionHandle = class {
107
+ id;
108
+ modelEndpoint;
109
+ #model;
110
+ #client;
111
+ constructor(client, documentId, model) {
112
+ this.#client = client;
113
+ this.id = documentId;
114
+ this.modelEndpoint = client.modelEndpoint(documentId);
115
+ this.#model = model;
116
+ }
117
+ get model() {
118
+ return this.#model;
119
+ }
120
+ state() {
121
+ return {
122
+ id: this.id,
123
+ model: this.#model,
124
+ modelEndpoint: this.modelEndpoint
125
+ };
126
+ }
127
+ async setModel(model) {
128
+ const normalized = model.trim();
129
+ if (!normalized) {
130
+ throw new OkraRuntimeError("INVALID_REQUEST", "session.setModel requires a non-empty model", 400);
131
+ }
132
+ this.#model = normalized;
133
+ }
134
+ status(signal) {
135
+ return this.#client.status(this.id, signal);
136
+ }
137
+ wait(options) {
138
+ return this.#client.wait(this.id, options);
139
+ }
140
+ pages(options) {
141
+ return this.#client.pages(this.id, options);
142
+ }
143
+ page(pageNumber, signal) {
144
+ return this.#client.page(this.id, pageNumber, signal);
145
+ }
146
+ entities(options) {
147
+ return this.#client.entities(this.id, options);
148
+ }
149
+ downloadUrl() {
150
+ return this.#client.downloadUrl(this.id);
151
+ }
152
+ query(sql, signal) {
153
+ return this.#client.query(this.id, sql, signal);
154
+ }
155
+ publish(signal) {
156
+ return this.#client.publish(this.id, signal);
157
+ }
158
+ shareLink(options) {
159
+ return this.#client.shareLink(this.id, options);
160
+ }
161
+ prompt(query, options) {
162
+ const model = options?.model ?? this.#model;
163
+ const merged = model ? { ...options, model } : options;
164
+ if (merged?.schema !== void 0) {
165
+ return this.#client.generate(
166
+ this.id,
167
+ query,
168
+ merged
169
+ );
170
+ }
171
+ return this.#client.generate(
172
+ this.id,
173
+ query,
174
+ merged
175
+ );
176
+ }
177
+ stream(query, options) {
178
+ const model = options?.model ?? this.#model;
179
+ const merged = model ? { ...options, model } : options;
180
+ return this.#client.stream(this.id, query, merged);
181
+ }
182
+ };
183
+ var OkraClient = class {
184
+ baseUrl;
185
+ apiKey;
186
+ sharedSecret;
187
+ fetchImpl;
188
+ sessions;
189
+ deployments;
190
+ constructor(options) {
191
+ this.baseUrl = normalizeBaseUrl(options.baseUrl || DEFAULT_BASE_URL);
192
+ this.apiKey = options.apiKey;
193
+ this.sharedSecret = options.sharedSecret;
194
+ this.fetchImpl = options.fetch || globalThis.fetch.bind(globalThis);
195
+ if (!this.apiKey && !this.sharedSecret) {
196
+ throw new OkraRuntimeError(
197
+ "UNAUTHORIZED",
198
+ "OkraClient requires either apiKey or sharedSecret",
199
+ 401
200
+ );
201
+ }
202
+ if (typeof globalThis !== "undefined" && "window" in globalThis && this.apiKey && !this.apiKey.startsWith("okra_pk_")) {
203
+ console.warn(
204
+ "[OkraPDF] Secret API key detected in browser. Use a publishable key (okra_pk_...) for client-side usage. See https://docs.okrapdf.dev/api-keys#publishable-keys"
205
+ );
206
+ }
207
+ this.sessions = {
208
+ create: async (sourceOrDocId, sessionOptions = {}) => {
209
+ let documentId;
210
+ if (typeof sourceOrDocId === "string" && isDocumentId(sourceOrDocId.trim())) {
211
+ documentId = sourceOrDocId.trim();
212
+ } else {
213
+ const session2 = await this.upload(sourceOrDocId, sessionOptions.upload);
214
+ documentId = session2.id;
215
+ }
216
+ const session = this.sessions.from(documentId, { model: sessionOptions.model });
217
+ if (sessionOptions.wait ?? true) {
218
+ await session.wait(sessionOptions.waitOptions);
219
+ }
220
+ return session;
221
+ },
222
+ from: (documentId, sessionOptions = {}) => {
223
+ const normalized = documentId.trim();
224
+ if (!normalized) {
225
+ throw new OkraRuntimeError(
226
+ "INVALID_REQUEST",
227
+ "sessions.from requires a non-empty documentId",
228
+ 400
229
+ );
230
+ }
231
+ return new OkraSessionHandle(
232
+ this,
233
+ normalized,
234
+ sessionOptions.model?.trim() || void 0
235
+ );
236
+ }
237
+ };
238
+ this.deployments = {
239
+ create: async (opts) => {
240
+ return this.requestJson("/v1/deployments", {
241
+ method: "POST",
242
+ headers: { "Content-Type": "application/json" },
243
+ body: JSON.stringify(opts)
244
+ });
245
+ },
246
+ status: async (deploymentId) => {
247
+ return this.requestJson(`/d/${deploymentId}/status`, {
248
+ method: "GET"
249
+ });
250
+ },
251
+ completion: async (deploymentId, opts) => {
252
+ return this.requestJson(`/d/${deploymentId}/completion`, {
253
+ method: "POST",
254
+ headers: { "Content-Type": "application/json" },
255
+ body: JSON.stringify(opts)
256
+ });
257
+ },
258
+ tokens: {
259
+ create: async (deploymentId, opts) => {
260
+ return this.requestJson(`/d/${deploymentId}/tokens`, {
261
+ method: "POST",
262
+ headers: { "Content-Type": "application/json" },
263
+ body: JSON.stringify(opts)
264
+ });
265
+ },
266
+ list: async (deploymentId) => {
267
+ return this.requestJson(`/d/${deploymentId}/tokens`, {
268
+ method: "GET"
269
+ });
270
+ },
271
+ revoke: async (deploymentId, tokenHint) => {
272
+ return this.requestJson(`/d/${deploymentId}/tokens/${tokenHint}`, {
273
+ method: "DELETE"
274
+ });
275
+ }
276
+ }
277
+ };
278
+ }
279
+ // ─── Upload ──────────────────────────────────────────────────────────────
280
+ async upload(input, options = {}) {
281
+ const documentId = options.documentId || makeDocId();
282
+ const path = `/document/${encodeURIComponent(documentId)}`;
283
+ const visibility = options.visibility || "private";
284
+ if (typeof input === "string" && isHttpUrl(input)) {
285
+ const urlHeaders = { "Content-Type": "application/json" };
286
+ if (options.vendorKeys) {
287
+ urlHeaders["X-Vendor-Keys"] = JSON.stringify(options.vendorKeys);
288
+ }
289
+ await this.requestJson(`${path}/upload-url`, {
290
+ method: "POST",
291
+ headers: urlHeaders,
292
+ body: JSON.stringify({
293
+ url: input,
294
+ capabilities: options.capabilities,
295
+ visibility,
296
+ redact: options.redact
297
+ })
298
+ });
299
+ return this.sessions.from(documentId);
300
+ }
301
+ let bytes;
302
+ let fileName = options.fileName || "document.pdf";
303
+ if (typeof input === "string") {
304
+ const local = await readLocalFileFromNode(input);
305
+ bytes = local.bytes;
306
+ if (!options.fileName) fileName = local.fileName;
307
+ } else if (isBlobLike(input)) {
308
+ bytes = toUint8Array(await input.arrayBuffer());
309
+ if (!options.fileName) {
310
+ fileName = inferBlobName(input, fileName);
311
+ }
312
+ } else {
313
+ bytes = toUint8Array(input);
314
+ }
315
+ const headers = {
316
+ "Content-Type": "application/pdf",
317
+ "X-File-Name": fileName
318
+ };
319
+ if (options.capabilities) {
320
+ headers["X-Capabilities"] = JSON.stringify(options.capabilities);
321
+ }
322
+ if (options.vendorKeys) {
323
+ headers["X-Vendor-Keys"] = JSON.stringify(options.vendorKeys);
324
+ }
325
+ if (options.redact) {
326
+ headers["X-Redact"] = JSON.stringify(options.redact);
327
+ }
328
+ if (visibility === "public") {
329
+ headers["X-Visibility"] = "public";
330
+ }
331
+ await this.requestJson(`${path}/upload`, {
332
+ method: "POST",
333
+ headers,
334
+ body: bytes
335
+ });
336
+ return this.sessions.from(documentId);
337
+ }
338
+ // ─── Status / Wait ───────────────────────────────────────────────────────
339
+ async status(documentId, signal) {
340
+ return this.requestJson(
341
+ `/document/${encodeURIComponent(documentId)}/status`,
342
+ { method: "GET", signal }
343
+ );
344
+ }
345
+ async wait(documentId, options = {}) {
346
+ const startedAt = Date.now();
347
+ const timeoutMs = options.timeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS;
348
+ const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_WAIT_POLL_MS;
349
+ while (true) {
350
+ if (options.signal?.aborted) {
351
+ throw new OkraRuntimeError("TIMEOUT", "Wait aborted", 499);
352
+ }
353
+ const current = await this.status(documentId, options.signal);
354
+ if (COMPLETE_PHASES.has(current.phase)) {
355
+ return current;
356
+ }
357
+ if (TERMINAL_ERROR_PHASES.has(current.phase)) {
358
+ throw new OkraRuntimeError(
359
+ "EXTRACTION_FAILED",
360
+ `Document entered terminal error phase (${current.phase})`,
361
+ 500,
362
+ current
363
+ );
364
+ }
365
+ const elapsed = Date.now() - startedAt;
366
+ if (elapsed >= timeoutMs) {
367
+ throw new OkraRuntimeError(
368
+ "TIMEOUT",
369
+ `Timed out waiting for document ${documentId} after ${timeoutMs}ms`,
370
+ 504,
371
+ current
372
+ );
373
+ }
374
+ await sleep(pollIntervalMs);
375
+ }
376
+ }
377
+ // ─── Pages ───────────────────────────────────────────────────────────────
378
+ async pages(documentId, options) {
379
+ const params = options?.range ? `?range=${encodeURIComponent(options.range)}` : "";
380
+ return this.requestJson(
381
+ `/document/${encodeURIComponent(documentId)}/pages${params}`,
382
+ { method: "GET", signal: options?.signal }
383
+ );
384
+ }
385
+ async page(documentId, pageNumber, signal) {
386
+ return this.requestJson(
387
+ `/document/${encodeURIComponent(documentId)}/page/${pageNumber}`,
388
+ { method: "GET", signal }
389
+ );
390
+ }
391
+ // ─── Download ──────────────────────────────────────────────────────────
392
+ downloadUrl(documentId) {
393
+ return `${this.baseUrl}/document/${encodeURIComponent(documentId)}/download`;
394
+ }
395
+ // ─── Entities ────────────────────────────────────────────────────────────
396
+ async entities(documentId, options) {
397
+ const params = new URLSearchParams();
398
+ if (options?.type) params.set("type", options.type);
399
+ if (options?.limit) params.set("limit", String(options.limit));
400
+ if (options?.offset) params.set("offset", String(options.offset));
401
+ const qs = params.toString();
402
+ return this.requestJson(
403
+ `/document/${encodeURIComponent(documentId)}/nodes${qs ? `?${qs}` : ""}`,
404
+ { method: "GET", signal: options?.signal }
405
+ );
406
+ }
407
+ // ─── Query (SQL) ─────────────────────────────────────────────────────────
408
+ async query(documentId, sql, signal) {
409
+ return this.requestJson(
410
+ `/document/${encodeURIComponent(documentId)}/query?select=${encodeURIComponent(sql)}`,
411
+ { method: "GET", signal }
412
+ );
413
+ }
414
+ // ─── Stream (streaming completion) ────────────────────────────────────────
415
+ async *stream(documentId, query, options) {
416
+ const response = await this.rawRequest(
417
+ `/document/${encodeURIComponent(documentId)}/completion`,
418
+ {
419
+ method: "POST",
420
+ headers: { "Content-Type": "application/json" },
421
+ body: JSON.stringify({
422
+ prompt: query,
423
+ stream: options?.stream !== false,
424
+ ...options?.model ? { model: options.model } : {}
425
+ }),
426
+ signal: options?.signal
427
+ }
428
+ );
429
+ if (!response.ok) {
430
+ const text = await response.text();
431
+ throw new OkraRuntimeError("HTTP_ERROR", `Completion failed: ${text}`, response.status);
432
+ }
433
+ if (!response.body) {
434
+ throw new OkraRuntimeError("INVALID_RESPONSE", "No response body for completion stream", 500);
435
+ }
436
+ const reader = response.body.getReader();
437
+ const decoder = new TextDecoder();
438
+ let buffer = "";
439
+ try {
440
+ while (true) {
441
+ const { done, value } = await reader.read();
442
+ if (done) break;
443
+ buffer += decoder.decode(value, { stream: true });
444
+ const lines = buffer.split("\n");
445
+ buffer = lines.pop() || "";
446
+ for (const line of lines) {
447
+ const trimmed = line.trim();
448
+ if (!trimmed) continue;
449
+ try {
450
+ yield JSON.parse(trimmed);
451
+ } catch {
452
+ }
453
+ }
454
+ }
455
+ if (buffer.trim()) {
456
+ try {
457
+ yield JSON.parse(buffer.trim());
458
+ } catch {
459
+ }
460
+ }
461
+ } finally {
462
+ reader.releaseLock();
463
+ }
464
+ }
465
+ async generate(documentId, query, options) {
466
+ if (options?.schema) {
467
+ return this.generateStructured(documentId, query, options);
468
+ }
469
+ const result = await this.requestJson(
470
+ `/document/${encodeURIComponent(documentId)}/completion`,
471
+ {
472
+ method: "POST",
473
+ headers: { "Content-Type": "application/json" },
474
+ body: JSON.stringify({
475
+ prompt: query,
476
+ ...options?.model ? { model: options.model } : {}
477
+ }),
478
+ signal: options?.signal
479
+ }
480
+ );
481
+ return {
482
+ answer: result.answer,
483
+ costUsd: result.usage?.costUsd
484
+ };
485
+ }
486
+ async generateStructured(documentId, query, options) {
487
+ if (!query || query.trim() === "") {
488
+ throw new OkraRuntimeError("INVALID_REQUEST", "generate with schema requires a non-empty query", 400);
489
+ }
490
+ const normalized = normalizeSchema(options.schema);
491
+ const body = await this.requestJson(
492
+ `/document/${encodeURIComponent(documentId)}/completion`,
493
+ {
494
+ method: "POST",
495
+ headers: { "Content-Type": "application/json" },
496
+ body: JSON.stringify({
497
+ prompt: query,
498
+ schema: normalized.jsonSchema,
499
+ ...options.model ? { model: options.model } : {},
500
+ ...options.timeoutMs ? { timeoutMs: options.timeoutMs } : {}
501
+ }),
502
+ signal: options.signal
503
+ }
504
+ );
505
+ let data;
506
+ if (normalized.parser) {
507
+ const parsed = normalized.parser.safeParse(body.data);
508
+ if (!parsed.success) {
509
+ throw new StructuredOutputError(
510
+ "SCHEMA_VALIDATION_FAILED",
511
+ "Client-side schema validation failed for structured output response",
512
+ 422,
513
+ parsed.error.issues
514
+ );
515
+ }
516
+ data = parsed.data;
517
+ } else {
518
+ data = body.data;
519
+ }
520
+ return {
521
+ answer: "",
522
+ data,
523
+ meta: body.meta
524
+ };
525
+ }
526
+ // ─── Model Endpoint ──────────────────────────────────────────────────────
527
+ modelEndpoint(documentId) {
528
+ return `${this.baseUrl}/v1/documents/${encodeURIComponent(documentId)}`;
529
+ }
530
+ // ─── Publish / Share ────────────────────────────────────────────────────
531
+ async publish(documentId, signal) {
532
+ const result = await this.requestJson(
533
+ `/document/${encodeURIComponent(documentId)}/publish`,
534
+ { method: "POST", signal }
535
+ );
536
+ return {
537
+ ...result,
538
+ url: `${this.baseUrl}/v1/documents/${encodeURIComponent(documentId)}`
539
+ };
540
+ }
541
+ async shareLink(documentId, options) {
542
+ return this.requestJson(
543
+ `/document/${encodeURIComponent(documentId)}/share-link`,
544
+ {
545
+ method: "POST",
546
+ headers: { "Content-Type": "application/json" },
547
+ body: JSON.stringify({
548
+ role: options?.role,
549
+ label: options?.label,
550
+ expiresInMs: options?.expiresInMs,
551
+ maxViews: options?.maxViews
552
+ }),
553
+ signal: options?.signal
554
+ }
555
+ );
556
+ }
557
+ // ─── Public HTTP ─────────────────────────────────────────────────────────
558
+ async request(path, init = {}) {
559
+ return this.requestJson(path, init);
560
+ }
561
+ get url() {
562
+ return this.baseUrl;
563
+ }
564
+ // ─── Internal HTTP ───────────────────────────────────────────────────────
565
+ authHeaders() {
566
+ if (this.apiKey) return { Authorization: `Bearer ${this.apiKey}` };
567
+ if (this.sharedSecret) return { "x-document-agent-secret": this.sharedSecret };
568
+ return {};
569
+ }
570
+ async rawRequest(path, init) {
571
+ const headers = new Headers(init.headers);
572
+ for (const [key, value] of Object.entries(this.authHeaders())) {
573
+ if (!headers.has(key)) headers.set(key, value);
574
+ }
575
+ try {
576
+ return await this.fetchImpl(`${this.baseUrl}${path}`, { ...init, headers });
577
+ } catch (err) {
578
+ throw new OkraRuntimeError(
579
+ "HTTP_ERROR",
580
+ err instanceof Error ? err.message : String(err),
581
+ 502
582
+ );
583
+ }
584
+ }
585
+ async requestJson(path, init) {
586
+ const response = await this.rawRequest(path, init);
587
+ const text = await response.text();
588
+ const parsed = this.parseBody(text);
589
+ if (!response.ok) {
590
+ const envelope = parsed;
591
+ const code = envelope?.code;
592
+ const message = envelope?.message || envelope?.error || `Request failed with status ${response.status}`;
593
+ const details = envelope?.details ?? parsed ?? text;
594
+ if (isStructuredCode(code)) {
595
+ throw new StructuredOutputError(code, message, response.status, details);
596
+ }
597
+ const runtimeCode = response.status === 401 ? "UNAUTHORIZED" : "HTTP_ERROR";
598
+ throw new OkraRuntimeError(runtimeCode, message, response.status, details);
599
+ }
600
+ if (parsed === null) {
601
+ throw new OkraRuntimeError(
602
+ "INVALID_RESPONSE",
603
+ `Expected JSON response for ${path}`,
604
+ response.status,
605
+ text
606
+ );
607
+ }
608
+ return parsed;
609
+ }
610
+ parseBody(text) {
611
+ const trimmed = text.trim();
612
+ if (!trimmed) return null;
613
+ try {
614
+ return JSON.parse(trimmed);
615
+ } catch {
616
+ return null;
617
+ }
618
+ }
619
+ };
620
+
621
+ export {
622
+ OkraRuntimeError,
623
+ StructuredOutputError,
624
+ OkraClient
625
+ };
626
+ //# sourceMappingURL=chunk-HITG34US.js.map