okrapdf 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.d.ts +9 -0
- package/dist/browser.js +16 -0
- package/dist/browser.js.map +1 -0
- package/dist/chunk-AG3A2T3B.js +84 -0
- package/dist/chunk-AG3A2T3B.js.map +1 -0
- package/dist/chunk-C6ZT7DKX.js +113 -0
- package/dist/chunk-C6ZT7DKX.js.map +1 -0
- package/dist/chunk-HITG34US.js +626 -0
- package/dist/chunk-HITG34US.js.map +1 -0
- package/dist/chunk-SBT5T6ZK.js +817 -0
- package/dist/chunk-SBT5T6ZK.js.map +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +212 -0
- package/dist/cli/bin.js.map +1 -0
- package/dist/cli/index.d.ts +536 -0
- package/dist/cli/index.js +73 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/client-aHzx0a5x.d.ts +58 -0
- package/dist/index.d.ts +57 -0
- package/dist/index.js +26 -0
- package/dist/index.js.map +1 -0
- package/dist/react/index.d.ts +197 -0
- package/dist/react/index.js +432 -0
- package/dist/react/index.js.map +1 -0
- package/dist/types-DEYgGUnH.d.ts +329 -0
- package/dist/url.d.ts +48 -0
- package/dist/url.js +7 -0
- package/dist/url.js.map +1 -0
- package/dist/worker.d.ts +44 -0
- package/dist/worker.js +82 -0
- package/dist/worker.js.map +1 -0
- package/package.json +82 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
import { ZodType } from 'zod';
|
|
2
|
+
|
|
3
|
+
type JsonSchema = Record<string, unknown>;
|
|
4
|
+
type StructuredOutputErrorCode = 'SCHEMA_VALIDATION_FAILED' | 'EXTRACTION_FAILED' | 'TIMEOUT' | 'DOCUMENT_NOT_FOUND';
|
|
5
|
+
type RuntimeErrorCode = StructuredOutputErrorCode | 'INVALID_REQUEST' | 'UNAUTHORIZED' | 'HTTP_ERROR' | 'INVALID_RESPONSE';
|
|
6
|
+
interface OkraClientOptions {
|
|
7
|
+
/** Hosted default points at api.okrapdf.com. */
|
|
8
|
+
baseUrl?: string;
|
|
9
|
+
/** Bearer API key (okra_...). */
|
|
10
|
+
apiKey?: string;
|
|
11
|
+
/** Alternative auth header for private deployments. */
|
|
12
|
+
sharedSecret?: string;
|
|
13
|
+
/** Inject custom fetch implementation for tests or runtime overrides. */
|
|
14
|
+
fetch?: typeof globalThis.fetch;
|
|
15
|
+
}
|
|
16
|
+
interface UploadRedactPiiOptions {
|
|
17
|
+
preset?: string;
|
|
18
|
+
patterns?: string[];
|
|
19
|
+
includeNames?: boolean;
|
|
20
|
+
includeAddresses?: boolean;
|
|
21
|
+
customPatterns?: Array<Record<string, unknown>>;
|
|
22
|
+
[key: string]: unknown;
|
|
23
|
+
}
|
|
24
|
+
interface UploadRedactOptions {
|
|
25
|
+
pii?: UploadRedactPiiOptions;
|
|
26
|
+
publicFieldAllowlist?: string[];
|
|
27
|
+
[key: string]: unknown;
|
|
28
|
+
}
|
|
29
|
+
interface UploadOptions {
|
|
30
|
+
/** Provide your own document ID. Default: auto-generated `doc-*`. */
|
|
31
|
+
documentId?: string;
|
|
32
|
+
/** Optional filename hint for binary uploads. */
|
|
33
|
+
fileName?: string;
|
|
34
|
+
/** Processing capability hints forwarded to the worker. */
|
|
35
|
+
capabilities?: Record<string, unknown>;
|
|
36
|
+
/** Document visibility. 'private' (default) requires auth; 'public' auto-publishes on completion. */
|
|
37
|
+
visibility?: 'public' | 'private';
|
|
38
|
+
/** BYOK vendor keys passed through to extraction (e.g. { llamaparse: 'llx-...' }). Stateless — never stored. */
|
|
39
|
+
vendorKeys?: Record<string, string>;
|
|
40
|
+
/** OpenRedact policy forwarded to upload and enforced at read/query/completion surfaces. */
|
|
41
|
+
redact?: UploadRedactOptions;
|
|
42
|
+
}
|
|
43
|
+
type UploadInput = string | ArrayBuffer | Uint8Array | Blob;
|
|
44
|
+
interface DocumentStatus {
|
|
45
|
+
phase: string;
|
|
46
|
+
pagesTotal?: number;
|
|
47
|
+
pagesCompleted?: number;
|
|
48
|
+
totalNodes?: number;
|
|
49
|
+
verifiedNodes?: number;
|
|
50
|
+
failedNodes?: number;
|
|
51
|
+
pendingNodes?: number;
|
|
52
|
+
[key: string]: unknown;
|
|
53
|
+
}
|
|
54
|
+
interface WaitOptions {
|
|
55
|
+
timeoutMs?: number;
|
|
56
|
+
pollIntervalMs?: number;
|
|
57
|
+
signal?: AbortSignal;
|
|
58
|
+
}
|
|
59
|
+
interface StructuredOutputMeta {
|
|
60
|
+
confidence: number;
|
|
61
|
+
model: string;
|
|
62
|
+
durationMs: number;
|
|
63
|
+
citations?: Array<{
|
|
64
|
+
page: number;
|
|
65
|
+
text: string;
|
|
66
|
+
}>;
|
|
67
|
+
}
|
|
68
|
+
type StructuredSchema<T> = JsonSchema | ZodType<T>;
|
|
69
|
+
interface PageBlock {
|
|
70
|
+
text: string;
|
|
71
|
+
bbox?: {
|
|
72
|
+
x: number;
|
|
73
|
+
y: number;
|
|
74
|
+
width: number;
|
|
75
|
+
height: number;
|
|
76
|
+
};
|
|
77
|
+
confidence?: number;
|
|
78
|
+
}
|
|
79
|
+
interface PageEntity {
|
|
80
|
+
id: string;
|
|
81
|
+
type: string;
|
|
82
|
+
label: string | null;
|
|
83
|
+
}
|
|
84
|
+
interface Page {
|
|
85
|
+
page: number;
|
|
86
|
+
content: string;
|
|
87
|
+
blocks: PageBlock[];
|
|
88
|
+
entities: PageEntity[];
|
|
89
|
+
}
|
|
90
|
+
interface Entity {
|
|
91
|
+
id: string;
|
|
92
|
+
type: string;
|
|
93
|
+
label: string | null;
|
|
94
|
+
value: string | null;
|
|
95
|
+
page_number: number | null;
|
|
96
|
+
status: string;
|
|
97
|
+
bbox_x?: number | null;
|
|
98
|
+
bbox_y?: number | null;
|
|
99
|
+
bbox_w?: number | null;
|
|
100
|
+
bbox_h?: number | null;
|
|
101
|
+
metadata?: string | null;
|
|
102
|
+
}
|
|
103
|
+
interface EntitiesResponse {
|
|
104
|
+
nodes: Entity[];
|
|
105
|
+
total?: number;
|
|
106
|
+
limit?: number;
|
|
107
|
+
offset?: number;
|
|
108
|
+
}
|
|
109
|
+
interface QueryResult {
|
|
110
|
+
rows: Record<string, unknown>[];
|
|
111
|
+
columns: string[];
|
|
112
|
+
}
|
|
113
|
+
type CompletionEvent = {
|
|
114
|
+
type: 'text_delta';
|
|
115
|
+
text: string;
|
|
116
|
+
} | {
|
|
117
|
+
type: 'tool_result';
|
|
118
|
+
name: string;
|
|
119
|
+
result: unknown;
|
|
120
|
+
} | {
|
|
121
|
+
type: 'done';
|
|
122
|
+
answer: string;
|
|
123
|
+
costUsd?: number;
|
|
124
|
+
sources?: Array<{
|
|
125
|
+
page: number;
|
|
126
|
+
snippet: string;
|
|
127
|
+
}>;
|
|
128
|
+
} | {
|
|
129
|
+
type: 'error';
|
|
130
|
+
message: string;
|
|
131
|
+
};
|
|
132
|
+
interface CompletionOptions {
|
|
133
|
+
stream?: boolean;
|
|
134
|
+
model?: string;
|
|
135
|
+
signal?: AbortSignal;
|
|
136
|
+
}
|
|
137
|
+
interface GenerateOptions {
|
|
138
|
+
schema?: StructuredSchema<unknown>;
|
|
139
|
+
model?: string;
|
|
140
|
+
timeoutMs?: number;
|
|
141
|
+
signal?: AbortSignal;
|
|
142
|
+
}
|
|
143
|
+
interface GenerateResult<T = undefined> {
|
|
144
|
+
answer: string;
|
|
145
|
+
sources?: Array<{
|
|
146
|
+
page: number;
|
|
147
|
+
snippet: string;
|
|
148
|
+
}>;
|
|
149
|
+
costUsd?: number;
|
|
150
|
+
/** Present when schema is provided. */
|
|
151
|
+
data?: T;
|
|
152
|
+
/** Present when schema is provided. */
|
|
153
|
+
meta?: StructuredOutputMeta;
|
|
154
|
+
}
|
|
155
|
+
interface SessionCreateOptions {
|
|
156
|
+
/** Wait for extraction to complete before returning the session handle. Default: true */
|
|
157
|
+
wait?: boolean;
|
|
158
|
+
/** Default model used by prompt()/stream() unless overridden per call. */
|
|
159
|
+
model?: string;
|
|
160
|
+
/** Upload options used when source is URL/path/file (not an existing doc ID). */
|
|
161
|
+
upload?: UploadOptions;
|
|
162
|
+
/** Wait options used when `wait` is enabled. */
|
|
163
|
+
waitOptions?: WaitOptions;
|
|
164
|
+
}
|
|
165
|
+
interface SessionAttachOptions {
|
|
166
|
+
/** Default model used by prompt()/stream() unless overridden per call. */
|
|
167
|
+
model?: string;
|
|
168
|
+
}
|
|
169
|
+
interface SessionState {
|
|
170
|
+
id: string;
|
|
171
|
+
model?: string;
|
|
172
|
+
modelEndpoint: string;
|
|
173
|
+
}
|
|
174
|
+
interface OkraSession {
|
|
175
|
+
readonly id: string;
|
|
176
|
+
readonly modelEndpoint: string;
|
|
177
|
+
readonly model?: string;
|
|
178
|
+
state(): SessionState;
|
|
179
|
+
setModel(model: string): Promise<void>;
|
|
180
|
+
status(signal?: AbortSignal): Promise<DocumentStatus>;
|
|
181
|
+
wait(options?: WaitOptions): Promise<DocumentStatus>;
|
|
182
|
+
pages(options?: {
|
|
183
|
+
range?: string;
|
|
184
|
+
signal?: AbortSignal;
|
|
185
|
+
}): Promise<Page[]>;
|
|
186
|
+
page(pageNumber: number, signal?: AbortSignal): Promise<Page>;
|
|
187
|
+
entities(options?: {
|
|
188
|
+
type?: string;
|
|
189
|
+
limit?: number;
|
|
190
|
+
offset?: number;
|
|
191
|
+
signal?: AbortSignal;
|
|
192
|
+
}): Promise<EntitiesResponse>;
|
|
193
|
+
downloadUrl(): string;
|
|
194
|
+
query(sql: string, signal?: AbortSignal): Promise<QueryResult>;
|
|
195
|
+
publish(signal?: AbortSignal): Promise<PublishResult>;
|
|
196
|
+
shareLink(options?: ShareLinkOptions): Promise<ShareLinkResult>;
|
|
197
|
+
prompt(query: string, options?: GenerateOptions & {
|
|
198
|
+
schema?: undefined;
|
|
199
|
+
}): Promise<GenerateResult>;
|
|
200
|
+
prompt<T>(query: string, options: GenerateOptions & {
|
|
201
|
+
schema: StructuredSchema<T>;
|
|
202
|
+
}): Promise<GenerateResult<T>>;
|
|
203
|
+
stream(query: string, options?: CompletionOptions): AsyncGenerator<CompletionEvent>;
|
|
204
|
+
}
|
|
205
|
+
interface PublishResult {
|
|
206
|
+
published: boolean;
|
|
207
|
+
documentId: string;
|
|
208
|
+
version: string;
|
|
209
|
+
publicUrl: string;
|
|
210
|
+
/** Immutable public URL: https://api.okrapdf.com/v1/documents/{id} */
|
|
211
|
+
url: string;
|
|
212
|
+
hash: string;
|
|
213
|
+
slug: string;
|
|
214
|
+
canonicalPath: string;
|
|
215
|
+
}
|
|
216
|
+
interface ShareLinkOptions {
|
|
217
|
+
/** Link role: 'viewer' (redacted/PDF access), 'admin' (full access), or 'ask' (public completion). */
|
|
218
|
+
role?: 'viewer' | 'ask' | 'admin';
|
|
219
|
+
label?: string;
|
|
220
|
+
expiresInMs?: number;
|
|
221
|
+
maxViews?: number;
|
|
222
|
+
signal?: AbortSignal;
|
|
223
|
+
}
|
|
224
|
+
interface ShareLinkLinks {
|
|
225
|
+
markdown: string | null;
|
|
226
|
+
pdf: string | null;
|
|
227
|
+
completion: string | null;
|
|
228
|
+
}
|
|
229
|
+
interface ShareLinkCapabilities {
|
|
230
|
+
canViewPdf: boolean;
|
|
231
|
+
}
|
|
232
|
+
interface ShareLinkResult {
|
|
233
|
+
documentId: string;
|
|
234
|
+
token: string;
|
|
235
|
+
tokenHint: string;
|
|
236
|
+
links: ShareLinkLinks;
|
|
237
|
+
capabilities: ShareLinkCapabilities;
|
|
238
|
+
role: string;
|
|
239
|
+
expiresAt: number;
|
|
240
|
+
maxViews: number | null;
|
|
241
|
+
}
|
|
242
|
+
interface DeploymentCreateOptions {
|
|
243
|
+
documentId?: string;
|
|
244
|
+
collectionId?: string;
|
|
245
|
+
guestAccess?: 'none' | 'read' | 'ask';
|
|
246
|
+
dataSource?: 'live' | 'snapshot';
|
|
247
|
+
chatPersistence?: 'persisted' | 'ephemeral';
|
|
248
|
+
}
|
|
249
|
+
interface DeploymentResult {
|
|
250
|
+
deploymentId: string;
|
|
251
|
+
completionUrl: string;
|
|
252
|
+
statusUrl: string;
|
|
253
|
+
}
|
|
254
|
+
interface DeploymentStatus {
|
|
255
|
+
deploymentId: string;
|
|
256
|
+
phase: string;
|
|
257
|
+
guestCount?: number;
|
|
258
|
+
eventCounter?: number;
|
|
259
|
+
[key: string]: unknown;
|
|
260
|
+
}
|
|
261
|
+
interface DeploymentCompletionOptions {
|
|
262
|
+
prompt: string;
|
|
263
|
+
guestId?: string;
|
|
264
|
+
model?: string;
|
|
265
|
+
}
|
|
266
|
+
interface DeploymentCompletionResult {
|
|
267
|
+
answer: string;
|
|
268
|
+
model?: string;
|
|
269
|
+
usage?: {
|
|
270
|
+
inputTokens?: number;
|
|
271
|
+
outputTokens?: number;
|
|
272
|
+
costUsd?: number;
|
|
273
|
+
};
|
|
274
|
+
toolCalls?: unknown[];
|
|
275
|
+
guestId?: string;
|
|
276
|
+
taskRunId?: string;
|
|
277
|
+
cached?: boolean;
|
|
278
|
+
}
|
|
279
|
+
interface DeploymentTokenCreateOptions {
|
|
280
|
+
role: 'ask' | 'viewer' | 'admin';
|
|
281
|
+
maxUses?: number;
|
|
282
|
+
expiresInMs?: number;
|
|
283
|
+
}
|
|
284
|
+
interface DeploymentToken {
|
|
285
|
+
token: string;
|
|
286
|
+
tokenHint: string;
|
|
287
|
+
role: string;
|
|
288
|
+
maxUses?: number;
|
|
289
|
+
expiresAt?: number | null;
|
|
290
|
+
}
|
|
291
|
+
interface DeploymentTokenListResult {
|
|
292
|
+
tokens: Array<{
|
|
293
|
+
token_hint: string;
|
|
294
|
+
role: string;
|
|
295
|
+
current_uses: number;
|
|
296
|
+
max_uses: number | null;
|
|
297
|
+
revoked_at: number | null;
|
|
298
|
+
[key: string]: unknown;
|
|
299
|
+
}>;
|
|
300
|
+
}
|
|
301
|
+
interface DeploymentTokenRevokeResult {
|
|
302
|
+
revoked: boolean;
|
|
303
|
+
tokenHint: string;
|
|
304
|
+
}
|
|
305
|
+
interface UrlBuilderOptions {
|
|
306
|
+
format?: 'json' | 'csv' | 'html' | 'markdown' | 'png';
|
|
307
|
+
include?: string[];
|
|
308
|
+
/** Provider transformation — changes extraction source, e.g. 'llamaparse', 'googleocr'. */
|
|
309
|
+
provider?: string;
|
|
310
|
+
}
|
|
311
|
+
interface DocUrlOptions {
|
|
312
|
+
/**
|
|
313
|
+
* Original source filename used to build friendly artifact URLs, e.g.
|
|
314
|
+
* /.../invoice.json
|
|
315
|
+
*/
|
|
316
|
+
fileName?: string;
|
|
317
|
+
/**
|
|
318
|
+
* Default provider transformation applied to all URLs from this builder.
|
|
319
|
+
* Cloudinary-style: `/t_llamaparse/pages/1.md` vs `/t_googleocr/pages/1.json`
|
|
320
|
+
*/
|
|
321
|
+
provider?: string;
|
|
322
|
+
/**
|
|
323
|
+
* Default image placeholder type when page image is not yet available.
|
|
324
|
+
* Inserts `/d_{type}/` segment. e.g. 'shimmer' → `/d_shimmer/pages/1/image.png`
|
|
325
|
+
*/
|
|
326
|
+
defaultImage?: string;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
export type { ShareLinkLinks as A, StructuredOutputMeta as B, CompletionOptions as C, DocUrlOptions as D, EntitiesResponse as E, UploadRedactOptions as F, GenerateOptions as G, UploadRedactPiiOptions as H, JsonSchema as J, OkraSession as O, Page as P, QueryResult as Q, RuntimeErrorCode as R, SessionCreateOptions as S, UrlBuilderOptions as U, WaitOptions as W, UploadInput as a, SessionAttachOptions as b, DeploymentCreateOptions as c, DeploymentResult as d, DeploymentStatus as e, DeploymentCompletionOptions as f, DeploymentCompletionResult as g, DeploymentTokenCreateOptions as h, DeploymentToken as i, DeploymentTokenListResult as j, DeploymentTokenRevokeResult as k, OkraClientOptions as l, UploadOptions as m, DocumentStatus as n, CompletionEvent as o, GenerateResult as p, StructuredSchema as q, PublishResult as r, ShareLinkOptions as s, ShareLinkResult as t, StructuredOutputErrorCode as u, Entity as v, PageBlock as w, PageEntity as x, SessionState as y, ShareLinkCapabilities as z };
|
package/dist/url.d.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { D as DocUrlOptions, U as UrlBuilderOptions } from './types-DEYgGUnH.js';
|
|
2
|
+
import 'zod';
|
|
3
|
+
|
|
4
|
+
interface PgPage {
|
|
5
|
+
png: () => string;
|
|
6
|
+
md: () => string;
|
|
7
|
+
json: () => string;
|
|
8
|
+
}
|
|
9
|
+
interface PgProxy {
|
|
10
|
+
[index: number]: PgPage;
|
|
11
|
+
}
|
|
12
|
+
interface DocumentUrl {
|
|
13
|
+
/** Base document URL */
|
|
14
|
+
url: (opts?: UrlBuilderOptions) => string;
|
|
15
|
+
/** Thumbnail image URL (pg_1.png) */
|
|
16
|
+
thumbnail: {
|
|
17
|
+
url: () => string;
|
|
18
|
+
};
|
|
19
|
+
/** Page access: d.pg[1].png(), d.pg[1].md(), d.pg[1].json() */
|
|
20
|
+
pg: PgProxy;
|
|
21
|
+
/** Entity-level access */
|
|
22
|
+
entities: EntitiesProxy;
|
|
23
|
+
}
|
|
24
|
+
interface EntitiesProxy {
|
|
25
|
+
tables: EntityCollectionProxy;
|
|
26
|
+
figures: EntityCollectionProxy;
|
|
27
|
+
}
|
|
28
|
+
interface EntityCollectionProxy {
|
|
29
|
+
[index: number]: {
|
|
30
|
+
url: (opts?: {
|
|
31
|
+
format?: 'json' | 'csv' | 'html';
|
|
32
|
+
}) => string;
|
|
33
|
+
};
|
|
34
|
+
url: (opts?: UrlBuilderOptions) => string;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Build-time URL builder — Cloudinary for documents.
|
|
38
|
+
*
|
|
39
|
+
* ```tsx
|
|
40
|
+
* import { doc } from 'okrapdf';
|
|
41
|
+
* const d = doc('doc_7fK3x');
|
|
42
|
+
* <Image src={d.thumbnail.url()} />
|
|
43
|
+
* <a href={d.entities.tables[0].url({ format: 'csv' })}>CSV</a>
|
|
44
|
+
* ```
|
|
45
|
+
*/
|
|
46
|
+
declare function doc(documentId: string, baseUrlOrOptions?: string | DocUrlOptions, maybeOptions?: DocUrlOptions): DocumentUrl;
|
|
47
|
+
|
|
48
|
+
export { doc };
|
package/dist/url.js
ADDED
package/dist/url.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
package/dist/worker.d.ts
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @okrapdf/runtime/worker — self-host subpath export.
|
|
3
|
+
*
|
|
4
|
+
* Re-export the DocumentAgent DO class and a request handler
|
|
5
|
+
* for self-hosted deployments.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* ```ts
|
|
9
|
+
* // worker.ts
|
|
10
|
+
* import { handleRequest } from '@okrapdf/runtime/worker';
|
|
11
|
+
* import { DocumentAgent } from './document-agent';
|
|
12
|
+
*
|
|
13
|
+
* export { DocumentAgent };
|
|
14
|
+
* export default { fetch: handleRequest({ DOCUMENT_AGENT: env.DOCUMENT_AGENT }) };
|
|
15
|
+
* ```
|
|
16
|
+
*
|
|
17
|
+
* ```toml
|
|
18
|
+
* # wrangler.toml
|
|
19
|
+
* [durable_objects]
|
|
20
|
+
* bindings = [{ name = "DOCUMENT_AGENT", class_name = "DocumentAgent" }]
|
|
21
|
+
* ```
|
|
22
|
+
*/
|
|
23
|
+
interface DurableObjectBinding {
|
|
24
|
+
idFromName(name: string): {
|
|
25
|
+
toString(): string;
|
|
26
|
+
};
|
|
27
|
+
get(id: {
|
|
28
|
+
toString(): string;
|
|
29
|
+
}): {
|
|
30
|
+
fetch(request: Request): Promise<Response>;
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
interface WorkerEnv {
|
|
34
|
+
DOCUMENT_AGENT: DurableObjectBinding;
|
|
35
|
+
DOCUMENT_AGENT_SHARED_SECRET?: string;
|
|
36
|
+
[key: string]: unknown;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Minimal request router that forwards /document/:id/* and /v1/documents/:id/*
|
|
40
|
+
* to the corresponding Durable Object.
|
|
41
|
+
*/
|
|
42
|
+
declare function handleRequest(env: WorkerEnv): (request: Request) => Promise<Response>;
|
|
43
|
+
|
|
44
|
+
export { type DurableObjectBinding, type WorkerEnv, handleRequest };
|
package/dist/worker.js
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// src/worker.ts
|
|
2
|
+
var DATA_URL_ALIAS_RE = /\/[a-z0-9][a-z0-9._-]*\.(json|html|md|markdown|pdf|png|csv|txt)$/i;
|
|
3
|
+
var DATA_URL_MARKER_ALIAS_RE = /\/_\/[^/]*\.(json|html|md|markdown|pdf|png|csv|txt)$/i;
|
|
4
|
+
var CANONICAL_PAGE_IMAGE_EXT_RE = /\/pages\/(\d+)\/image\.(png|jpg|jpeg|webp)$/i;
|
|
5
|
+
var CANONICAL_PAGE_MARKDOWN_EXT_RE = /\/pages\/(\d+)\/markdown\.(md|markdown)$/i;
|
|
6
|
+
var PAGE_IMAGE_ALIAS_RE = /\/pages\/(\d+)\/[^/]+\.(png|jpg|jpeg|webp)$/i;
|
|
7
|
+
var PAGE_MARKDOWN_ALIAS_RE = /\/pages\/(\d+)\/[^/]+\.(md|markdown)$/i;
|
|
8
|
+
var PAGE_MARKDOWN_NESTED_ALIAS_RE = /\/pages\/(\d+)\/markdown\/(?:_\/)?[^/]+\.(md|markdown)$/i;
|
|
9
|
+
var PROVIDER_SEGMENT_RE = /^t_[a-z0-9][a-z0-9._-]*$/i;
|
|
10
|
+
var DEFAULT_IMAGE_SEGMENT_RE = /^d_[a-z0-9][a-z0-9._:-]*$/i;
|
|
11
|
+
function normalizeDataAliasPath(pathname, method) {
|
|
12
|
+
if (method !== "GET" && method !== "HEAD") return pathname;
|
|
13
|
+
if (PAGE_IMAGE_ALIAS_RE.test(pathname)) {
|
|
14
|
+
return pathname.replace(PAGE_IMAGE_ALIAS_RE, "/pages/$1/image");
|
|
15
|
+
}
|
|
16
|
+
if (CANONICAL_PAGE_IMAGE_EXT_RE.test(pathname)) {
|
|
17
|
+
return pathname.replace(CANONICAL_PAGE_IMAGE_EXT_RE, "/pages/$1/image");
|
|
18
|
+
}
|
|
19
|
+
if (PAGE_MARKDOWN_NESTED_ALIAS_RE.test(pathname)) {
|
|
20
|
+
return pathname.replace(PAGE_MARKDOWN_NESTED_ALIAS_RE, "/pages/$1/markdown");
|
|
21
|
+
}
|
|
22
|
+
if (CANONICAL_PAGE_MARKDOWN_EXT_RE.test(pathname)) {
|
|
23
|
+
return pathname.replace(CANONICAL_PAGE_MARKDOWN_EXT_RE, "/pages/$1/markdown");
|
|
24
|
+
}
|
|
25
|
+
if (PAGE_MARKDOWN_ALIAS_RE.test(pathname)) {
|
|
26
|
+
return pathname.replace(PAGE_MARKDOWN_ALIAS_RE, "/pages/$1/markdown");
|
|
27
|
+
}
|
|
28
|
+
if (DATA_URL_MARKER_ALIAS_RE.test(pathname)) {
|
|
29
|
+
return pathname.replace(DATA_URL_MARKER_ALIAS_RE, "");
|
|
30
|
+
}
|
|
31
|
+
if (!DATA_URL_ALIAS_RE.test(pathname)) return pathname;
|
|
32
|
+
return pathname.replace(DATA_URL_ALIAS_RE, "");
|
|
33
|
+
}
|
|
34
|
+
function normalizeTransformSegments(subpath) {
|
|
35
|
+
const parts = subpath.split("/");
|
|
36
|
+
let defaultImage;
|
|
37
|
+
const filtered = [parts[0]];
|
|
38
|
+
for (let i = 1; i < parts.length; i++) {
|
|
39
|
+
if (PROVIDER_SEGMENT_RE.test(parts[i])) continue;
|
|
40
|
+
if (DEFAULT_IMAGE_SEGMENT_RE.test(parts[i])) {
|
|
41
|
+
defaultImage = parts[i].slice(2);
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
filtered.push(parts[i]);
|
|
45
|
+
}
|
|
46
|
+
const normalized = filtered.join("/");
|
|
47
|
+
return {
|
|
48
|
+
subpath: normalized === "/" ? "" : normalized,
|
|
49
|
+
defaultImage
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
function handleRequest(env) {
|
|
53
|
+
return async (request) => {
|
|
54
|
+
const normalizedUrl = new URL(request.url);
|
|
55
|
+
normalizedUrl.pathname = normalizeDataAliasPath(normalizedUrl.pathname, request.method);
|
|
56
|
+
const url = normalizedUrl;
|
|
57
|
+
const path = url.pathname;
|
|
58
|
+
const docMatch = path.match(/^\/(?:document|v1\/documents)\/([^/]+)(\/.*)?$/);
|
|
59
|
+
if (!docMatch) {
|
|
60
|
+
return new Response(JSON.stringify({ error: "Not found" }), {
|
|
61
|
+
status: 404,
|
|
62
|
+
headers: { "Content-Type": "application/json" }
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
const documentId = decodeURIComponent(docMatch[1]);
|
|
66
|
+
const rawSubpath = docMatch[2] || "";
|
|
67
|
+
const { subpath: cleanSubpath, defaultImage } = normalizeTransformSegments(rawSubpath);
|
|
68
|
+
const subpath = cleanSubpath || "/status";
|
|
69
|
+
const doId = env.DOCUMENT_AGENT.idFromName(documentId);
|
|
70
|
+
const stub = env.DOCUMENT_AGENT.get(doId);
|
|
71
|
+
const doUrl = new URL(url.toString());
|
|
72
|
+
doUrl.pathname = `/document/${documentId}${subpath}`;
|
|
73
|
+
const headers = defaultImage ? new Headers(request.headers) : void 0;
|
|
74
|
+
if (headers && defaultImage) headers.set("X-Okra-Default", defaultImage);
|
|
75
|
+
const doRequest = headers ? new Request(doUrl.toString(), { method: request.method, headers, body: request.body }) : new Request(doUrl.toString(), request);
|
|
76
|
+
return stub.fetch(doRequest);
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
export {
|
|
80
|
+
handleRequest
|
|
81
|
+
};
|
|
82
|
+
//# sourceMappingURL=worker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/worker.ts"],"sourcesContent":["/**\n * @okrapdf/runtime/worker — self-host subpath export.\n *\n * Re-export the DocumentAgent DO class and a request handler\n * for self-hosted deployments.\n *\n * Usage:\n * ```ts\n * // worker.ts\n * import { handleRequest } from '@okrapdf/runtime/worker';\n * import { DocumentAgent } from './document-agent';\n *\n * export { DocumentAgent };\n * export default { fetch: handleRequest({ DOCUMENT_AGENT: env.DOCUMENT_AGENT }) };\n * ```\n *\n * ```toml\n * # wrangler.toml\n * [durable_objects]\n * bindings = [{ name = \"DOCUMENT_AGENT\", class_name = \"DocumentAgent\" }]\n * ```\n */\n\nexport interface DurableObjectBinding {\n idFromName(name: string): { toString(): string };\n get(id: { toString(): string }): { fetch(request: Request): Promise<Response> };\n}\n\nexport interface WorkerEnv {\n DOCUMENT_AGENT: DurableObjectBinding;\n DOCUMENT_AGENT_SHARED_SECRET?: string;\n [key: string]: unknown;\n}\n\n// Strip trailing artifact filenames from URLs (e.g. /report.png, /document.md).\n// Matches both plain format and legacy suffix format for backward compat.\nconst DATA_URL_ALIAS_RE = /\\/[a-z0-9][a-z0-9._-]*\\.(json|html|md|markdown|pdf|png|csv|txt)$/i;\nconst DATA_URL_MARKER_ALIAS_RE = /\\/_\\/[^/]*\\.(json|html|md|markdown|pdf|png|csv|txt)$/i;\nconst CANONICAL_PAGE_IMAGE_EXT_RE = /\\/pages\\/(\\d+)\\/image\\.(png|jpg|jpeg|webp)$/i;\nconst CANONICAL_PAGE_MARKDOWN_EXT_RE = /\\/pages\\/(\\d+)\\/markdown\\.(md|markdown)$/i;\nconst PAGE_IMAGE_ALIAS_RE = /\\/pages\\/(\\d+)\\/[^/]+\\.(png|jpg|jpeg|webp)$/i;\nconst PAGE_MARKDOWN_ALIAS_RE = /\\/pages\\/(\\d+)\\/[^/]+\\.(md|markdown)$/i;\nconst PAGE_MARKDOWN_NESTED_ALIAS_RE = /\\/pages\\/(\\d+)\\/markdown\\/(?:_\\/)?[^/]+\\.(md|markdown)$/i;\nconst PROVIDER_SEGMENT_RE = /^t_[a-z0-9][a-z0-9._-]*$/i;\nconst DEFAULT_IMAGE_SEGMENT_RE = /^d_[a-z0-9][a-z0-9._:-]*$/i;\n\nfunction normalizeDataAliasPath(pathname: string, method: string): string {\n if (method !== 'GET' && method !== 'HEAD') return pathname;\n if (PAGE_IMAGE_ALIAS_RE.test(pathname)) {\n return pathname.replace(PAGE_IMAGE_ALIAS_RE, '/pages/$1/image');\n }\n if (CANONICAL_PAGE_IMAGE_EXT_RE.test(pathname)) {\n return pathname.replace(CANONICAL_PAGE_IMAGE_EXT_RE, '/pages/$1/image');\n }\n if (PAGE_MARKDOWN_NESTED_ALIAS_RE.test(pathname)) {\n return pathname.replace(PAGE_MARKDOWN_NESTED_ALIAS_RE, '/pages/$1/markdown');\n }\n if (CANONICAL_PAGE_MARKDOWN_EXT_RE.test(pathname)) {\n return pathname.replace(CANONICAL_PAGE_MARKDOWN_EXT_RE, '/pages/$1/markdown');\n }\n if (PAGE_MARKDOWN_ALIAS_RE.test(pathname)) {\n return pathname.replace(PAGE_MARKDOWN_ALIAS_RE, '/pages/$1/markdown');\n }\n if (DATA_URL_MARKER_ALIAS_RE.test(pathname)) {\n return pathname.replace(DATA_URL_MARKER_ALIAS_RE, '');\n }\n if (!DATA_URL_ALIAS_RE.test(pathname)) return pathname;\n return pathname.replace(DATA_URL_ALIAS_RE, '');\n}\n\nfunction normalizeTransformSegments(subpath: string): { subpath: string; defaultImage?: string } {\n const parts = subpath.split('/');\n let defaultImage: string | undefined;\n\n // Strip t_ (provider) and d_ (default image) segments from the path\n const filtered: string[] = [parts[0]]; // keep leading empty string\n for (let i = 1; i < parts.length; i++) {\n if (PROVIDER_SEGMENT_RE.test(parts[i])) continue;\n if (DEFAULT_IMAGE_SEGMENT_RE.test(parts[i])) {\n defaultImage = parts[i].slice(2); // strip \"d_\" prefix\n continue;\n }\n filtered.push(parts[i]);\n }\n\n const normalized = filtered.join('/');\n return {\n subpath: normalized === '/' ? '' : normalized,\n defaultImage,\n };\n}\n\n/**\n * Minimal request router that forwards /document/:id/* and /v1/documents/:id/*\n * to the corresponding Durable Object.\n */\nexport function handleRequest(env: WorkerEnv): (request: Request) => Promise<Response> {\n return async (request: Request) => {\n const normalizedUrl = new URL(request.url);\n normalizedUrl.pathname = normalizeDataAliasPath(normalizedUrl.pathname, request.method);\n const url = normalizedUrl;\n const path = url.pathname;\n\n // Match /document/:id/* or /v1/documents/:id/*\n const docMatch = path.match(/^\\/(?:document|v1\\/documents)\\/([^/]+)(\\/.*)?$/);\n if (!docMatch) {\n return new Response(JSON.stringify({ error: 'Not found' }), {\n status: 404,\n headers: { 'Content-Type': 'application/json' },\n });\n }\n\n const documentId = decodeURIComponent(docMatch[1]);\n const rawSubpath = docMatch[2] || '';\n const { subpath: cleanSubpath, defaultImage } = normalizeTransformSegments(rawSubpath);\n const subpath = cleanSubpath || '/status';\n\n const doId = env.DOCUMENT_AGENT.idFromName(documentId);\n const stub = env.DOCUMENT_AGENT.get(doId);\n\n // Forward to DO with the subpath\n const doUrl = new URL(url.toString());\n doUrl.pathname = `/document/${documentId}${subpath}`;\n\n // Pass d_ value as header so DO can serve placeholder on R2 miss\n const headers = defaultImage ? new Headers(request.headers) : undefined;\n if (headers && defaultImage) headers.set('X-Okra-Default', defaultImage);\n const doRequest = headers\n ? new Request(doUrl.toString(), { method: request.method, headers, body: request.body })\n : new Request(doUrl.toString(), request);\n\n return stub.fetch(doRequest);\n };\n}\n"],"mappings":";AAoCA,IAAM,oBAAoB;AAC1B,IAAM,2BAA2B;AACjC,IAAM,8BAA8B;AACpC,IAAM,iCAAiC;AACvC,IAAM,sBAAsB;AAC5B,IAAM,yBAAyB;AAC/B,IAAM,gCAAgC;AACtC,IAAM,sBAAsB;AAC5B,IAAM,2BAA2B;AAEjC,SAAS,uBAAuB,UAAkB,QAAwB;AACxE,MAAI,WAAW,SAAS,WAAW,OAAQ,QAAO;AAClD,MAAI,oBAAoB,KAAK,QAAQ,GAAG;AACtC,WAAO,SAAS,QAAQ,qBAAqB,iBAAiB;AAAA,EAChE;AACA,MAAI,4BAA4B,KAAK,QAAQ,GAAG;AAC9C,WAAO,SAAS,QAAQ,6BAA6B,iBAAiB;AAAA,EACxE;AACA,MAAI,8BAA8B,KAAK,QAAQ,GAAG;AAChD,WAAO,SAAS,QAAQ,+BAA+B,oBAAoB;AAAA,EAC7E;AACA,MAAI,+BAA+B,KAAK,QAAQ,GAAG;AACjD,WAAO,SAAS,QAAQ,gCAAgC,oBAAoB;AAAA,EAC9E;AACA,MAAI,uBAAuB,KAAK,QAAQ,GAAG;AACzC,WAAO,SAAS,QAAQ,wBAAwB,oBAAoB;AAAA,EACtE;AACA,MAAI,yBAAyB,KAAK,QAAQ,GAAG;AAC3C,WAAO,SAAS,QAAQ,0BAA0B,EAAE;AAAA,EACtD;AACA,MAAI,CAAC,kBAAkB,KAAK,QAAQ,EAAG,QAAO;AAC9C,SAAO,SAAS,QAAQ,mBAAmB,EAAE;AAC/C;AAEA,SAAS,2BAA2B,SAA6D;AAC/F,QAAM,QAAQ,QAAQ,MAAM,GAAG;AAC/B,MAAI;AAGJ,QAAM,WAAqB,CAAC,MAAM,CAAC,CAAC;AACpC,WAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACrC,QAAI,oBAAoB,KAAK,MAAM,CAAC,CAAC,EAAG;AACxC,QAAI,yBAAyB,KAAK,MAAM,CAAC,CAAC,GAAG;AAC3C,qBAAe,MAAM,CAAC,EAAE,MAAM,CAAC;AAC/B;AAAA,IACF;AACA,aAAS,KAAK,MAAM,CAAC,CAAC;AAAA,EACxB;AAEA,QAAM,aAAa,SAAS,KAAK,GAAG;AACpC,SAAO;AAAA,IACL,SAAS,eAAe,MAAM,KAAK;AAAA,IACnC;AAAA,EACF;AACF;AAMO,SAAS,cAAc,KAAyD;AACrF,SAAO,OAAO,YAAqB;AACjC,UAAM,gBAAgB,IAAI,IAAI,QAAQ,GAAG;AACzC,kBAAc,WAAW,uBAAuB,cAAc,UAAU,QAAQ,MAAM;AACtF,UAAM,MAAM;AACZ,UAAM,OAAO,IAAI;AAGjB,UAAM,WAAW,KAAK,MAAM,gDAAgD;AAC5E,QAAI,CAAC,UAAU;AACb,aAAO,IAAI,SAAS,KAAK,UAAU,EAAE,OAAO,YAAY,CAAC,GAAG;AAAA,QAC1D,QAAQ;AAAA,QACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAChD,CAAC;AAAA,IACH;AAEA,UAAM,aAAa,mBAAmB,SAAS,CAAC,CAAC;AACjD,UAAM,aAAa,SAAS,CAAC,KAAK;AAClC,UAAM,EAAE,SAAS,cAAc,aAAa,IAAI,2BAA2B,UAAU;AACrF,UAAM,UAAU,gBAAgB;AAEhC,UAAM,OAAO,IAAI,eAAe,WAAW,UAAU;AACrD,UAAM,OAAO,IAAI,eAAe,IAAI,IAAI;AAGxC,UAAM,QAAQ,IAAI,IAAI,IAAI,SAAS,CAAC;AACpC,UAAM,WAAW,aAAa,UAAU,GAAG,OAAO;AAGlD,UAAM,UAAU,eAAe,IAAI,QAAQ,QAAQ,OAAO,IAAI;AAC9D,QAAI,WAAW,aAAc,SAAQ,IAAI,kBAAkB,YAAY;AACvE,UAAM,YAAY,UACd,IAAI,QAAQ,MAAM,SAAS,GAAG,EAAE,QAAQ,QAAQ,QAAQ,SAAS,MAAM,QAAQ,KAAK,CAAC,IACrF,IAAI,QAAQ,MAAM,SAAS,GAAG,OAAO;AAEzC,WAAO,KAAK,MAAM,SAAS;AAAA,EAC7B;AACF;","names":[]}
|
package/package.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "okrapdf",
|
|
3
|
+
"version": "0.8.0",
|
|
4
|
+
"description": "OkraPDF — upload a PDF, get an API. Runtime client, React hooks, and CLI.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"exports": {
|
|
7
|
+
".": {
|
|
8
|
+
"import": {
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"default": "./dist/index.js"
|
|
11
|
+
}
|
|
12
|
+
},
|
|
13
|
+
"./doc": {
|
|
14
|
+
"import": {
|
|
15
|
+
"types": "./dist/url.d.ts",
|
|
16
|
+
"default": "./dist/url.js"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"./browser": {
|
|
20
|
+
"import": {
|
|
21
|
+
"types": "./dist/browser.d.ts",
|
|
22
|
+
"default": "./dist/browser.js"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"./worker": {
|
|
26
|
+
"import": {
|
|
27
|
+
"types": "./dist/worker.d.ts",
|
|
28
|
+
"default": "./dist/worker.js"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"./react": {
|
|
32
|
+
"import": {
|
|
33
|
+
"types": "./dist/react/index.d.ts",
|
|
34
|
+
"default": "./dist/react/index.js"
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
"./cli": {
|
|
38
|
+
"import": {
|
|
39
|
+
"types": "./dist/cli/index.d.ts",
|
|
40
|
+
"default": "./dist/cli/index.js"
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"bin": {
|
|
45
|
+
"okra": "./dist/cli/bin.js"
|
|
46
|
+
},
|
|
47
|
+
"files": [
|
|
48
|
+
"dist"
|
|
49
|
+
],
|
|
50
|
+
"scripts": {
|
|
51
|
+
"build": "tsup",
|
|
52
|
+
"test": "vitest run",
|
|
53
|
+
"test:watch": "vitest",
|
|
54
|
+
"typecheck": "tsc --noEmit"
|
|
55
|
+
},
|
|
56
|
+
"dependencies": {
|
|
57
|
+
"commander": "^12.0.0",
|
|
58
|
+
"ws": "^8.19.0",
|
|
59
|
+
"zod": "^4.3.6"
|
|
60
|
+
},
|
|
61
|
+
"peerDependencies": {
|
|
62
|
+
"react": ">=18"
|
|
63
|
+
},
|
|
64
|
+
"peerDependenciesMeta": {
|
|
65
|
+
"react": {
|
|
66
|
+
"optional": true
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
"devDependencies": {
|
|
70
|
+
"@types/node": "^20.14.0",
|
|
71
|
+
"@types/react": "^18.2.0",
|
|
72
|
+
"@types/ws": "^8.18.1",
|
|
73
|
+
"react": "^18.2.0",
|
|
74
|
+
"tsup": "^8.0.0",
|
|
75
|
+
"typescript": "^5.5.0",
|
|
76
|
+
"vitest": "^2.0.0"
|
|
77
|
+
},
|
|
78
|
+
"license": "UNLICENSED",
|
|
79
|
+
"publishConfig": {
|
|
80
|
+
"access": "public"
|
|
81
|
+
}
|
|
82
|
+
}
|