@dvvebond/core 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,652 @@
1
+
2
+ //#region src/worker/messages.ts
3
+ /**
4
+ * Generate a unique message ID.
5
+ */
6
+ function generateMessageId() {
7
+ return `msg-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
8
+ }
9
+ /**
10
+ * Generate a unique task ID.
11
+ */
12
+ function generateTaskId() {
13
+ return `task-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
14
+ }
15
+ /**
16
+ * Check if a message is a response.
17
+ */
18
+ function isResponse(message) {
19
+ return typeof message === "object" && message !== null && "type" in message && message.type === "response";
20
+ }
21
+ /**
22
+ * Check if a message is a progress update.
23
+ */
24
+ function isProgress(message) {
25
+ return typeof message === "object" && message !== null && "type" in message && message.type === "progress";
26
+ }
27
+ /**
28
+ * Create a request message.
29
+ */
30
+ function createRequest(requestType, data) {
31
+ return {
32
+ type: "request",
33
+ id: generateMessageId(),
34
+ requestType,
35
+ data
36
+ };
37
+ }
38
+ /**
39
+ * Create a WorkerError from an Error.
40
+ */
41
+ function createWorkerError(error, code) {
42
+ return {
43
+ code: code ?? error.name,
44
+ message: error.message,
45
+ stack: error.stack
46
+ };
47
+ }
48
+
49
+ //#endregion
50
+ //#region src/worker/parsing-types.ts
51
+ /**
52
+ * Check if a message is a parsing progress message.
53
+ */
54
+ function isParsingProgress(message) {
55
+ return typeof message === "object" && message !== null && "type" in message && message.type === "parsingProgress";
56
+ }
57
+ /**
58
+ * Check if a message is a parsing worker response.
59
+ */
60
+ function isParsingResponse(message) {
61
+ return typeof message === "object" && message !== null && "type" in message && message.type === "response" && "requestType" in message && [
62
+ "parseDocument",
63
+ "extractText",
64
+ "cancelParsing"
65
+ ].includes(message.requestType);
66
+ }
67
+ /**
68
+ * Create a parsing progress message.
69
+ */
70
+ function createParsingProgress(taskId, progress) {
71
+ return {
72
+ type: "parsingProgress",
73
+ taskId,
74
+ progress,
75
+ timestamp: Date.now()
76
+ };
77
+ }
78
+
79
+ //#endregion
80
+ //#region src/worker/parsing-utils.ts
81
+ /**
82
+ * Detect the current runtime environment.
83
+ */
84
+ function detectEnvironment() {
85
+ if (typeof globalThis !== "undefined" && "Bun" in globalThis) return "bun";
86
+ if (typeof globalThis !== "undefined" && "Deno" in globalThis) return "deno";
87
+ if (typeof globalThis !== "undefined" && typeof globalThis.process?.versions?.node === "string") return "node";
88
+ if (typeof window !== "undefined" && typeof document !== "undefined") return "browser";
89
+ if (typeof self !== "undefined" && typeof self.importScripts === "function") return "browser";
90
+ return "unknown";
91
+ }
92
+ /**
93
+ * Check if Web Workers are supported in the current environment.
94
+ */
95
+ function isWorkerSupported() {
96
+ switch (detectEnvironment()) {
97
+ case "browser": return typeof Worker !== "undefined";
98
+ case "node": return false;
99
+ case "bun": return typeof Worker !== "undefined";
100
+ case "deno": return typeof Worker !== "undefined";
101
+ default: return false;
102
+ }
103
+ }
104
+ /**
105
+ * Check if we're currently running inside a Web Worker.
106
+ */
107
+ function isWorkerContext() {
108
+ return typeof self !== "undefined" && typeof self.importScripts === "function" && typeof window === "undefined";
109
+ }
110
+ /**
111
+ * Create a Worker instance with proper error handling.
112
+ *
113
+ * @throws Error if workers are not supported or creation fails
114
+ */
115
+ function createWorkerInstance(options) {
116
+ if (!isWorkerSupported()) throw new Error(`Web Workers are not supported in ${detectEnvironment()} environment. Use the synchronous parsing API instead.`);
117
+ const { workerUrl, name, module: module$1 = true } = options;
118
+ if (!workerUrl) throw new Error("Worker URL is required. Provide workerUrl pointing to the bundled parsing worker script.");
119
+ try {
120
+ return new Worker(workerUrl, {
121
+ type: module$1 ? "module" : "classic",
122
+ name: name ?? `parsing-worker-${Date.now()}`
123
+ });
124
+ } catch (error) {
125
+ const message = error instanceof Error ? error.message : String(error);
126
+ throw new Error(`Failed to create parsing worker: ${message}`, { cause: error });
127
+ }
128
+ }
129
+ /**
130
+ * Extract transferable objects from data for efficient worker communication.
131
+ *
132
+ * Identifies ArrayBuffer instances that can be transferred (zero-copy)
133
+ * instead of copied between threads.
134
+ */
135
+ function extractTransferables(data) {
136
+ const transferables = [];
137
+ const seen = /* @__PURE__ */ new WeakSet();
138
+ function collect(value) {
139
+ if (value === null || typeof value !== "object") return;
140
+ if (seen.has(value)) return;
141
+ seen.add(value);
142
+ if (value instanceof ArrayBuffer) {
143
+ transferables.push(value);
144
+ return;
145
+ }
146
+ if (ArrayBuffer.isView(value) && value.buffer instanceof ArrayBuffer) {
147
+ if (value.byteOffset === 0 && value.byteLength === value.buffer.byteLength) transferables.push(value.buffer);
148
+ return;
149
+ }
150
+ if (Array.isArray(value)) {
151
+ for (const item of value) collect(item);
152
+ return;
153
+ }
154
+ for (const key of Object.keys(value)) collect(value[key]);
155
+ }
156
+ collect(data);
157
+ return transferables;
158
+ }
159
+ /**
160
+ * Generate a unique ID for messages.
161
+ */
162
+ function generateParsingMessageId() {
163
+ return `parse-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
164
+ }
165
+ /**
166
+ * Generate a unique task ID for parsing operations.
167
+ */
168
+ function generateParsingTaskId() {
169
+ return `parsing-task-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
170
+ }
171
+ /**
172
+ * Default timeouts for parsing operations (in milliseconds).
173
+ */
174
+ const DEFAULT_PARSING_TIMEOUTS = {
175
+ init: 1e4,
176
+ small: 3e4,
177
+ medium: 6e4,
178
+ large: 3e5,
179
+ textPerPage: 5e3
180
+ };
181
+ /**
182
+ * Calculate appropriate timeout based on document size.
183
+ */
184
+ function calculateParsingTimeout(sizeBytes) {
185
+ const sizeMB = sizeBytes / (1024 * 1024);
186
+ if (sizeMB < 1) return DEFAULT_PARSING_TIMEOUTS.small;
187
+ if (sizeMB < 10) return DEFAULT_PARSING_TIMEOUTS.medium;
188
+ return DEFAULT_PARSING_TIMEOUTS.large;
189
+ }
190
+ /**
191
+ * Create a deferred promise for async coordination.
192
+ */
193
+ function createDeferred() {
194
+ let resolve;
195
+ let reject;
196
+ let isPending = true;
197
+ return {
198
+ promise: new Promise((res, rej) => {
199
+ resolve = (value) => {
200
+ isPending = false;
201
+ res(value);
202
+ };
203
+ reject = (reason) => {
204
+ isPending = false;
205
+ rej(reason);
206
+ };
207
+ }),
208
+ resolve,
209
+ reject,
210
+ get isPending() {
211
+ return isPending;
212
+ }
213
+ };
214
+ }
215
+
216
+ //#endregion
217
+ //#region src/worker/parsing-worker-host.ts
218
+ /**
219
+ * Main thread interface for parsing worker communication.
220
+ *
221
+ * ParsingWorkerHost manages the lifecycle of a parsing worker and provides
222
+ * a Promise-based API for document parsing operations. It handles:
223
+ * - Worker creation and initialization
224
+ * - Message passing with request/response correlation
225
+ * - Progress event handling with 500ms throttling
226
+ * - Cancellation support
227
+ * - Graceful shutdown and cleanup
228
+ */
229
+ /**
230
+ * ParsingWorkerHost manages a Web Worker for PDF parsing operations.
231
+ *
232
+ * @example
233
+ * ```typescript
234
+ * const host = new ParsingWorkerHost({
235
+ * workerUrl: '/parsing-worker.js',
236
+ * onProgress: (progress) => console.log(`${progress.percent}%`),
237
+ * });
238
+ *
239
+ * await host.initialize();
240
+ *
241
+ * const result = await host.parse(pdfBytes);
242
+ * console.log(`Parsed ${result.info.pageCount} pages`);
243
+ *
244
+ * const text = await host.extractText(result.documentId);
245
+ * console.log(text.pages[0].text);
246
+ *
247
+ * await host.terminate();
248
+ * ```
249
+ */
250
+ var ParsingWorkerHost = class {
251
+ _worker = null;
252
+ _state = "idle";
253
+ _options;
254
+ _pendingRequests = /* @__PURE__ */ new Map();
255
+ _taskProgressHandlers = /* @__PURE__ */ new Map();
256
+ _initPromise = null;
257
+ constructor(options) {
258
+ this._options = {
259
+ workerUrl: options.workerUrl,
260
+ name: options.name ?? `parsing-worker-host-${Date.now()}`,
261
+ verbose: options.verbose ?? false,
262
+ initTimeout: options.initTimeout ?? 1e4,
263
+ defaultTimeout: options.defaultTimeout ?? 6e4,
264
+ onProgress: options.onProgress,
265
+ onError: options.onError,
266
+ onStateChange: options.onStateChange
267
+ };
268
+ }
269
+ /**
270
+ * Current worker state.
271
+ */
272
+ get state() {
273
+ return this._state;
274
+ }
275
+ /**
276
+ * Whether the worker is ready to accept requests.
277
+ */
278
+ get isReady() {
279
+ return this._state === "ready" || this._state === "busy";
280
+ }
281
+ /**
282
+ * Whether the worker has been terminated.
283
+ */
284
+ get isTerminated() {
285
+ return this._state === "terminated";
286
+ }
287
+ /**
288
+ * Number of pending requests.
289
+ */
290
+ get pendingCount() {
291
+ return this._pendingRequests.size;
292
+ }
293
+ /**
294
+ * Worker name.
295
+ */
296
+ get name() {
297
+ return this._options.name;
298
+ }
299
+ /**
300
+ * Initialize the worker.
301
+ *
302
+ * Creates the Web Worker instance and waits for it to be ready.
303
+ * This method is idempotent — calling it multiple times returns the same promise.
304
+ *
305
+ * @throws Error if workers are not supported, creation fails, or initialization times out
306
+ */
307
+ async initialize() {
308
+ if (this._initPromise) return this._initPromise;
309
+ if (this._state === "terminated") throw new Error("Cannot initialize a terminated worker");
310
+ this._initPromise = this._doInitialize();
311
+ return this._initPromise;
312
+ }
313
+ async _doInitialize() {
314
+ this._setState("initializing");
315
+ try {
316
+ this._worker = createWorkerInstance({
317
+ workerUrl: this._options.workerUrl,
318
+ name: this._options.name,
319
+ module: true
320
+ });
321
+ this._worker.onmessage = this._handleMessage.bind(this);
322
+ this._worker.onerror = this._handleError.bind(this);
323
+ await this._sendRequest("init", {
324
+ verbose: this._options.verbose,
325
+ name: this._options.name
326
+ }, this._options.initTimeout);
327
+ this._setState("ready");
328
+ } catch (error) {
329
+ this._setState("error");
330
+ this._cleanup();
331
+ throw error;
332
+ }
333
+ }
334
+ /**
335
+ * Terminate the worker.
336
+ *
337
+ * @param graceful - If true, wait for pending operations to complete
338
+ * @param timeout - Timeout for graceful shutdown in milliseconds
339
+ */
340
+ async terminate(graceful = true, timeout = 5e3) {
341
+ if (this._state === "terminated") return;
342
+ if (graceful && this._worker && this.isReady) try {
343
+ await Promise.race([this._sendRequest("terminate", void 0, timeout), new Promise((_, reject) => setTimeout(() => reject(/* @__PURE__ */ new Error("Terminate timeout")), timeout))]);
344
+ } catch {}
345
+ this._forceTerminate();
346
+ }
347
+ _forceTerminate() {
348
+ for (const pending of this._pendingRequests.values()) {
349
+ if (pending.timeoutId) clearTimeout(pending.timeoutId);
350
+ pending.deferred.reject(/* @__PURE__ */ new Error("Worker terminated"));
351
+ }
352
+ this._pendingRequests.clear();
353
+ this._taskProgressHandlers.clear();
354
+ if (this._worker) {
355
+ this._worker.terminate();
356
+ this._worker = null;
357
+ }
358
+ this._setState("terminated");
359
+ this._initPromise = null;
360
+ }
361
+ _cleanup() {
362
+ for (const pending of this._pendingRequests.values()) if (pending.timeoutId) clearTimeout(pending.timeoutId);
363
+ this._pendingRequests.clear();
364
+ this._taskProgressHandlers.clear();
365
+ }
366
+ /**
367
+ * Parse a PDF document.
368
+ *
369
+ * @param bytes - PDF file bytes
370
+ * @param options - Parse options
371
+ * @returns Parsed document information
372
+ */
373
+ async parse(bytes, options) {
374
+ await this._ensureInitialized();
375
+ const taskId = generateParsingTaskId();
376
+ const timeout = options?.timeout ?? calculateParsingTimeout(bytes.length);
377
+ if (options?.onProgress) this._taskProgressHandlers.set(taskId, options.onProgress);
378
+ try {
379
+ const response = await this._sendRequest("parseDocument", {
380
+ bytes,
381
+ taskId,
382
+ options: {
383
+ lenient: options?.lenient,
384
+ password: options?.password,
385
+ bruteForceRecovery: options?.bruteForceRecovery,
386
+ progressInterval: options?.progressInterval
387
+ }
388
+ }, timeout, taskId, [bytes.buffer]);
389
+ if (response.status === "cancelled") throw new Error("Operation cancelled");
390
+ if (response.status === "error" || !response.data) throw new Error(response.error?.message ?? "Failed to parse document");
391
+ return response.data;
392
+ } finally {
393
+ this._taskProgressHandlers.delete(taskId);
394
+ }
395
+ }
396
+ /**
397
+ * Parse a PDF document with cancellation support.
398
+ */
399
+ parseCancellable(bytes, options) {
400
+ const taskId = generateParsingTaskId();
401
+ return {
402
+ promise: this.parse(bytes, {
403
+ ...options,
404
+ taskId
405
+ }),
406
+ taskId,
407
+ cancel: () => this.cancel(taskId)
408
+ };
409
+ }
410
+ /**
411
+ * Extract text from a parsed document.
412
+ *
413
+ * @param documentId - Document ID from parse result
414
+ * @param options - Extraction options
415
+ * @returns Extracted text per page
416
+ */
417
+ async extractText(documentId, options) {
418
+ await this._ensureInitialized();
419
+ const taskId = generateParsingTaskId();
420
+ const timeout = options?.timeout ?? this._options.defaultTimeout;
421
+ if (options?.onProgress) this._taskProgressHandlers.set(taskId, options.onProgress);
422
+ try {
423
+ const response = await this._sendRequest("extractText", {
424
+ documentId,
425
+ taskId,
426
+ pageIndices: options?.pages,
427
+ includePositions: options?.includePositions
428
+ }, timeout, taskId);
429
+ if (response.status === "cancelled") throw new Error("Operation cancelled");
430
+ if (response.status === "error" || !response.data) throw new Error(response.error?.message ?? "Failed to extract text");
431
+ return response.data;
432
+ } finally {
433
+ this._taskProgressHandlers.delete(taskId);
434
+ }
435
+ }
436
+ /**
437
+ * Cancel an active parsing operation.
438
+ *
439
+ * @param taskId - Task ID to cancel
440
+ * @returns Whether the task was successfully cancelled
441
+ */
442
+ async cancel(taskId) {
443
+ if (!this.isReady) return false;
444
+ try {
445
+ const response = await this._sendRequest("cancelParsing", { taskId }, 5e3);
446
+ return response.status === "success" && response.data?.wasCancelled;
447
+ } catch {
448
+ return false;
449
+ }
450
+ }
451
+ async _ensureInitialized() {
452
+ if (this._state === "terminated") throw new Error("Worker has been terminated");
453
+ if (!this.isReady) await this.initialize();
454
+ }
455
+ _sendRequest(requestType, data, timeout, taskId, transferables) {
456
+ if (this._state === "terminated") return Promise.reject(/* @__PURE__ */ new Error("Worker has been terminated"));
457
+ if (!this._worker) return Promise.reject(/* @__PURE__ */ new Error("Worker not initialized"));
458
+ const messageId = generateMessageId();
459
+ const actualTaskId = taskId ?? generateTaskId();
460
+ const deferred = createDeferred();
461
+ const timeoutId = setTimeout(() => {
462
+ const pending$1 = this._pendingRequests.get(messageId);
463
+ if (pending$1) {
464
+ this._pendingRequests.delete(messageId);
465
+ this._updateBusyState();
466
+ pending$1.deferred.reject(/* @__PURE__ */ new Error(`Request timeout after ${timeout}ms: ${requestType}`));
467
+ }
468
+ }, timeout);
469
+ const pending = {
470
+ messageId,
471
+ taskId: actualTaskId,
472
+ deferred,
473
+ timeoutId
474
+ };
475
+ this._pendingRequests.set(messageId, pending);
476
+ if (this._state === "ready") this._setState("busy");
477
+ const request = {
478
+ type: "request",
479
+ id: messageId,
480
+ requestType,
481
+ data
482
+ };
483
+ try {
484
+ if (transferables && transferables.length > 0) this._worker.postMessage(request, transferables);
485
+ else this._worker.postMessage(request);
486
+ } catch (error) {
487
+ clearTimeout(timeoutId);
488
+ this._pendingRequests.delete(messageId);
489
+ this._updateBusyState();
490
+ return Promise.reject(error);
491
+ }
492
+ return deferred.promise;
493
+ }
494
+ _handleMessage(event) {
495
+ const message = event.data;
496
+ if (isParsingProgress(message)) this._handleProgress(message);
497
+ else if (isParsingResponse(message)) this._handleResponse(message);
498
+ else if (typeof message === "object" && message !== null && "type" in message) {
499
+ if (message.type === "response") this._handleResponse(message);
500
+ }
501
+ }
502
+ _handleResponse(response) {
503
+ const pending = this._pendingRequests.get(response.id);
504
+ if (!pending) return;
505
+ if (pending.timeoutId) clearTimeout(pending.timeoutId);
506
+ this._pendingRequests.delete(response.id);
507
+ this._updateBusyState();
508
+ if (response.status === "error" && response.error) pending.deferred.reject(new Error(response.error.message));
509
+ else pending.deferred.resolve(response);
510
+ }
511
+ _handleProgress(progress) {
512
+ const taskHandler = this._taskProgressHandlers.get(progress.taskId);
513
+ if (taskHandler) taskHandler(progress.progress);
514
+ if (this._options.onProgress) this._options.onProgress(progress.progress);
515
+ }
516
+ _handleError(event) {
517
+ const error = {
518
+ code: "INTERNAL_ERROR",
519
+ message: event.message ?? "Unknown worker error",
520
+ recoverable: false
521
+ };
522
+ if (this._options.onError) this._options.onError(error);
523
+ if (this._state !== "initializing") {
524
+ for (const pending of this._pendingRequests.values()) {
525
+ if (pending.timeoutId) clearTimeout(pending.timeoutId);
526
+ pending.deferred.reject(new Error(error.message));
527
+ }
528
+ this._pendingRequests.clear();
529
+ this._setState("error");
530
+ }
531
+ }
532
+ _updateBusyState() {
533
+ if (this._state === "busy" && this._pendingRequests.size === 0) this._setState("ready");
534
+ }
535
+ _setState(newState) {
536
+ const previousState = this._state;
537
+ if (previousState === newState) return;
538
+ this._state = newState;
539
+ if (this._options.onStateChange) this._options.onStateChange(newState, previousState);
540
+ }
541
+ };
542
+ /**
543
+ * Create a new ParsingWorkerHost instance.
544
+ */
545
+ function createParsingWorkerHost(options) {
546
+ return new ParsingWorkerHost(options);
547
+ }
548
+
549
+ //#endregion
550
+ Object.defineProperty(exports, 'DEFAULT_PARSING_TIMEOUTS', {
551
+ enumerable: true,
552
+ get: function () {
553
+ return DEFAULT_PARSING_TIMEOUTS;
554
+ }
555
+ });
556
+ Object.defineProperty(exports, 'ParsingWorkerHost', {
557
+ enumerable: true,
558
+ get: function () {
559
+ return ParsingWorkerHost;
560
+ }
561
+ });
562
+ Object.defineProperty(exports, 'calculateParsingTimeout', {
563
+ enumerable: true,
564
+ get: function () {
565
+ return calculateParsingTimeout;
566
+ }
567
+ });
568
+ Object.defineProperty(exports, 'createDeferred', {
569
+ enumerable: true,
570
+ get: function () {
571
+ return createDeferred;
572
+ }
573
+ });
574
+ Object.defineProperty(exports, 'createParsingProgress', {
575
+ enumerable: true,
576
+ get: function () {
577
+ return createParsingProgress;
578
+ }
579
+ });
580
+ Object.defineProperty(exports, 'createParsingWorkerHost', {
581
+ enumerable: true,
582
+ get: function () {
583
+ return createParsingWorkerHost;
584
+ }
585
+ });
586
+ Object.defineProperty(exports, 'createRequest', {
587
+ enumerable: true,
588
+ get: function () {
589
+ return createRequest;
590
+ }
591
+ });
592
+ Object.defineProperty(exports, 'createWorkerError', {
593
+ enumerable: true,
594
+ get: function () {
595
+ return createWorkerError;
596
+ }
597
+ });
598
+ Object.defineProperty(exports, 'detectEnvironment', {
599
+ enumerable: true,
600
+ get: function () {
601
+ return detectEnvironment;
602
+ }
603
+ });
604
+ Object.defineProperty(exports, 'extractTransferables', {
605
+ enumerable: true,
606
+ get: function () {
607
+ return extractTransferables;
608
+ }
609
+ });
610
+ Object.defineProperty(exports, 'generateParsingMessageId', {
611
+ enumerable: true,
612
+ get: function () {
613
+ return generateParsingMessageId;
614
+ }
615
+ });
616
+ Object.defineProperty(exports, 'generateParsingTaskId', {
617
+ enumerable: true,
618
+ get: function () {
619
+ return generateParsingTaskId;
620
+ }
621
+ });
622
+ Object.defineProperty(exports, 'generateTaskId', {
623
+ enumerable: true,
624
+ get: function () {
625
+ return generateTaskId;
626
+ }
627
+ });
628
+ Object.defineProperty(exports, 'isProgress', {
629
+ enumerable: true,
630
+ get: function () {
631
+ return isProgress;
632
+ }
633
+ });
634
+ Object.defineProperty(exports, 'isResponse', {
635
+ enumerable: true,
636
+ get: function () {
637
+ return isResponse;
638
+ }
639
+ });
640
+ Object.defineProperty(exports, 'isWorkerContext', {
641
+ enumerable: true,
642
+ get: function () {
643
+ return isWorkerContext;
644
+ }
645
+ });
646
+ Object.defineProperty(exports, 'isWorkerSupported', {
647
+ enumerable: true,
648
+ get: function () {
649
+ return isWorkerSupported;
650
+ }
651
+ });
652
+ //# sourceMappingURL=parsing-worker-host-CBKQ4mss.cjs.map