pdf-oxide 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +218 -0
  2. package/binding.gyp +35 -0
  3. package/package.json +78 -0
  4. package/src/builders/annotation-builder.ts +367 -0
  5. package/src/builders/conversion-options-builder.ts +257 -0
  6. package/src/builders/index.ts +12 -0
  7. package/src/builders/metadata-builder.ts +317 -0
  8. package/src/builders/pdf-builder.ts +386 -0
  9. package/src/builders/search-options-builder.ts +151 -0
  10. package/src/document-editor-manager.ts +318 -0
  11. package/src/errors.ts +1629 -0
  12. package/src/form-field-manager.ts +666 -0
  13. package/src/hybrid-ml-manager.ts +283 -0
  14. package/src/index.ts +453 -0
  15. package/src/managers/accessibility-manager.ts +338 -0
  16. package/src/managers/annotation-manager.ts +439 -0
  17. package/src/managers/barcode-manager.ts +235 -0
  18. package/src/managers/batch-manager.ts +533 -0
  19. package/src/managers/cache-manager.ts +486 -0
  20. package/src/managers/compliance-manager.ts +375 -0
  21. package/src/managers/content-manager.ts +339 -0
  22. package/src/managers/document-utility-manager.ts +922 -0
  23. package/src/managers/dom-pdf-creator.ts +365 -0
  24. package/src/managers/editing-manager.ts +514 -0
  25. package/src/managers/enterprise-manager.ts +478 -0
  26. package/src/managers/extended-managers.ts +437 -0
  27. package/src/managers/extraction-manager.ts +583 -0
  28. package/src/managers/final-utilities.ts +429 -0
  29. package/src/managers/hybrid-ml-advanced.ts +479 -0
  30. package/src/managers/index.ts +239 -0
  31. package/src/managers/layer-manager.ts +500 -0
  32. package/src/managers/metadata-manager.ts +303 -0
  33. package/src/managers/ocr-manager.ts +756 -0
  34. package/src/managers/optimization-manager.ts +262 -0
  35. package/src/managers/outline-manager.ts +196 -0
  36. package/src/managers/page-manager.ts +289 -0
  37. package/src/managers/pattern-detection.ts +440 -0
  38. package/src/managers/rendering-manager.ts +863 -0
  39. package/src/managers/search-manager.ts +385 -0
  40. package/src/managers/security-manager.ts +345 -0
  41. package/src/managers/signature-manager.ts +1664 -0
  42. package/src/managers/streams.ts +618 -0
  43. package/src/managers/xfa-manager.ts +500 -0
  44. package/src/pdf-creator-manager.ts +494 -0
  45. package/src/properties.ts +522 -0
  46. package/src/result-accessors-manager.ts +867 -0
  47. package/src/tests/advanced-features.test.ts +414 -0
  48. package/src/tests/advanced.test.ts +266 -0
  49. package/src/tests/extended-managers.test.ts +316 -0
  50. package/src/tests/final-utilities.test.ts +455 -0
  51. package/src/tests/foundation.test.ts +315 -0
  52. package/src/tests/high-demand.test.ts +257 -0
  53. package/src/tests/specialized.test.ts +97 -0
  54. package/src/thumbnail-manager.ts +272 -0
  55. package/src/types/common.ts +142 -0
  56. package/src/types/document-types.ts +457 -0
  57. package/src/types/index.ts +6 -0
  58. package/src/types/manager-types.ts +284 -0
  59. package/src/types/native-bindings.ts +517 -0
  60. package/src/workers/index.ts +7 -0
  61. package/src/workers/pool.ts +274 -0
  62. package/src/workers/worker.ts +131 -0
@@ -0,0 +1,274 @@
1
+ /**
2
+ * Worker Thread Pool Manager
3
+ * Enables non-blocking parallel PDF processing
4
+ */
5
+
6
+ import { Worker } from 'worker_threads';
7
+ import os from 'os';
8
+ import path from 'path';
9
+ import { fileURLToPath } from 'url';
10
+
11
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
+
13
+ /**
14
+ * Represents a task to be processed by a worker
15
+ */
16
+ export interface WorkerTask<T = any> {
17
+ operation: 'extract' | 'search' | 'render' | 'analyze';
18
+ documentPath: string;
19
+ params: Record<string, any>;
20
+ }
21
+
22
+ /**
23
+ * Result returned from a worker
24
+ */
25
+ export interface WorkerResult<T = any> {
26
+ success: boolean;
27
+ data?: T;
28
+ error?: Error | string;
29
+ duration: number;
30
+ }
31
+
32
+ interface QueuedTask {
33
+ task: WorkerTask<any>;
34
+ resolve: (value: WorkerResult<any>) => void;
35
+ reject: (error: Error) => void;
36
+ timeout: NodeJS.Timeout;
37
+ }
38
+
39
+ /**
40
+ * Thread pool for parallel PDF processing
41
+ */
42
+ export class WorkerPool {
43
+ private workers: Worker[] = [];
44
+ private queue: QueuedTask[] = [];
45
+ private activeCount = 0;
46
+ private terminated = false;
47
+ private readonly defaultTimeout = 30000; // 30 seconds
48
+
49
+ /**
50
+ * Initialize the worker pool
51
+ * @param poolSize - Number of worker threads to create
52
+ */
53
+ constructor(private poolSize: number = 4) {
54
+ this.validatePoolSize();
55
+ this.initializeWorkers();
56
+ }
57
+
58
+ private validatePoolSize(): void {
59
+ if (this.poolSize < 1 || this.poolSize > 32) {
60
+ throw new Error(
61
+ `Pool size must be between 1 and 32, got ${this.poolSize}`
62
+ );
63
+ }
64
+ }
65
+
66
+ private initializeWorkers(): void {
67
+ try {
68
+ for (let i = 0; i < this.poolSize; i++) {
69
+ const worker = new Worker(path.join(__dirname, 'worker.js'));
70
+
71
+ worker.on('error', (error) => {
72
+ console.error(`Worker ${i} error:`, error);
73
+ this.handleWorkerError(error);
74
+ });
75
+
76
+ worker.on('exit', (code) => {
77
+ if (code !== 0 && !this.terminated) {
78
+ console.warn(`Worker ${i} exited with code ${code}`);
79
+ }
80
+ });
81
+
82
+ this.workers.push(worker);
83
+ }
84
+ } catch (error) {
85
+ this.cleanup();
86
+ throw new Error(
87
+ `Failed to initialize worker pool: ${
88
+ error instanceof Error ? error.message : String(error)
89
+ }`
90
+ );
91
+ }
92
+ }
93
+
94
+ /**
95
+ * Run a task in the worker pool
96
+ * @param task - The task to run
97
+ * @param timeout - Optional timeout in milliseconds
98
+ * @returns Promise that resolves with the result
99
+ */
100
+ public async runTask<T = any>(
101
+ task: WorkerTask<T>,
102
+ timeout: number = this.defaultTimeout
103
+ ): Promise<WorkerResult<T>> {
104
+ if (this.terminated) {
105
+ throw new Error('Worker pool has been terminated');
106
+ }
107
+
108
+ if (timeout < 1000 || timeout > 300000) {
109
+ throw new Error('Timeout must be between 1 and 300 seconds');
110
+ }
111
+
112
+ return new Promise<WorkerResult<T>>((resolve, reject) => {
113
+ const timeoutHandle = setTimeout(() => {
114
+ this.queue = this.queue.filter((q) => q.task !== task);
115
+ reject(
116
+ new Error(
117
+ `Worker task timeout after ${timeout}ms: ${task.operation} on ${task.documentPath}`
118
+ )
119
+ );
120
+ }, timeout);
121
+
122
+ this.queue.push({
123
+ task,
124
+ resolve,
125
+ reject,
126
+ timeout: timeoutHandle,
127
+ });
128
+
129
+ this.processQueue();
130
+ });
131
+ }
132
+
133
+ private processQueue(): void {
134
+ if (this.queue.length === 0 || this.activeCount >= this.poolSize) {
135
+ return;
136
+ }
137
+
138
+ const queuedTask = this.queue.shift();
139
+ if (!queuedTask) return;
140
+
141
+ const { task, resolve, reject, timeout } = queuedTask;
142
+
143
+ // Find an available worker
144
+ const workerIndex = this.activeCount % this.poolSize;
145
+ const worker = this.workers[workerIndex];
146
+
147
+ if (!worker) {
148
+ reject(new Error('No available worker'));
149
+ clearTimeout(timeout);
150
+ return;
151
+ }
152
+
153
+ this.activeCount++;
154
+
155
+ const messageHandler = (result: WorkerResult<any>) => {
156
+ clearTimeout(timeout);
157
+ resolve(result as WorkerResult<any>);
158
+ this.activeCount--;
159
+ worker.off('message', messageHandler);
160
+ worker.off('error', errorHandler);
161
+ this.processQueue();
162
+ };
163
+
164
+ const errorHandler = (error: Error) => {
165
+ clearTimeout(timeout);
166
+ reject(error);
167
+ this.activeCount--;
168
+ worker.off('message', messageHandler);
169
+ worker.off('error', errorHandler);
170
+ this.processQueue();
171
+ };
172
+
173
+ worker.on('message', messageHandler);
174
+ worker.once('error', errorHandler);
175
+
176
+ try {
177
+ worker.postMessage(task);
178
+ } catch (error) {
179
+ clearTimeout(timeout);
180
+ reject(error instanceof Error ? error : new Error(String(error)));
181
+ this.activeCount--;
182
+ worker.off('message', messageHandler);
183
+ worker.off('error', errorHandler);
184
+ this.processQueue();
185
+ }
186
+ }
187
+
188
+ private handleWorkerError(error: Error): void {
189
+ if (this.queue.length > 0) {
190
+ const queuedTask = this.queue.shift();
191
+ if (queuedTask) {
192
+ clearTimeout(queuedTask.timeout);
193
+ queuedTask.reject(error);
194
+ this.activeCount--;
195
+ this.processQueue();
196
+ }
197
+ }
198
+ }
199
+
200
+ /**
201
+ * Terminate all workers
202
+ * @returns Promise that resolves when all workers are terminated
203
+ */
204
+ public async terminate(): Promise<void> {
205
+ this.terminated = true;
206
+
207
+ // Reject all queued tasks
208
+ while (this.queue.length > 0) {
209
+ const queuedTask = this.queue.shift();
210
+ if (queuedTask) {
211
+ clearTimeout(queuedTask.timeout);
212
+ queuedTask.reject(new Error('Worker pool terminated'));
213
+ }
214
+ }
215
+
216
+ // Terminate all workers
217
+ await Promise.all(
218
+ this.workers.map((worker) =>
219
+ worker
220
+ .terminate()
221
+ .catch((error) =>
222
+ console.warn('Error terminating worker:', error)
223
+ )
224
+ )
225
+ );
226
+
227
+ this.cleanup();
228
+ }
229
+
230
+ private cleanup(): void {
231
+ this.workers = [];
232
+ this.queue = [];
233
+ this.activeCount = 0;
234
+ }
235
+
236
+ /**
237
+ * Get current pool statistics
238
+ */
239
+ public getStats(): {
240
+ poolSize: number;
241
+ activeWorkers: number;
242
+ queuedTasks: number;
243
+ terminated: boolean;
244
+ } {
245
+ return {
246
+ poolSize: this.poolSize,
247
+ activeWorkers: this.activeCount,
248
+ queuedTasks: this.queue.length,
249
+ terminated: this.terminated,
250
+ };
251
+ }
252
+ }
253
+
254
+ /**
255
+ * Global worker pool instance (singleton)
256
+ * Auto-configured based on CPU count
257
+ */
258
+ const hardwareConcurrency = Math.max(1, os.cpus().length);
259
+
260
+ export const workerPool = new WorkerPool(
261
+ Math.min(hardwareConcurrency, 8)
262
+ );
263
+
264
+ /**
265
+ * Graceful shutdown
266
+ */
267
+ process.on('exit', async () => {
268
+ if (!workerPool || (workerPool as any).terminated) return;
269
+ try {
270
+ await workerPool.terminate();
271
+ } catch (error) {
272
+ console.error('Error during worker pool shutdown:', error);
273
+ }
274
+ });
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Worker Thread Script
3
+ * Handles off-main-thread PDF processing tasks
4
+ */
5
+
6
+ import { parentPort, workerData } from 'worker_threads';
7
+ import type { WorkerTask, WorkerResult } from './pool.js';
8
+
9
+ // Types for operations - will be available at runtime via the PdfDocument
10
+ type PdfDocument = any;
11
+
12
+ /**
13
+ * Process a worker task
14
+ */
15
+ async function handleTask(task: WorkerTask<any>): Promise<WorkerResult<any>> {
16
+ const startTime = Date.now();
17
+
18
+ try {
19
+ // Dynamically import PdfDocument since we can't use top-level imports
20
+ // in a worker context reliably across all environments
21
+ const { PdfDocument } = await import('../index.js');
22
+
23
+ if (!PdfDocument) {
24
+ throw new Error('PdfDocument not available in worker');
25
+ }
26
+
27
+ let result: any;
28
+
29
+ switch (task.operation) {
30
+ case 'extract': {
31
+ const doc = PdfDocument.open(task.documentPath);
32
+ const extMgr = doc.extraction;
33
+
34
+ if (task.params.type === 'markdown') {
35
+ result = extMgr.extractMarkdown(
36
+ task.params.pageIndex,
37
+ task.params.options
38
+ );
39
+ } else if (task.params.type === 'html') {
40
+ result = extMgr.extractHtml(
41
+ task.params.pageIndex,
42
+ task.params.options
43
+ );
44
+ } else {
45
+ result = extMgr.extractText(
46
+ task.params.pageIndex,
47
+ task.params.options
48
+ );
49
+ }
50
+ break;
51
+ }
52
+
53
+ case 'search': {
54
+ const doc = PdfDocument.open(task.documentPath);
55
+ const searchMgr = doc.search;
56
+ result = searchMgr.searchAll(
57
+ task.params.query,
58
+ task.params.options || {}
59
+ );
60
+ break;
61
+ }
62
+
63
+ case 'render': {
64
+ const doc = PdfDocument.open(task.documentPath);
65
+ const renderMgr = doc.rendering;
66
+ result = renderMgr.renderPage(
67
+ task.params.pageIndex,
68
+ task.params.options || {}
69
+ );
70
+ break;
71
+ }
72
+
73
+ case 'analyze': {
74
+ const doc = PdfDocument.open(task.documentPath);
75
+
76
+ result = {
77
+ pageCount: doc.pageCount,
78
+ metadata: doc.metadata?.getMetadata?.() || null,
79
+ outline: {
80
+ count: doc.outline?.getOutlineCount?.() || 0,
81
+ isFlat: doc.outline?.isFlat?.() || false,
82
+ },
83
+ layers: {
84
+ count: doc.layers?.getLayerCount?.() || 0,
85
+ visible: doc.layers?.getVisibleLayerCount?.() || 0,
86
+ },
87
+ };
88
+ break;
89
+ }
90
+
91
+ default:
92
+ throw new Error(`Unknown operation: ${task.operation}`);
93
+ }
94
+
95
+ const duration = Date.now() - startTime;
96
+
97
+ return {
98
+ success: true,
99
+ data: result,
100
+ duration,
101
+ };
102
+ } catch (error) {
103
+ const duration = Date.now() - startTime;
104
+
105
+ return {
106
+ success: false,
107
+ error:
108
+ error instanceof Error
109
+ ? {
110
+ name: error.name,
111
+ message: error.message,
112
+ stack: error.stack,
113
+ }
114
+ : String(error),
115
+ duration,
116
+ };
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Main worker message handler
122
+ */
123
+ if (parentPort) {
124
+ parentPort.on('message', async (task: WorkerTask<any>) => {
125
+ const result = await handleTask(task);
126
+ parentPort?.postMessage(result);
127
+ });
128
+ } else {
129
+ console.error('Worker script must be run as a Worker thread');
130
+ process.exit(1);
131
+ }