pdf-oxide-fips 0.3.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE-APACHE +176 -0
  2. package/LICENSE-MIT +25 -0
  3. package/README.md +218 -0
  4. package/lib/builders/annotation-builder.d.ts +198 -0
  5. package/lib/builders/annotation-builder.js +317 -0
  6. package/lib/builders/conversion-options-builder.d.ts +106 -0
  7. package/lib/builders/conversion-options-builder.js +214 -0
  8. package/lib/builders/document-builder.d.ts +381 -0
  9. package/lib/builders/document-builder.js +770 -0
  10. package/lib/builders/index.d.ts +13 -0
  11. package/lib/builders/index.js +13 -0
  12. package/lib/builders/metadata-builder.d.ts +201 -0
  13. package/lib/builders/metadata-builder.js +285 -0
  14. package/lib/builders/pdf-builder.d.ts +216 -0
  15. package/lib/builders/pdf-builder.js +350 -0
  16. package/lib/builders/search-options-builder.d.ts +73 -0
  17. package/lib/builders/search-options-builder.js +129 -0
  18. package/lib/builders/streaming-table.d.ts +64 -0
  19. package/lib/builders/streaming-table.js +140 -0
  20. package/lib/document-editor-manager.d.ts +139 -0
  21. package/lib/document-editor-manager.js +256 -0
  22. package/lib/document-editor.d.ts +124 -0
  23. package/lib/document-editor.js +318 -0
  24. package/lib/errors.d.ts +382 -0
  25. package/lib/errors.js +1115 -0
  26. package/lib/form-field-manager.d.ts +299 -0
  27. package/lib/form-field-manager.js +568 -0
  28. package/lib/hybrid-ml-manager.d.ts +142 -0
  29. package/lib/hybrid-ml-manager.js +208 -0
  30. package/lib/index.d.ts +205 -0
  31. package/lib/index.js +693 -0
  32. package/lib/managers/accessibility-manager.d.ts +148 -0
  33. package/lib/managers/accessibility-manager.js +234 -0
  34. package/lib/managers/annotation-manager.d.ts +219 -0
  35. package/lib/managers/annotation-manager.js +359 -0
  36. package/lib/managers/barcode-manager.d.ts +82 -0
  37. package/lib/managers/barcode-manager.js +263 -0
  38. package/lib/managers/batch-manager.d.ts +185 -0
  39. package/lib/managers/batch-manager.js +385 -0
  40. package/lib/managers/cache-manager.d.ts +181 -0
  41. package/lib/managers/cache-manager.js +384 -0
  42. package/lib/managers/compliance-manager.d.ts +103 -0
  43. package/lib/managers/compliance-manager.js +453 -0
  44. package/lib/managers/content-manager.d.ts +120 -0
  45. package/lib/managers/content-manager.js +294 -0
  46. package/lib/managers/document-utility-manager.d.ts +369 -0
  47. package/lib/managers/document-utility-manager.js +730 -0
  48. package/lib/managers/dom-pdf-creator.d.ts +104 -0
  49. package/lib/managers/dom-pdf-creator.js +299 -0
  50. package/lib/managers/editing-manager.d.ts +248 -0
  51. package/lib/managers/editing-manager.js +387 -0
  52. package/lib/managers/enterprise-manager.d.ts +192 -0
  53. package/lib/managers/enterprise-manager.js +307 -0
  54. package/lib/managers/extended-managers.d.ts +122 -0
  55. package/lib/managers/extended-managers.js +664 -0
  56. package/lib/managers/extraction-manager.d.ts +246 -0
  57. package/lib/managers/extraction-manager.js +482 -0
  58. package/lib/managers/final-utilities.d.ts +127 -0
  59. package/lib/managers/final-utilities.js +657 -0
  60. package/lib/managers/hybrid-ml-advanced.d.ts +136 -0
  61. package/lib/managers/hybrid-ml-advanced.js +722 -0
  62. package/lib/managers/index.d.ts +64 -0
  63. package/lib/managers/index.js +69 -0
  64. package/lib/managers/layer-manager.d.ts +203 -0
  65. package/lib/managers/layer-manager.js +401 -0
  66. package/lib/managers/metadata-manager.d.ts +148 -0
  67. package/lib/managers/metadata-manager.js +280 -0
  68. package/lib/managers/ocr-manager.d.ts +194 -0
  69. package/lib/managers/ocr-manager.js +582 -0
  70. package/lib/managers/optimization-manager.d.ts +102 -0
  71. package/lib/managers/optimization-manager.js +213 -0
  72. package/lib/managers/outline-manager.d.ts +101 -0
  73. package/lib/managers/outline-manager.js +169 -0
  74. package/lib/managers/page-manager.d.ts +142 -0
  75. package/lib/managers/page-manager.js +235 -0
  76. package/lib/managers/pattern-detection.d.ts +169 -0
  77. package/lib/managers/pattern-detection.js +322 -0
  78. package/lib/managers/rendering-manager.d.ts +353 -0
  79. package/lib/managers/rendering-manager.js +679 -0
  80. package/lib/managers/search-manager.d.ts +235 -0
  81. package/lib/managers/search-manager.js +329 -0
  82. package/lib/managers/security-manager.d.ts +161 -0
  83. package/lib/managers/security-manager.js +292 -0
  84. package/lib/managers/signature-manager.d.ts +738 -0
  85. package/lib/managers/signature-manager.js +1509 -0
  86. package/lib/managers/streams.d.ts +262 -0
  87. package/lib/managers/streams.js +477 -0
  88. package/lib/managers/xfa-manager.d.ts +227 -0
  89. package/lib/managers/xfa-manager.js +539 -0
  90. package/lib/native-loader.d.ts +7 -0
  91. package/lib/native-loader.js +62 -0
  92. package/lib/native.d.ts +16 -0
  93. package/lib/native.js +69 -0
  94. package/lib/pdf-creator-manager.d.ts +200 -0
  95. package/lib/pdf-creator-manager.js +381 -0
  96. package/lib/properties.d.ts +79 -0
  97. package/lib/properties.js +454 -0
  98. package/lib/result-accessors-manager.d.ts +346 -0
  99. package/lib/result-accessors-manager.js +706 -0
  100. package/lib/thumbnail-manager.d.ts +121 -0
  101. package/lib/thumbnail-manager.js +205 -0
  102. package/lib/timestamp.d.ts +54 -0
  103. package/lib/timestamp.js +115 -0
  104. package/lib/tsa-client.d.ts +44 -0
  105. package/lib/tsa-client.js +67 -0
  106. package/lib/types/common.d.ts +189 -0
  107. package/lib/types/common.js +17 -0
  108. package/lib/types/document-types.d.ts +352 -0
  109. package/lib/types/document-types.js +82 -0
  110. package/lib/types/index.d.ts +5 -0
  111. package/lib/types/index.js +5 -0
  112. package/lib/types/manager-types.d.ts +179 -0
  113. package/lib/types/manager-types.js +100 -0
  114. package/lib/types/native-bindings.d.ts +439 -0
  115. package/lib/types/native-bindings.js +7 -0
  116. package/lib/workers/index.d.ts +6 -0
  117. package/lib/workers/index.js +5 -0
  118. package/lib/workers/pool.d.ts +64 -0
  119. package/lib/workers/pool.js +192 -0
  120. package/lib/workers/worker.d.ts +5 -0
  121. package/lib/workers/worker.js +99 -0
  122. package/package.json +79 -0
  123. package/prebuilds/darwin-arm64/pdf_oxide.node +0 -0
  124. package/prebuilds/darwin-x64/pdf_oxide.node +0 -0
  125. package/prebuilds/linux-arm64/pdf_oxide.node +0 -0
  126. package/prebuilds/linux-x64/pdf_oxide.node +0 -0
  127. package/prebuilds/win32-x64/pdf_oxide.node +0 -0
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Batch Processing Manager for Parallel Document Operations
3
+ *
4
+ * Enables efficient parallel processing of multiple PDF documents with:
5
+ * - Configurable concurrency control
6
+ * - Real-time progress tracking with ETA calculation
7
+ * - Memory-aware backpressure handling
8
+ * - Per-document timeout support
9
+ * - Error resilience and detailed reporting
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * import { BatchManager } from 'pdf-oxide';
14
+ * import { PdfDocument } from 'pdf-oxide';
15
+ *
16
+ * const batch = new BatchManager([
17
+ * { path: 'doc1.pdf' },
18
+ * { path: 'doc2.pdf' },
19
+ * { path: 'doc3.pdf' }
20
+ * ]);
21
+ *
22
+ * // Extract text from multiple documents in parallel
23
+ * const results = await batch.extractTextBatch({
24
+ * maxParallel: 4,
25
+ * timeout: 30000,
26
+ * onProgress: (progress) => {
27
+ * console.log('Progress: ' + Math.round(progress.progress * 100) + '%');
28
+ * console.log('ETA: ' + progress.eta + 'ms');
29
+ * }
30
+ * });
31
+ *
32
+ * results.forEach(result => {
33
+ * if (result.success) {
34
+ * console.log(result.document.path + ': ' + result.data.length + ' chars');
35
+ * } else {
36
+ * console.error(result.document.path + ': ' + result.error.message);
37
+ * }
38
+ * });
39
+ * ```
40
+ */
41
+ /**
42
+ * Represents a document to be processed in a batch
43
+ */
44
+ export interface BatchDocument {
45
+ /** File path to the PDF document */
46
+ path: string;
47
+ /** Optional unique identifier */
48
+ id?: string;
49
+ /** Priority (1-10, default 5) */
50
+ priority?: number;
51
+ /** Optional metadata */
52
+ metadata?: Record<string, any>;
53
+ }
54
+ /**
55
+ * Progress information for batch operations
56
+ */
57
+ export interface BatchProgress {
58
+ /** Total documents in batch */
59
+ total: number;
60
+ /** Number of successfully completed documents */
61
+ completed: number;
62
+ /** Number of failed documents */
63
+ failed: number;
64
+ /** Current document being processed (index) */
65
+ current: number;
66
+ /** Progress percentage (0.0-1.0) */
67
+ progress: number;
68
+ /** Estimated time remaining in milliseconds */
69
+ eta: number;
70
+ /** Number of currently active operations */
71
+ activeOperations: number;
72
+ /** Batch start time (milliseconds since epoch) */
73
+ startTime: number;
74
+ /** Elapsed time since start (milliseconds) */
75
+ elapsedTime: number;
76
+ }
77
+ /**
78
+ * Result of processing a single document in a batch
79
+ */
80
+ export interface BatchResult<T = any> {
81
+ /** The document that was processed */
82
+ document: BatchDocument;
83
+ /** Whether the operation succeeded */
84
+ success: boolean;
85
+ /** The result data if successful */
86
+ data?: T;
87
+ /** Error if operation failed */
88
+ error?: Error;
89
+ /** Time to process this document (milliseconds) */
90
+ duration: number;
91
+ }
92
+ /**
93
+ * Options for batch processing
94
+ */
95
+ export interface BatchOptions {
96
+ /** Maximum number of parallel operations (default: CPU count) */
97
+ maxParallel?: number;
98
+ /** Timeout per document in milliseconds (default: 30000) */
99
+ timeout?: number;
100
+ /** Progress callback invoked on each document completion */
101
+ onProgress?: (progress: BatchProgress) => void;
102
+ /** Backpressure configuration for memory management */
103
+ backpressure?: {
104
+ /** Maximum memory usage in MB (default: 500) */
105
+ maxMemoryMB?: number;
106
+ /** Interval to check memory in ms (default: 1000) */
107
+ checkInterval?: number;
108
+ };
109
+ }
110
+ /**
111
+ * Statistics for completed batch operations
112
+ */
113
+ export interface BatchStatistics {
114
+ /** Total documents processed */
115
+ total: number;
116
+ /** Successfully completed documents */
117
+ completed: number;
118
+ /** Failed documents */
119
+ failed: number;
120
+ /** Total time elapsed (milliseconds) */
121
+ totalTime: number;
122
+ /** Average time per document (milliseconds) */
123
+ averageTime: number;
124
+ /** Documents per second throughput */
125
+ throughput: number;
126
+ /** Peak memory usage (MB) */
127
+ peakMemory: number;
128
+ }
129
+ /**
130
+ * Batch processor for parallel document operations
131
+ */
132
+ export declare class BatchManager {
133
+ private documents;
134
+ private stats;
135
+ /**
136
+ * Creates a new BatchManager
137
+ * @param documents - Array of documents to process
138
+ * @throws Error if documents array is empty or invalid
139
+ */
140
+ constructor(documents: BatchDocument[]);
141
+ /**
142
+ * Get current statistics
143
+ */
144
+ getStatistics(): BatchStatistics;
145
+ /**
146
+ * Process documents in a queue with concurrency control
147
+ * @private
148
+ */
149
+ private processQueue;
150
+ /**
151
+ * Extract text from multiple documents in parallel
152
+ * @param options - Batch processing options
153
+ * @returns Array of extraction results
154
+ */
155
+ extractTextBatch(options?: BatchOptions): Promise<BatchResult<string>[]>;
156
+ /**
157
+ * Extract markdown from multiple documents in parallel
158
+ * @param options - Batch processing options
159
+ * @returns Array of extraction results
160
+ */
161
+ extractMarkdownBatch(options?: BatchOptions): Promise<BatchResult<string>[]>;
162
+ /**
163
+ * Extract HTML from multiple documents in parallel
164
+ * @param options - Batch processing options
165
+ * @returns Array of extraction results
166
+ */
167
+ extractHtmlBatch(options?: BatchOptions): Promise<BatchResult<string>[]>;
168
+ /**
169
+ * Search for a term in multiple documents in parallel
170
+ * @param searchTerm - Term to search for
171
+ * @param options - Batch processing options
172
+ * @returns Array of search results
173
+ */
174
+ searchBatch(searchTerm: string, options?: BatchOptions): Promise<BatchResult<Array<{
175
+ page: number;
176
+ count: number;
177
+ }>>[]>;
178
+ /**
179
+ * Generic batch processor for custom operations
180
+ * @param processor - Function to process each document
181
+ * @param options - Batch processing options
182
+ * @returns Array of results
183
+ */
184
+ processBatch<T>(processor: (doc: BatchDocument, pdfDoc: any) => Promise<T>, options?: BatchOptions): Promise<BatchResult<T>[]>;
185
+ }
@@ -0,0 +1,385 @@
1
+ /**
2
+ * Batch Processing Manager for Parallel Document Operations
3
+ *
4
+ * Enables efficient parallel processing of multiple PDF documents with:
5
+ * - Configurable concurrency control
6
+ * - Real-time progress tracking with ETA calculation
7
+ * - Memory-aware backpressure handling
8
+ * - Per-document timeout support
9
+ * - Error resilience and detailed reporting
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * import { BatchManager } from 'pdf-oxide';
14
+ * import { PdfDocument } from 'pdf-oxide';
15
+ *
16
+ * const batch = new BatchManager([
17
+ * { path: 'doc1.pdf' },
18
+ * { path: 'doc2.pdf' },
19
+ * { path: 'doc3.pdf' }
20
+ * ]);
21
+ *
22
+ * // Extract text from multiple documents in parallel
23
+ * const results = await batch.extractTextBatch({
24
+ * maxParallel: 4,
25
+ * timeout: 30000,
26
+ * onProgress: (progress) => {
27
+ * console.log('Progress: ' + Math.round(progress.progress * 100) + '%');
28
+ * console.log('ETA: ' + progress.eta + 'ms');
29
+ * }
30
+ * });
31
+ *
32
+ * results.forEach(result => {
33
+ * if (result.success) {
34
+ * console.log(result.document.path + ': ' + result.data.length + ' chars');
35
+ * } else {
36
+ * console.error(result.document.path + ': ' + result.error.message);
37
+ * }
38
+ * });
39
+ * ```
40
+ */
41
+ import os from 'os';
42
+ /**
43
+ * Batch processor for parallel document operations
44
+ */
45
+ export class BatchManager {
46
+ /**
47
+ * Creates a new BatchManager
48
+ * @param documents - Array of documents to process
49
+ * @throws Error if documents array is empty or invalid
50
+ */
51
+ constructor(documents) {
52
+ this.stats = {
53
+ total: 0,
54
+ completed: 0,
55
+ failed: 0,
56
+ totalTime: 0,
57
+ averageTime: 0,
58
+ throughput: 0,
59
+ peakMemory: 0,
60
+ };
61
+ if (!Array.isArray(documents) || documents.length === 0) {
62
+ throw new Error('Documents array must not be empty');
63
+ }
64
+ for (const doc of documents) {
65
+ if (!doc.path || typeof doc.path !== 'string') {
66
+ throw new Error('Each document must have a valid path property');
67
+ }
68
+ }
69
+ this.documents = documents;
70
+ this.stats.total = documents.length;
71
+ }
72
+ /**
73
+ * Get current statistics
74
+ */
75
+ getStatistics() {
76
+ return { ...this.stats };
77
+ }
78
+ /**
79
+ * Process documents in a queue with concurrency control
80
+ * @private
81
+ */
82
+ async processQueue(processor, options = {}) {
83
+ const maxParallel = options.maxParallel || os.cpus().length;
84
+ const timeout = options.timeout || 30000;
85
+ const backpressure = options.backpressure || {
86
+ maxMemoryMB: 500,
87
+ checkInterval: 1000,
88
+ };
89
+ const results = [];
90
+ const startTime = Date.now();
91
+ let completed = 0;
92
+ let failed = 0;
93
+ let active = 0;
94
+ // Progress tracking helper
95
+ const reportProgress = () => {
96
+ const elapsedTime = Date.now() - startTime;
97
+ const completedDocs = completed + failed;
98
+ const avgTimePerDoc = completedDocs > 0 ? elapsedTime / completedDocs : 0;
99
+ const eta = completedDocs > 0 ? (this.documents.length - completedDocs) * avgTimePerDoc : 0;
100
+ if (options.onProgress) {
101
+ options.onProgress({
102
+ total: this.documents.length,
103
+ completed,
104
+ failed,
105
+ current: completedDocs,
106
+ progress: this.documents.length > 0 ? completedDocs / this.documents.length : 0,
107
+ eta: Math.max(0, eta),
108
+ activeOperations: active,
109
+ startTime,
110
+ elapsedTime,
111
+ });
112
+ }
113
+ };
114
+ // Memory monitoring helper
115
+ const checkMemory = async () => {
116
+ const memUsageMB = process.memoryUsage().heapUsed / 1024 / 1024;
117
+ this.stats.peakMemory = Math.max(this.stats.peakMemory, memUsageMB);
118
+ if (memUsageMB > (backpressure.maxMemoryMB || 500)) {
119
+ // Wait a bit for garbage collection
120
+ await new Promise((resolve) => setTimeout(resolve, 100));
121
+ }
122
+ };
123
+ // Process all documents with concurrency control
124
+ let index = 0;
125
+ const queue = [];
126
+ while (index < this.documents.length || queue.length > 0) {
127
+ // Check memory before starting new operations
128
+ await checkMemory();
129
+ // Start new operations while under concurrency limit
130
+ while (active < maxParallel && index < this.documents.length) {
131
+ const docIndex = index++;
132
+ const doc = this.documents[docIndex];
133
+ active++;
134
+ const promise = (async () => {
135
+ const docStartTime = Date.now();
136
+ try {
137
+ const result = await Promise.race([
138
+ processor(doc, docIndex),
139
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout after ' + timeout + 'ms')), timeout)),
140
+ ]);
141
+ result.duration = Date.now() - docStartTime;
142
+ results[docIndex] = result;
143
+ if (result.success) {
144
+ completed++;
145
+ }
146
+ else {
147
+ failed++;
148
+ }
149
+ }
150
+ catch (error) {
151
+ const duration = Date.now() - docStartTime;
152
+ results[docIndex] = {
153
+ document: doc,
154
+ success: false,
155
+ error: error instanceof Error ? error : new Error(String(error)),
156
+ duration,
157
+ };
158
+ failed++;
159
+ }
160
+ finally {
161
+ active--;
162
+ reportProgress();
163
+ }
164
+ })();
165
+ queue.push(promise);
166
+ }
167
+ // Wait for at least one operation to complete
168
+ if (queue.length > 0) {
169
+ await Promise.race(queue);
170
+ const idx = queue.findIndex((p) => p !== undefined);
171
+ if (idx >= 0) {
172
+ queue.splice(idx, 1);
173
+ }
174
+ }
175
+ }
176
+ // Update final statistics
177
+ const totalTime = Date.now() - startTime;
178
+ this.stats.totalTime = totalTime;
179
+ this.stats.completed = completed;
180
+ this.stats.failed = failed;
181
+ this.stats.averageTime = completed > 0 ? totalTime / completed : 0;
182
+ this.stats.throughput = totalTime > 0 ? (completed / totalTime) * 1000 : 0;
183
+ return results.filter((r) => r !== undefined);
184
+ }
185
+ /**
186
+ * Extract text from multiple documents in parallel
187
+ * @param options - Batch processing options
188
+ * @returns Array of extraction results
189
+ */
190
+ async extractTextBatch(options = {}) {
191
+ return this.processQueue(async (doc, _index) => {
192
+ try {
193
+ // Dynamic import to avoid circular dependencies
194
+ const { PdfDocument } = await import('../index.js');
195
+ const pdfDoc = PdfDocument.open(doc.path);
196
+ const extractionMgr = pdfDoc.createExtractionManager?.();
197
+ if (!extractionMgr) {
198
+ throw new Error('Failed to create extraction manager');
199
+ }
200
+ const text = extractionMgr.extractAllText();
201
+ if (typeof pdfDoc.close === 'function') {
202
+ pdfDoc.close();
203
+ }
204
+ return {
205
+ document: doc,
206
+ success: true,
207
+ data: text,
208
+ duration: 0,
209
+ };
210
+ }
211
+ catch (error) {
212
+ return {
213
+ document: doc,
214
+ success: false,
215
+ error: error instanceof Error ? error : new Error(String(error)),
216
+ duration: 0,
217
+ };
218
+ }
219
+ }, options);
220
+ }
221
+ /**
222
+ * Extract markdown from multiple documents in parallel
223
+ * @param options - Batch processing options
224
+ * @returns Array of extraction results
225
+ */
226
+ async extractMarkdownBatch(options = {}) {
227
+ return this.processQueue(async (doc, _index) => {
228
+ try {
229
+ const { PdfDocument } = await import('../index.js');
230
+ const pdfDoc = PdfDocument.open(doc.path);
231
+ const extractionMgr = pdfDoc.createExtractionManager?.();
232
+ if (!extractionMgr) {
233
+ throw new Error('Failed to create extraction manager');
234
+ }
235
+ // Extract markdown from all pages
236
+ let markdown = '';
237
+ const pageCount = pdfDoc.pageCount || 0;
238
+ for (let i = 0; i < pageCount; i++) {
239
+ markdown += extractionMgr.extractMarkdown(i) || '';
240
+ if (i < pageCount - 1) {
241
+ markdown += '\n\n---\n\n';
242
+ }
243
+ }
244
+ if (typeof pdfDoc.close === 'function') {
245
+ pdfDoc.close();
246
+ }
247
+ return {
248
+ document: doc,
249
+ success: true,
250
+ data: markdown,
251
+ duration: 0,
252
+ };
253
+ }
254
+ catch (error) {
255
+ return {
256
+ document: doc,
257
+ success: false,
258
+ error: error instanceof Error ? error : new Error(String(error)),
259
+ duration: 0,
260
+ };
261
+ }
262
+ }, options);
263
+ }
264
+ /**
265
+ * Extract HTML from multiple documents in parallel
266
+ * @param options - Batch processing options
267
+ * @returns Array of extraction results
268
+ */
269
+ async extractHtmlBatch(options = {}) {
270
+ return this.processQueue(async (doc, _index) => {
271
+ try {
272
+ const { PdfDocument } = await import('../index.js');
273
+ const pdfDoc = PdfDocument.open(doc.path);
274
+ const extractionMgr = pdfDoc.createExtractionManager?.();
275
+ if (!extractionMgr) {
276
+ throw new Error('Failed to create extraction manager');
277
+ }
278
+ // Extract HTML from all pages
279
+ let html = '<html><body>';
280
+ const pageCount = pdfDoc.pageCount || 0;
281
+ for (let i = 0; i < pageCount; i++) {
282
+ html += '<div class="page page-' + (i + 1) + '">';
283
+ html += extractionMgr.extractHtml(i) || '';
284
+ html += '</div>';
285
+ }
286
+ html += '</body></html>';
287
+ if (typeof pdfDoc.close === 'function') {
288
+ pdfDoc.close();
289
+ }
290
+ return {
291
+ document: doc,
292
+ success: true,
293
+ data: html,
294
+ duration: 0,
295
+ };
296
+ }
297
+ catch (error) {
298
+ return {
299
+ document: doc,
300
+ success: false,
301
+ error: error instanceof Error ? error : new Error(String(error)),
302
+ duration: 0,
303
+ };
304
+ }
305
+ }, options);
306
+ }
307
+ /**
308
+ * Search for a term in multiple documents in parallel
309
+ * @param searchTerm - Term to search for
310
+ * @param options - Batch processing options
311
+ * @returns Array of search results
312
+ */
313
+ async searchBatch(searchTerm, options = {}) {
314
+ if (!searchTerm || typeof searchTerm !== 'string') {
315
+ throw new Error('Search term must be a non-empty string');
316
+ }
317
+ return this.processQueue(async (doc, _index) => {
318
+ try {
319
+ const { PdfDocument } = await import('../index.js');
320
+ const pdfDoc = PdfDocument.open(doc.path);
321
+ const searchMgr = pdfDoc.createSearchManager?.();
322
+ if (!searchMgr) {
323
+ throw new Error('Failed to create search manager');
324
+ }
325
+ const results = [];
326
+ const pageCount = pdfDoc.pageCount || 0;
327
+ for (let i = 0; i < pageCount; i++) {
328
+ const matches = searchMgr.search(searchTerm, i) || [];
329
+ if (matches.length > 0) {
330
+ results.push({ page: i, count: matches.length });
331
+ }
332
+ }
333
+ if (typeof pdfDoc.close === 'function') {
334
+ pdfDoc.close();
335
+ }
336
+ return {
337
+ document: doc,
338
+ success: true,
339
+ data: results,
340
+ duration: 0,
341
+ };
342
+ }
343
+ catch (error) {
344
+ return {
345
+ document: doc,
346
+ success: false,
347
+ error: error instanceof Error ? error : new Error(String(error)),
348
+ duration: 0,
349
+ };
350
+ }
351
+ }, options);
352
+ }
353
+ /**
354
+ * Generic batch processor for custom operations
355
+ * @param processor - Function to process each document
356
+ * @param options - Batch processing options
357
+ * @returns Array of results
358
+ */
359
+ async processBatch(processor, options = {}) {
360
+ return this.processQueue(async (doc, _index) => {
361
+ try {
362
+ const { PdfDocument } = await import('../index.js');
363
+ const pdfDoc = PdfDocument.open(doc.path);
364
+ const data = await processor(doc, pdfDoc);
365
+ if (typeof pdfDoc.close === 'function') {
366
+ pdfDoc.close();
367
+ }
368
+ return {
369
+ document: doc,
370
+ success: true,
371
+ data,
372
+ duration: 0,
373
+ };
374
+ }
375
+ catch (error) {
376
+ return {
377
+ document: doc,
378
+ success: false,
379
+ error: error instanceof Error ? error : new Error(String(error)),
380
+ duration: 0,
381
+ };
382
+ }
383
+ }, options);
384
+ }
385
+ }