pdf-oxide-fips 0.3.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE-APACHE +176 -0
  2. package/LICENSE-MIT +25 -0
  3. package/README.md +218 -0
  4. package/lib/builders/annotation-builder.d.ts +198 -0
  5. package/lib/builders/annotation-builder.js +317 -0
  6. package/lib/builders/conversion-options-builder.d.ts +106 -0
  7. package/lib/builders/conversion-options-builder.js +214 -0
  8. package/lib/builders/document-builder.d.ts +381 -0
  9. package/lib/builders/document-builder.js +770 -0
  10. package/lib/builders/index.d.ts +13 -0
  11. package/lib/builders/index.js +13 -0
  12. package/lib/builders/metadata-builder.d.ts +201 -0
  13. package/lib/builders/metadata-builder.js +285 -0
  14. package/lib/builders/pdf-builder.d.ts +216 -0
  15. package/lib/builders/pdf-builder.js +350 -0
  16. package/lib/builders/search-options-builder.d.ts +73 -0
  17. package/lib/builders/search-options-builder.js +129 -0
  18. package/lib/builders/streaming-table.d.ts +64 -0
  19. package/lib/builders/streaming-table.js +140 -0
  20. package/lib/document-editor-manager.d.ts +139 -0
  21. package/lib/document-editor-manager.js +256 -0
  22. package/lib/document-editor.d.ts +124 -0
  23. package/lib/document-editor.js +318 -0
  24. package/lib/errors.d.ts +382 -0
  25. package/lib/errors.js +1115 -0
  26. package/lib/form-field-manager.d.ts +299 -0
  27. package/lib/form-field-manager.js +568 -0
  28. package/lib/hybrid-ml-manager.d.ts +142 -0
  29. package/lib/hybrid-ml-manager.js +208 -0
  30. package/lib/index.d.ts +205 -0
  31. package/lib/index.js +693 -0
  32. package/lib/managers/accessibility-manager.d.ts +148 -0
  33. package/lib/managers/accessibility-manager.js +234 -0
  34. package/lib/managers/annotation-manager.d.ts +219 -0
  35. package/lib/managers/annotation-manager.js +359 -0
  36. package/lib/managers/barcode-manager.d.ts +82 -0
  37. package/lib/managers/barcode-manager.js +263 -0
  38. package/lib/managers/batch-manager.d.ts +185 -0
  39. package/lib/managers/batch-manager.js +385 -0
  40. package/lib/managers/cache-manager.d.ts +181 -0
  41. package/lib/managers/cache-manager.js +384 -0
  42. package/lib/managers/compliance-manager.d.ts +103 -0
  43. package/lib/managers/compliance-manager.js +453 -0
  44. package/lib/managers/content-manager.d.ts +120 -0
  45. package/lib/managers/content-manager.js +294 -0
  46. package/lib/managers/document-utility-manager.d.ts +369 -0
  47. package/lib/managers/document-utility-manager.js +730 -0
  48. package/lib/managers/dom-pdf-creator.d.ts +104 -0
  49. package/lib/managers/dom-pdf-creator.js +299 -0
  50. package/lib/managers/editing-manager.d.ts +248 -0
  51. package/lib/managers/editing-manager.js +387 -0
  52. package/lib/managers/enterprise-manager.d.ts +192 -0
  53. package/lib/managers/enterprise-manager.js +307 -0
  54. package/lib/managers/extended-managers.d.ts +122 -0
  55. package/lib/managers/extended-managers.js +664 -0
  56. package/lib/managers/extraction-manager.d.ts +246 -0
  57. package/lib/managers/extraction-manager.js +482 -0
  58. package/lib/managers/final-utilities.d.ts +127 -0
  59. package/lib/managers/final-utilities.js +657 -0
  60. package/lib/managers/hybrid-ml-advanced.d.ts +136 -0
  61. package/lib/managers/hybrid-ml-advanced.js +722 -0
  62. package/lib/managers/index.d.ts +64 -0
  63. package/lib/managers/index.js +69 -0
  64. package/lib/managers/layer-manager.d.ts +203 -0
  65. package/lib/managers/layer-manager.js +401 -0
  66. package/lib/managers/metadata-manager.d.ts +148 -0
  67. package/lib/managers/metadata-manager.js +280 -0
  68. package/lib/managers/ocr-manager.d.ts +194 -0
  69. package/lib/managers/ocr-manager.js +582 -0
  70. package/lib/managers/optimization-manager.d.ts +102 -0
  71. package/lib/managers/optimization-manager.js +213 -0
  72. package/lib/managers/outline-manager.d.ts +101 -0
  73. package/lib/managers/outline-manager.js +169 -0
  74. package/lib/managers/page-manager.d.ts +142 -0
  75. package/lib/managers/page-manager.js +235 -0
  76. package/lib/managers/pattern-detection.d.ts +169 -0
  77. package/lib/managers/pattern-detection.js +322 -0
  78. package/lib/managers/rendering-manager.d.ts +353 -0
  79. package/lib/managers/rendering-manager.js +679 -0
  80. package/lib/managers/search-manager.d.ts +235 -0
  81. package/lib/managers/search-manager.js +329 -0
  82. package/lib/managers/security-manager.d.ts +161 -0
  83. package/lib/managers/security-manager.js +292 -0
  84. package/lib/managers/signature-manager.d.ts +738 -0
  85. package/lib/managers/signature-manager.js +1509 -0
  86. package/lib/managers/streams.d.ts +262 -0
  87. package/lib/managers/streams.js +477 -0
  88. package/lib/managers/xfa-manager.d.ts +227 -0
  89. package/lib/managers/xfa-manager.js +539 -0
  90. package/lib/native-loader.d.ts +7 -0
  91. package/lib/native-loader.js +62 -0
  92. package/lib/native.d.ts +16 -0
  93. package/lib/native.js +69 -0
  94. package/lib/pdf-creator-manager.d.ts +200 -0
  95. package/lib/pdf-creator-manager.js +381 -0
  96. package/lib/properties.d.ts +79 -0
  97. package/lib/properties.js +454 -0
  98. package/lib/result-accessors-manager.d.ts +346 -0
  99. package/lib/result-accessors-manager.js +706 -0
  100. package/lib/thumbnail-manager.d.ts +121 -0
  101. package/lib/thumbnail-manager.js +205 -0
  102. package/lib/timestamp.d.ts +54 -0
  103. package/lib/timestamp.js +115 -0
  104. package/lib/tsa-client.d.ts +44 -0
  105. package/lib/tsa-client.js +67 -0
  106. package/lib/types/common.d.ts +189 -0
  107. package/lib/types/common.js +17 -0
  108. package/lib/types/document-types.d.ts +352 -0
  109. package/lib/types/document-types.js +82 -0
  110. package/lib/types/index.d.ts +5 -0
  111. package/lib/types/index.js +5 -0
  112. package/lib/types/manager-types.d.ts +179 -0
  113. package/lib/types/manager-types.js +100 -0
  114. package/lib/types/native-bindings.d.ts +439 -0
  115. package/lib/types/native-bindings.js +7 -0
  116. package/lib/workers/index.d.ts +6 -0
  117. package/lib/workers/index.js +5 -0
  118. package/lib/workers/pool.d.ts +64 -0
  119. package/lib/workers/pool.js +192 -0
  120. package/lib/workers/worker.d.ts +5 -0
  121. package/lib/workers/worker.js +99 -0
  122. package/package.json +79 -0
  123. package/prebuilds/darwin-arm64/pdf_oxide.node +0 -0
  124. package/prebuilds/darwin-x64/pdf_oxide.node +0 -0
  125. package/prebuilds/linux-arm64/pdf_oxide.node +0 -0
  126. package/prebuilds/linux-x64/pdf_oxide.node +0 -0
  127. package/prebuilds/win32-x64/pdf_oxide.node +0 -0
@@ -0,0 +1,482 @@
1
+ /**
2
+ * Manager for content extraction from PDF documents
3
+ *
4
+ * Caching is handled automatically at the Rust FFI layer, eliminating
5
+ * the need for duplicate cache implementations in the binding.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { ExtractionManager, ConversionOptionsBuilder } from 'pdf_oxide';
10
+ *
11
+ * const doc = PdfDocument.open('document.pdf');
12
+ * const extractionManager = new ExtractionManager(doc);
13
+ *
14
+ * // Extract text from a single page
15
+ * const text = extractionManager.extractText(0);
16
+ * console.log(text);
17
+ *
18
+ * // Extract all text
19
+ * const allText = extractionManager.extractAllText();
20
+ *
21
+ * // Extract with custom options
22
+ * const options = ConversionOptionsBuilder.highQuality().build();
23
+ * const markdown = extractionManager.extractMarkdown(0, options);
24
+ * ```
25
+ */
26
+ export class ExtractionManager {
27
+ /**
28
+ * Creates a new ExtractionManager for the given document
29
+ * @param document - The PDF document
30
+ * @throws Error if document is null or undefined
31
+ */
32
+ constructor(document) {
33
+ if (!document) {
34
+ throw new Error('Document is required');
35
+ }
36
+ this._document = document;
37
+ }
38
+ /**
39
+ * Extracts text from a single page.
40
+ *
41
+ * The native layer produces UTF-8 bytes, which Node decodes into a JS
42
+ * `string` (UTF-16 code units internally). As a result,
43
+ * `text.length` reports UTF-16 code units, not bytes — so a 648-byte
44
+ * UTF-8 string containing two accented letters reads as 646 in JS. Use
45
+ * `Buffer.byteLength(text, 'utf8')` if you need the byte count (e.g. to
46
+ * compare against Go's `len(string)` or Rust's `String::len()`).
47
+ *
48
+ * Results are automatically cached at the FFI layer.
49
+ *
50
+ * @param pageIndex - Zero-based page index
51
+ * @param options - Conversion options
52
+ * @returns Extracted text (UTF-16 code units)
53
+ * @throws Error if page index is invalid
54
+ *
55
+ * @example
56
+ * ```typescript
57
+ * const text = manager.extractText(0);
58
+ * console.log(`Page 1: ${text.length} UTF-16 code units`);
59
+ * console.log(` ${Buffer.byteLength(text, 'utf8')} UTF-8 bytes`);
60
+ * ```
61
+ */
62
+ extractText(pageIndex, options) {
63
+ if (typeof pageIndex !== 'number' || pageIndex < 0) {
64
+ throw new Error('Page index must be a non-negative number');
65
+ }
66
+ if (pageIndex >= this._document.pageCount) {
67
+ throw new Error(`Page index ${pageIndex} out of range`);
68
+ }
69
+ try {
70
+ return this._document.extractText(pageIndex);
71
+ }
72
+ catch (error) {
73
+ throw new Error(`Failed to extract text from page ${pageIndex}: ${error.message}`);
74
+ }
75
+ }
76
+ /**
77
+ * Extracts text from all pages
78
+ * @param options - Conversion options
79
+ * @returns All extracted text concatenated
80
+ *
81
+ * @example
82
+ * ```typescript
83
+ * const allText = manager.extractAllText();
84
+ * console.log(`Total characters: ${allText.length}`);
85
+ * ```
86
+ */
87
+ extractAllText(options) {
88
+ try {
89
+ const parts = [];
90
+ for (let i = 0; i < this._document.pageCount; i++) {
91
+ parts.push(this.extractText(i, options));
92
+ }
93
+ return parts.join('\n');
94
+ }
95
+ catch (error) {
96
+ throw new Error(`Failed to extract all text: ${error.message}`);
97
+ }
98
+ }
99
+ /**
100
+ * Extracts text from a range of pages
101
+ * @param startPageIndex - Zero-based start page index
102
+ * @param endPageIndex - Zero-based end page index (inclusive)
103
+ * @param options - Conversion options
104
+ * @returns Extracted text from pages in range
105
+ *
106
+ * @example
107
+ * ```typescript
108
+ * const text = manager.extractTextRange(0, 10);
109
+ * console.log(`Text from pages 1-11: ${text}`);
110
+ * ```
111
+ */
112
+ extractTextRange(startPageIndex, endPageIndex, options) {
113
+ if (typeof startPageIndex !== 'number' || startPageIndex < 0) {
114
+ throw new Error('Start page index must be a non-negative number');
115
+ }
116
+ if (typeof endPageIndex !== 'number' || endPageIndex < startPageIndex) {
117
+ throw new Error('End page index must be >= start page index');
118
+ }
119
+ if (endPageIndex >= this._document.pageCount) {
120
+ throw new Error(`End page index ${endPageIndex} out of range`);
121
+ }
122
+ try {
123
+ const parts = [];
124
+ for (let i = startPageIndex; i <= endPageIndex; i++) {
125
+ parts.push(this.extractText(i, options));
126
+ }
127
+ return parts.join('\n');
128
+ }
129
+ catch (error) {
130
+ throw new Error(`Failed to extract text range: ${error.message}`);
131
+ }
132
+ }
133
+ /**
134
+ * Extracts text from specific page indices (non-contiguous)
135
+ * @param pageIndices - Array of zero-based page indices
136
+ * @param options - Conversion options
137
+ * @returns Extracted text from specified pages concatenated with newlines
138
+ * @throws Error if page indices are invalid
139
+ *
140
+ * @example
141
+ * ```typescript
142
+ * const text = manager.extractTextBatch([0, 2, 5]); // Extract pages 1, 3, 6
143
+ * console.log(text);
144
+ * ```
145
+ */
146
+ extractTextBatch(pageIndices, options) {
147
+ if (!Array.isArray(pageIndices)) {
148
+ throw new Error('Page indices must be an array');
149
+ }
150
+ if (pageIndices.length === 0) {
151
+ return '';
152
+ }
153
+ try {
154
+ const parts = [];
155
+ for (const pageIndex of pageIndices) {
156
+ if (typeof pageIndex !== 'number' ||
157
+ pageIndex < 0 ||
158
+ pageIndex >= this._document.pageCount) {
159
+ throw new Error(`Invalid page index: ${pageIndex}`);
160
+ }
161
+ parts.push(this.extractText(pageIndex, options));
162
+ }
163
+ return parts.join('\n');
164
+ }
165
+ catch (error) {
166
+ throw new Error(`Failed to extract text batch: ${error.message}`);
167
+ }
168
+ }
169
+ /**
170
+ * Extracts text from pages as an array (one entry per page)
171
+ * @param startPageIndex - Zero-based start page index
172
+ * @param endPageIndex - Zero-based end page index (inclusive)
173
+ * @param options - Conversion options
174
+ * @returns Array of extracted text, one per page
175
+ *
176
+ * @example
177
+ * ```typescript
178
+ * const pages = manager.extractTextArray(0, 5);
179
+ * pages.forEach((text, i) => console.log(`Page ${i}: ${text.length} chars`));
180
+ * ```
181
+ */
182
+ extractTextArray(startPageIndex, endPageIndex, options) {
183
+ if (typeof startPageIndex !== 'number' || startPageIndex < 0) {
184
+ throw new Error('Start page index must be a non-negative number');
185
+ }
186
+ if (typeof endPageIndex !== 'number' || endPageIndex < startPageIndex) {
187
+ throw new Error('End page index must be >= start page index');
188
+ }
189
+ if (endPageIndex >= this._document.pageCount) {
190
+ throw new Error(`End page index ${endPageIndex} out of range`);
191
+ }
192
+ try {
193
+ const results = [];
194
+ for (let i = startPageIndex; i <= endPageIndex; i++) {
195
+ results.push(this.extractText(i, options));
196
+ }
197
+ return results;
198
+ }
199
+ catch (error) {
200
+ throw new Error(`Failed to extract text array: ${error.message}`);
201
+ }
202
+ }
203
+ /**
204
+ * Extracts page as Markdown.
205
+ * Results are automatically cached at the FFI layer.
206
+ * @param pageIndex - Zero-based page index
207
+ * @param options - Conversion options
208
+ * @returns Page content as Markdown
209
+ * @throws Error if page index is invalid
210
+ *
211
+ * @example
212
+ * ```typescript
213
+ * const markdown = manager.extractMarkdown(0);
214
+ * console.log(markdown); // Markdown formatted content
215
+ * ```
216
+ */
217
+ extractMarkdown(pageIndex, options) {
218
+ if (typeof pageIndex !== 'number' || pageIndex < 0) {
219
+ throw new Error('Page index must be a non-negative number');
220
+ }
221
+ if (pageIndex >= this._document.pageCount) {
222
+ throw new Error(`Page index ${pageIndex} out of range`);
223
+ }
224
+ try {
225
+ return this._document.toMarkdown(pageIndex, options);
226
+ }
227
+ catch (error) {
228
+ throw new Error(`Failed to extract markdown from page ${pageIndex}: ${error.message}`);
229
+ }
230
+ }
231
+ /**
232
+ * Extracts all pages as Markdown
233
+ * @param options - Conversion options
234
+ * @returns All pages as Markdown
235
+ *
236
+ * @example
237
+ * ```typescript
238
+ * const markdown = manager.extractAllMarkdown();
239
+ * // Write to file
240
+ * fs.writeFileSync('output.md', markdown);
241
+ * ```
242
+ */
243
+ extractAllMarkdown(options) {
244
+ try {
245
+ const parts = [];
246
+ for (let i = 0; i < this._document.pageCount; i++) {
247
+ const heading = `\n## Page ${i + 1}\n`;
248
+ const content = this.extractMarkdown(i, options);
249
+ parts.push(heading + content);
250
+ }
251
+ return parts.join('\n');
252
+ }
253
+ catch (error) {
254
+ throw new Error(`Failed to extract all markdown: ${error.message}`);
255
+ }
256
+ }
257
+ /**
258
+ * Extracts markdown from a range of pages
259
+ * @param startPageIndex - Zero-based start page index
260
+ * @param endPageIndex - Zero-based end page index (inclusive)
261
+ * @param options - Conversion options
262
+ * @returns Extracted markdown from pages in range
263
+ */
264
+ extractMarkdownRange(startPageIndex, endPageIndex, options) {
265
+ if (typeof startPageIndex !== 'number' || startPageIndex < 0) {
266
+ throw new Error('Start page index must be a non-negative number');
267
+ }
268
+ if (typeof endPageIndex !== 'number' || endPageIndex < startPageIndex) {
269
+ throw new Error('End page index must be >= start page index');
270
+ }
271
+ if (endPageIndex >= this._document.pageCount) {
272
+ throw new Error(`End page index ${endPageIndex} out of range`);
273
+ }
274
+ try {
275
+ const parts = [];
276
+ for (let i = startPageIndex; i <= endPageIndex; i++) {
277
+ const heading = `\n## Page ${i + 1}\n`;
278
+ const content = this.extractMarkdown(i, options);
279
+ parts.push(heading + content);
280
+ }
281
+ return parts.join('\n');
282
+ }
283
+ catch (error) {
284
+ throw new Error(`Failed to extract markdown range: ${error.message}`);
285
+ }
286
+ }
287
+ /**
288
+ * Gets word count for a page
289
+ * @param pageIndex - Zero-based page index
290
+ * @returns Estimated word count
291
+ */
292
+ getPageWordCount(pageIndex) {
293
+ const text = this.extractText(pageIndex);
294
+ return text.trim().split(/\s+/).length;
295
+ }
296
+ /**
297
+ * Gets total word count for all pages
298
+ * @returns Total word count across all pages
299
+ */
300
+ getTotalWordCount() {
301
+ const allText = this.extractAllText();
302
+ return allText
303
+ .trim()
304
+ .split(/\s+/)
305
+ .filter((word) => word.length > 0).length;
306
+ }
307
+ /**
308
+ * Gets character count for a page
309
+ * @param pageIndex - Zero-based page index
310
+ * @returns Character count (including whitespace)
311
+ */
312
+ getPageCharacterCount(pageIndex) {
313
+ const text = this.extractText(pageIndex);
314
+ return text.length;
315
+ }
316
+ /**
317
+ * Gets total character count for all pages
318
+ * @returns Total character count
319
+ */
320
+ getTotalCharacterCount() {
321
+ let total = 0;
322
+ for (let i = 0; i < this._document.pageCount; i++) {
323
+ total += this.getPageCharacterCount(i);
324
+ }
325
+ return total;
326
+ }
327
+ /**
328
+ * Gets line count for a page
329
+ * @param pageIndex - Zero-based page index
330
+ * @returns Estimated line count
331
+ */
332
+ getPageLineCount(pageIndex) {
333
+ const text = this.extractText(pageIndex);
334
+ return text.split('\n').length;
335
+ }
336
+ /**
337
+ * Gets statistics for extracted content
338
+ * @returns Content statistics object
339
+ *
340
+ * @example
341
+ * ```typescript
342
+ * const stats = manager.getContentStatistics();
343
+ * console.log(`Total pages: ${stats.pageCount}`);
344
+ * console.log(`Total words: ${stats.wordCount}`);
345
+ * console.log(`Average page length: ${stats.averagePageLength}`);
346
+ * ```
347
+ */
348
+ getContentStatistics() {
349
+ try {
350
+ const pageCount = this._document.pageCount;
351
+ const totalWords = this.getTotalWordCount();
352
+ const totalCharacters = this.getTotalCharacterCount();
353
+ return {
354
+ pageCount,
355
+ wordCount: totalWords,
356
+ characterCount: totalCharacters,
357
+ averageWordsPerPage: Math.round(totalWords / pageCount),
358
+ averageCharactersPerPage: Math.round(totalCharacters / pageCount),
359
+ };
360
+ }
361
+ catch (error) {
362
+ throw new Error(`Failed to get content statistics: ${error.message}`);
363
+ }
364
+ }
365
+ /**
366
+ * Searches for text across all pages and returns matching snippets
367
+ * @param searchText - Text to search for
368
+ * @param contextLength - Characters of context around match
369
+ * @returns Array of match objects with page and snippet
370
+ *
371
+ * @example
372
+ * ```typescript
373
+ * const matches = manager.searchContent('keyword', 50);
374
+ * matches.forEach(match => {
375
+ * console.log(`Page ${match.pageIndex + 1}: ...${match.snippet}...`);
376
+ * });
377
+ * ```
378
+ */
379
+ searchContent(searchText, contextLength = 100) {
380
+ if (!searchText || typeof searchText !== 'string') {
381
+ throw new Error('Search text must be a non-empty string');
382
+ }
383
+ const results = [];
384
+ const searchRegex = new RegExp(searchText, 'gi');
385
+ for (let i = 0; i < this._document.pageCount; i++) {
386
+ try {
387
+ const text = this.extractText(i);
388
+ let match;
389
+ while ((match = searchRegex.exec(text)) !== null) {
390
+ const start = Math.max(0, match.index - contextLength);
391
+ const end = Math.min(text.length, match.index + searchText.length + contextLength);
392
+ const snippet = text.substring(start, end);
393
+ results.push({
394
+ pageIndex: i,
395
+ pageNumber: i + 1,
396
+ matchIndex: match.index,
397
+ snippet: snippet.replace(/\n/g, ' '),
398
+ matchText: match[0],
399
+ });
400
+ }
401
+ // Reset regex for next iteration
402
+ searchRegex.lastIndex = 0;
403
+ }
404
+ catch (e) {
405
+ // Skip pages that fail extraction
406
+ }
407
+ }
408
+ return results;
409
+ }
410
+ /**
411
+ * Extract text from a page in a worker thread (non-blocking)
412
+ * @param documentPath - Path to the PDF document
413
+ * @param pageIndex - Page index to extract from
414
+ * @param options - Optional extraction options
415
+ * @param timeout - Optional timeout in milliseconds
416
+ * @returns Promise resolving to extracted text
417
+ */
418
+ async extractTextInWorker(documentPath, pageIndex, options, timeout) {
419
+ const { workerPool } = await import('../workers/index.js');
420
+ const result = await workerPool.runTask({
421
+ operation: 'extract',
422
+ documentPath,
423
+ params: {
424
+ type: 'text',
425
+ pageIndex,
426
+ options: options || {},
427
+ },
428
+ }, timeout);
429
+ if (!result.success) {
430
+ throw new Error(`Worker extraction failed: ${result.error instanceof Error ? result.error.message : String(result.error)}`);
431
+ }
432
+ return result.data;
433
+ }
434
+ /**
435
+ * Extract markdown from a page in a worker thread (non-blocking)
436
+ * @param documentPath - Path to the PDF document
437
+ * @param pageIndex - Page index to extract from
438
+ * @param options - Optional extraction options
439
+ * @param timeout - Optional timeout in milliseconds
440
+ * @returns Promise resolving to extracted markdown
441
+ */
442
+ async extractMarkdownInWorker(documentPath, pageIndex, options, timeout) {
443
+ const { workerPool } = await import('../workers/index.js');
444
+ const result = await workerPool.runTask({
445
+ operation: 'extract',
446
+ documentPath,
447
+ params: {
448
+ type: 'markdown',
449
+ pageIndex,
450
+ options: options || {},
451
+ },
452
+ }, timeout);
453
+ if (!result.success) {
454
+ throw new Error(`Worker extraction failed: ${result.error instanceof Error ? result.error.message : String(result.error)}`);
455
+ }
456
+ return result.data;
457
+ }
458
+ /**
459
+ * Extract HTML from a page in a worker thread (non-blocking)
460
+ * @param documentPath - Path to the PDF document
461
+ * @param pageIndex - Page index to extract from
462
+ * @param options - Optional extraction options
463
+ * @param timeout - Optional timeout in milliseconds
464
+ * @returns Promise resolving to extracted HTML
465
+ */
466
+ async extractHtmlInWorker(documentPath, pageIndex, options, timeout) {
467
+ const { workerPool } = await import('../workers/index.js');
468
+ const result = await workerPool.runTask({
469
+ operation: 'extract',
470
+ documentPath,
471
+ params: {
472
+ type: 'html',
473
+ pageIndex,
474
+ options: options || {},
475
+ },
476
+ }, timeout);
477
+ if (!result.success) {
478
+ throw new Error(`Worker extraction failed: ${result.error instanceof Error ? result.error.message : String(result.error)}`);
479
+ }
480
+ return result.data;
481
+ }
482
+ }
@@ -0,0 +1,127 @@
1
+ import { EventEmitter } from 'events';
2
+ export declare enum EventType {
3
+ PAGE_LOADED = "page_loaded",
4
+ PAGE_RENDERED = "page_rendered",
5
+ CONTENT_PARSED = "content_parsed",
6
+ SEARCH_COMPLETED = "search_completed",
7
+ ERROR_OCCURRED = "error_occurred",
8
+ PROCESSING_STARTED = "processing_started",
9
+ PROCESSING_COMPLETED = "processing_completed"
10
+ }
11
+ export declare enum EncryptionAlgorithm {
12
+ AES_128 = "aes_128",
13
+ AES_256 = "aes_256",
14
+ RC4_40 = "rc4_40",
15
+ RC4_128 = "rc4_128"
16
+ }
17
+ export declare enum CompressionLevel {
18
+ NONE = 0,
19
+ FAST = 3,
20
+ BALANCED = 6,
21
+ BEST = 9
22
+ }
23
+ export interface DocumentEvent {
24
+ eventType: EventType;
25
+ timestamp: number;
26
+ data: Record<string, any>;
27
+ pageIndex?: number;
28
+ }
29
+ export interface EncryptionSettings {
30
+ algorithm: EncryptionAlgorithm;
31
+ userPassword: string;
32
+ ownerPassword: string;
33
+ allowPrinting: boolean;
34
+ allowCopying: boolean;
35
+ allowModification: boolean;
36
+ }
37
+ export interface CompressionSettings {
38
+ level: CompressionLevel;
39
+ compressImages: boolean;
40
+ compressStreams: boolean;
41
+ compressFonts: boolean;
42
+ removeDuplicates: boolean;
43
+ }
44
+ export declare class EventManager extends EventEmitter {
45
+ private document;
46
+ private eventListeners;
47
+ constructor(document: any);
48
+ addEventListener(eventType: EventType, handler: (event: unknown) => void): Promise<boolean>;
49
+ removeEventListener(eventType: EventType, handler: (event: unknown) => void): Promise<boolean>;
50
+ emitEvent(event: DocumentEvent): Promise<boolean>;
51
+ hasListener(eventType: EventType): Promise<boolean>;
52
+ getListenerCount(eventType: EventType): Promise<number>;
53
+ clearListeners(eventType?: EventType): Promise<boolean>;
54
+ getEventHistory(): Promise<DocumentEvent[]>;
55
+ enableEventLogging(enabled: boolean): Promise<boolean>;
56
+ getEventStatistics(): Promise<Record<string, any>>;
57
+ waitForEvent(eventType: EventType, timeoutSec?: number): Promise<DocumentEvent | null>;
58
+ }
59
+ export declare class EncryptionManager extends EventEmitter {
60
+ private document;
61
+ private encryptionSettings;
62
+ constructor(document: any);
63
+ encryptDocument(settings: EncryptionSettings): Promise<boolean>;
64
+ decryptDocument(password: string): Promise<boolean>;
65
+ changeEncryption(newSettings: EncryptionSettings): Promise<boolean>;
66
+ getEncryptionAlgorithm(): Promise<string | null>;
67
+ isDocumentEncrypted(): Promise<boolean>;
68
+ removeEncryption(ownerPassword: string): Promise<boolean>;
69
+ setUserPassword(password: string): Promise<boolean>;
70
+ setOwnerPassword(password: string): Promise<boolean>;
71
+ validatePassword(password: string): Promise<boolean>;
72
+ getPermissions(): Promise<Record<string, boolean>>;
73
+ setPermissions(permissions: Record<string, boolean>): Promise<boolean>;
74
+ exportCertificate(outputPath: string): Promise<boolean>;
75
+ importCertificate(certPath: string): Promise<boolean>;
76
+ encryptionStatus(): Promise<Record<string, any>>;
77
+ }
78
+ export declare class CompressionManager extends EventEmitter {
79
+ private document;
80
+ private compressionSettings;
81
+ constructor(document: any);
82
+ compressDocument(settings: CompressionSettings): Promise<boolean>;
83
+ compressImages(quality?: number): Promise<boolean>;
84
+ compressStreams(): Promise<boolean>;
85
+ compressPage(pageIndex: number, settings: CompressionSettings): Promise<boolean>;
86
+ getCompressionRatio(): Promise<number | null>;
87
+ estimateCompression(settings: CompressionSettings): Promise<number | null>;
88
+ decompressDocument(): Promise<boolean>;
89
+ isCompressed(): Promise<boolean>;
90
+ getCompressionReport(): Promise<Record<string, any>>;
91
+ optimizeForWeb(): Promise<boolean>;
92
+ optimizeForPrint(): Promise<boolean>;
93
+ }
94
+ export declare class CustomAnnotationManager extends EventEmitter {
95
+ private document;
96
+ private customAnnotations;
97
+ constructor(document: any);
98
+ createCustomAnnotation(annotationType: string, properties: Record<string, any>): Promise<string | null>;
99
+ registerAnnotationType(typeName: string, handler: (...args: unknown[]) => unknown): Promise<boolean>;
100
+ modifyAnnotation(annotationId: string, properties: Record<string, any>): Promise<boolean>;
101
+ deleteCustomAnnotation(annotationId: string): Promise<boolean>;
102
+ getCustomAnnotations(pageIndex: number): Promise<Record<string, any>[]>;
103
+ setAnnotationVisibility(annotationId: string, visible: boolean): Promise<boolean>;
104
+ exportAnnotations(outputPath: string): Promise<boolean>;
105
+ importAnnotations(inputPath: string): Promise<boolean>;
106
+ applyAnnotationStyle(annotationId: string, style: Record<string, any>): Promise<boolean>;
107
+ getAnnotationMetadata(annotationId: string): Promise<Record<string, any> | null>;
108
+ replyToAnnotation(annotationId: string, replyText: string): Promise<boolean>;
109
+ getAnnotationReplies(annotationId: string): Promise<string[]>;
110
+ flattenAnnotations(): Promise<boolean>;
111
+ convertAnnotations(targetFormat: string): Promise<boolean>;
112
+ }
113
+ export declare class ContentSecurityManager extends EventEmitter {
114
+ private document;
115
+ private accessPolicies;
116
+ constructor(document: any);
117
+ setAccessControl(policyName: string, restrictions: Record<string, any>): Promise<boolean>;
118
+ validateAccess(userRole: string, action: string): Promise<boolean>;
119
+ applyDigitalRights(rights: Record<string, boolean>): Promise<boolean>;
120
+ sanitizeContent(removeScripts?: boolean, removeEmbedded?: boolean): Promise<boolean>;
121
+ detectSuspiciousContent(): Promise<Record<string, any>[]>;
122
+ getAccessLog(): Promise<Record<string, any>[]>;
123
+ setExpirationDate(expirationDate: string): Promise<boolean>;
124
+ enableWatermarking(watermarkText: string): Promise<boolean>;
125
+ trackDocumentUsage(enabled: boolean): Promise<boolean>;
126
+ getSecurityAudit(): Promise<Record<string, any>>;
127
+ }