pdf-oxide-fips 0.3.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE-APACHE +176 -0
  2. package/LICENSE-MIT +25 -0
  3. package/README.md +218 -0
  4. package/lib/builders/annotation-builder.d.ts +198 -0
  5. package/lib/builders/annotation-builder.js +317 -0
  6. package/lib/builders/conversion-options-builder.d.ts +106 -0
  7. package/lib/builders/conversion-options-builder.js +214 -0
  8. package/lib/builders/document-builder.d.ts +381 -0
  9. package/lib/builders/document-builder.js +770 -0
  10. package/lib/builders/index.d.ts +13 -0
  11. package/lib/builders/index.js +13 -0
  12. package/lib/builders/metadata-builder.d.ts +201 -0
  13. package/lib/builders/metadata-builder.js +285 -0
  14. package/lib/builders/pdf-builder.d.ts +216 -0
  15. package/lib/builders/pdf-builder.js +350 -0
  16. package/lib/builders/search-options-builder.d.ts +73 -0
  17. package/lib/builders/search-options-builder.js +129 -0
  18. package/lib/builders/streaming-table.d.ts +64 -0
  19. package/lib/builders/streaming-table.js +140 -0
  20. package/lib/document-editor-manager.d.ts +139 -0
  21. package/lib/document-editor-manager.js +256 -0
  22. package/lib/document-editor.d.ts +124 -0
  23. package/lib/document-editor.js +318 -0
  24. package/lib/errors.d.ts +382 -0
  25. package/lib/errors.js +1115 -0
  26. package/lib/form-field-manager.d.ts +299 -0
  27. package/lib/form-field-manager.js +568 -0
  28. package/lib/hybrid-ml-manager.d.ts +142 -0
  29. package/lib/hybrid-ml-manager.js +208 -0
  30. package/lib/index.d.ts +205 -0
  31. package/lib/index.js +693 -0
  32. package/lib/managers/accessibility-manager.d.ts +148 -0
  33. package/lib/managers/accessibility-manager.js +234 -0
  34. package/lib/managers/annotation-manager.d.ts +219 -0
  35. package/lib/managers/annotation-manager.js +359 -0
  36. package/lib/managers/barcode-manager.d.ts +82 -0
  37. package/lib/managers/barcode-manager.js +263 -0
  38. package/lib/managers/batch-manager.d.ts +185 -0
  39. package/lib/managers/batch-manager.js +385 -0
  40. package/lib/managers/cache-manager.d.ts +181 -0
  41. package/lib/managers/cache-manager.js +384 -0
  42. package/lib/managers/compliance-manager.d.ts +103 -0
  43. package/lib/managers/compliance-manager.js +453 -0
  44. package/lib/managers/content-manager.d.ts +120 -0
  45. package/lib/managers/content-manager.js +294 -0
  46. package/lib/managers/document-utility-manager.d.ts +369 -0
  47. package/lib/managers/document-utility-manager.js +730 -0
  48. package/lib/managers/dom-pdf-creator.d.ts +104 -0
  49. package/lib/managers/dom-pdf-creator.js +299 -0
  50. package/lib/managers/editing-manager.d.ts +248 -0
  51. package/lib/managers/editing-manager.js +387 -0
  52. package/lib/managers/enterprise-manager.d.ts +192 -0
  53. package/lib/managers/enterprise-manager.js +307 -0
  54. package/lib/managers/extended-managers.d.ts +122 -0
  55. package/lib/managers/extended-managers.js +664 -0
  56. package/lib/managers/extraction-manager.d.ts +246 -0
  57. package/lib/managers/extraction-manager.js +482 -0
  58. package/lib/managers/final-utilities.d.ts +127 -0
  59. package/lib/managers/final-utilities.js +657 -0
  60. package/lib/managers/hybrid-ml-advanced.d.ts +136 -0
  61. package/lib/managers/hybrid-ml-advanced.js +722 -0
  62. package/lib/managers/index.d.ts +64 -0
  63. package/lib/managers/index.js +69 -0
  64. package/lib/managers/layer-manager.d.ts +203 -0
  65. package/lib/managers/layer-manager.js +401 -0
  66. package/lib/managers/metadata-manager.d.ts +148 -0
  67. package/lib/managers/metadata-manager.js +280 -0
  68. package/lib/managers/ocr-manager.d.ts +194 -0
  69. package/lib/managers/ocr-manager.js +582 -0
  70. package/lib/managers/optimization-manager.d.ts +102 -0
  71. package/lib/managers/optimization-manager.js +213 -0
  72. package/lib/managers/outline-manager.d.ts +101 -0
  73. package/lib/managers/outline-manager.js +169 -0
  74. package/lib/managers/page-manager.d.ts +142 -0
  75. package/lib/managers/page-manager.js +235 -0
  76. package/lib/managers/pattern-detection.d.ts +169 -0
  77. package/lib/managers/pattern-detection.js +322 -0
  78. package/lib/managers/rendering-manager.d.ts +353 -0
  79. package/lib/managers/rendering-manager.js +679 -0
  80. package/lib/managers/search-manager.d.ts +235 -0
  81. package/lib/managers/search-manager.js +329 -0
  82. package/lib/managers/security-manager.d.ts +161 -0
  83. package/lib/managers/security-manager.js +292 -0
  84. package/lib/managers/signature-manager.d.ts +738 -0
  85. package/lib/managers/signature-manager.js +1509 -0
  86. package/lib/managers/streams.d.ts +262 -0
  87. package/lib/managers/streams.js +477 -0
  88. package/lib/managers/xfa-manager.d.ts +227 -0
  89. package/lib/managers/xfa-manager.js +539 -0
  90. package/lib/native-loader.d.ts +7 -0
  91. package/lib/native-loader.js +62 -0
  92. package/lib/native.d.ts +16 -0
  93. package/lib/native.js +69 -0
  94. package/lib/pdf-creator-manager.d.ts +200 -0
  95. package/lib/pdf-creator-manager.js +381 -0
  96. package/lib/properties.d.ts +79 -0
  97. package/lib/properties.js +454 -0
  98. package/lib/result-accessors-manager.d.ts +346 -0
  99. package/lib/result-accessors-manager.js +706 -0
  100. package/lib/thumbnail-manager.d.ts +121 -0
  101. package/lib/thumbnail-manager.js +205 -0
  102. package/lib/timestamp.d.ts +54 -0
  103. package/lib/timestamp.js +115 -0
  104. package/lib/tsa-client.d.ts +44 -0
  105. package/lib/tsa-client.js +67 -0
  106. package/lib/types/common.d.ts +189 -0
  107. package/lib/types/common.js +17 -0
  108. package/lib/types/document-types.d.ts +352 -0
  109. package/lib/types/document-types.js +82 -0
  110. package/lib/types/index.d.ts +5 -0
  111. package/lib/types/index.js +5 -0
  112. package/lib/types/manager-types.d.ts +179 -0
  113. package/lib/types/manager-types.js +100 -0
  114. package/lib/types/native-bindings.d.ts +439 -0
  115. package/lib/types/native-bindings.js +7 -0
  116. package/lib/workers/index.d.ts +6 -0
  117. package/lib/workers/index.js +5 -0
  118. package/lib/workers/pool.d.ts +64 -0
  119. package/lib/workers/pool.js +192 -0
  120. package/lib/workers/worker.d.ts +5 -0
  121. package/lib/workers/worker.js +99 -0
  122. package/package.json +79 -0
  123. package/prebuilds/darwin-arm64/pdf_oxide.node +0 -0
  124. package/prebuilds/darwin-x64/pdf_oxide.node +0 -0
  125. package/prebuilds/linux-arm64/pdf_oxide.node +0 -0
  126. package/prebuilds/linux-x64/pdf_oxide.node +0 -0
  127. package/prebuilds/win32-x64/pdf_oxide.node +0 -0
@@ -0,0 +1,582 @@
1
+ /**
2
+ * OcrManager - Canonical OCR Manager (merged from 3 implementations)
3
+ *
4
+ * Consolidates:
5
+ * - src/ocr-manager.ts (simple API with setLanguage, extractText, analyzePage)
6
+ * - src/managers/ocr-compliance-cache.ts OCRManager (engine lifecycle)
7
+ * - src/managers/ocr-manager-typed.ts OCRManager (full TypeScript, FFI-wired)
8
+ *
9
+ * Provides optical character recognition operations with complete type safety,
10
+ * proper error handling, and full FFI integration.
11
+ */
12
+ import { promises as fs } from 'fs';
13
+ import { dirname } from 'path';
14
+ import { BaseManager, OcrLanguage, } from '../types/manager-types.js';
15
+ // Re-export types for convenience
16
+ export { OcrLanguage };
17
+ /**
18
+ * OCR detection modes for accuracy/speed tradeoff
19
+ */
20
+ export var OcrDetectionMode;
21
+ (function (OcrDetectionMode) {
22
+ OcrDetectionMode["Accurate"] = "accurate";
23
+ OcrDetectionMode["Fast"] = "fast";
24
+ OcrDetectionMode["Balanced"] = "balanced";
25
+ })(OcrDetectionMode || (OcrDetectionMode = {}));
26
+ /**
27
+ * Canonical OcrManager - Comprehensive OCR with full TypeScript support
28
+ *
29
+ * Features:
30
+ * - Full text recognition with confidence scoring
31
+ * - Batch page processing with skip optimization
32
+ * - Text region detection with coordinates
33
+ * - Multi-language support
34
+ * - Comprehensive event emission
35
+ * - Automatic resource cleanup
36
+ * - Legacy API compatibility (setLanguage, extractText, analyzePage, etc.)
37
+ */
38
+ export class OcrManager extends BaseManager {
39
+ constructor(document, options) {
40
+ super(document, options);
41
+ this.ocrEngine = null;
42
+ this.currentLanguage = OcrLanguage.ENGLISH;
43
+ this.preprocessingType = 'auto';
44
+ try {
45
+ this.native = require('../../index.node');
46
+ }
47
+ catch {
48
+ this.native = null;
49
+ }
50
+ }
51
+ // ==========================================================================
52
+ // Engine Lifecycle (from typed version)
53
+ // ==========================================================================
54
+ /**
55
+ * Initialize OCR engine with specified configuration
56
+ */
57
+ async initializeEngine(detectionThreshold = 0.5, recognitionThreshold = 0.5, maxSideLen = 960, useGpu = false, gpuDeviceId = 0) {
58
+ try {
59
+ this.recordOperation();
60
+ if (this.ocrEngine) {
61
+ return true;
62
+ }
63
+ this.ocrEngine = await this.document?.createOcrEngine(detectionThreshold, recognitionThreshold, maxSideLen, useGpu, gpuDeviceId);
64
+ if (this.ocrEngine) {
65
+ this.emit('ocr-engine-initialized', {
66
+ useGpu,
67
+ gpuDeviceId,
68
+ detectionThreshold,
69
+ recognitionThreshold,
70
+ });
71
+ return true;
72
+ }
73
+ return false;
74
+ }
75
+ catch (error) {
76
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
77
+ throw error;
78
+ }
79
+ }
80
+ /**
81
+ * Destroy OCR engine and free resources
82
+ */
83
+ async destroyOcrEngine() {
84
+ try {
85
+ this.recordOperation();
86
+ if (this.ocrEngine) {
87
+ await this.document?.destroyOcrEngine(this.ocrEngine);
88
+ this.ocrEngine = null;
89
+ this.emit('ocr-engine-destroyed', { timestamp: Date.now() });
90
+ }
91
+ }
92
+ catch (error) {
93
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
94
+ throw error;
95
+ }
96
+ }
97
+ // ==========================================================================
98
+ // Core Recognition (from typed version)
99
+ // ==========================================================================
100
+ /**
101
+ * Check if page needs OCR processing
102
+ */
103
+ async pageNeedsOcr(pageIndex) {
104
+ try {
105
+ this.recordOperation();
106
+ return (await this.document?.pageNeedsOcr(pageIndex)) || false;
107
+ }
108
+ catch (error) {
109
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
110
+ throw error;
111
+ }
112
+ }
113
+ /**
114
+ * Recognize text on a page with full confidence scoring
115
+ */
116
+ async recognizePage(pageIndex) {
117
+ try {
118
+ this.recordOperation();
119
+ if (!this.ocrEngine) {
120
+ throw new Error('OCR engine not initialized. Call initializeEngine() first.');
121
+ }
122
+ const text = await this.document?.recognizePage(pageIndex, this.ocrEngine);
123
+ this.emit('page-recognized', {
124
+ pageIndex,
125
+ textLength: text?.length || 0,
126
+ timestamp: Date.now(),
127
+ });
128
+ return text || '';
129
+ }
130
+ catch (error) {
131
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
132
+ throw error;
133
+ }
134
+ }
135
+ /**
136
+ * Get OCR confidence score for a page
137
+ */
138
+ async getOcrConfidence(pageIndex) {
139
+ try {
140
+ this.recordOperation();
141
+ if (!this.ocrEngine) {
142
+ return 0;
143
+ }
144
+ return (await this.document?.getOcrConfidence(pageIndex)) || 0;
145
+ }
146
+ catch (error) {
147
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
148
+ throw error;
149
+ }
150
+ }
151
+ /**
152
+ * Detect text regions on a page with bounding boxes
153
+ */
154
+ async detectTextRegions(pageIndex) {
155
+ try {
156
+ this.recordOperation();
157
+ if (!this.ocrEngine) {
158
+ return [];
159
+ }
160
+ const regions = await this.document?.detectTextRegions(pageIndex, this.ocrEngine);
161
+ return regions || [];
162
+ }
163
+ catch (error) {
164
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
165
+ throw error;
166
+ }
167
+ }
168
+ // ==========================================================================
169
+ // Language Configuration (from typed + root versions)
170
+ // ==========================================================================
171
+ /**
172
+ * Set OCR language for recognition (FFI-wired)
173
+ */
174
+ async setOcrLanguage(language) {
175
+ try {
176
+ this.recordOperation();
177
+ if (!this.ocrEngine) {
178
+ throw new Error('OCR engine not initialized');
179
+ }
180
+ const result = await this.document?.setOcrLanguage(this.ocrEngine, language);
181
+ if (result) {
182
+ this.currentLanguage = language || OcrLanguage.ENGLISH;
183
+ this.emit('language-changed', {
184
+ language,
185
+ timestamp: Date.now(),
186
+ });
187
+ }
188
+ return !!result;
189
+ }
190
+ catch (error) {
191
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
192
+ throw error;
193
+ }
194
+ }
195
+ /**
196
+ * Sets the OCR language (convenience alias for setOcrLanguage)
197
+ * From root-level OCRManager
198
+ */
199
+ setLanguage(language) {
200
+ this.currentLanguage = language;
201
+ this.invalidateCache('ocr');
202
+ this.emit('languageChanged', language);
203
+ }
204
+ /**
205
+ * Gets the current OCR language
206
+ * From root-level OCRManager
207
+ */
208
+ getLanguage() {
209
+ return this.currentLanguage;
210
+ }
211
+ /**
212
+ * Get available OCR languages
213
+ */
214
+ async getAvailableLanguages() {
215
+ try {
216
+ this.recordOperation();
217
+ const languages = (await this.document?.getAvailableLanguages()) || Object.values(OcrLanguage);
218
+ return languages;
219
+ }
220
+ catch (error) {
221
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
222
+ throw error;
223
+ }
224
+ }
225
+ // ==========================================================================
226
+ // Processing & Export (from typed version)
227
+ // ==========================================================================
228
+ /**
229
+ * Preprocess page before OCR for better recognition
230
+ */
231
+ async preprocessPage(pageIndex, preprocessingType = 'auto') {
232
+ try {
233
+ this.recordOperation();
234
+ const result = await this.document?.preprocessPage(pageIndex, preprocessingType);
235
+ this.preprocessingType = preprocessingType;
236
+ this.emit('page-preprocessed', {
237
+ pageIndex,
238
+ type: preprocessingType,
239
+ timestamp: Date.now(),
240
+ });
241
+ return !!result;
242
+ }
243
+ catch (error) {
244
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
245
+ throw error;
246
+ }
247
+ }
248
+ /**
249
+ * Export OCR text to file
250
+ */
251
+ async exportOcrText(pageIndex, filePath, format = 'txt') {
252
+ try {
253
+ this.recordOperation();
254
+ const text = await this.recognizePage(pageIndex);
255
+ await fs.mkdir(dirname(filePath), { recursive: true });
256
+ let content;
257
+ switch (format) {
258
+ case 'json':
259
+ content = JSON.stringify({ pageIndex, text, timestamp: Date.now() }, null, 2);
260
+ break;
261
+ case 'xml':
262
+ content = `<?xml version="1.0"?>\n<page index="${pageIndex}">\n${text
263
+ .split('\n')
264
+ .map((line) => ` <line>${line}</line>`)
265
+ .join('\n')}\n</page>`;
266
+ break;
267
+ default:
268
+ content = text;
269
+ }
270
+ await fs.writeFile(filePath, content, 'utf8');
271
+ this.emit('text-exported', {
272
+ pageIndex,
273
+ filePath,
274
+ format,
275
+ size: content.length,
276
+ timestamp: Date.now(),
277
+ });
278
+ return true;
279
+ }
280
+ catch (error) {
281
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
282
+ throw error;
283
+ }
284
+ }
285
+ // ==========================================================================
286
+ // Statistics & Batch (from typed version)
287
+ // ==========================================================================
288
+ /**
289
+ * Get comprehensive OCR statistics for a page
290
+ */
291
+ async getOcrStatistics(pageIndex) {
292
+ try {
293
+ this.recordOperation();
294
+ const text = await this.recognizePage(pageIndex);
295
+ const confidence = await this.getOcrConfidence(pageIndex);
296
+ const regions = await this.detectTextRegions(pageIndex);
297
+ return {
298
+ pageIndex,
299
+ text,
300
+ confidence,
301
+ regionCount: regions.length,
302
+ };
303
+ }
304
+ catch (error) {
305
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
306
+ throw error;
307
+ }
308
+ }
309
+ /**
310
+ * Batch recognize multiple pages
311
+ */
312
+ async batchRecognizePages(startPage, endPage) {
313
+ try {
314
+ this.recordOperation();
315
+ const results = new Map();
316
+ for (let i = startPage; i <= endPage; i++) {
317
+ const text = await this.recognizePage(i);
318
+ results.set(i, text);
319
+ }
320
+ this.emit('batch-recognized', {
321
+ startPage,
322
+ endPage,
323
+ pageCount: endPage - startPage + 1,
324
+ totalCharacters: Array.from(results.values()).reduce((s, t) => s + t.length, 0),
325
+ timestamp: Date.now(),
326
+ });
327
+ return results;
328
+ }
329
+ catch (error) {
330
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
331
+ throw error;
332
+ }
333
+ }
334
+ /**
335
+ * Extract OCR text with aggregated statistics from page range (FFI-wired)
336
+ */
337
+ async extractPageRange(startPage, endPage, skipNonScanned = true) {
338
+ try {
339
+ this.recordOperation();
340
+ if (!this.ocrEngine) {
341
+ throw new Error('OCR engine not initialized');
342
+ }
343
+ let totalSpans = 0;
344
+ let confidenceSum = 0;
345
+ let skippedPages = 0;
346
+ for (let pageIdx = startPage; pageIdx <= endPage; pageIdx++) {
347
+ try {
348
+ if (skipNonScanned) {
349
+ const needsOcr = await this.pageNeedsOcr(pageIdx);
350
+ if (!needsOcr) {
351
+ skippedPages++;
352
+ continue;
353
+ }
354
+ }
355
+ const text = await this.recognizePage(pageIdx);
356
+ const confidence = await this.getOcrConfidence(pageIdx);
357
+ const regions = await this.detectTextRegions(pageIdx);
358
+ totalSpans += Math.max(regions.length, text ? 1 : 0);
359
+ confidenceSum += confidence;
360
+ }
361
+ catch { }
362
+ }
363
+ const processedPages = endPage - startPage + 1 - skippedPages;
364
+ const avgConfidence = processedPages > 0 ? confidenceSum / processedPages : 0;
365
+ const result = {
366
+ startPage,
367
+ endPage,
368
+ totalPages: endPage - startPage + 1,
369
+ totalSpans,
370
+ averageConfidence: avgConfidence,
371
+ skippedPages,
372
+ };
373
+ this.emit('page-range-extracted', {
374
+ ...result,
375
+ timestamp: Date.now(),
376
+ });
377
+ this.setCached(`ocr-batch:${startPage}-${endPage}`, result);
378
+ return result;
379
+ }
380
+ catch (error) {
381
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
382
+ throw error;
383
+ }
384
+ }
385
+ // ==========================================================================
386
+ // Engine Status & Configuration (from typed version)
387
+ // ==========================================================================
388
+ /**
389
+ * Get OCR engine status and configuration
390
+ */
391
+ async getEngineStatus() {
392
+ try {
393
+ this.recordOperation();
394
+ if (!this.ocrEngine) {
395
+ return 'not_initialized';
396
+ }
397
+ return (await this.document?.getEngineStatus(this.ocrEngine)) || 'unknown';
398
+ }
399
+ catch (error) {
400
+ this.recordError(error instanceof Error ? error : new Error(String(error)));
401
+ throw error;
402
+ }
403
+ }
404
+ /**
405
+ * Get current OCR configuration
406
+ */
407
+ getConfiguration() {
408
+ return {
409
+ language: this.currentLanguage,
410
+ preprocessingType: this.preprocessingType,
411
+ engineInitialized: !!this.ocrEngine,
412
+ };
413
+ }
414
+ // ==========================================================================
415
+ // Methods from root-level OCRManager
416
+ // ==========================================================================
417
+ /**
418
+ * Extracts text from a page (convenience alias for recognizePage)
419
+ * From root-level OCRManager
420
+ */
421
+ async extractText(pageIndex, config) {
422
+ const cacheKey = `ocr:text:${pageIndex}:${this.currentLanguage}`;
423
+ const cached = this.getCached(cacheKey);
424
+ if (cached !== undefined) {
425
+ return cached;
426
+ }
427
+ let result = '';
428
+ if (this.document?.extractText) {
429
+ result = this.document.extractText(pageIndex) || '';
430
+ }
431
+ this.setCached(cacheKey, result);
432
+ this.emit('textExtracted', pageIndex, result.length);
433
+ return result;
434
+ }
435
+ /**
436
+ * Analyzes a page and returns detailed results
437
+ * From root-level OCRManager
438
+ */
439
+ async analyzePage(pageIndex, config) {
440
+ const cacheKey = `ocr:analysis:${pageIndex}:${this.currentLanguage}`;
441
+ const cached = this.getCached(cacheKey);
442
+ if (cached !== undefined) {
443
+ return cached;
444
+ }
445
+ let text = '';
446
+ let needsOcr = false;
447
+ if (this.document?.extractText) {
448
+ text = this.document.extractText(pageIndex) || '';
449
+ needsOcr = !text || text.trim().length < 10;
450
+ }
451
+ const result = {
452
+ pageIndex,
453
+ needsOcr,
454
+ confidence: needsOcr ? 0.0 : 0.95,
455
+ spanCount: text.split(' ').length || 0,
456
+ text,
457
+ };
458
+ this.setCached(cacheKey, result);
459
+ this.emit('pageAnalyzed', pageIndex, result);
460
+ return result;
461
+ }
462
+ /**
463
+ * Performs OCR analysis on all pages in the document
464
+ * From root-level OCRManager
465
+ */
466
+ async analyzeDocument(config) {
467
+ const cacheKey = `ocr:document:${this.currentLanguage}`;
468
+ const cached = this.getCached(cacheKey);
469
+ if (cached !== undefined) {
470
+ return cached;
471
+ }
472
+ const results = [];
473
+ const pageCount = this.document?.pageCount || 0;
474
+ for (let i = 0; i < pageCount; i++) {
475
+ const analysis = await this.analyzePage(i, config);
476
+ results.push(analysis);
477
+ this.emit('pageProcessed', i + 1, pageCount);
478
+ }
479
+ this.setCached(cacheKey, results);
480
+ this.emit('documentAnalyzed', results.length);
481
+ return results;
482
+ }
483
+ /**
484
+ * Extracts text spans with bounding boxes for a page
485
+ * From root-level OCRManager
486
+ */
487
+ async extractSpans(pageIndex, config) {
488
+ const cacheKey = `ocr:spans:${pageIndex}:${this.currentLanguage}`;
489
+ const cached = this.getCached(cacheKey);
490
+ if (cached !== undefined) {
491
+ return cached;
492
+ }
493
+ let spans = [];
494
+ if (this.native?.extract_spans) {
495
+ try {
496
+ const spansJson = this.native.extract_spans(pageIndex) ?? [];
497
+ spans = spansJson.length > 0 ? spansJson.map((json) => JSON.parse(json)) : [];
498
+ }
499
+ catch {
500
+ spans = [];
501
+ }
502
+ }
503
+ this.setCached(cacheKey, spans);
504
+ this.emit('spansExtracted', { page: pageIndex, count: spans.length });
505
+ return spans;
506
+ }
507
+ /**
508
+ * Checks if OCR is available/installed
509
+ * From root-level OCRManager
510
+ */
511
+ async isAvailable() {
512
+ const cacheKey = 'ocr:available';
513
+ const cached = this.getCached(cacheKey);
514
+ if (cached !== undefined) {
515
+ return cached;
516
+ }
517
+ const result = this.native ? true : false;
518
+ this.setCached(cacheKey, result);
519
+ return result;
520
+ }
521
+ /**
522
+ * Gets OCR engine version
523
+ * From root-level OCRManager
524
+ */
525
+ async getVersion() {
526
+ const cacheKey = 'ocr:version';
527
+ const cached = this.getCached(cacheKey);
528
+ if (cached !== undefined) {
529
+ return cached;
530
+ }
531
+ let version = '0.0.0';
532
+ if (this.native?.get_ocr_version) {
533
+ try {
534
+ version = this.native.get_ocr_version() ?? '0.0.0';
535
+ }
536
+ catch {
537
+ version = '0.0.0';
538
+ }
539
+ }
540
+ this.setCached(cacheKey, version);
541
+ return version;
542
+ }
543
+ // ==========================================================================
544
+ // Cache Operations (from root-level OCRManager)
545
+ // ==========================================================================
546
+ /**
547
+ * Clears the result cache
548
+ */
549
+ clearCache() {
550
+ this.invalidateCache();
551
+ this.emit('cacheCleared');
552
+ }
553
+ /**
554
+ * Gets cache statistics
555
+ */
556
+ getCacheStats() {
557
+ return {
558
+ cacheSize: this.cache.size,
559
+ entries: Array.from(this.cache.keys()),
560
+ };
561
+ }
562
+ // ==========================================================================
563
+ // Cleanup
564
+ // ==========================================================================
565
+ /**
566
+ * Cleanup on destroy
567
+ */
568
+ async destroy() {
569
+ try {
570
+ await this.destroyOcrEngine();
571
+ this.invalidateCache();
572
+ this.removeAllListeners();
573
+ this.initialized = false;
574
+ }
575
+ catch (error) {
576
+ console.error('Error during OCR manager cleanup:', error);
577
+ }
578
+ }
579
+ }
580
+ /** @deprecated Use OcrManager instead */
581
+ export const OCRManager = OcrManager;
582
+ export default OcrManager;
@@ -0,0 +1,102 @@
1
+ /**
2
+ * OptimizationManager - PDF Optimization Operations
3
+ *
4
+ * Provides document optimization capabilities including:
5
+ * - Font subsetting
6
+ * - Image downsampling
7
+ * - Object deduplication
8
+ * - Full optimization pipeline
9
+ *
10
+ * @since 1.0.0
11
+ */
12
+ import { EventEmitter } from 'events';
13
+ /**
14
+ * Result of an optimization operation.
15
+ */
16
+ export interface OptimizationResult {
17
+ /** Whether the optimization succeeded */
18
+ readonly success: boolean;
19
+ /** Number of bytes saved */
20
+ readonly bytesSaved: number;
21
+ /** Original document size in bytes */
22
+ readonly originalSize: number;
23
+ /** Optimized document size in bytes */
24
+ readonly optimizedSize: number;
25
+ /** Compression ratio (0.0 - 1.0) */
26
+ readonly compressionRatio: number;
27
+ }
28
+ /**
29
+ * Manager for PDF optimization operations.
30
+ *
31
+ * Provides methods for reducing PDF file size through font subsetting,
32
+ * image downsampling, object deduplication, and combined optimization.
33
+ *
34
+ * @example
35
+ * ```typescript
36
+ * const optimizer = new OptimizationManager(document);
37
+ *
38
+ * // Subset fonts to remove unused glyphs
39
+ * const fontResult = await optimizer.subsetFonts();
40
+ * console.log(`Font subsetting saved ${fontResult.bytesSaved} bytes`);
41
+ *
42
+ * // Downsample high-resolution images
43
+ * const imageResult = await optimizer.downsampleImages(150, 80);
44
+ *
45
+ * // Full optimization pipeline
46
+ * const fullResult = await optimizer.optimizeFull(150, 80);
47
+ * console.log(`Total savings: ${fullResult.bytesSaved} bytes`);
48
+ * ```
49
+ */
50
+ export declare class OptimizationManager extends EventEmitter {
51
+ private document;
52
+ private native;
53
+ constructor(document: any);
54
+ /**
55
+ * Subsets all embedded fonts in the document.
56
+ *
57
+ * Removes unused glyphs from embedded fonts, reducing file size
58
+ * while preserving visual fidelity for the characters actually used.
59
+ *
60
+ * @returns Optimization result with bytes saved
61
+ * @throws OptimizationException if the operation fails
62
+ */
63
+ subsetFonts(): Promise<OptimizationResult>;
64
+ /**
65
+ * Downsamples images in the document to reduce file size.
66
+ *
67
+ * @param dpi - Target resolution in dots per inch (default: 150)
68
+ * @param quality - JPEG quality for recompression (1-100, default: 80)
69
+ * @returns Optimization result with bytes saved
70
+ * @throws OptimizationException if the operation fails
71
+ */
72
+ downsampleImages(dpi?: number, quality?: number): Promise<OptimizationResult>;
73
+ /**
74
+ * Deduplicates identical objects in the document.
75
+ *
76
+ * Identifies and merges duplicate fonts, images, and other resources
77
+ * that appear multiple times in the document.
78
+ *
79
+ * @returns Optimization result with bytes saved
80
+ * @throws OptimizationException if the operation fails
81
+ */
82
+ deduplicate(): Promise<OptimizationResult>;
83
+ /**
84
+ * Runs the full optimization pipeline.
85
+ *
86
+ * Combines font subsetting, image downsampling, and object deduplication
87
+ * into a single operation for maximum file size reduction.
88
+ *
89
+ * @param dpi - Target image resolution in dots per inch (default: 150)
90
+ * @param quality - JPEG quality for recompression (1-100, default: 80)
91
+ * @returns Optimization result with total bytes saved
92
+ * @throws OptimizationException if the operation fails
93
+ */
94
+ optimizeFull(dpi?: number, quality?: number): Promise<OptimizationResult>;
95
+ private parseOptimizationResult;
96
+ private freeOptimizationResult;
97
+ /**
98
+ * Releases resources held by this manager.
99
+ */
100
+ destroy(): void;
101
+ }
102
+ export default OptimizationManager;