pdf-oxide 0.3.24 → 0.3.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/builders/annotation-builder.d.ts +199 -0
- package/lib/builders/annotation-builder.d.ts.map +1 -0
- package/lib/builders/annotation-builder.js +318 -0
- package/lib/builders/annotation-builder.js.map +1 -0
- package/lib/builders/conversion-options-builder.d.ts +107 -0
- package/lib/builders/conversion-options-builder.d.ts.map +1 -0
- package/lib/builders/conversion-options-builder.js +215 -0
- package/lib/builders/conversion-options-builder.js.map +1 -0
- package/{src/builders/index.ts → lib/builders/index.d.ts} +1 -1
- package/lib/builders/index.d.ts.map +1 -0
- package/lib/builders/index.js +12 -0
- package/lib/builders/index.js.map +1 -0
- package/lib/builders/metadata-builder.d.ts +202 -0
- package/lib/builders/metadata-builder.d.ts.map +1 -0
- package/lib/builders/metadata-builder.js +286 -0
- package/lib/builders/metadata-builder.js.map +1 -0
- package/lib/builders/pdf-builder.d.ts +215 -0
- package/lib/builders/pdf-builder.d.ts.map +1 -0
- package/lib/builders/pdf-builder.js +339 -0
- package/lib/builders/pdf-builder.js.map +1 -0
- package/lib/builders/search-options-builder.d.ts +74 -0
- package/lib/builders/search-options-builder.d.ts.map +1 -0
- package/lib/builders/search-options-builder.js +130 -0
- package/lib/builders/search-options-builder.js.map +1 -0
- package/lib/document-editor-manager.d.ts +140 -0
- package/lib/document-editor-manager.d.ts.map +1 -0
- package/lib/document-editor-manager.js +257 -0
- package/lib/document-editor-manager.js.map +1 -0
- package/lib/errors.d.ts +383 -0
- package/lib/errors.d.ts.map +1 -0
- package/lib/errors.js +1117 -0
- package/lib/errors.js.map +1 -0
- package/lib/form-field-manager.d.ts +300 -0
- package/lib/form-field-manager.d.ts.map +1 -0
- package/lib/form-field-manager.js +567 -0
- package/lib/form-field-manager.js.map +1 -0
- package/lib/hybrid-ml-manager.d.ts +143 -0
- package/lib/hybrid-ml-manager.d.ts.map +1 -0
- package/lib/hybrid-ml-manager.js +209 -0
- package/lib/hybrid-ml-manager.js.map +1 -0
- package/lib/index.d.ts +23 -0
- package/lib/index.d.ts.map +1 -0
- package/lib/index.js +280 -0
- package/lib/index.js.map +1 -0
- package/lib/managers/accessibility-manager.d.ts +149 -0
- package/lib/managers/accessibility-manager.d.ts.map +1 -0
- package/lib/managers/accessibility-manager.js +224 -0
- package/lib/managers/accessibility-manager.js.map +1 -0
- package/lib/managers/annotation-manager.d.ts +220 -0
- package/lib/managers/annotation-manager.d.ts.map +1 -0
- package/lib/managers/annotation-manager.js +360 -0
- package/lib/managers/annotation-manager.js.map +1 -0
- package/lib/managers/barcode-manager.d.ts +80 -0
- package/lib/managers/barcode-manager.d.ts.map +1 -0
- package/lib/managers/barcode-manager.js +226 -0
- package/lib/managers/barcode-manager.js.map +1 -0
- package/lib/managers/batch-manager.d.ts +186 -0
- package/lib/managers/batch-manager.d.ts.map +1 -0
- package/lib/managers/batch-manager.js +389 -0
- package/lib/managers/batch-manager.js.map +1 -0
- package/lib/managers/cache-manager.d.ts +182 -0
- package/lib/managers/cache-manager.d.ts.map +1 -0
- package/lib/managers/cache-manager.js +387 -0
- package/lib/managers/cache-manager.js.map +1 -0
- package/lib/managers/compliance-manager.d.ts +104 -0
- package/lib/managers/compliance-manager.d.ts.map +1 -0
- package/lib/managers/compliance-manager.js +415 -0
- package/lib/managers/compliance-manager.js.map +1 -0
- package/lib/managers/content-manager.d.ts +121 -0
- package/lib/managers/content-manager.d.ts.map +1 -0
- package/lib/managers/content-manager.js +295 -0
- package/lib/managers/content-manager.js.map +1 -0
- package/lib/managers/document-utility-manager.d.ts +370 -0
- package/lib/managers/document-utility-manager.d.ts.map +1 -0
- package/lib/managers/document-utility-manager.js +731 -0
- package/lib/managers/document-utility-manager.js.map +1 -0
- package/lib/managers/dom-pdf-creator.d.ts +105 -0
- package/lib/managers/dom-pdf-creator.d.ts.map +1 -0
- package/lib/managers/dom-pdf-creator.js +300 -0
- package/lib/managers/dom-pdf-creator.js.map +1 -0
- package/lib/managers/editing-manager.d.ts +249 -0
- package/lib/managers/editing-manager.d.ts.map +1 -0
- package/lib/managers/editing-manager.js +388 -0
- package/lib/managers/editing-manager.js.map +1 -0
- package/lib/managers/enterprise-manager.d.ts +193 -0
- package/lib/managers/enterprise-manager.d.ts.map +1 -0
- package/lib/managers/enterprise-manager.js +305 -0
- package/lib/managers/enterprise-manager.js.map +1 -0
- package/lib/managers/extended-managers.d.ts +123 -0
- package/lib/managers/extended-managers.d.ts.map +1 -0
- package/lib/managers/extended-managers.js +658 -0
- package/lib/managers/extended-managers.js.map +1 -0
- package/lib/managers/extraction-manager.d.ts +247 -0
- package/lib/managers/extraction-manager.d.ts.map +1 -0
- package/lib/managers/extraction-manager.js +478 -0
- package/lib/managers/extraction-manager.js.map +1 -0
- package/lib/managers/final-utilities.d.ts +128 -0
- package/lib/managers/final-utilities.d.ts.map +1 -0
- package/lib/managers/final-utilities.js +653 -0
- package/lib/managers/final-utilities.js.map +1 -0
- package/lib/managers/hybrid-ml-advanced.d.ts +137 -0
- package/lib/managers/hybrid-ml-advanced.d.ts.map +1 -0
- package/lib/managers/hybrid-ml-advanced.js +707 -0
- package/lib/managers/hybrid-ml-advanced.js.map +1 -0
- package/lib/managers/index.d.ts +65 -0
- package/lib/managers/index.d.ts.map +1 -0
- package/lib/managers/index.js +70 -0
- package/lib/managers/index.js.map +1 -0
- package/lib/managers/layer-manager.d.ts +204 -0
- package/lib/managers/layer-manager.d.ts.map +1 -0
- package/lib/managers/layer-manager.js +403 -0
- package/lib/managers/layer-manager.js.map +1 -0
- package/lib/managers/metadata-manager.d.ts +149 -0
- package/lib/managers/metadata-manager.d.ts.map +1 -0
- package/lib/managers/metadata-manager.js +281 -0
- package/lib/managers/metadata-manager.js.map +1 -0
- package/lib/managers/ocr-manager.d.ts +195 -0
- package/lib/managers/ocr-manager.d.ts.map +1 -0
- package/lib/managers/ocr-manager.js +583 -0
- package/lib/managers/ocr-manager.js.map +1 -0
- package/lib/managers/optimization-manager.d.ts +103 -0
- package/lib/managers/optimization-manager.d.ts.map +1 -0
- package/lib/managers/optimization-manager.js +194 -0
- package/lib/managers/optimization-manager.js.map +1 -0
- package/lib/managers/outline-manager.d.ts +102 -0
- package/lib/managers/outline-manager.d.ts.map +1 -0
- package/lib/managers/outline-manager.js +170 -0
- package/lib/managers/outline-manager.js.map +1 -0
- package/lib/managers/page-manager.d.ts +143 -0
- package/lib/managers/page-manager.d.ts.map +1 -0
- package/lib/managers/page-manager.js +237 -0
- package/lib/managers/page-manager.js.map +1 -0
- package/lib/managers/pattern-detection.d.ts +170 -0
- package/lib/managers/pattern-detection.d.ts.map +1 -0
- package/lib/managers/pattern-detection.js +325 -0
- package/lib/managers/pattern-detection.js.map +1 -0
- package/lib/managers/rendering-manager.d.ts +354 -0
- package/lib/managers/rendering-manager.d.ts.map +1 -0
- package/lib/managers/rendering-manager.js +680 -0
- package/lib/managers/rendering-manager.js.map +1 -0
- package/lib/managers/search-manager.d.ts +236 -0
- package/lib/managers/search-manager.d.ts.map +1 -0
- package/lib/managers/search-manager.js +330 -0
- package/lib/managers/search-manager.js.map +1 -0
- package/lib/managers/security-manager.d.ts +162 -0
- package/lib/managers/security-manager.d.ts.map +1 -0
- package/lib/managers/security-manager.js +293 -0
- package/lib/managers/security-manager.js.map +1 -0
- package/lib/managers/signature-manager.d.ts +725 -0
- package/lib/managers/signature-manager.d.ts.map +1 -0
- package/lib/managers/signature-manager.js +1365 -0
- package/lib/managers/signature-manager.js.map +1 -0
- package/lib/managers/streams.d.ts +263 -0
- package/lib/managers/streams.d.ts.map +1 -0
- package/lib/managers/streams.js +472 -0
- package/lib/managers/streams.js.map +1 -0
- package/lib/managers/xfa-manager.d.ts +228 -0
- package/lib/managers/xfa-manager.d.ts.map +1 -0
- package/lib/managers/xfa-manager.js +490 -0
- package/lib/managers/xfa-manager.js.map +1 -0
- package/lib/pdf-creator-manager.d.ts +201 -0
- package/lib/pdf-creator-manager.d.ts.map +1 -0
- package/lib/pdf-creator-manager.js +379 -0
- package/lib/pdf-creator-manager.js.map +1 -0
- package/lib/properties.d.ts +80 -0
- package/lib/properties.d.ts.map +1 -0
- package/lib/properties.js +455 -0
- package/lib/properties.js.map +1 -0
- package/lib/result-accessors-manager.d.ts +347 -0
- package/lib/result-accessors-manager.d.ts.map +1 -0
- package/lib/result-accessors-manager.js +705 -0
- package/lib/result-accessors-manager.js.map +1 -0
- package/lib/thumbnail-manager.d.ts +122 -0
- package/lib/thumbnail-manager.d.ts.map +1 -0
- package/lib/thumbnail-manager.js +206 -0
- package/lib/thumbnail-manager.js.map +1 -0
- package/lib/types/common.d.ts +93 -0
- package/lib/types/common.d.ts.map +1 -0
- package/lib/types/common.js +5 -0
- package/lib/types/common.js.map +1 -0
- package/lib/types/document-types.d.ts +353 -0
- package/lib/types/document-types.d.ts.map +1 -0
- package/lib/types/document-types.js +83 -0
- package/lib/types/document-types.js.map +1 -0
- package/{src/types/index.ts → lib/types/index.d.ts} +1 -1
- package/lib/types/index.d.ts.map +1 -0
- package/lib/types/index.js +6 -0
- package/lib/types/index.js.map +1 -0
- package/lib/types/manager-types.d.ts +180 -0
- package/lib/types/manager-types.d.ts.map +1 -0
- package/lib/types/manager-types.js +99 -0
- package/lib/types/manager-types.js.map +1 -0
- package/lib/types/native-bindings.d.ts +440 -0
- package/lib/types/native-bindings.d.ts.map +1 -0
- package/lib/types/native-bindings.js +8 -0
- package/lib/types/native-bindings.js.map +1 -0
- package/{src/workers/index.ts → lib/workers/index.d.ts} +1 -1
- package/lib/workers/index.d.ts.map +1 -0
- package/lib/workers/index.js +6 -0
- package/lib/workers/index.js.map +1 -0
- package/lib/workers/pool.d.ts +65 -0
- package/lib/workers/pool.d.ts.map +1 -0
- package/lib/workers/pool.js +195 -0
- package/lib/workers/pool.js.map +1 -0
- package/lib/workers/worker.d.ts +6 -0
- package/lib/workers/worker.d.ts.map +1 -0
- package/lib/workers/worker.js +100 -0
- package/lib/workers/worker.js.map +1 -0
- package/package.json +12 -22
- package/prebuilds/darwin-arm64/pdf_oxide.node +0 -0
- package/prebuilds/darwin-x64/pdf_oxide.node +0 -0
- package/prebuilds/linux-arm64/pdf_oxide.node +0 -0
- package/prebuilds/linux-x64/pdf_oxide.node +0 -0
- package/prebuilds/win32-x64/pdf_oxide.node +0 -0
- package/binding.gyp +0 -35
- package/src/builders/annotation-builder.ts +0 -367
- package/src/builders/conversion-options-builder.ts +0 -257
- package/src/builders/metadata-builder.ts +0 -317
- package/src/builders/pdf-builder.ts +0 -386
- package/src/builders/search-options-builder.ts +0 -151
- package/src/document-editor-manager.ts +0 -318
- package/src/errors.ts +0 -1629
- package/src/form-field-manager.ts +0 -666
- package/src/hybrid-ml-manager.ts +0 -283
- package/src/index.ts +0 -453
- package/src/managers/accessibility-manager.ts +0 -338
- package/src/managers/annotation-manager.ts +0 -439
- package/src/managers/barcode-manager.ts +0 -235
- package/src/managers/batch-manager.ts +0 -533
- package/src/managers/cache-manager.ts +0 -486
- package/src/managers/compliance-manager.ts +0 -375
- package/src/managers/content-manager.ts +0 -339
- package/src/managers/document-utility-manager.ts +0 -922
- package/src/managers/dom-pdf-creator.ts +0 -365
- package/src/managers/editing-manager.ts +0 -514
- package/src/managers/enterprise-manager.ts +0 -478
- package/src/managers/extended-managers.ts +0 -437
- package/src/managers/extraction-manager.ts +0 -583
- package/src/managers/final-utilities.ts +0 -429
- package/src/managers/hybrid-ml-advanced.ts +0 -479
- package/src/managers/index.ts +0 -239
- package/src/managers/layer-manager.ts +0 -500
- package/src/managers/metadata-manager.ts +0 -303
- package/src/managers/ocr-manager.ts +0 -756
- package/src/managers/optimization-manager.ts +0 -262
- package/src/managers/outline-manager.ts +0 -196
- package/src/managers/page-manager.ts +0 -289
- package/src/managers/pattern-detection.ts +0 -440
- package/src/managers/rendering-manager.ts +0 -863
- package/src/managers/search-manager.ts +0 -385
- package/src/managers/security-manager.ts +0 -345
- package/src/managers/signature-manager.ts +0 -1664
- package/src/managers/streams.ts +0 -618
- package/src/managers/xfa-manager.ts +0 -500
- package/src/pdf-creator-manager.ts +0 -494
- package/src/properties.ts +0 -522
- package/src/result-accessors-manager.ts +0 -867
- package/src/tests/advanced-features.test.ts +0 -414
- package/src/tests/advanced.test.ts +0 -266
- package/src/tests/extended-managers.test.ts +0 -316
- package/src/tests/final-utilities.test.ts +0 -455
- package/src/tests/foundation.test.ts +0 -315
- package/src/tests/high-demand.test.ts +0 -257
- package/src/tests/specialized.test.ts +0 -97
- package/src/thumbnail-manager.ts +0 -272
- package/src/types/common.ts +0 -142
- package/src/types/document-types.ts +0 -457
- package/src/types/manager-types.ts +0 -284
- package/src/types/native-bindings.ts +0 -517
- package/src/workers/pool.ts +0 -274
- package/src/workers/worker.ts +0 -131
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Manager for content extraction from PDF documents
|
|
3
|
+
*
|
|
4
|
+
* Caching is handled automatically at the Rust FFI layer, eliminating
|
|
5
|
+
* the need for duplicate cache implementations in the binding.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { ExtractionManager, ConversionOptionsBuilder } from 'pdf_oxide';
|
|
10
|
+
*
|
|
11
|
+
* const doc = PdfDocument.open('document.pdf');
|
|
12
|
+
* const extractionManager = new ExtractionManager(doc);
|
|
13
|
+
*
|
|
14
|
+
* // Extract text from a single page
|
|
15
|
+
* const text = extractionManager.extractText(0);
|
|
16
|
+
* console.log(text);
|
|
17
|
+
*
|
|
18
|
+
* // Extract all text
|
|
19
|
+
* const allText = extractionManager.extractAllText();
|
|
20
|
+
*
|
|
21
|
+
* // Extract with custom options
|
|
22
|
+
* const options = ConversionOptionsBuilder.highQuality().build();
|
|
23
|
+
* const markdown = extractionManager.extractMarkdown(0, options);
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
export interface ContentStatistics {
|
|
27
|
+
pageCount: number;
|
|
28
|
+
wordCount: number;
|
|
29
|
+
characterCount: number;
|
|
30
|
+
averageWordsPerPage: number;
|
|
31
|
+
averageCharactersPerPage: number;
|
|
32
|
+
}
|
|
33
|
+
export interface SearchMatch {
|
|
34
|
+
pageIndex: number;
|
|
35
|
+
pageNumber: number;
|
|
36
|
+
matchIndex: number;
|
|
37
|
+
snippet: string;
|
|
38
|
+
matchText: string;
|
|
39
|
+
}
|
|
40
|
+
export declare class ExtractionManager {
|
|
41
|
+
private _document;
|
|
42
|
+
/**
|
|
43
|
+
* Creates a new ExtractionManager for the given document
|
|
44
|
+
* @param document - The PDF document
|
|
45
|
+
* @throws Error if document is null or undefined
|
|
46
|
+
*/
|
|
47
|
+
constructor(document: any);
|
|
48
|
+
/**
|
|
49
|
+
* Extracts text from a single page.
|
|
50
|
+
*
|
|
51
|
+
* The native layer produces UTF-8 bytes, which Node decodes into a JS
|
|
52
|
+
* `string` (UTF-16 code units internally). As a result,
|
|
53
|
+
* `text.length` reports UTF-16 code units, not bytes — so a 648-byte
|
|
54
|
+
* UTF-8 string containing two accented letters reads as 646 in JS. Use
|
|
55
|
+
* `Buffer.byteLength(text, 'utf8')` if you need the byte count (e.g. to
|
|
56
|
+
* compare against Go's `len(string)` or Rust's `String::len()`).
|
|
57
|
+
*
|
|
58
|
+
* Results are automatically cached at the FFI layer.
|
|
59
|
+
*
|
|
60
|
+
* @param pageIndex - Zero-based page index
|
|
61
|
+
* @param options - Conversion options
|
|
62
|
+
* @returns Extracted text (UTF-16 code units)
|
|
63
|
+
* @throws Error if page index is invalid
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```typescript
|
|
67
|
+
* const text = manager.extractText(0);
|
|
68
|
+
* console.log(`Page 1: ${text.length} UTF-16 code units`);
|
|
69
|
+
* console.log(` ${Buffer.byteLength(text, 'utf8')} UTF-8 bytes`);
|
|
70
|
+
* ```
|
|
71
|
+
*/
|
|
72
|
+
extractText(pageIndex: number, options?: Record<string, any>): string;
|
|
73
|
+
/**
|
|
74
|
+
* Extracts text from all pages
|
|
75
|
+
* @param options - Conversion options
|
|
76
|
+
* @returns All extracted text concatenated
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* ```typescript
|
|
80
|
+
* const allText = manager.extractAllText();
|
|
81
|
+
* console.log(`Total characters: ${allText.length}`);
|
|
82
|
+
* ```
|
|
83
|
+
*/
|
|
84
|
+
extractAllText(options?: Record<string, any>): string;
|
|
85
|
+
/**
|
|
86
|
+
* Extracts text from a range of pages
|
|
87
|
+
* @param startPageIndex - Zero-based start page index
|
|
88
|
+
* @param endPageIndex - Zero-based end page index (inclusive)
|
|
89
|
+
* @param options - Conversion options
|
|
90
|
+
* @returns Extracted text from pages in range
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* const text = manager.extractTextRange(0, 10);
|
|
95
|
+
* console.log(`Text from pages 1-11: ${text}`);
|
|
96
|
+
* ```
|
|
97
|
+
*/
|
|
98
|
+
extractTextRange(startPageIndex: number, endPageIndex: number, options?: Record<string, any>): string;
|
|
99
|
+
/**
|
|
100
|
+
* Extracts text from specific page indices (non-contiguous)
|
|
101
|
+
* @param pageIndices - Array of zero-based page indices
|
|
102
|
+
* @param options - Conversion options
|
|
103
|
+
* @returns Extracted text from specified pages concatenated with newlines
|
|
104
|
+
* @throws Error if page indices are invalid
|
|
105
|
+
*
|
|
106
|
+
* @example
|
|
107
|
+
* ```typescript
|
|
108
|
+
* const text = manager.extractTextBatch([0, 2, 5]); // Extract pages 1, 3, 6
|
|
109
|
+
* console.log(text);
|
|
110
|
+
* ```
|
|
111
|
+
*/
|
|
112
|
+
extractTextBatch(pageIndices: number[], options?: Record<string, any>): string;
|
|
113
|
+
/**
|
|
114
|
+
* Extracts text from pages as an array (one entry per page)
|
|
115
|
+
* @param startPageIndex - Zero-based start page index
|
|
116
|
+
* @param endPageIndex - Zero-based end page index (inclusive)
|
|
117
|
+
* @param options - Conversion options
|
|
118
|
+
* @returns Array of extracted text, one per page
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* ```typescript
|
|
122
|
+
* const pages = manager.extractTextArray(0, 5);
|
|
123
|
+
* pages.forEach((text, i) => console.log(`Page ${i}: ${text.length} chars`));
|
|
124
|
+
* ```
|
|
125
|
+
*/
|
|
126
|
+
extractTextArray(startPageIndex: number, endPageIndex: number, options?: Record<string, any>): string[];
|
|
127
|
+
/**
|
|
128
|
+
* Extracts page as Markdown.
|
|
129
|
+
* Results are automatically cached at the FFI layer.
|
|
130
|
+
* @param pageIndex - Zero-based page index
|
|
131
|
+
* @param options - Conversion options
|
|
132
|
+
* @returns Page content as Markdown
|
|
133
|
+
* @throws Error if page index is invalid
|
|
134
|
+
*
|
|
135
|
+
* @example
|
|
136
|
+
* ```typescript
|
|
137
|
+
* const markdown = manager.extractMarkdown(0);
|
|
138
|
+
* console.log(markdown); // Markdown formatted content
|
|
139
|
+
* ```
|
|
140
|
+
*/
|
|
141
|
+
extractMarkdown(pageIndex: number, options?: Record<string, any>): string;
|
|
142
|
+
/**
|
|
143
|
+
* Extracts all pages as Markdown
|
|
144
|
+
* @param options - Conversion options
|
|
145
|
+
* @returns All pages as Markdown
|
|
146
|
+
*
|
|
147
|
+
* @example
|
|
148
|
+
* ```typescript
|
|
149
|
+
* const markdown = manager.extractAllMarkdown();
|
|
150
|
+
* // Write to file
|
|
151
|
+
* fs.writeFileSync('output.md', markdown);
|
|
152
|
+
* ```
|
|
153
|
+
*/
|
|
154
|
+
extractAllMarkdown(options?: Record<string, any>): string;
|
|
155
|
+
/**
|
|
156
|
+
* Extracts markdown from a range of pages
|
|
157
|
+
* @param startPageIndex - Zero-based start page index
|
|
158
|
+
* @param endPageIndex - Zero-based end page index (inclusive)
|
|
159
|
+
* @param options - Conversion options
|
|
160
|
+
* @returns Extracted markdown from pages in range
|
|
161
|
+
*/
|
|
162
|
+
extractMarkdownRange(startPageIndex: number, endPageIndex: number, options?: Record<string, any>): string;
|
|
163
|
+
/**
|
|
164
|
+
* Gets word count for a page
|
|
165
|
+
* @param pageIndex - Zero-based page index
|
|
166
|
+
* @returns Estimated word count
|
|
167
|
+
*/
|
|
168
|
+
getPageWordCount(pageIndex: number): number;
|
|
169
|
+
/**
|
|
170
|
+
* Gets total word count for all pages
|
|
171
|
+
* @returns Total word count across all pages
|
|
172
|
+
*/
|
|
173
|
+
getTotalWordCount(): number;
|
|
174
|
+
/**
|
|
175
|
+
* Gets character count for a page
|
|
176
|
+
* @param pageIndex - Zero-based page index
|
|
177
|
+
* @returns Character count (including whitespace)
|
|
178
|
+
*/
|
|
179
|
+
getPageCharacterCount(pageIndex: number): number;
|
|
180
|
+
/**
|
|
181
|
+
* Gets total character count for all pages
|
|
182
|
+
* @returns Total character count
|
|
183
|
+
*/
|
|
184
|
+
getTotalCharacterCount(): number;
|
|
185
|
+
/**
|
|
186
|
+
* Gets line count for a page
|
|
187
|
+
* @param pageIndex - Zero-based page index
|
|
188
|
+
* @returns Estimated line count
|
|
189
|
+
*/
|
|
190
|
+
getPageLineCount(pageIndex: number): number;
|
|
191
|
+
/**
|
|
192
|
+
* Gets statistics for extracted content
|
|
193
|
+
* @returns Content statistics object
|
|
194
|
+
*
|
|
195
|
+
* @example
|
|
196
|
+
* ```typescript
|
|
197
|
+
* const stats = manager.getContentStatistics();
|
|
198
|
+
* console.log(`Total pages: ${stats.pageCount}`);
|
|
199
|
+
* console.log(`Total words: ${stats.wordCount}`);
|
|
200
|
+
* console.log(`Average page length: ${stats.averagePageLength}`);
|
|
201
|
+
* ```
|
|
202
|
+
*/
|
|
203
|
+
getContentStatistics(): ContentStatistics;
|
|
204
|
+
/**
|
|
205
|
+
* Searches for text across all pages and returns matching snippets
|
|
206
|
+
* @param searchText - Text to search for
|
|
207
|
+
* @param contextLength - Characters of context around match
|
|
208
|
+
* @returns Array of match objects with page and snippet
|
|
209
|
+
*
|
|
210
|
+
* @example
|
|
211
|
+
* ```typescript
|
|
212
|
+
* const matches = manager.searchContent('keyword', 50);
|
|
213
|
+
* matches.forEach(match => {
|
|
214
|
+
* console.log(`Page ${match.pageIndex + 1}: ...${match.snippet}...`);
|
|
215
|
+
* });
|
|
216
|
+
* ```
|
|
217
|
+
*/
|
|
218
|
+
searchContent(searchText: string, contextLength?: number): SearchMatch[];
|
|
219
|
+
/**
|
|
220
|
+
* Extract text from a page in a worker thread (non-blocking)
|
|
221
|
+
* @param documentPath - Path to the PDF document
|
|
222
|
+
* @param pageIndex - Page index to extract from
|
|
223
|
+
* @param options - Optional extraction options
|
|
224
|
+
* @param timeout - Optional timeout in milliseconds
|
|
225
|
+
* @returns Promise resolving to extracted text
|
|
226
|
+
*/
|
|
227
|
+
extractTextInWorker(documentPath: string, pageIndex: number, options?: Record<string, any>, timeout?: number): Promise<string>;
|
|
228
|
+
/**
|
|
229
|
+
* Extract markdown from a page in a worker thread (non-blocking)
|
|
230
|
+
* @param documentPath - Path to the PDF document
|
|
231
|
+
* @param pageIndex - Page index to extract from
|
|
232
|
+
* @param options - Optional extraction options
|
|
233
|
+
* @param timeout - Optional timeout in milliseconds
|
|
234
|
+
* @returns Promise resolving to extracted markdown
|
|
235
|
+
*/
|
|
236
|
+
extractMarkdownInWorker(documentPath: string, pageIndex: number, options?: Record<string, any>, timeout?: number): Promise<string>;
|
|
237
|
+
/**
|
|
238
|
+
* Extract HTML from a page in a worker thread (non-blocking)
|
|
239
|
+
* @param documentPath - Path to the PDF document
|
|
240
|
+
* @param pageIndex - Page index to extract from
|
|
241
|
+
* @param options - Optional extraction options
|
|
242
|
+
* @param timeout - Optional timeout in milliseconds
|
|
243
|
+
* @returns Promise resolving to extracted HTML
|
|
244
|
+
*/
|
|
245
|
+
extractHtmlInWorker(documentPath: string, pageIndex: number, options?: Record<string, any>, timeout?: number): Promise<string>;
|
|
246
|
+
}
|
|
247
|
+
//# sourceMappingURL=extraction-manager.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extraction-manager.d.ts","sourceRoot":"","sources":["../../src/managers/extraction-manager.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,MAAM,WAAW,iBAAiB;IAChC,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc,EAAE,MAAM,CAAC;IACvB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,wBAAwB,EAAE,MAAM,CAAC;CAClC;AAED,MAAM,WAAW,WAAW;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,SAAS,CAAM;IAEvB;;;;OAIG;gBACS,QAAQ,EAAE,GAAG;IAOzB;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM;IAgBrE;;;;;;;;;;OAUG;IACH,cAAc,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM;IAYrD;;;;;;;;;;;;OAYG;IACH,gBAAgB,CACd,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC5B,MAAM;IAwBT;;;;;;;;;;;;OAYG;IACH,gBAAgB,CAAC,WAAW,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM;IAuB9E;;;;;;;;;;;;OAYG;IACH,gBAAgB,CACd,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC5B,MAAM,EAAE;IAwBX;;;;;;;;;;;;;OAaG;IACH,eAAe,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM;IAgBzE;;;;;;;;;;;OAWG;IACH,kBAAkB,CAAC,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAAG,MAAM;IAczD;;;;;;OAMG;IACH,oBAAoB,CAClB,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,EACpB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC5B,MAAM;IA0BT;;;;OAIG;IACH,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM;IAK3C;;;OAGG;IACH,iBAAiB,IAAI,MAAM;IAK3B;;;;OAIG;IACH,qBAAqB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM;IAKhD;;;OAGG;IACH,sBAAsB,IAAI,MAAM;IAQhC;;;;OAIG;IACH,gBAAgB,CAAC,SAAS,EAAE,MAAM,GAAG,MAAM;IAK3C;;;;;;;;;;;OAWG;IACH,oBAAoB,IAAI,iBAAiB;IAkBzC;;;;;;;;;;;;;OAaG;IACH,aAAa,CAAC,UAAU,EAAE,MAAM,EAAE,aAAa,GAAE,MAAY,GAAG,WAAW,EAAE;IAqC7E;;;;;;;OAOG;IACG,mBAAmB,CACvB,YAAY,EAAE,MAAM,EACpB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC7B,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC;IA2BlB;;;;;;;OAOG;IACG,uBAAuB,CAC3B,YAAY,EAAE,MAAM,EACpB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC7B,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC;IA2BlB;;;;;;;OAOG;IACG,mBAAmB,CACvB,YAAY,EAAE,MAAM,EACpB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EAC7B,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,MAAM,CAAC;CA0BnB"}
|