pdf-oxide-fips 0.3.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE-APACHE +176 -0
  2. package/LICENSE-MIT +25 -0
  3. package/README.md +218 -0
  4. package/lib/builders/annotation-builder.d.ts +198 -0
  5. package/lib/builders/annotation-builder.js +317 -0
  6. package/lib/builders/conversion-options-builder.d.ts +106 -0
  7. package/lib/builders/conversion-options-builder.js +214 -0
  8. package/lib/builders/document-builder.d.ts +381 -0
  9. package/lib/builders/document-builder.js +770 -0
  10. package/lib/builders/index.d.ts +13 -0
  11. package/lib/builders/index.js +13 -0
  12. package/lib/builders/metadata-builder.d.ts +201 -0
  13. package/lib/builders/metadata-builder.js +285 -0
  14. package/lib/builders/pdf-builder.d.ts +216 -0
  15. package/lib/builders/pdf-builder.js +350 -0
  16. package/lib/builders/search-options-builder.d.ts +73 -0
  17. package/lib/builders/search-options-builder.js +129 -0
  18. package/lib/builders/streaming-table.d.ts +64 -0
  19. package/lib/builders/streaming-table.js +140 -0
  20. package/lib/document-editor-manager.d.ts +139 -0
  21. package/lib/document-editor-manager.js +256 -0
  22. package/lib/document-editor.d.ts +124 -0
  23. package/lib/document-editor.js +318 -0
  24. package/lib/errors.d.ts +382 -0
  25. package/lib/errors.js +1115 -0
  26. package/lib/form-field-manager.d.ts +299 -0
  27. package/lib/form-field-manager.js +568 -0
  28. package/lib/hybrid-ml-manager.d.ts +142 -0
  29. package/lib/hybrid-ml-manager.js +208 -0
  30. package/lib/index.d.ts +205 -0
  31. package/lib/index.js +693 -0
  32. package/lib/managers/accessibility-manager.d.ts +148 -0
  33. package/lib/managers/accessibility-manager.js +234 -0
  34. package/lib/managers/annotation-manager.d.ts +219 -0
  35. package/lib/managers/annotation-manager.js +359 -0
  36. package/lib/managers/barcode-manager.d.ts +82 -0
  37. package/lib/managers/barcode-manager.js +263 -0
  38. package/lib/managers/batch-manager.d.ts +185 -0
  39. package/lib/managers/batch-manager.js +385 -0
  40. package/lib/managers/cache-manager.d.ts +181 -0
  41. package/lib/managers/cache-manager.js +384 -0
  42. package/lib/managers/compliance-manager.d.ts +103 -0
  43. package/lib/managers/compliance-manager.js +453 -0
  44. package/lib/managers/content-manager.d.ts +120 -0
  45. package/lib/managers/content-manager.js +294 -0
  46. package/lib/managers/document-utility-manager.d.ts +369 -0
  47. package/lib/managers/document-utility-manager.js +730 -0
  48. package/lib/managers/dom-pdf-creator.d.ts +104 -0
  49. package/lib/managers/dom-pdf-creator.js +299 -0
  50. package/lib/managers/editing-manager.d.ts +248 -0
  51. package/lib/managers/editing-manager.js +387 -0
  52. package/lib/managers/enterprise-manager.d.ts +192 -0
  53. package/lib/managers/enterprise-manager.js +307 -0
  54. package/lib/managers/extended-managers.d.ts +122 -0
  55. package/lib/managers/extended-managers.js +664 -0
  56. package/lib/managers/extraction-manager.d.ts +246 -0
  57. package/lib/managers/extraction-manager.js +482 -0
  58. package/lib/managers/final-utilities.d.ts +127 -0
  59. package/lib/managers/final-utilities.js +657 -0
  60. package/lib/managers/hybrid-ml-advanced.d.ts +136 -0
  61. package/lib/managers/hybrid-ml-advanced.js +722 -0
  62. package/lib/managers/index.d.ts +64 -0
  63. package/lib/managers/index.js +69 -0
  64. package/lib/managers/layer-manager.d.ts +203 -0
  65. package/lib/managers/layer-manager.js +401 -0
  66. package/lib/managers/metadata-manager.d.ts +148 -0
  67. package/lib/managers/metadata-manager.js +280 -0
  68. package/lib/managers/ocr-manager.d.ts +194 -0
  69. package/lib/managers/ocr-manager.js +582 -0
  70. package/lib/managers/optimization-manager.d.ts +102 -0
  71. package/lib/managers/optimization-manager.js +213 -0
  72. package/lib/managers/outline-manager.d.ts +101 -0
  73. package/lib/managers/outline-manager.js +169 -0
  74. package/lib/managers/page-manager.d.ts +142 -0
  75. package/lib/managers/page-manager.js +235 -0
  76. package/lib/managers/pattern-detection.d.ts +169 -0
  77. package/lib/managers/pattern-detection.js +322 -0
  78. package/lib/managers/rendering-manager.d.ts +353 -0
  79. package/lib/managers/rendering-manager.js +679 -0
  80. package/lib/managers/search-manager.d.ts +235 -0
  81. package/lib/managers/search-manager.js +329 -0
  82. package/lib/managers/security-manager.d.ts +161 -0
  83. package/lib/managers/security-manager.js +292 -0
  84. package/lib/managers/signature-manager.d.ts +738 -0
  85. package/lib/managers/signature-manager.js +1509 -0
  86. package/lib/managers/streams.d.ts +262 -0
  87. package/lib/managers/streams.js +477 -0
  88. package/lib/managers/xfa-manager.d.ts +227 -0
  89. package/lib/managers/xfa-manager.js +539 -0
  90. package/lib/native-loader.d.ts +7 -0
  91. package/lib/native-loader.js +62 -0
  92. package/lib/native.d.ts +16 -0
  93. package/lib/native.js +69 -0
  94. package/lib/pdf-creator-manager.d.ts +200 -0
  95. package/lib/pdf-creator-manager.js +381 -0
  96. package/lib/properties.d.ts +79 -0
  97. package/lib/properties.js +454 -0
  98. package/lib/result-accessors-manager.d.ts +346 -0
  99. package/lib/result-accessors-manager.js +706 -0
  100. package/lib/thumbnail-manager.d.ts +121 -0
  101. package/lib/thumbnail-manager.js +205 -0
  102. package/lib/timestamp.d.ts +54 -0
  103. package/lib/timestamp.js +115 -0
  104. package/lib/tsa-client.d.ts +44 -0
  105. package/lib/tsa-client.js +67 -0
  106. package/lib/types/common.d.ts +189 -0
  107. package/lib/types/common.js +17 -0
  108. package/lib/types/document-types.d.ts +352 -0
  109. package/lib/types/document-types.js +82 -0
  110. package/lib/types/index.d.ts +5 -0
  111. package/lib/types/index.js +5 -0
  112. package/lib/types/manager-types.d.ts +179 -0
  113. package/lib/types/manager-types.js +100 -0
  114. package/lib/types/native-bindings.d.ts +439 -0
  115. package/lib/types/native-bindings.js +7 -0
  116. package/lib/workers/index.d.ts +6 -0
  117. package/lib/workers/index.js +5 -0
  118. package/lib/workers/pool.d.ts +64 -0
  119. package/lib/workers/pool.js +192 -0
  120. package/lib/workers/worker.d.ts +5 -0
  121. package/lib/workers/worker.js +99 -0
  122. package/package.json +79 -0
  123. package/prebuilds/darwin-arm64/pdf_oxide.node +0 -0
  124. package/prebuilds/darwin-x64/pdf_oxide.node +0 -0
  125. package/prebuilds/linux-arm64/pdf_oxide.node +0 -0
  126. package/prebuilds/linux-x64/pdf_oxide.node +0 -0
  127. package/prebuilds/win32-x64/pdf_oxide.node +0 -0
@@ -0,0 +1,246 @@
1
+ /**
2
+ * Manager for content extraction from PDF documents
3
+ *
4
+ * Caching is handled automatically at the Rust FFI layer, eliminating
5
+ * the need for duplicate cache implementations in the binding.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { ExtractionManager, ConversionOptionsBuilder } from 'pdf_oxide';
10
+ *
11
+ * const doc = PdfDocument.open('document.pdf');
12
+ * const extractionManager = new ExtractionManager(doc);
13
+ *
14
+ * // Extract text from a single page
15
+ * const text = extractionManager.extractText(0);
16
+ * console.log(text);
17
+ *
18
+ * // Extract all text
19
+ * const allText = extractionManager.extractAllText();
20
+ *
21
+ * // Extract with custom options
22
+ * const options = ConversionOptionsBuilder.highQuality().build();
23
+ * const markdown = extractionManager.extractMarkdown(0, options);
24
+ * ```
25
+ */
26
+ export interface ContentStatistics {
27
+ pageCount: number;
28
+ wordCount: number;
29
+ characterCount: number;
30
+ averageWordsPerPage: number;
31
+ averageCharactersPerPage: number;
32
+ }
33
+ export interface SearchMatch {
34
+ pageIndex: number;
35
+ pageNumber: number;
36
+ matchIndex: number;
37
+ snippet: string;
38
+ matchText: string;
39
+ }
40
+ export declare class ExtractionManager {
41
+ private _document;
42
+ /**
43
+ * Creates a new ExtractionManager for the given document
44
+ * @param document - The PDF document
45
+ * @throws Error if document is null or undefined
46
+ */
47
+ constructor(document: any);
48
+ /**
49
+ * Extracts text from a single page.
50
+ *
51
+ * The native layer produces UTF-8 bytes, which Node decodes into a JS
52
+ * `string` (UTF-16 code units internally). As a result,
53
+ * `text.length` reports UTF-16 code units, not bytes — so a 648-byte
54
+ * UTF-8 string containing two accented letters reads as 646 in JS. Use
55
+ * `Buffer.byteLength(text, 'utf8')` if you need the byte count (e.g. to
56
+ * compare against Go's `len(string)` or Rust's `String::len()`).
57
+ *
58
+ * Results are automatically cached at the FFI layer.
59
+ *
60
+ * @param pageIndex - Zero-based page index
61
+ * @param options - Conversion options
62
+ * @returns Extracted text (UTF-16 code units)
63
+ * @throws Error if page index is invalid
64
+ *
65
+ * @example
66
+ * ```typescript
67
+ * const text = manager.extractText(0);
68
+ * console.log(`Page 1: ${text.length} UTF-16 code units`);
69
+ * console.log(` ${Buffer.byteLength(text, 'utf8')} UTF-8 bytes`);
70
+ * ```
71
+ */
72
+ extractText(pageIndex: number, options?: Record<string, any>): string;
73
+ /**
74
+ * Extracts text from all pages
75
+ * @param options - Conversion options
76
+ * @returns All extracted text concatenated
77
+ *
78
+ * @example
79
+ * ```typescript
80
+ * const allText = manager.extractAllText();
81
+ * console.log(`Total characters: ${allText.length}`);
82
+ * ```
83
+ */
84
+ extractAllText(options?: Record<string, any>): string;
85
+ /**
86
+ * Extracts text from a range of pages
87
+ * @param startPageIndex - Zero-based start page index
88
+ * @param endPageIndex - Zero-based end page index (inclusive)
89
+ * @param options - Conversion options
90
+ * @returns Extracted text from pages in range
91
+ *
92
+ * @example
93
+ * ```typescript
94
+ * const text = manager.extractTextRange(0, 10);
95
+ * console.log(`Text from pages 1-11: ${text}`);
96
+ * ```
97
+ */
98
+ extractTextRange(startPageIndex: number, endPageIndex: number, options?: Record<string, any>): string;
99
+ /**
100
+ * Extracts text from specific page indices (non-contiguous)
101
+ * @param pageIndices - Array of zero-based page indices
102
+ * @param options - Conversion options
103
+ * @returns Extracted text from specified pages concatenated with newlines
104
+ * @throws Error if page indices are invalid
105
+ *
106
+ * @example
107
+ * ```typescript
108
+ * const text = manager.extractTextBatch([0, 2, 5]); // Extract pages 1, 3, 6
109
+ * console.log(text);
110
+ * ```
111
+ */
112
+ extractTextBatch(pageIndices: number[], options?: Record<string, any>): string;
113
+ /**
114
+ * Extracts text from pages as an array (one entry per page)
115
+ * @param startPageIndex - Zero-based start page index
116
+ * @param endPageIndex - Zero-based end page index (inclusive)
117
+ * @param options - Conversion options
118
+ * @returns Array of extracted text, one per page
119
+ *
120
+ * @example
121
+ * ```typescript
122
+ * const pages = manager.extractTextArray(0, 5);
123
+ * pages.forEach((text, i) => console.log(`Page ${i}: ${text.length} chars`));
124
+ * ```
125
+ */
126
+ extractTextArray(startPageIndex: number, endPageIndex: number, options?: Record<string, any>): string[];
127
+ /**
128
+ * Extracts page as Markdown.
129
+ * Results are automatically cached at the FFI layer.
130
+ * @param pageIndex - Zero-based page index
131
+ * @param options - Conversion options
132
+ * @returns Page content as Markdown
133
+ * @throws Error if page index is invalid
134
+ *
135
+ * @example
136
+ * ```typescript
137
+ * const markdown = manager.extractMarkdown(0);
138
+ * console.log(markdown); // Markdown formatted content
139
+ * ```
140
+ */
141
+ extractMarkdown(pageIndex: number, options?: Record<string, any>): string;
142
+ /**
143
+ * Extracts all pages as Markdown
144
+ * @param options - Conversion options
145
+ * @returns All pages as Markdown
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * const markdown = manager.extractAllMarkdown();
150
+ * // Write to file
151
+ * fs.writeFileSync('output.md', markdown);
152
+ * ```
153
+ */
154
+ extractAllMarkdown(options?: Record<string, any>): string;
155
+ /**
156
+ * Extracts markdown from a range of pages
157
+ * @param startPageIndex - Zero-based start page index
158
+ * @param endPageIndex - Zero-based end page index (inclusive)
159
+ * @param options - Conversion options
160
+ * @returns Extracted markdown from pages in range
161
+ */
162
+ extractMarkdownRange(startPageIndex: number, endPageIndex: number, options?: Record<string, any>): string;
163
+ /**
164
+ * Gets word count for a page
165
+ * @param pageIndex - Zero-based page index
166
+ * @returns Estimated word count
167
+ */
168
+ getPageWordCount(pageIndex: number): number;
169
+ /**
170
+ * Gets total word count for all pages
171
+ * @returns Total word count across all pages
172
+ */
173
+ getTotalWordCount(): number;
174
+ /**
175
+ * Gets character count for a page
176
+ * @param pageIndex - Zero-based page index
177
+ * @returns Character count (including whitespace)
178
+ */
179
+ getPageCharacterCount(pageIndex: number): number;
180
+ /**
181
+ * Gets total character count for all pages
182
+ * @returns Total character count
183
+ */
184
+ getTotalCharacterCount(): number;
185
+ /**
186
+ * Gets line count for a page
187
+ * @param pageIndex - Zero-based page index
188
+ * @returns Estimated line count
189
+ */
190
+ getPageLineCount(pageIndex: number): number;
191
+ /**
192
+ * Gets statistics for extracted content
193
+ * @returns Content statistics object
194
+ *
195
+ * @example
196
+ * ```typescript
197
+ * const stats = manager.getContentStatistics();
198
+ * console.log(`Total pages: ${stats.pageCount}`);
199
+ * console.log(`Total words: ${stats.wordCount}`);
200
+ * console.log(`Average page length: ${stats.averagePageLength}`);
201
+ * ```
202
+ */
203
+ getContentStatistics(): ContentStatistics;
204
+ /**
205
+ * Searches for text across all pages and returns matching snippets
206
+ * @param searchText - Text to search for
207
+ * @param contextLength - Characters of context around match
208
+ * @returns Array of match objects with page and snippet
209
+ *
210
+ * @example
211
+ * ```typescript
212
+ * const matches = manager.searchContent('keyword', 50);
213
+ * matches.forEach(match => {
214
+ * console.log(`Page ${match.pageIndex + 1}: ...${match.snippet}...`);
215
+ * });
216
+ * ```
217
+ */
218
+ searchContent(searchText: string, contextLength?: number): SearchMatch[];
219
+ /**
220
+ * Extract text from a page in a worker thread (non-blocking)
221
+ * @param documentPath - Path to the PDF document
222
+ * @param pageIndex - Page index to extract from
223
+ * @param options - Optional extraction options
224
+ * @param timeout - Optional timeout in milliseconds
225
+ * @returns Promise resolving to extracted text
226
+ */
227
+ extractTextInWorker(documentPath: string, pageIndex: number, options?: Record<string, any>, timeout?: number): Promise<string>;
228
+ /**
229
+ * Extract markdown from a page in a worker thread (non-blocking)
230
+ * @param documentPath - Path to the PDF document
231
+ * @param pageIndex - Page index to extract from
232
+ * @param options - Optional extraction options
233
+ * @param timeout - Optional timeout in milliseconds
234
+ * @returns Promise resolving to extracted markdown
235
+ */
236
+ extractMarkdownInWorker(documentPath: string, pageIndex: number, options?: Record<string, any>, timeout?: number): Promise<string>;
237
+ /**
238
+ * Extract HTML from a page in a worker thread (non-blocking)
239
+ * @param documentPath - Path to the PDF document
240
+ * @param pageIndex - Page index to extract from
241
+ * @param options - Optional extraction options
242
+ * @param timeout - Optional timeout in milliseconds
243
+ * @returns Promise resolving to extracted HTML
244
+ */
245
+ extractHtmlInWorker(documentPath: string, pageIndex: number, options?: Record<string, any>, timeout?: number): Promise<string>;
246
+ }