pdf-oxide 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +218 -0
  2. package/binding.gyp +35 -0
  3. package/package.json +78 -0
  4. package/src/builders/annotation-builder.ts +367 -0
  5. package/src/builders/conversion-options-builder.ts +257 -0
  6. package/src/builders/index.ts +12 -0
  7. package/src/builders/metadata-builder.ts +317 -0
  8. package/src/builders/pdf-builder.ts +386 -0
  9. package/src/builders/search-options-builder.ts +151 -0
  10. package/src/document-editor-manager.ts +318 -0
  11. package/src/errors.ts +1629 -0
  12. package/src/form-field-manager.ts +666 -0
  13. package/src/hybrid-ml-manager.ts +283 -0
  14. package/src/index.ts +453 -0
  15. package/src/managers/accessibility-manager.ts +338 -0
  16. package/src/managers/annotation-manager.ts +439 -0
  17. package/src/managers/barcode-manager.ts +235 -0
  18. package/src/managers/batch-manager.ts +533 -0
  19. package/src/managers/cache-manager.ts +486 -0
  20. package/src/managers/compliance-manager.ts +375 -0
  21. package/src/managers/content-manager.ts +339 -0
  22. package/src/managers/document-utility-manager.ts +922 -0
  23. package/src/managers/dom-pdf-creator.ts +365 -0
  24. package/src/managers/editing-manager.ts +514 -0
  25. package/src/managers/enterprise-manager.ts +478 -0
  26. package/src/managers/extended-managers.ts +437 -0
  27. package/src/managers/extraction-manager.ts +583 -0
  28. package/src/managers/final-utilities.ts +429 -0
  29. package/src/managers/hybrid-ml-advanced.ts +479 -0
  30. package/src/managers/index.ts +239 -0
  31. package/src/managers/layer-manager.ts +500 -0
  32. package/src/managers/metadata-manager.ts +303 -0
  33. package/src/managers/ocr-manager.ts +756 -0
  34. package/src/managers/optimization-manager.ts +262 -0
  35. package/src/managers/outline-manager.ts +196 -0
  36. package/src/managers/page-manager.ts +289 -0
  37. package/src/managers/pattern-detection.ts +440 -0
  38. package/src/managers/rendering-manager.ts +863 -0
  39. package/src/managers/search-manager.ts +385 -0
  40. package/src/managers/security-manager.ts +345 -0
  41. package/src/managers/signature-manager.ts +1664 -0
  42. package/src/managers/streams.ts +618 -0
  43. package/src/managers/xfa-manager.ts +500 -0
  44. package/src/pdf-creator-manager.ts +494 -0
  45. package/src/properties.ts +522 -0
  46. package/src/result-accessors-manager.ts +867 -0
  47. package/src/tests/advanced-features.test.ts +414 -0
  48. package/src/tests/advanced.test.ts +266 -0
  49. package/src/tests/extended-managers.test.ts +316 -0
  50. package/src/tests/final-utilities.test.ts +455 -0
  51. package/src/tests/foundation.test.ts +315 -0
  52. package/src/tests/high-demand.test.ts +257 -0
  53. package/src/tests/specialized.test.ts +97 -0
  54. package/src/thumbnail-manager.ts +272 -0
  55. package/src/types/common.ts +142 -0
  56. package/src/types/document-types.ts +457 -0
  57. package/src/types/index.ts +6 -0
  58. package/src/types/manager-types.ts +284 -0
  59. package/src/types/native-bindings.ts +517 -0
  60. package/src/workers/index.ts +7 -0
  61. package/src/workers/pool.ts +274 -0
  62. package/src/workers/worker.ts +131 -0
@@ -0,0 +1,385 @@
1
+ /**
2
+ * Manager for text search operations in PDF documents
3
+ *
4
+ * Caching is handled automatically at the Rust FFI layer, eliminating
5
+ * the need for duplicate cache implementations in the binding.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { SearchManager, SearchOptionsBuilder } from 'pdf_oxide';
10
+ *
11
+ * const doc = PdfDocument.open('document.pdf');
12
+ * const searchManager = new SearchManager(doc);
13
+ *
14
+ * // Simple search
15
+ * const results = searchManager.search('keyword');
16
+ *
17
+ * // Search with options
18
+ * const options = SearchOptionsBuilder.strict().build();
19
+ * const results = searchManager.search('keyword', options);
20
+ *
21
+ * // Count occurrences
22
+ * const count = searchManager.countOccurrences('keyword');
23
+ * ```
24
+ */
25
+
26
+ export interface SearchResult {
27
+ text?: string;
28
+ pageIndex?: number;
29
+ position?: number;
30
+ boundingBox?: Record<string, number>;
31
+ [key: string]: any;
32
+ }
33
+
34
+ export interface SearchStatistics {
35
+ searchText: string;
36
+ totalOccurrences: number;
37
+ pagesContaining: number;
38
+ firstMatchPage: number;
39
+ lastMatchPage: number;
40
+ pages: number[];
41
+ occurrencesPerPage: Array<{
42
+ pageIndex: number;
43
+ pageNumber: number;
44
+ count: number;
45
+ }>;
46
+ }
47
+
48
+ export interface SearchCapabilities {
49
+ caseSensitiveSearch: boolean;
50
+ wholeWordSearch: boolean;
51
+ regexSearch: boolean;
52
+ annotationSearch: boolean;
53
+ maxResults: number;
54
+ isSearchable: boolean;
55
+ }
56
+
57
+ export class SearchManager {
58
+ private _document: any;
59
+
60
+ /**
61
+ * Creates a new SearchManager for the given document
62
+ * @param document - The PDF document
63
+ * @throws Error if document is null or undefined
64
+ */
65
+ constructor(document: any) {
66
+ if (!document) {
67
+ throw new Error('Document is required');
68
+ }
69
+ this._document = document;
70
+ }
71
+
72
+ /**
73
+ * Searches for text in a specific page.
74
+ * Results are automatically cached at the FFI layer.
75
+ * @param searchText - Text to search for
76
+ * @param pageIndex - Zero-based page index
77
+ * @param options - Search options (caseSensitive, wholeWords, useRegex, etc.)
78
+ * @returns Array of search results
79
+ * @throws Error if parameters are invalid
80
+ *
81
+ * @example
82
+ * ```typescript
83
+ * const results = manager.search('error', 0);
84
+ * results.forEach(result => {
85
+ * console.log(`Found at position ${result.position}`);
86
+ * });
87
+ * ```
88
+ */
89
+ search(searchText: string, pageIndex: number, options?: Record<string, any>): SearchResult[] {
90
+ if (!searchText || typeof searchText !== 'string') {
91
+ throw new Error('Search text must be a non-empty string');
92
+ }
93
+
94
+ if (typeof pageIndex !== 'number' || pageIndex < 0) {
95
+ throw new Error('Page index must be a non-negative number');
96
+ }
97
+
98
+ if (pageIndex >= this._document.pageCount) {
99
+ throw new Error(`Page index ${pageIndex} out of range`);
100
+ }
101
+
102
+ try {
103
+ return this._document.search(searchText, pageIndex, options) || [];
104
+ } catch (error) {
105
+ throw new Error(`Search failed: ${(error as Error).message}`);
106
+ }
107
+ }
108
+
109
+ /**
110
+ * Searches for text across all pages
111
+ * @param searchText - Text to search for
112
+ * @param options - Search options
113
+ * @returns Array of search results with page information
114
+ *
115
+ * @example
116
+ * ```typescript
117
+ * const results = manager.searchAll('important');
118
+ * console.log(`Found ${results.length} occurrences`);
119
+ * ```
120
+ */
121
+ searchAll(searchText: string, options?: Record<string, any>): SearchResult[] {
122
+ if (!searchText || typeof searchText !== 'string') {
123
+ throw new Error('Search text must be a non-empty string');
124
+ }
125
+
126
+ const allResults: SearchResult[] = [];
127
+
128
+ try {
129
+ for (let i = 0; i < this._document.pageCount; i++) {
130
+ const results = this.search(searchText, i, options);
131
+ results.forEach(result => {
132
+ result.pageIndex = i;
133
+ result.pageNumber = i + 1;
134
+ });
135
+ allResults.push(...results);
136
+ }
137
+
138
+ return allResults;
139
+ } catch (error) {
140
+ throw new Error(`Search all failed: ${(error as Error).message}`);
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Counts occurrences of text in a page
146
+ * @param searchText - Text to search for
147
+ * @param pageIndex - Zero-based page index
148
+ * @param options - Search options
149
+ * @returns Number of occurrences found
150
+ *
151
+ * @example
152
+ * ```typescript
153
+ * const count = manager.countOccurrences('the', 0);
154
+ * console.log(`"the" appears ${count} times on page 1`);
155
+ * ```
156
+ */
157
+ countOccurrences(searchText: string, pageIndex: number, options?: Record<string, any>): number {
158
+ const results = this.search(searchText, pageIndex, options);
159
+ return results.length;
160
+ }
161
+
162
+ /**
163
+ * Counts occurrences of text across all pages
164
+ * @param searchText - Text to search for
165
+ * @param options - Search options
166
+ * @returns Total occurrences
167
+ *
168
+ * @example
169
+ * ```typescript
170
+ * const totalCount = manager.countAllOccurrences('the');
171
+ * console.log(`"the" appears ${totalCount} times in document`);
172
+ * ```
173
+ */
174
+ countAllOccurrences(searchText: string, options?: Record<string, any>): number {
175
+ const results = this.searchAll(searchText, options);
176
+ return results.length;
177
+ }
178
+
179
+ /**
180
+ * Checks if text exists in a page
181
+ * @param searchText - Text to search for
182
+ * @param pageIndex - Zero-based page index
183
+ * @param options - Search options
184
+ * @returns True if text found
185
+ *
186
+ * @example
187
+ * ```typescript
188
+ * if (manager.contains('error', 0)) {
189
+ * console.log('Page contains "error"');
190
+ * }
191
+ * ```
192
+ */
193
+ contains(searchText: string, pageIndex: number, options?: Record<string, any>): boolean {
194
+ const results = this.search(searchText, pageIndex, options);
195
+ return results.length > 0;
196
+ }
197
+
198
+ /**
199
+ * Checks if text exists anywhere in document
200
+ * @param searchText - Text to search for
201
+ * @param options - Search options
202
+ * @returns True if text found anywhere
203
+ *
204
+ * @example
205
+ * ```typescript
206
+ * if (manager.containsAnywhere('copyright')) {
207
+ * console.log('Document contains copyright notice');
208
+ * }
209
+ * ```
210
+ */
211
+ containsAnywhere(searchText: string, options?: Record<string, any>): boolean {
212
+ const results = this.searchAll(searchText, options);
213
+ return results.length > 0;
214
+ }
215
+
216
+ /**
217
+ * Gets pages containing the search text
218
+ * @param searchText - Text to search for
219
+ * @param options - Search options
220
+ * @returns Array of page indices (zero-based) containing the text
221
+ *
222
+ * @example
223
+ * ```typescript
224
+ * const pages = manager.getPagesContaining('error');
225
+ * console.log(`"error" found on pages: ${pages.map(p => p + 1).join(', ')}`);
226
+ * ```
227
+ */
228
+ getPagesContaining(searchText: string, options?: Record<string, any>): number[] {
229
+ const results = this.searchAll(searchText, options);
230
+ const pageSet = new Set(results.map(r => r.pageIndex || 0));
231
+ return Array.from(pageSet).sort((a, b) => a - b);
232
+ }
233
+
234
+ /**
235
+ * Gets statistics for search results
236
+ * @param searchText - Text to search for
237
+ * @param options - Search options
238
+ * @returns Search statistics
239
+ *
240
+ * @example
241
+ * ```typescript
242
+ * const stats = manager.getSearchStatistics('error');
243
+ * console.log(`Found ${stats.totalOccurrences} occurrences`);
244
+ * console.log(`On ${stats.pagesContaining} pages`);
245
+ * console.log(`First match on page ${stats.firstMatchPage + 1}`);
246
+ * ```
247
+ */
248
+ getSearchStatistics(searchText: string, options?: Record<string, any>): SearchStatistics {
249
+ const results = this.searchAll(searchText, options);
250
+
251
+ // Extract unique pages and calculate per-page counts in single pass
252
+ const pageMap = new Map<number, number>();
253
+ for (const result of results) {
254
+ const pageIdx = result.pageIndex || 0;
255
+ if (!pageMap.has(pageIdx)) {
256
+ pageMap.set(pageIdx, 0);
257
+ }
258
+ pageMap.set(pageIdx, (pageMap.get(pageIdx) || 0) + 1);
259
+ }
260
+
261
+ const pages = Array.from(pageMap.keys()).sort((a, b) => a - b);
262
+
263
+ return {
264
+ searchText,
265
+ totalOccurrences: results.length,
266
+ pagesContaining: pages.length,
267
+ firstMatchPage: pages.length > 0 ? (pages[0] as number) : -1,
268
+ lastMatchPage: pages.length > 0 ? (pages[pages.length - 1] as number) : -1,
269
+ pages,
270
+ occurrencesPerPage: pages.map(p => ({
271
+ pageIndex: p,
272
+ pageNumber: p + 1,
273
+ count: pageMap.get(p) || 0,
274
+ })),
275
+ };
276
+ }
277
+
278
+ /**
279
+ * Searches with a regular expression
280
+ * @param pattern - Regular expression pattern
281
+ * @param options - Search options (will set useRegex: true)
282
+ * @returns Array of search results
283
+ *
284
+ * @example
285
+ * ```typescript
286
+ * const results = manager.searchRegex(/error\d+/i);
287
+ * // Finds "error1", "ERROR2", "Error3", etc.
288
+ * ```
289
+ */
290
+ searchRegex(pattern: RegExp | string, options: Record<string, any> = {}): SearchResult[] {
291
+ const regexStr = pattern instanceof RegExp ? pattern.source : pattern;
292
+
293
+ if (!regexStr || typeof regexStr !== 'string') {
294
+ throw new Error('Pattern must be a valid regular expression');
295
+ }
296
+
297
+ // Merge options and ensure useRegex is true
298
+ const searchOptions = {
299
+ ...options,
300
+ useRegex: true,
301
+ };
302
+
303
+ try {
304
+ return this.searchAll(regexStr, searchOptions);
305
+ } catch (error) {
306
+ throw new Error(`Regex search failed: ${(error as Error).message}`);
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Finds first occurrence of text
312
+ * @param searchText - Text to search for
313
+ * @param options - Search options
314
+ * @returns First search result or null if not found
315
+ *
316
+ * @example
317
+ * ```typescript
318
+ * const first = manager.findFirst('chapter');
319
+ * if (first) {
320
+ * console.log(`First "chapter" found on page ${first.pageNumber}`);
321
+ * }
322
+ * ```
323
+ */
324
+ findFirst(searchText: string, options?: Record<string, any>): SearchResult | null {
325
+ const results = this.searchAll(searchText, options);
326
+ return results.length > 0 ? (results[0] as SearchResult) : null;
327
+ }
328
+
329
+ /**
330
+ * Finds last occurrence of text
331
+ * @param searchText - Text to search for
332
+ * @param options - Search options
333
+ * @returns Last search result or null if not found
334
+ */
335
+ findLast(searchText: string, options?: Record<string, any>): SearchResult | null {
336
+ const results = this.searchAll(searchText, options);
337
+ return results.length > 0 ? (results[results.length - 1] as SearchResult) : null;
338
+ }
339
+
340
+ /**
341
+ * Replaces text occurrences with highlighted versions (view only)
342
+ * Gets all occurrences for highlighting without modification
343
+ * @param searchText - Text to find
344
+ * @param options - Search options
345
+ * @returns Results formatted for highlighting
346
+ *
347
+ * @example
348
+ * ```typescript
349
+ * const highlights = manager.highlightMatches('important');
350
+ * // Use results for UI highlighting
351
+ * ```
352
+ */
353
+ highlightMatches(searchText: string, options?: Record<string, any>): SearchResult[] {
354
+ return this.searchAll(searchText, options);
355
+ }
356
+
357
+ /**
358
+ * Checks if document is searchable
359
+ * @returns True if document supports text search
360
+ */
361
+ isSearchable(): boolean {
362
+ try {
363
+ // Try searching for common text to verify searchability
364
+ this.searchAll('test');
365
+ return true;
366
+ } catch (error) {
367
+ return false;
368
+ }
369
+ }
370
+
371
+ /**
372
+ * Gets search capabilities summary
373
+ * @returns Search capabilities information
374
+ */
375
+ getCapabilities(): SearchCapabilities {
376
+ return {
377
+ caseSensitiveSearch: true,
378
+ wholeWordSearch: true,
379
+ regexSearch: true,
380
+ annotationSearch: true,
381
+ maxResults: 1000,
382
+ isSearchable: this.isSearchable(),
383
+ };
384
+ }
385
+ }