pdf-oxide 0.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -0
- package/binding.gyp +35 -0
- package/package.json +78 -0
- package/src/builders/annotation-builder.ts +367 -0
- package/src/builders/conversion-options-builder.ts +257 -0
- package/src/builders/index.ts +12 -0
- package/src/builders/metadata-builder.ts +317 -0
- package/src/builders/pdf-builder.ts +386 -0
- package/src/builders/search-options-builder.ts +151 -0
- package/src/document-editor-manager.ts +318 -0
- package/src/errors.ts +1629 -0
- package/src/form-field-manager.ts +666 -0
- package/src/hybrid-ml-manager.ts +283 -0
- package/src/index.ts +453 -0
- package/src/managers/accessibility-manager.ts +338 -0
- package/src/managers/annotation-manager.ts +439 -0
- package/src/managers/barcode-manager.ts +235 -0
- package/src/managers/batch-manager.ts +533 -0
- package/src/managers/cache-manager.ts +486 -0
- package/src/managers/compliance-manager.ts +375 -0
- package/src/managers/content-manager.ts +339 -0
- package/src/managers/document-utility-manager.ts +922 -0
- package/src/managers/dom-pdf-creator.ts +365 -0
- package/src/managers/editing-manager.ts +514 -0
- package/src/managers/enterprise-manager.ts +478 -0
- package/src/managers/extended-managers.ts +437 -0
- package/src/managers/extraction-manager.ts +583 -0
- package/src/managers/final-utilities.ts +429 -0
- package/src/managers/hybrid-ml-advanced.ts +479 -0
- package/src/managers/index.ts +239 -0
- package/src/managers/layer-manager.ts +500 -0
- package/src/managers/metadata-manager.ts +303 -0
- package/src/managers/ocr-manager.ts +756 -0
- package/src/managers/optimization-manager.ts +262 -0
- package/src/managers/outline-manager.ts +196 -0
- package/src/managers/page-manager.ts +289 -0
- package/src/managers/pattern-detection.ts +440 -0
- package/src/managers/rendering-manager.ts +863 -0
- package/src/managers/search-manager.ts +385 -0
- package/src/managers/security-manager.ts +345 -0
- package/src/managers/signature-manager.ts +1664 -0
- package/src/managers/streams.ts +618 -0
- package/src/managers/xfa-manager.ts +500 -0
- package/src/pdf-creator-manager.ts +494 -0
- package/src/properties.ts +522 -0
- package/src/result-accessors-manager.ts +867 -0
- package/src/tests/advanced-features.test.ts +414 -0
- package/src/tests/advanced.test.ts +266 -0
- package/src/tests/extended-managers.test.ts +316 -0
- package/src/tests/final-utilities.test.ts +455 -0
- package/src/tests/foundation.test.ts +315 -0
- package/src/tests/high-demand.test.ts +257 -0
- package/src/tests/specialized.test.ts +97 -0
- package/src/thumbnail-manager.ts +272 -0
- package/src/types/common.ts +142 -0
- package/src/types/document-types.ts +457 -0
- package/src/types/index.ts +6 -0
- package/src/types/manager-types.ts +284 -0
- package/src/types/native-bindings.ts +517 -0
- package/src/workers/index.ts +7 -0
- package/src/workers/pool.ts +274 -0
- package/src/workers/worker.ts +131 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Manager for text search operations in PDF documents
|
|
3
|
+
*
|
|
4
|
+
* Caching is handled automatically at the Rust FFI layer, eliminating
|
|
5
|
+
* the need for duplicate cache implementations in the binding.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { SearchManager, SearchOptionsBuilder } from 'pdf_oxide';
|
|
10
|
+
*
|
|
11
|
+
* const doc = PdfDocument.open('document.pdf');
|
|
12
|
+
* const searchManager = new SearchManager(doc);
|
|
13
|
+
*
|
|
14
|
+
* // Simple search
|
|
15
|
+
* const results = searchManager.search('keyword');
|
|
16
|
+
*
|
|
17
|
+
* // Search with options
|
|
18
|
+
* const options = SearchOptionsBuilder.strict().build();
|
|
19
|
+
* const results = searchManager.search('keyword', options);
|
|
20
|
+
*
|
|
21
|
+
* // Count occurrences
|
|
22
|
+
* const count = searchManager.countOccurrences('keyword');
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
export interface SearchResult {
|
|
27
|
+
text?: string;
|
|
28
|
+
pageIndex?: number;
|
|
29
|
+
position?: number;
|
|
30
|
+
boundingBox?: Record<string, number>;
|
|
31
|
+
[key: string]: any;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface SearchStatistics {
|
|
35
|
+
searchText: string;
|
|
36
|
+
totalOccurrences: number;
|
|
37
|
+
pagesContaining: number;
|
|
38
|
+
firstMatchPage: number;
|
|
39
|
+
lastMatchPage: number;
|
|
40
|
+
pages: number[];
|
|
41
|
+
occurrencesPerPage: Array<{
|
|
42
|
+
pageIndex: number;
|
|
43
|
+
pageNumber: number;
|
|
44
|
+
count: number;
|
|
45
|
+
}>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface SearchCapabilities {
|
|
49
|
+
caseSensitiveSearch: boolean;
|
|
50
|
+
wholeWordSearch: boolean;
|
|
51
|
+
regexSearch: boolean;
|
|
52
|
+
annotationSearch: boolean;
|
|
53
|
+
maxResults: number;
|
|
54
|
+
isSearchable: boolean;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export class SearchManager {
|
|
58
|
+
private _document: any;
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Creates a new SearchManager for the given document
|
|
62
|
+
* @param document - The PDF document
|
|
63
|
+
* @throws Error if document is null or undefined
|
|
64
|
+
*/
|
|
65
|
+
constructor(document: any) {
|
|
66
|
+
if (!document) {
|
|
67
|
+
throw new Error('Document is required');
|
|
68
|
+
}
|
|
69
|
+
this._document = document;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Searches for text in a specific page.
|
|
74
|
+
* Results are automatically cached at the FFI layer.
|
|
75
|
+
* @param searchText - Text to search for
|
|
76
|
+
* @param pageIndex - Zero-based page index
|
|
77
|
+
* @param options - Search options (caseSensitive, wholeWords, useRegex, etc.)
|
|
78
|
+
* @returns Array of search results
|
|
79
|
+
* @throws Error if parameters are invalid
|
|
80
|
+
*
|
|
81
|
+
* @example
|
|
82
|
+
* ```typescript
|
|
83
|
+
* const results = manager.search('error', 0);
|
|
84
|
+
* results.forEach(result => {
|
|
85
|
+
* console.log(`Found at position ${result.position}`);
|
|
86
|
+
* });
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
search(searchText: string, pageIndex: number, options?: Record<string, any>): SearchResult[] {
|
|
90
|
+
if (!searchText || typeof searchText !== 'string') {
|
|
91
|
+
throw new Error('Search text must be a non-empty string');
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (typeof pageIndex !== 'number' || pageIndex < 0) {
|
|
95
|
+
throw new Error('Page index must be a non-negative number');
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (pageIndex >= this._document.pageCount) {
|
|
99
|
+
throw new Error(`Page index ${pageIndex} out of range`);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
try {
|
|
103
|
+
return this._document.search(searchText, pageIndex, options) || [];
|
|
104
|
+
} catch (error) {
|
|
105
|
+
throw new Error(`Search failed: ${(error as Error).message}`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Searches for text across all pages
|
|
111
|
+
* @param searchText - Text to search for
|
|
112
|
+
* @param options - Search options
|
|
113
|
+
* @returns Array of search results with page information
|
|
114
|
+
*
|
|
115
|
+
* @example
|
|
116
|
+
* ```typescript
|
|
117
|
+
* const results = manager.searchAll('important');
|
|
118
|
+
* console.log(`Found ${results.length} occurrences`);
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
searchAll(searchText: string, options?: Record<string, any>): SearchResult[] {
|
|
122
|
+
if (!searchText || typeof searchText !== 'string') {
|
|
123
|
+
throw new Error('Search text must be a non-empty string');
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const allResults: SearchResult[] = [];
|
|
127
|
+
|
|
128
|
+
try {
|
|
129
|
+
for (let i = 0; i < this._document.pageCount; i++) {
|
|
130
|
+
const results = this.search(searchText, i, options);
|
|
131
|
+
results.forEach(result => {
|
|
132
|
+
result.pageIndex = i;
|
|
133
|
+
result.pageNumber = i + 1;
|
|
134
|
+
});
|
|
135
|
+
allResults.push(...results);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return allResults;
|
|
139
|
+
} catch (error) {
|
|
140
|
+
throw new Error(`Search all failed: ${(error as Error).message}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Counts occurrences of text in a page
|
|
146
|
+
* @param searchText - Text to search for
|
|
147
|
+
* @param pageIndex - Zero-based page index
|
|
148
|
+
* @param options - Search options
|
|
149
|
+
* @returns Number of occurrences found
|
|
150
|
+
*
|
|
151
|
+
* @example
|
|
152
|
+
* ```typescript
|
|
153
|
+
* const count = manager.countOccurrences('the', 0);
|
|
154
|
+
* console.log(`"the" appears ${count} times on page 1`);
|
|
155
|
+
* ```
|
|
156
|
+
*/
|
|
157
|
+
countOccurrences(searchText: string, pageIndex: number, options?: Record<string, any>): number {
|
|
158
|
+
const results = this.search(searchText, pageIndex, options);
|
|
159
|
+
return results.length;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Counts occurrences of text across all pages
|
|
164
|
+
* @param searchText - Text to search for
|
|
165
|
+
* @param options - Search options
|
|
166
|
+
* @returns Total occurrences
|
|
167
|
+
*
|
|
168
|
+
* @example
|
|
169
|
+
* ```typescript
|
|
170
|
+
* const totalCount = manager.countAllOccurrences('the');
|
|
171
|
+
* console.log(`"the" appears ${totalCount} times in document`);
|
|
172
|
+
* ```
|
|
173
|
+
*/
|
|
174
|
+
countAllOccurrences(searchText: string, options?: Record<string, any>): number {
|
|
175
|
+
const results = this.searchAll(searchText, options);
|
|
176
|
+
return results.length;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Checks if text exists in a page
|
|
181
|
+
* @param searchText - Text to search for
|
|
182
|
+
* @param pageIndex - Zero-based page index
|
|
183
|
+
* @param options - Search options
|
|
184
|
+
* @returns True if text found
|
|
185
|
+
*
|
|
186
|
+
* @example
|
|
187
|
+
* ```typescript
|
|
188
|
+
* if (manager.contains('error', 0)) {
|
|
189
|
+
* console.log('Page contains "error"');
|
|
190
|
+
* }
|
|
191
|
+
* ```
|
|
192
|
+
*/
|
|
193
|
+
contains(searchText: string, pageIndex: number, options?: Record<string, any>): boolean {
|
|
194
|
+
const results = this.search(searchText, pageIndex, options);
|
|
195
|
+
return results.length > 0;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Checks if text exists anywhere in document
|
|
200
|
+
* @param searchText - Text to search for
|
|
201
|
+
* @param options - Search options
|
|
202
|
+
* @returns True if text found anywhere
|
|
203
|
+
*
|
|
204
|
+
* @example
|
|
205
|
+
* ```typescript
|
|
206
|
+
* if (manager.containsAnywhere('copyright')) {
|
|
207
|
+
* console.log('Document contains copyright notice');
|
|
208
|
+
* }
|
|
209
|
+
* ```
|
|
210
|
+
*/
|
|
211
|
+
containsAnywhere(searchText: string, options?: Record<string, any>): boolean {
|
|
212
|
+
const results = this.searchAll(searchText, options);
|
|
213
|
+
return results.length > 0;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Gets pages containing the search text
|
|
218
|
+
* @param searchText - Text to search for
|
|
219
|
+
* @param options - Search options
|
|
220
|
+
* @returns Array of page indices (zero-based) containing the text
|
|
221
|
+
*
|
|
222
|
+
* @example
|
|
223
|
+
* ```typescript
|
|
224
|
+
* const pages = manager.getPagesContaining('error');
|
|
225
|
+
* console.log(`"error" found on pages: ${pages.map(p => p + 1).join(', ')}`);
|
|
226
|
+
* ```
|
|
227
|
+
*/
|
|
228
|
+
getPagesContaining(searchText: string, options?: Record<string, any>): number[] {
|
|
229
|
+
const results = this.searchAll(searchText, options);
|
|
230
|
+
const pageSet = new Set(results.map(r => r.pageIndex || 0));
|
|
231
|
+
return Array.from(pageSet).sort((a, b) => a - b);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Gets statistics for search results
|
|
236
|
+
* @param searchText - Text to search for
|
|
237
|
+
* @param options - Search options
|
|
238
|
+
* @returns Search statistics
|
|
239
|
+
*
|
|
240
|
+
* @example
|
|
241
|
+
* ```typescript
|
|
242
|
+
* const stats = manager.getSearchStatistics('error');
|
|
243
|
+
* console.log(`Found ${stats.totalOccurrences} occurrences`);
|
|
244
|
+
* console.log(`On ${stats.pagesContaining} pages`);
|
|
245
|
+
* console.log(`First match on page ${stats.firstMatchPage + 1}`);
|
|
246
|
+
* ```
|
|
247
|
+
*/
|
|
248
|
+
getSearchStatistics(searchText: string, options?: Record<string, any>): SearchStatistics {
|
|
249
|
+
const results = this.searchAll(searchText, options);
|
|
250
|
+
|
|
251
|
+
// Extract unique pages and calculate per-page counts in single pass
|
|
252
|
+
const pageMap = new Map<number, number>();
|
|
253
|
+
for (const result of results) {
|
|
254
|
+
const pageIdx = result.pageIndex || 0;
|
|
255
|
+
if (!pageMap.has(pageIdx)) {
|
|
256
|
+
pageMap.set(pageIdx, 0);
|
|
257
|
+
}
|
|
258
|
+
pageMap.set(pageIdx, (pageMap.get(pageIdx) || 0) + 1);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const pages = Array.from(pageMap.keys()).sort((a, b) => a - b);
|
|
262
|
+
|
|
263
|
+
return {
|
|
264
|
+
searchText,
|
|
265
|
+
totalOccurrences: results.length,
|
|
266
|
+
pagesContaining: pages.length,
|
|
267
|
+
firstMatchPage: pages.length > 0 ? (pages[0] as number) : -1,
|
|
268
|
+
lastMatchPage: pages.length > 0 ? (pages[pages.length - 1] as number) : -1,
|
|
269
|
+
pages,
|
|
270
|
+
occurrencesPerPage: pages.map(p => ({
|
|
271
|
+
pageIndex: p,
|
|
272
|
+
pageNumber: p + 1,
|
|
273
|
+
count: pageMap.get(p) || 0,
|
|
274
|
+
})),
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Searches with a regular expression
|
|
280
|
+
* @param pattern - Regular expression pattern
|
|
281
|
+
* @param options - Search options (will set useRegex: true)
|
|
282
|
+
* @returns Array of search results
|
|
283
|
+
*
|
|
284
|
+
* @example
|
|
285
|
+
* ```typescript
|
|
286
|
+
* const results = manager.searchRegex(/error\d+/i);
|
|
287
|
+
* // Finds "error1", "ERROR2", "Error3", etc.
|
|
288
|
+
* ```
|
|
289
|
+
*/
|
|
290
|
+
searchRegex(pattern: RegExp | string, options: Record<string, any> = {}): SearchResult[] {
|
|
291
|
+
const regexStr = pattern instanceof RegExp ? pattern.source : pattern;
|
|
292
|
+
|
|
293
|
+
if (!regexStr || typeof regexStr !== 'string') {
|
|
294
|
+
throw new Error('Pattern must be a valid regular expression');
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Merge options and ensure useRegex is true
|
|
298
|
+
const searchOptions = {
|
|
299
|
+
...options,
|
|
300
|
+
useRegex: true,
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
try {
|
|
304
|
+
return this.searchAll(regexStr, searchOptions);
|
|
305
|
+
} catch (error) {
|
|
306
|
+
throw new Error(`Regex search failed: ${(error as Error).message}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Finds first occurrence of text
|
|
312
|
+
* @param searchText - Text to search for
|
|
313
|
+
* @param options - Search options
|
|
314
|
+
* @returns First search result or null if not found
|
|
315
|
+
*
|
|
316
|
+
* @example
|
|
317
|
+
* ```typescript
|
|
318
|
+
* const first = manager.findFirst('chapter');
|
|
319
|
+
* if (first) {
|
|
320
|
+
* console.log(`First "chapter" found on page ${first.pageNumber}`);
|
|
321
|
+
* }
|
|
322
|
+
* ```
|
|
323
|
+
*/
|
|
324
|
+
findFirst(searchText: string, options?: Record<string, any>): SearchResult | null {
|
|
325
|
+
const results = this.searchAll(searchText, options);
|
|
326
|
+
return results.length > 0 ? (results[0] as SearchResult) : null;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Finds last occurrence of text
|
|
331
|
+
* @param searchText - Text to search for
|
|
332
|
+
* @param options - Search options
|
|
333
|
+
* @returns Last search result or null if not found
|
|
334
|
+
*/
|
|
335
|
+
findLast(searchText: string, options?: Record<string, any>): SearchResult | null {
|
|
336
|
+
const results = this.searchAll(searchText, options);
|
|
337
|
+
return results.length > 0 ? (results[results.length - 1] as SearchResult) : null;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Replaces text occurrences with highlighted versions (view only)
|
|
342
|
+
* Gets all occurrences for highlighting without modification
|
|
343
|
+
* @param searchText - Text to find
|
|
344
|
+
* @param options - Search options
|
|
345
|
+
* @returns Results formatted for highlighting
|
|
346
|
+
*
|
|
347
|
+
* @example
|
|
348
|
+
* ```typescript
|
|
349
|
+
* const highlights = manager.highlightMatches('important');
|
|
350
|
+
* // Use results for UI highlighting
|
|
351
|
+
* ```
|
|
352
|
+
*/
|
|
353
|
+
highlightMatches(searchText: string, options?: Record<string, any>): SearchResult[] {
|
|
354
|
+
return this.searchAll(searchText, options);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Checks if document is searchable
|
|
359
|
+
* @returns True if document supports text search
|
|
360
|
+
*/
|
|
361
|
+
isSearchable(): boolean {
|
|
362
|
+
try {
|
|
363
|
+
// Try searching for common text to verify searchability
|
|
364
|
+
this.searchAll('test');
|
|
365
|
+
return true;
|
|
366
|
+
} catch (error) {
|
|
367
|
+
return false;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Gets search capabilities summary
|
|
373
|
+
* @returns Search capabilities information
|
|
374
|
+
*/
|
|
375
|
+
getCapabilities(): SearchCapabilities {
|
|
376
|
+
return {
|
|
377
|
+
caseSensitiveSearch: true,
|
|
378
|
+
wholeWordSearch: true,
|
|
379
|
+
regexSearch: true,
|
|
380
|
+
annotationSearch: true,
|
|
381
|
+
maxResults: 1000,
|
|
382
|
+
isSearchable: this.isSearchable(),
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
}
|