pdf-oxide 0.3.37 → 0.3.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/builders/document-builder.d.ts +199 -0
- package/lib/builders/document-builder.js +434 -0
- package/lib/builders/index.d.ts +3 -2
- package/lib/builders/index.js +3 -2
- package/lib/document-editor.d.ts +57 -0
- package/lib/document-editor.js +184 -0
- package/lib/errors.js +3 -4
- package/lib/form-field-manager.js +3 -1
- package/lib/index.d.ts +40 -6
- package/lib/index.js +253 -90
- package/lib/managers/accessibility-manager.js +19 -8
- package/lib/managers/annotation-manager.js +9 -9
- package/lib/managers/barcode-manager.js +18 -7
- package/lib/managers/batch-manager.js +2 -5
- package/lib/managers/cache-manager.js +1 -3
- package/lib/managers/compliance-manager.js +58 -19
- package/lib/managers/document-utility-manager.js +6 -6
- package/lib/managers/dom-pdf-creator.js +9 -9
- package/lib/managers/enterprise-manager.js +4 -1
- package/lib/managers/extended-managers.js +8 -1
- package/lib/managers/extraction-manager.js +7 -2
- package/lib/managers/final-utilities.d.ts +3 -3
- package/lib/managers/final-utilities.js +9 -4
- package/lib/managers/hybrid-ml-advanced.js +22 -6
- package/lib/managers/index.d.ts +22 -22
- package/lib/managers/index.js +23 -23
- package/lib/managers/layer-manager.js +20 -21
- package/lib/managers/ocr-manager.d.ts +2 -2
- package/lib/managers/ocr-manager.js +7 -7
- package/lib/managers/optimization-manager.js +24 -4
- package/lib/managers/page-manager.js +5 -6
- package/lib/managers/pattern-detection.d.ts +1 -1
- package/lib/managers/pattern-detection.js +4 -6
- package/lib/managers/search-manager.js +3 -3
- package/lib/managers/signature-manager.js +149 -40
- package/lib/managers/streams.js +8 -2
- package/lib/managers/xfa-manager.js +69 -19
- package/lib/native-loader.d.ts +7 -0
- package/lib/native-loader.js +62 -0
- package/lib/native.d.ts +16 -0
- package/lib/native.js +69 -0
- package/lib/pdf-creator-manager.js +4 -1
- package/lib/result-accessors-manager.js +3 -1
- package/lib/timestamp.d.ts +54 -0
- package/lib/timestamp.js +115 -0
- package/lib/tsa-client.d.ts +44 -0
- package/lib/tsa-client.js +67 -0
- package/lib/types/common.d.ts +1 -1
- package/lib/types/index.d.ts +1 -1
- package/lib/types/index.js +1 -1
- package/lib/types/manager-types.js +4 -2
- package/lib/workers/index.d.ts +1 -1
- package/lib/workers/pool.js +2 -4
- package/package.json +16 -10
- package/prebuilds/darwin-arm64/pdf_oxide.node +0 -0
- package/prebuilds/darwin-x64/pdf_oxide.node +0 -0
- package/prebuilds/linux-arm64/pdf_oxide.node +0 -0
- package/prebuilds/linux-x64/pdf_oxide.node +0 -0
- package/prebuilds/win32-x64/pdf_oxide.node +0 -0
|
@@ -117,7 +117,7 @@ export class LayerManager {
|
|
|
117
117
|
throw new Error('Layer name must be a non-empty string');
|
|
118
118
|
}
|
|
119
119
|
const layers = this.getLayers();
|
|
120
|
-
return layers.find(layer => layer.name === name) || null;
|
|
120
|
+
return layers.find((layer) => layer.name === name) || null;
|
|
121
121
|
}
|
|
122
122
|
/**
|
|
123
123
|
* Gets layer by ID
|
|
@@ -129,7 +129,7 @@ export class LayerManager {
|
|
|
129
129
|
throw new Error('Layer ID must be a non-empty string');
|
|
130
130
|
}
|
|
131
131
|
const layers = this.getLayers();
|
|
132
|
-
return layers.find(layer => layer.id === id) || null;
|
|
132
|
+
return layers.find((layer) => layer.id === id) || null;
|
|
133
133
|
}
|
|
134
134
|
/**
|
|
135
135
|
* Gets root-level layers (not nested under other layers)
|
|
@@ -137,7 +137,7 @@ export class LayerManager {
|
|
|
137
137
|
*/
|
|
138
138
|
getRootLayers() {
|
|
139
139
|
const layers = this.getLayers();
|
|
140
|
-
return layers.filter(layer => !layer.parentId);
|
|
140
|
+
return layers.filter((layer) => !layer.parentId);
|
|
141
141
|
}
|
|
142
142
|
/**
|
|
143
143
|
* Gets the full layer hierarchy as a tree structure
|
|
@@ -182,7 +182,7 @@ export class LayerManager {
|
|
|
182
182
|
throw new Error('Parent layer ID must be a non-empty string');
|
|
183
183
|
}
|
|
184
184
|
const layers = this.getLayers();
|
|
185
|
-
return layers.filter(layer => layer.parentId === parentId);
|
|
185
|
+
return layers.filter((layer) => layer.parentId === parentId);
|
|
186
186
|
}
|
|
187
187
|
/**
|
|
188
188
|
* Gets parent layer of a layer
|
|
@@ -230,9 +230,9 @@ export class LayerManager {
|
|
|
230
230
|
getLayerUsages() {
|
|
231
231
|
const layers = this.getLayers();
|
|
232
232
|
return {
|
|
233
|
-
view: layers.filter(l => l.printable === false).length,
|
|
234
|
-
print: layers.filter(l => l.printable === true).length,
|
|
235
|
-
export: layers.filter(l => l.export !== false).length,
|
|
233
|
+
view: layers.filter((l) => l.printable === false).length,
|
|
234
|
+
print: layers.filter((l) => l.printable === true).length,
|
|
235
|
+
export: layers.filter((l) => l.export !== false).length,
|
|
236
236
|
};
|
|
237
237
|
}
|
|
238
238
|
/**
|
|
@@ -268,10 +268,10 @@ export class LayerManager {
|
|
|
268
268
|
count: layers.length,
|
|
269
269
|
rootCount: hierarchy.root.length,
|
|
270
270
|
maxDepth,
|
|
271
|
-
visible: layers.filter(l => l.visible !== false).length,
|
|
272
|
-
hidden: layers.filter(l => l.visible === false).length,
|
|
273
|
-
printable: layers.filter(l => l.printable !== false).length,
|
|
274
|
-
exportable: layers.filter(l => l.export !== false).length,
|
|
271
|
+
visible: layers.filter((l) => l.visible !== false).length,
|
|
272
|
+
hidden: layers.filter((l) => l.visible === false).length,
|
|
273
|
+
printable: layers.filter((l) => l.printable !== false).length,
|
|
274
|
+
exportable: layers.filter((l) => l.export !== false).length,
|
|
275
275
|
hasConflicts: this._detectLayerConflicts().length > 0,
|
|
276
276
|
};
|
|
277
277
|
this._statisticsCache = stats;
|
|
@@ -285,7 +285,7 @@ export class LayerManager {
|
|
|
285
285
|
getLayerDependencies() {
|
|
286
286
|
const layers = this.getLayers();
|
|
287
287
|
const dependencies = {};
|
|
288
|
-
layers.forEach(layer => {
|
|
288
|
+
layers.forEach((layer) => {
|
|
289
289
|
dependencies[layer.id] = {
|
|
290
290
|
dependsOn: layer.dependsOn || [],
|
|
291
291
|
dependents: [],
|
|
@@ -317,8 +317,7 @@ export class LayerManager {
|
|
|
317
317
|
}
|
|
318
318
|
const regex = pattern instanceof RegExp ? pattern : new RegExp(pattern, 'i');
|
|
319
319
|
const layers = this.getLayers();
|
|
320
|
-
return layers.filter(layer => regex.test(layer.name) ||
|
|
321
|
-
(layer.description && regex.test(layer.description)));
|
|
320
|
+
return layers.filter((layer) => regex.test(layer.name) || (layer.description && regex.test(layer.description)));
|
|
322
321
|
}
|
|
323
322
|
/**
|
|
324
323
|
* Validates layer state for conflicts and issues
|
|
@@ -329,10 +328,10 @@ export class LayerManager {
|
|
|
329
328
|
const conflicts = this._detectLayerConflicts();
|
|
330
329
|
const cycles = this._detectLayerCycles();
|
|
331
330
|
if (conflicts.length > 0) {
|
|
332
|
-
issues.push(...conflicts.map(c => `Conflict: ${c}`));
|
|
331
|
+
issues.push(...conflicts.map((c) => `Conflict: ${c}`));
|
|
333
332
|
}
|
|
334
333
|
if (cycles.length > 0) {
|
|
335
|
-
issues.push(...cycles.map(c => `Cycle detected: ${c}`));
|
|
334
|
+
issues.push(...cycles.map((c) => `Cycle detected: ${c}`));
|
|
336
335
|
}
|
|
337
336
|
return {
|
|
338
337
|
isValid: issues.length === 0,
|
|
@@ -349,16 +348,16 @@ export class LayerManager {
|
|
|
349
348
|
const layers = this.getLayers();
|
|
350
349
|
// Check for layers with same name
|
|
351
350
|
const nameMap = new Map();
|
|
352
|
-
layers.forEach(layer => {
|
|
351
|
+
layers.forEach((layer) => {
|
|
353
352
|
if (nameMap.has(layer.name)) {
|
|
354
353
|
conflicts.push(`Duplicate layer name: ${layer.name}`);
|
|
355
354
|
}
|
|
356
355
|
nameMap.set(layer.name, layer.id);
|
|
357
356
|
});
|
|
358
357
|
// Check for orphaned layers
|
|
359
|
-
const parentIds = new Set(layers.map(l => l.parentId).filter(id => id));
|
|
360
|
-
const layerIds = new Set(layers.map(l => l.id));
|
|
361
|
-
parentIds.forEach(parentId => {
|
|
358
|
+
const parentIds = new Set(layers.map((l) => l.parentId).filter((id) => id));
|
|
359
|
+
const layerIds = new Set(layers.map((l) => l.id));
|
|
360
|
+
parentIds.forEach((parentId) => {
|
|
362
361
|
if (!layerIds.has(parentId)) {
|
|
363
362
|
conflicts.push(`Orphaned layer reference: ${parentId}`);
|
|
364
363
|
}
|
|
@@ -392,7 +391,7 @@ export class LayerManager {
|
|
|
392
391
|
}
|
|
393
392
|
stack.delete(layerId);
|
|
394
393
|
};
|
|
395
|
-
layers.forEach(layer => {
|
|
394
|
+
layers.forEach((layer) => {
|
|
396
395
|
if (!visited.has(layer.id)) {
|
|
397
396
|
detectCycle(layer.id);
|
|
398
397
|
}
|
|
@@ -9,9 +9,9 @@
|
|
|
9
9
|
* Provides optical character recognition operations with complete type safety,
|
|
10
10
|
* proper error handling, and full FFI integration.
|
|
11
11
|
*/
|
|
12
|
-
import { BaseManager,
|
|
12
|
+
import { BaseManager, type ManagerOptions, type OcrBatchResult, OcrLanguage, type OcrResult, type PdfDocumentHandle, type TextRegion } from '../types/manager-types.js';
|
|
13
|
+
export type { OcrBatchResult, OcrResult, TextRegion };
|
|
13
14
|
export { OcrLanguage };
|
|
14
|
-
export type { OcrResult, OcrBatchResult, TextRegion };
|
|
15
15
|
/**
|
|
16
16
|
* OCR detection modes for accuracy/speed tradeoff
|
|
17
17
|
*/
|
|
@@ -9,9 +9,9 @@
|
|
|
9
9
|
* Provides optical character recognition operations with complete type safety,
|
|
10
10
|
* proper error handling, and full FFI integration.
|
|
11
11
|
*/
|
|
12
|
-
import { BaseManager, OcrLanguage, } from '../types/manager-types.js';
|
|
13
12
|
import { promises as fs } from 'fs';
|
|
14
13
|
import { dirname } from 'path';
|
|
14
|
+
import { BaseManager, OcrLanguage, } from '../types/manager-types.js';
|
|
15
15
|
// Re-export types for convenience
|
|
16
16
|
export { OcrLanguage };
|
|
17
17
|
/**
|
|
@@ -214,8 +214,7 @@ export class OcrManager extends BaseManager {
|
|
|
214
214
|
async getAvailableLanguages() {
|
|
215
215
|
try {
|
|
216
216
|
this.recordOperation();
|
|
217
|
-
const languages = (await this.document?.getAvailableLanguages()) ||
|
|
218
|
-
Object.values(OcrLanguage);
|
|
217
|
+
const languages = (await this.document?.getAvailableLanguages()) || Object.values(OcrLanguage);
|
|
219
218
|
return languages;
|
|
220
219
|
}
|
|
221
220
|
catch (error) {
|
|
@@ -260,7 +259,10 @@ export class OcrManager extends BaseManager {
|
|
|
260
259
|
content = JSON.stringify({ pageIndex, text, timestamp: Date.now() }, null, 2);
|
|
261
260
|
break;
|
|
262
261
|
case 'xml':
|
|
263
|
-
content = `<?xml version="1.0"?>\n<page index="${pageIndex}">\n${text
|
|
262
|
+
content = `<?xml version="1.0"?>\n<page index="${pageIndex}">\n${text
|
|
263
|
+
.split('\n')
|
|
264
|
+
.map((line) => ` <line>${line}</line>`)
|
|
265
|
+
.join('\n')}\n</page>`;
|
|
264
266
|
break;
|
|
265
267
|
default:
|
|
266
268
|
content = text;
|
|
@@ -356,9 +358,7 @@ export class OcrManager extends BaseManager {
|
|
|
356
358
|
totalSpans += Math.max(regions.length, text ? 1 : 0);
|
|
357
359
|
confidenceSum += confidence;
|
|
358
360
|
}
|
|
359
|
-
catch {
|
|
360
|
-
continue;
|
|
361
|
-
}
|
|
361
|
+
catch { }
|
|
362
362
|
}
|
|
363
363
|
const processedPages = endPage - startPage + 1 - skippedPages;
|
|
364
364
|
const avgConfidence = processedPages > 0 ? confidenceSum / processedPages : 0;
|
|
@@ -96,7 +96,11 @@ export class OptimizationManager extends EventEmitter {
|
|
|
96
96
|
throw mapFfiErrorCode(code, 'Failed to downsample images');
|
|
97
97
|
}
|
|
98
98
|
const result = this.parseOptimizationResult(resultPtr);
|
|
99
|
-
this.emit('images-downsampled', {
|
|
99
|
+
this.emit('images-downsampled', {
|
|
100
|
+
dpi: dpi ?? 150,
|
|
101
|
+
quality: quality ?? 80,
|
|
102
|
+
bytesSaved: result.bytesSaved,
|
|
103
|
+
});
|
|
100
104
|
this.freeOptimizationResult(resultPtr);
|
|
101
105
|
return result;
|
|
102
106
|
}
|
|
@@ -146,7 +150,11 @@ export class OptimizationManager extends EventEmitter {
|
|
|
146
150
|
throw mapFfiErrorCode(code, 'Failed to run full optimization');
|
|
147
151
|
}
|
|
148
152
|
const result = this.parseOptimizationResult(resultPtr);
|
|
149
|
-
this.emit('optimized-full', {
|
|
153
|
+
this.emit('optimized-full', {
|
|
154
|
+
dpi: dpi ?? 150,
|
|
155
|
+
quality: quality ?? 80,
|
|
156
|
+
bytesSaved: result.bytesSaved,
|
|
157
|
+
});
|
|
150
158
|
this.freeOptimizationResult(resultPtr);
|
|
151
159
|
return result;
|
|
152
160
|
}
|
|
@@ -155,14 +163,26 @@ export class OptimizationManager extends EventEmitter {
|
|
|
155
163
|
// ===========================================================================
|
|
156
164
|
parseOptimizationResult(resultPtr) {
|
|
157
165
|
if (!resultPtr) {
|
|
158
|
-
return {
|
|
166
|
+
return {
|
|
167
|
+
success: true,
|
|
168
|
+
bytesSaved: 0,
|
|
169
|
+
originalSize: 0,
|
|
170
|
+
optimizedSize: 0,
|
|
171
|
+
compressionRatio: 0,
|
|
172
|
+
};
|
|
159
173
|
}
|
|
160
174
|
if (typeof resultPtr === 'string') {
|
|
161
175
|
try {
|
|
162
176
|
return JSON.parse(resultPtr);
|
|
163
177
|
}
|
|
164
178
|
catch {
|
|
165
|
-
return {
|
|
179
|
+
return {
|
|
180
|
+
success: true,
|
|
181
|
+
bytesSaved: 0,
|
|
182
|
+
originalSize: 0,
|
|
183
|
+
optimizedSize: 0,
|
|
184
|
+
compressionRatio: 0,
|
|
185
|
+
};
|
|
166
186
|
}
|
|
167
187
|
}
|
|
168
188
|
// Handle native result handle
|
|
@@ -185,8 +185,8 @@ export class PageManager {
|
|
|
185
185
|
hasVariableSizes: false,
|
|
186
186
|
};
|
|
187
187
|
}
|
|
188
|
-
const widths = pages.map(p => p.width);
|
|
189
|
-
const heights = pages.map(p => p.height);
|
|
188
|
+
const widths = pages.map((p) => p.width);
|
|
189
|
+
const heights = pages.map((p) => p.height);
|
|
190
190
|
const minWidth = Math.min(...widths);
|
|
191
191
|
const maxWidth = Math.max(...widths);
|
|
192
192
|
const minHeight = Math.min(...heights);
|
|
@@ -214,8 +214,7 @@ export class PageManager {
|
|
|
214
214
|
*/
|
|
215
215
|
getPagesInSizeRange(minWidth, maxWidth, minHeight, maxHeight) {
|
|
216
216
|
const pages = this.getAllPageInfo();
|
|
217
|
-
return pages.filter(p => p.width >= minWidth && p.width <= maxWidth &&
|
|
218
|
-
p.height >= minHeight && p.height <= maxHeight);
|
|
217
|
+
return pages.filter((p) => p.width >= minWidth && p.width <= maxWidth && p.height >= minHeight && p.height <= maxHeight);
|
|
219
218
|
}
|
|
220
219
|
/**
|
|
221
220
|
* Gets landscape pages
|
|
@@ -223,7 +222,7 @@ export class PageManager {
|
|
|
223
222
|
*/
|
|
224
223
|
getLandscapePages() {
|
|
225
224
|
const pages = this.getAllPageInfo();
|
|
226
|
-
return pages.filter(p => p.width > p.height);
|
|
225
|
+
return pages.filter((p) => p.width > p.height);
|
|
227
226
|
}
|
|
228
227
|
/**
|
|
229
228
|
* Gets portrait pages
|
|
@@ -231,6 +230,6 @@ export class PageManager {
|
|
|
231
230
|
*/
|
|
232
231
|
getPortraitPages() {
|
|
233
232
|
const pages = this.getAllPageInfo();
|
|
234
|
-
return pages.filter(p => p.height > p.width);
|
|
233
|
+
return pages.filter((p) => p.height > p.width);
|
|
235
234
|
}
|
|
236
235
|
}
|
|
@@ -52,7 +52,7 @@ export class PatternDetectionManager {
|
|
|
52
52
|
}
|
|
53
53
|
// Simple heuristic: detect table-like patterns
|
|
54
54
|
const tables = [];
|
|
55
|
-
const lines = text.split(
|
|
55
|
+
const lines = text.split('\n');
|
|
56
56
|
// Look for lines with multiple columns (tabs or spaces)
|
|
57
57
|
let currentTableStart = -1;
|
|
58
58
|
let tableLines = 0;
|
|
@@ -101,7 +101,7 @@ export class PatternDetectionManager {
|
|
|
101
101
|
}
|
|
102
102
|
// Simple heuristic: detect multi-column layouts
|
|
103
103
|
const columns = [];
|
|
104
|
-
const lines = text.split(
|
|
104
|
+
const lines = text.split('\n');
|
|
105
105
|
// Check for indentation patterns suggesting columns
|
|
106
106
|
const indentationPattern = new Map();
|
|
107
107
|
for (const line of lines) {
|
|
@@ -184,7 +184,7 @@ export class PatternDetectionManager {
|
|
|
184
184
|
y: f.y || 0,
|
|
185
185
|
width: f.width || 100,
|
|
186
186
|
height: f.height || 20,
|
|
187
|
-
fieldType: f.type ||
|
|
187
|
+
fieldType: f.type || 'unknown',
|
|
188
188
|
fieldName: f.name,
|
|
189
189
|
confidence: 0.9,
|
|
190
190
|
}));
|
|
@@ -279,9 +279,7 @@ export class PatternDetectionManager {
|
|
|
279
279
|
*/
|
|
280
280
|
async findPagesWithPattern(patternType) {
|
|
281
281
|
const patterns = await this.analyzeDocumentPatterns();
|
|
282
|
-
return patterns
|
|
283
|
-
.filter((p) => p.patternType === patternType)
|
|
284
|
-
.map((p) => p.pageIndex);
|
|
282
|
+
return patterns.filter((p) => p.patternType === patternType).map((p) => p.pageIndex);
|
|
285
283
|
}
|
|
286
284
|
/**
|
|
287
285
|
* Get pattern statistics for the document.
|
|
@@ -88,7 +88,7 @@ export class SearchManager {
|
|
|
88
88
|
try {
|
|
89
89
|
for (let i = 0; i < this._document.pageCount; i++) {
|
|
90
90
|
const results = this.search(searchText, i, options);
|
|
91
|
-
results.forEach(result => {
|
|
91
|
+
results.forEach((result) => {
|
|
92
92
|
result.pageIndex = i;
|
|
93
93
|
result.pageNumber = i + 1;
|
|
94
94
|
});
|
|
@@ -182,7 +182,7 @@ export class SearchManager {
|
|
|
182
182
|
*/
|
|
183
183
|
getPagesContaining(searchText, options) {
|
|
184
184
|
const results = this.searchAll(searchText, options);
|
|
185
|
-
const pageSet = new Set(results.map(r => r.pageIndex || 0));
|
|
185
|
+
const pageSet = new Set(results.map((r) => r.pageIndex || 0));
|
|
186
186
|
return Array.from(pageSet).sort((a, b) => a - b);
|
|
187
187
|
}
|
|
188
188
|
/**
|
|
@@ -218,7 +218,7 @@ export class SearchManager {
|
|
|
218
218
|
firstMatchPage: pages.length > 0 ? pages[0] : -1,
|
|
219
219
|
lastMatchPage: pages.length > 0 ? pages[pages.length - 1] : -1,
|
|
220
220
|
pages,
|
|
221
|
-
occurrencesPerPage: pages.map(p => ({
|
|
221
|
+
occurrencesPerPage: pages.map((p) => ({
|
|
222
222
|
pageIndex: p,
|
|
223
223
|
pageNumber: p + 1,
|
|
224
224
|
count: pageMap.get(p) || 0,
|