pdf-oxide-fips 0.3.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE-APACHE +176 -0
  2. package/LICENSE-MIT +25 -0
  3. package/README.md +218 -0
  4. package/lib/builders/annotation-builder.d.ts +198 -0
  5. package/lib/builders/annotation-builder.js +317 -0
  6. package/lib/builders/conversion-options-builder.d.ts +106 -0
  7. package/lib/builders/conversion-options-builder.js +214 -0
  8. package/lib/builders/document-builder.d.ts +381 -0
  9. package/lib/builders/document-builder.js +770 -0
  10. package/lib/builders/index.d.ts +13 -0
  11. package/lib/builders/index.js +13 -0
  12. package/lib/builders/metadata-builder.d.ts +201 -0
  13. package/lib/builders/metadata-builder.js +285 -0
  14. package/lib/builders/pdf-builder.d.ts +216 -0
  15. package/lib/builders/pdf-builder.js +350 -0
  16. package/lib/builders/search-options-builder.d.ts +73 -0
  17. package/lib/builders/search-options-builder.js +129 -0
  18. package/lib/builders/streaming-table.d.ts +64 -0
  19. package/lib/builders/streaming-table.js +140 -0
  20. package/lib/document-editor-manager.d.ts +139 -0
  21. package/lib/document-editor-manager.js +256 -0
  22. package/lib/document-editor.d.ts +124 -0
  23. package/lib/document-editor.js +318 -0
  24. package/lib/errors.d.ts +382 -0
  25. package/lib/errors.js +1115 -0
  26. package/lib/form-field-manager.d.ts +299 -0
  27. package/lib/form-field-manager.js +568 -0
  28. package/lib/hybrid-ml-manager.d.ts +142 -0
  29. package/lib/hybrid-ml-manager.js +208 -0
  30. package/lib/index.d.ts +205 -0
  31. package/lib/index.js +693 -0
  32. package/lib/managers/accessibility-manager.d.ts +148 -0
  33. package/lib/managers/accessibility-manager.js +234 -0
  34. package/lib/managers/annotation-manager.d.ts +219 -0
  35. package/lib/managers/annotation-manager.js +359 -0
  36. package/lib/managers/barcode-manager.d.ts +82 -0
  37. package/lib/managers/barcode-manager.js +263 -0
  38. package/lib/managers/batch-manager.d.ts +185 -0
  39. package/lib/managers/batch-manager.js +385 -0
  40. package/lib/managers/cache-manager.d.ts +181 -0
  41. package/lib/managers/cache-manager.js +384 -0
  42. package/lib/managers/compliance-manager.d.ts +103 -0
  43. package/lib/managers/compliance-manager.js +453 -0
  44. package/lib/managers/content-manager.d.ts +120 -0
  45. package/lib/managers/content-manager.js +294 -0
  46. package/lib/managers/document-utility-manager.d.ts +369 -0
  47. package/lib/managers/document-utility-manager.js +730 -0
  48. package/lib/managers/dom-pdf-creator.d.ts +104 -0
  49. package/lib/managers/dom-pdf-creator.js +299 -0
  50. package/lib/managers/editing-manager.d.ts +248 -0
  51. package/lib/managers/editing-manager.js +387 -0
  52. package/lib/managers/enterprise-manager.d.ts +192 -0
  53. package/lib/managers/enterprise-manager.js +307 -0
  54. package/lib/managers/extended-managers.d.ts +122 -0
  55. package/lib/managers/extended-managers.js +664 -0
  56. package/lib/managers/extraction-manager.d.ts +246 -0
  57. package/lib/managers/extraction-manager.js +482 -0
  58. package/lib/managers/final-utilities.d.ts +127 -0
  59. package/lib/managers/final-utilities.js +657 -0
  60. package/lib/managers/hybrid-ml-advanced.d.ts +136 -0
  61. package/lib/managers/hybrid-ml-advanced.js +722 -0
  62. package/lib/managers/index.d.ts +64 -0
  63. package/lib/managers/index.js +69 -0
  64. package/lib/managers/layer-manager.d.ts +203 -0
  65. package/lib/managers/layer-manager.js +401 -0
  66. package/lib/managers/metadata-manager.d.ts +148 -0
  67. package/lib/managers/metadata-manager.js +280 -0
  68. package/lib/managers/ocr-manager.d.ts +194 -0
  69. package/lib/managers/ocr-manager.js +582 -0
  70. package/lib/managers/optimization-manager.d.ts +102 -0
  71. package/lib/managers/optimization-manager.js +213 -0
  72. package/lib/managers/outline-manager.d.ts +101 -0
  73. package/lib/managers/outline-manager.js +169 -0
  74. package/lib/managers/page-manager.d.ts +142 -0
  75. package/lib/managers/page-manager.js +235 -0
  76. package/lib/managers/pattern-detection.d.ts +169 -0
  77. package/lib/managers/pattern-detection.js +322 -0
  78. package/lib/managers/rendering-manager.d.ts +353 -0
  79. package/lib/managers/rendering-manager.js +679 -0
  80. package/lib/managers/search-manager.d.ts +235 -0
  81. package/lib/managers/search-manager.js +329 -0
  82. package/lib/managers/security-manager.d.ts +161 -0
  83. package/lib/managers/security-manager.js +292 -0
  84. package/lib/managers/signature-manager.d.ts +738 -0
  85. package/lib/managers/signature-manager.js +1509 -0
  86. package/lib/managers/streams.d.ts +262 -0
  87. package/lib/managers/streams.js +477 -0
  88. package/lib/managers/xfa-manager.d.ts +227 -0
  89. package/lib/managers/xfa-manager.js +539 -0
  90. package/lib/native-loader.d.ts +7 -0
  91. package/lib/native-loader.js +62 -0
  92. package/lib/native.d.ts +16 -0
  93. package/lib/native.js +69 -0
  94. package/lib/pdf-creator-manager.d.ts +200 -0
  95. package/lib/pdf-creator-manager.js +381 -0
  96. package/lib/properties.d.ts +79 -0
  97. package/lib/properties.js +454 -0
  98. package/lib/result-accessors-manager.d.ts +346 -0
  99. package/lib/result-accessors-manager.js +706 -0
  100. package/lib/thumbnail-manager.d.ts +121 -0
  101. package/lib/thumbnail-manager.js +205 -0
  102. package/lib/timestamp.d.ts +54 -0
  103. package/lib/timestamp.js +115 -0
  104. package/lib/tsa-client.d.ts +44 -0
  105. package/lib/tsa-client.js +67 -0
  106. package/lib/types/common.d.ts +189 -0
  107. package/lib/types/common.js +17 -0
  108. package/lib/types/document-types.d.ts +352 -0
  109. package/lib/types/document-types.js +82 -0
  110. package/lib/types/index.d.ts +5 -0
  111. package/lib/types/index.js +5 -0
  112. package/lib/types/manager-types.d.ts +179 -0
  113. package/lib/types/manager-types.js +100 -0
  114. package/lib/types/native-bindings.d.ts +439 -0
  115. package/lib/types/native-bindings.js +7 -0
  116. package/lib/workers/index.d.ts +6 -0
  117. package/lib/workers/index.js +5 -0
  118. package/lib/workers/pool.d.ts +64 -0
  119. package/lib/workers/pool.js +192 -0
  120. package/lib/workers/worker.d.ts +5 -0
  121. package/lib/workers/worker.js +99 -0
  122. package/package.json +79 -0
  123. package/prebuilds/darwin-arm64/pdf_oxide.node +0 -0
  124. package/prebuilds/darwin-x64/pdf_oxide.node +0 -0
  125. package/prebuilds/linux-arm64/pdf_oxide.node +0 -0
  126. package/prebuilds/linux-x64/pdf_oxide.node +0 -0
  127. package/prebuilds/win32-x64/pdf_oxide.node +0 -0
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Type definitions for PDF Oxide native bindings (C++ module via NAPI)
3
+ *
4
+ * These interfaces describe the structure of objects returned from the native module.
5
+ * They are used for type checking and IDE auto-completion when working with the binding layer.
6
+ */
7
+ export {};
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Worker Threads Module
3
+ * Exports worker pool and types for parallel PDF processing
4
+ */
5
+ export type { WorkerResult, WorkerTask } from './pool.js';
6
+ export { WorkerPool, workerPool } from './pool.js';
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Worker Threads Module
3
+ * Exports worker pool and types for parallel PDF processing
4
+ */
5
+ export { WorkerPool, workerPool } from './pool.js';
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Worker Thread Pool Manager
3
+ * Enables non-blocking parallel PDF processing
4
+ */
5
+ /**
6
+ * Represents a task to be processed by a worker
7
+ */
8
+ export interface WorkerTask<T = any> {
9
+ operation: 'extract' | 'search' | 'render' | 'analyze';
10
+ documentPath: string;
11
+ params: Record<string, any>;
12
+ }
13
+ /**
14
+ * Result returned from a worker
15
+ */
16
+ export interface WorkerResult<T = any> {
17
+ success: boolean;
18
+ data?: T;
19
+ error?: Error | string;
20
+ duration: number;
21
+ }
22
+ /**
23
+ * Thread pool for parallel PDF processing
24
+ */
25
+ export declare class WorkerPool {
26
+ private poolSize;
27
+ private workers;
28
+ private queue;
29
+ private activeCount;
30
+ private terminated;
31
+ private readonly defaultTimeout;
32
+ /**
33
+ * Initialize the worker pool
34
+ * @param poolSize - Number of worker threads to create
35
+ */
36
+ constructor(poolSize?: number);
37
+ private validatePoolSize;
38
+ private initializeWorkers;
39
+ /**
40
+ * Run a task in the worker pool
41
+ * @param task - The task to run
42
+ * @param timeout - Optional timeout in milliseconds
43
+ * @returns Promise that resolves with the result
44
+ */
45
+ runTask<T = any>(task: WorkerTask<T>, timeout?: number): Promise<WorkerResult<T>>;
46
+ private processQueue;
47
+ private handleWorkerError;
48
+ /**
49
+ * Terminate all workers
50
+ * @returns Promise that resolves when all workers are terminated
51
+ */
52
+ terminate(): Promise<void>;
53
+ private cleanup;
54
+ /**
55
+ * Get current pool statistics
56
+ */
57
+ getStats(): {
58
+ poolSize: number;
59
+ activeWorkers: number;
60
+ queuedTasks: number;
61
+ terminated: boolean;
62
+ };
63
+ }
64
+ export declare const workerPool: WorkerPool;
@@ -0,0 +1,192 @@
1
+ /**
2
+ * Worker Thread Pool Manager
3
+ * Enables non-blocking parallel PDF processing
4
+ */
5
+ import os from 'os';
6
+ import path from 'path';
7
+ import { fileURLToPath } from 'url';
8
+ import { Worker } from 'worker_threads';
9
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
+ /**
11
+ * Thread pool for parallel PDF processing
12
+ */
13
+ export class WorkerPool {
14
+ /**
15
+ * Initialize the worker pool
16
+ * @param poolSize - Number of worker threads to create
17
+ */
18
+ constructor(poolSize = 4) {
19
+ this.poolSize = poolSize;
20
+ this.workers = [];
21
+ this.queue = [];
22
+ this.activeCount = 0;
23
+ this.terminated = false;
24
+ this.defaultTimeout = 30000; // 30 seconds
25
+ this.validatePoolSize();
26
+ this.initializeWorkers();
27
+ }
28
+ validatePoolSize() {
29
+ if (this.poolSize < 1 || this.poolSize > 32) {
30
+ throw new Error(`Pool size must be between 1 and 32, got ${this.poolSize}`);
31
+ }
32
+ }
33
+ initializeWorkers() {
34
+ try {
35
+ for (let i = 0; i < this.poolSize; i++) {
36
+ const worker = new Worker(path.join(__dirname, 'worker.js'));
37
+ worker.on('error', (error) => {
38
+ console.error(`Worker ${i} error:`, error);
39
+ this.handleWorkerError(error instanceof Error ? error : new Error(String(error)));
40
+ });
41
+ worker.on('exit', (code) => {
42
+ if (code !== 0 && !this.terminated) {
43
+ console.warn(`Worker ${i} exited with code ${code}`);
44
+ }
45
+ });
46
+ this.workers.push(worker);
47
+ }
48
+ }
49
+ catch (error) {
50
+ this.cleanup();
51
+ throw new Error(`Failed to initialize worker pool: ${error instanceof Error ? error.message : String(error)}`);
52
+ }
53
+ }
54
+ /**
55
+ * Run a task in the worker pool
56
+ * @param task - The task to run
57
+ * @param timeout - Optional timeout in milliseconds
58
+ * @returns Promise that resolves with the result
59
+ */
60
+ async runTask(task, timeout = this.defaultTimeout) {
61
+ if (this.terminated) {
62
+ throw new Error('Worker pool has been terminated');
63
+ }
64
+ if (timeout < 1000 || timeout > 300000) {
65
+ throw new Error('Timeout must be between 1 and 300 seconds');
66
+ }
67
+ return new Promise((resolve, reject) => {
68
+ const timeoutHandle = setTimeout(() => {
69
+ this.queue = this.queue.filter((q) => q.task !== task);
70
+ reject(new Error(`Worker task timeout after ${timeout}ms: ${task.operation} on ${task.documentPath}`));
71
+ }, timeout);
72
+ this.queue.push({
73
+ task,
74
+ resolve,
75
+ reject,
76
+ timeout: timeoutHandle,
77
+ });
78
+ this.processQueue();
79
+ });
80
+ }
81
+ processQueue() {
82
+ if (this.queue.length === 0 || this.activeCount >= this.poolSize) {
83
+ return;
84
+ }
85
+ const queuedTask = this.queue.shift();
86
+ if (!queuedTask)
87
+ return;
88
+ const { task, resolve, reject, timeout } = queuedTask;
89
+ // Find an available worker
90
+ const workerIndex = this.activeCount % this.poolSize;
91
+ const worker = this.workers[workerIndex];
92
+ if (!worker) {
93
+ reject(new Error('No available worker'));
94
+ clearTimeout(timeout);
95
+ return;
96
+ }
97
+ this.activeCount++;
98
+ const messageHandler = (result) => {
99
+ clearTimeout(timeout);
100
+ resolve(result);
101
+ this.activeCount--;
102
+ worker.off('message', messageHandler);
103
+ worker.off('error', errorHandler);
104
+ this.processQueue();
105
+ };
106
+ const errorHandler = (error) => {
107
+ clearTimeout(timeout);
108
+ reject(error);
109
+ this.activeCount--;
110
+ worker.off('message', messageHandler);
111
+ worker.off('error', errorHandler);
112
+ this.processQueue();
113
+ };
114
+ worker.on('message', messageHandler);
115
+ worker.once('error', errorHandler);
116
+ try {
117
+ worker.postMessage(task);
118
+ }
119
+ catch (error) {
120
+ clearTimeout(timeout);
121
+ reject(error instanceof Error ? error : new Error(String(error)));
122
+ this.activeCount--;
123
+ worker.off('message', messageHandler);
124
+ worker.off('error', errorHandler);
125
+ this.processQueue();
126
+ }
127
+ }
128
+ handleWorkerError(error) {
129
+ if (this.queue.length > 0) {
130
+ const queuedTask = this.queue.shift();
131
+ if (queuedTask) {
132
+ clearTimeout(queuedTask.timeout);
133
+ queuedTask.reject(error);
134
+ this.activeCount--;
135
+ this.processQueue();
136
+ }
137
+ }
138
+ }
139
+ /**
140
+ * Terminate all workers
141
+ * @returns Promise that resolves when all workers are terminated
142
+ */
143
+ async terminate() {
144
+ this.terminated = true;
145
+ // Reject all queued tasks
146
+ while (this.queue.length > 0) {
147
+ const queuedTask = this.queue.shift();
148
+ if (queuedTask) {
149
+ clearTimeout(queuedTask.timeout);
150
+ queuedTask.reject(new Error('Worker pool terminated'));
151
+ }
152
+ }
153
+ // Terminate all workers
154
+ await Promise.all(this.workers.map((worker) => worker.terminate().catch((error) => console.warn('Error terminating worker:', error))));
155
+ this.cleanup();
156
+ }
157
+ cleanup() {
158
+ this.workers = [];
159
+ this.queue = [];
160
+ this.activeCount = 0;
161
+ }
162
+ /**
163
+ * Get current pool statistics
164
+ */
165
+ getStats() {
166
+ return {
167
+ poolSize: this.poolSize,
168
+ activeWorkers: this.activeCount,
169
+ queuedTasks: this.queue.length,
170
+ terminated: this.terminated,
171
+ };
172
+ }
173
+ }
174
+ /**
175
+ * Global worker pool instance (singleton)
176
+ * Auto-configured based on CPU count
177
+ */
178
+ const hardwareConcurrency = Math.max(1, os.cpus().length);
179
+ export const workerPool = new WorkerPool(Math.min(hardwareConcurrency, 8));
180
+ /**
181
+ * Graceful shutdown
182
+ */
183
+ process.on('exit', async () => {
184
+ if (!workerPool || workerPool.terminated)
185
+ return;
186
+ try {
187
+ await workerPool.terminate();
188
+ }
189
+ catch (error) {
190
+ console.error('Error during worker pool shutdown:', error);
191
+ }
192
+ });
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Worker Thread Script
3
+ * Handles off-main-thread PDF processing tasks
4
+ */
5
+ export {};
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Worker Thread Script
3
+ * Handles off-main-thread PDF processing tasks
4
+ */
5
+ import { parentPort } from 'worker_threads';
6
+ /**
7
+ * Process a worker task
8
+ */
9
+ async function handleTask(task) {
10
+ const startTime = Date.now();
11
+ try {
12
+ // Dynamically import PdfDocument since we can't use top-level imports
13
+ // in a worker context reliably across all environments
14
+ const { PdfDocument } = await import('../index.js');
15
+ if (!PdfDocument) {
16
+ throw new Error('PdfDocument not available in worker');
17
+ }
18
+ let result;
19
+ switch (task.operation) {
20
+ case 'extract': {
21
+ const doc = PdfDocument.open(task.documentPath);
22
+ const extMgr = doc.extraction;
23
+ if (task.params.type === 'markdown') {
24
+ result = extMgr.extractMarkdown(task.params.pageIndex, task.params.options);
25
+ }
26
+ else if (task.params.type === 'html') {
27
+ result = extMgr.extractHtml(task.params.pageIndex, task.params.options);
28
+ }
29
+ else {
30
+ result = extMgr.extractText(task.params.pageIndex, task.params.options);
31
+ }
32
+ break;
33
+ }
34
+ case 'search': {
35
+ const doc = PdfDocument.open(task.documentPath);
36
+ const searchMgr = doc.search;
37
+ result = searchMgr.searchAll(task.params.query, task.params.options || {});
38
+ break;
39
+ }
40
+ case 'render': {
41
+ const doc = PdfDocument.open(task.documentPath);
42
+ const renderMgr = doc.rendering;
43
+ result = renderMgr.renderPage(task.params.pageIndex, task.params.options || {});
44
+ break;
45
+ }
46
+ case 'analyze': {
47
+ const doc = PdfDocument.open(task.documentPath);
48
+ result = {
49
+ pageCount: doc.pageCount,
50
+ metadata: doc.metadata?.getMetadata?.() || null,
51
+ outline: {
52
+ count: doc.outline?.getOutlineCount?.() || 0,
53
+ isFlat: doc.outline?.isFlat?.() || false,
54
+ },
55
+ layers: {
56
+ count: doc.layers?.getLayerCount?.() || 0,
57
+ visible: doc.layers?.getVisibleLayerCount?.() || 0,
58
+ },
59
+ };
60
+ break;
61
+ }
62
+ default:
63
+ throw new Error(`Unknown operation: ${task.operation}`);
64
+ }
65
+ const duration = Date.now() - startTime;
66
+ return {
67
+ success: true,
68
+ data: result,
69
+ duration,
70
+ };
71
+ }
72
+ catch (error) {
73
+ const duration = Date.now() - startTime;
74
+ return {
75
+ success: false,
76
+ error: error instanceof Error
77
+ ? {
78
+ name: error.name,
79
+ message: error.message,
80
+ stack: error.stack,
81
+ }
82
+ : String(error),
83
+ duration,
84
+ };
85
+ }
86
+ }
87
+ /**
88
+ * Main worker message handler
89
+ */
90
+ if (parentPort) {
91
+ parentPort.on('message', async (task) => {
92
+ const result = await handleTask(task);
93
+ parentPort?.postMessage(result);
94
+ });
95
+ }
96
+ else {
97
+ console.error('Worker script must be run as a Worker thread');
98
+ process.exit(1);
99
+ }
package/package.json ADDED
@@ -0,0 +1,79 @@
1
+ {
2
+ "name": "pdf-oxide-fips",
3
+ "version": "0.3.47",
4
+ "type": "module",
5
+ "description": "[FIPS 140-3 validated build] High-performance PDF parsing and text extraction library — prebuilt native bindings, no build toolchain required",
6
+ "main": "lib/index.js",
7
+ "types": "lib/index.d.ts",
8
+ "scripts": {
9
+ "compile:ts": "tsc",
10
+ "build:ts": "npm run compile:ts && node scripts/fix-esm-imports.js",
11
+ "build:native": "node-gyp rebuild",
12
+ "build": "npm run build:ts && npm run build:native",
13
+ "typecheck": "tsc --noEmit",
14
+ "clean:ts": "rimraf lib/",
15
+ "clean": "npm run clean:ts && node-gyp clean",
16
+ "prepack": "npm run build:ts",
17
+ "lint": "biome check .",
18
+ "lint:fix": "biome check --write .",
19
+ "format": "biome format --write .",
20
+ "check:publint": "publint",
21
+ "check:types": "attw --pack . --ignore-rules=cjs-resolves-to-esm",
22
+ "audit:prod": "npm audit --omit=dev --audit-level=high",
23
+ "test": "node --test tests/smoke.test.mjs tests/feature-guard.test.mjs tests/api-coverage.test.mjs tests/html-css.test.mjs tests/document-builder.test.mjs tests/document-editor.test.mjs tests/render-options.test.mjs tests/tables.test.mjs tests/worker-threads-safety.test.mjs"
24
+ },
25
+ "files": [
26
+ "lib/",
27
+ "prebuilds/",
28
+ "README.md",
29
+ "LICENSE-APACHE",
30
+ "LICENSE-MIT"
31
+ ],
32
+ "keywords": [
33
+ "pdf",
34
+ "text-extraction",
35
+ "pdf-parsing",
36
+ "rust-ffi",
37
+ "native-binding",
38
+ "prebuilt"
39
+ ],
40
+ "author": "PDF Oxide Contributors",
41
+ "license": "MIT OR Apache-2.0",
42
+ "homepage": "https://github.com/yfedoseev/pdf_oxide",
43
+ "repository": {
44
+ "type": "git",
45
+ "url": "git+https://github.com/yfedoseev/pdf_oxide.git",
46
+ "directory": "js"
47
+ },
48
+ "engines": {
49
+ "node": ">=18.0.0"
50
+ },
51
+ "exports": {
52
+ ".": {
53
+ "types": "./lib/index.d.ts",
54
+ "import": "./lib/index.js"
55
+ },
56
+ "./builders": {
57
+ "types": "./lib/builders/index.d.ts",
58
+ "import": "./lib/builders/index.js"
59
+ },
60
+ "./managers": {
61
+ "types": "./lib/managers/index.d.ts",
62
+ "import": "./lib/managers/index.js"
63
+ },
64
+ "./errors": {
65
+ "types": "./lib/errors.d.ts",
66
+ "import": "./lib/errors.js"
67
+ }
68
+ },
69
+ "dependencies": {
70
+ "async-mutex": "^0.5.0"
71
+ },
72
+ "devDependencies": {
73
+ "@types/node": "^25.6.0",
74
+ "node-addon-api": "^8.7.0",
75
+ "rimraf": "^6.1.3",
76
+ "typescript": "^6.0.3"
77
+ },
78
+ "gypfile": false
79
+ }