pdf-oxide 0.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -0
- package/binding.gyp +35 -0
- package/package.json +78 -0
- package/src/builders/annotation-builder.ts +367 -0
- package/src/builders/conversion-options-builder.ts +257 -0
- package/src/builders/index.ts +12 -0
- package/src/builders/metadata-builder.ts +317 -0
- package/src/builders/pdf-builder.ts +386 -0
- package/src/builders/search-options-builder.ts +151 -0
- package/src/document-editor-manager.ts +318 -0
- package/src/errors.ts +1629 -0
- package/src/form-field-manager.ts +666 -0
- package/src/hybrid-ml-manager.ts +283 -0
- package/src/index.ts +453 -0
- package/src/managers/accessibility-manager.ts +338 -0
- package/src/managers/annotation-manager.ts +439 -0
- package/src/managers/barcode-manager.ts +235 -0
- package/src/managers/batch-manager.ts +533 -0
- package/src/managers/cache-manager.ts +486 -0
- package/src/managers/compliance-manager.ts +375 -0
- package/src/managers/content-manager.ts +339 -0
- package/src/managers/document-utility-manager.ts +922 -0
- package/src/managers/dom-pdf-creator.ts +365 -0
- package/src/managers/editing-manager.ts +514 -0
- package/src/managers/enterprise-manager.ts +478 -0
- package/src/managers/extended-managers.ts +437 -0
- package/src/managers/extraction-manager.ts +583 -0
- package/src/managers/final-utilities.ts +429 -0
- package/src/managers/hybrid-ml-advanced.ts +479 -0
- package/src/managers/index.ts +239 -0
- package/src/managers/layer-manager.ts +500 -0
- package/src/managers/metadata-manager.ts +303 -0
- package/src/managers/ocr-manager.ts +756 -0
- package/src/managers/optimization-manager.ts +262 -0
- package/src/managers/outline-manager.ts +196 -0
- package/src/managers/page-manager.ts +289 -0
- package/src/managers/pattern-detection.ts +440 -0
- package/src/managers/rendering-manager.ts +863 -0
- package/src/managers/search-manager.ts +385 -0
- package/src/managers/security-manager.ts +345 -0
- package/src/managers/signature-manager.ts +1664 -0
- package/src/managers/streams.ts +618 -0
- package/src/managers/xfa-manager.ts +500 -0
- package/src/pdf-creator-manager.ts +494 -0
- package/src/properties.ts +522 -0
- package/src/result-accessors-manager.ts +867 -0
- package/src/tests/advanced-features.test.ts +414 -0
- package/src/tests/advanced.test.ts +266 -0
- package/src/tests/extended-managers.test.ts +316 -0
- package/src/tests/final-utilities.test.ts +455 -0
- package/src/tests/foundation.test.ts +315 -0
- package/src/tests/high-demand.test.ts +257 -0
- package/src/tests/specialized.test.ts +97 -0
- package/src/thumbnail-manager.ts +272 -0
- package/src/types/common.ts +142 -0
- package/src/types/document-types.ts +457 -0
- package/src/types/index.ts +6 -0
- package/src/types/manager-types.ts +284 -0
- package/src/types/native-bindings.ts +517 -0
- package/src/workers/index.ts +7 -0
- package/src/workers/pool.ts +274 -0
- package/src/workers/worker.ts +131 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AccessibilityManager - PDF Accessibility Operations
|
|
3
|
+
*
|
|
4
|
+
* Provides accessibility analysis and remediation capabilities including:
|
|
5
|
+
* - Tagged PDF detection
|
|
6
|
+
* - Structure tree extraction
|
|
7
|
+
* - Auto-tagging
|
|
8
|
+
* - Alt text management
|
|
9
|
+
* - Language and title metadata
|
|
10
|
+
*
|
|
11
|
+
* @since 1.0.0
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { EventEmitter } from 'events';
|
|
15
|
+
import { mapFfiErrorCode, AccessibilityException } from '../errors';
|
|
16
|
+
|
|
17
|
+
// =============================================================================
|
|
18
|
+
// Type Definitions
|
|
19
|
+
// =============================================================================
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Represents a structure element in a tagged PDF.
|
|
23
|
+
*/
|
|
24
|
+
export interface StructureElement {
|
|
25
|
+
/** Structure element type (e.g., 'P', 'H1', 'Figure', 'Table') */
|
|
26
|
+
readonly type: string;
|
|
27
|
+
/** Alt text for the element, if set */
|
|
28
|
+
readonly altText?: string;
|
|
29
|
+
/** Actual text content of the element */
|
|
30
|
+
readonly actualText?: string;
|
|
31
|
+
/** Language of the element */
|
|
32
|
+
readonly language?: string;
|
|
33
|
+
/** Page index where this element appears */
|
|
34
|
+
readonly pageIndex: number;
|
|
35
|
+
/** Marked content identifier */
|
|
36
|
+
readonly mcid: number;
|
|
37
|
+
/** Child elements */
|
|
38
|
+
readonly children: readonly StructureElement[];
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Represents the full structure tree of a tagged PDF.
|
|
43
|
+
*/
|
|
44
|
+
export interface StructureTree {
|
|
45
|
+
/** Whether the document is tagged */
|
|
46
|
+
readonly isTagged: boolean;
|
|
47
|
+
/** Root-level structure elements */
|
|
48
|
+
readonly elements: readonly StructureElement[];
|
|
49
|
+
/** Total element count across all levels */
|
|
50
|
+
readonly totalElements: number;
|
|
51
|
+
/** Document language from the structure tree root */
|
|
52
|
+
readonly language?: string;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Result of an auto-tag operation.
|
|
57
|
+
*/
|
|
58
|
+
export interface AutoTagResult {
|
|
59
|
+
/** Whether auto-tagging succeeded */
|
|
60
|
+
readonly success: boolean;
|
|
61
|
+
/** Number of elements tagged */
|
|
62
|
+
readonly elementsTagged: number;
|
|
63
|
+
/** Number of images found */
|
|
64
|
+
readonly imagesFound: number;
|
|
65
|
+
/** Number of headings detected */
|
|
66
|
+
readonly headingsDetected: number;
|
|
67
|
+
/** Warnings generated during tagging */
|
|
68
|
+
readonly warnings: readonly string[];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// =============================================================================
|
|
72
|
+
// AccessibilityManager
|
|
73
|
+
// =============================================================================
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Manager for PDF accessibility operations.
|
|
77
|
+
*
|
|
78
|
+
* Provides methods for inspecting and improving the accessibility of
|
|
79
|
+
* PDF documents, including tagged PDF detection, structure tree analysis,
|
|
80
|
+
* auto-tagging, and alt text management.
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* ```typescript
|
|
84
|
+
* const accessibility = new AccessibilityManager(document);
|
|
85
|
+
*
|
|
86
|
+
* // Check if document is tagged
|
|
87
|
+
* const tagged = await accessibility.isTagged();
|
|
88
|
+
*
|
|
89
|
+
* // Auto-tag an untagged document
|
|
90
|
+
* if (!tagged) {
|
|
91
|
+
* const result = await accessibility.autoTag('en');
|
|
92
|
+
* console.log(`Tagged ${result.elementsTagged} elements`);
|
|
93
|
+
* }
|
|
94
|
+
*
|
|
95
|
+
* // Set alt text on images
|
|
96
|
+
* await accessibility.setAltText(0, 1, 'Company logo');
|
|
97
|
+
* ```
|
|
98
|
+
*/
|
|
99
|
+
export class AccessibilityManager extends EventEmitter {
|
|
100
|
+
private document: any;
|
|
101
|
+
private native: any;
|
|
102
|
+
|
|
103
|
+
constructor(document: any) {
|
|
104
|
+
super();
|
|
105
|
+
if (!document) {
|
|
106
|
+
throw new Error('Document cannot be null or undefined');
|
|
107
|
+
}
|
|
108
|
+
this.document = document;
|
|
109
|
+
try {
|
|
110
|
+
this.native = require('../../index.node');
|
|
111
|
+
} catch {
|
|
112
|
+
this.native = null;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ===========================================================================
|
|
117
|
+
// Accessibility Analysis
|
|
118
|
+
// ===========================================================================
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Checks whether the PDF document is tagged.
|
|
122
|
+
*
|
|
123
|
+
* A tagged PDF contains a structure tree that defines the logical
|
|
124
|
+
* reading order and semantic structure of the content.
|
|
125
|
+
*
|
|
126
|
+
* @returns True if the document is tagged
|
|
127
|
+
* @throws AccessibilityException if the check fails
|
|
128
|
+
*/
|
|
129
|
+
async isTagged(): Promise<boolean> {
|
|
130
|
+
if (!this.native?.pdf_accessibility_is_tagged) {
|
|
131
|
+
throw new AccessibilityException('Native accessibility not available: pdf_accessibility_is_tagged not found');
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const errorCode = Buffer.alloc(4);
|
|
135
|
+
const result = this.native.pdf_accessibility_is_tagged(
|
|
136
|
+
this.document._handle ?? this.document,
|
|
137
|
+
errorCode,
|
|
138
|
+
);
|
|
139
|
+
const code = errorCode.readInt32LE(0);
|
|
140
|
+
|
|
141
|
+
if (code !== 0) {
|
|
142
|
+
throw mapFfiErrorCode(code, 'Failed to check if document is tagged');
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
this.emit('tagged-checked', { isTagged: result });
|
|
146
|
+
return !!result;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Gets the full structure tree of the document.
|
|
151
|
+
*
|
|
152
|
+
* Returns the hierarchical structure tree that defines the logical
|
|
153
|
+
* organization and reading order of the document content.
|
|
154
|
+
*
|
|
155
|
+
* @returns The document structure tree
|
|
156
|
+
* @throws AccessibilityException if the extraction fails
|
|
157
|
+
*/
|
|
158
|
+
async getStructureTree(): Promise<StructureTree> {
|
|
159
|
+
if (!this.native?.pdf_accessibility_get_structure_tree) {
|
|
160
|
+
throw new AccessibilityException('Native accessibility not available: pdf_accessibility_get_structure_tree not found');
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const errorCode = Buffer.alloc(4);
|
|
164
|
+
const resultPtr = this.native.pdf_accessibility_get_structure_tree(
|
|
165
|
+
this.document._handle ?? this.document,
|
|
166
|
+
errorCode,
|
|
167
|
+
);
|
|
168
|
+
const code = errorCode.readInt32LE(0);
|
|
169
|
+
|
|
170
|
+
if (code !== 0) {
|
|
171
|
+
throw mapFfiErrorCode(code, 'Failed to get structure tree');
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
try {
|
|
175
|
+
const tree: StructureTree = typeof resultPtr === 'string'
|
|
176
|
+
? JSON.parse(resultPtr)
|
|
177
|
+
: resultPtr;
|
|
178
|
+
|
|
179
|
+
this.emit('structure-tree-retrieved', { totalElements: tree.totalElements });
|
|
180
|
+
|
|
181
|
+
// Free native handle if needed
|
|
182
|
+
if (this.native.pdf_structure_tree_free && typeof resultPtr !== 'string') {
|
|
183
|
+
this.native.pdf_structure_tree_free(resultPtr);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return tree;
|
|
187
|
+
} catch {
|
|
188
|
+
return {
|
|
189
|
+
isTagged: false,
|
|
190
|
+
elements: [],
|
|
191
|
+
totalElements: 0,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Automatically tags an untagged PDF document.
|
|
198
|
+
*
|
|
199
|
+
* Uses heuristic analysis to detect document structure and apply
|
|
200
|
+
* appropriate tags for headings, paragraphs, images, tables, and lists.
|
|
201
|
+
*
|
|
202
|
+
* @param language - Optional BCP-47 language tag (e.g., 'en', 'fr', 'de')
|
|
203
|
+
* @returns Result of the auto-tagging operation
|
|
204
|
+
* @throws AccessibilityException if auto-tagging fails
|
|
205
|
+
*/
|
|
206
|
+
async autoTag(language?: string): Promise<AutoTagResult> {
|
|
207
|
+
if (!this.native?.pdf_accessibility_auto_tag) {
|
|
208
|
+
throw new AccessibilityException('Native accessibility not available: pdf_accessibility_auto_tag not found');
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const errorCode = Buffer.alloc(4);
|
|
212
|
+
const resultPtr = this.native.pdf_accessibility_auto_tag(
|
|
213
|
+
this.document._handle ?? this.document,
|
|
214
|
+
language ?? null,
|
|
215
|
+
errorCode,
|
|
216
|
+
);
|
|
217
|
+
const code = errorCode.readInt32LE(0);
|
|
218
|
+
|
|
219
|
+
if (code !== 0) {
|
|
220
|
+
throw mapFfiErrorCode(code, 'Failed to auto-tag document');
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
let result: AutoTagResult;
|
|
224
|
+
try {
|
|
225
|
+
result = typeof resultPtr === 'string'
|
|
226
|
+
? JSON.parse(resultPtr)
|
|
227
|
+
: resultPtr ?? { success: true, elementsTagged: 0, imagesFound: 0, headingsDetected: 0, warnings: [] };
|
|
228
|
+
} catch {
|
|
229
|
+
result = { success: true, elementsTagged: 0, imagesFound: 0, headingsDetected: 0, warnings: [] };
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
this.emit('auto-tagged', { language, elementsTagged: result.elementsTagged });
|
|
233
|
+
return result;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// ===========================================================================
|
|
237
|
+
// Alt Text Management
|
|
238
|
+
// ===========================================================================
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Sets alt text for a structure element identified by page and MCID.
|
|
242
|
+
*
|
|
243
|
+
* @param page - Zero-based page index
|
|
244
|
+
* @param mcid - Marked content identifier of the element
|
|
245
|
+
* @param text - Alt text to set
|
|
246
|
+
* @throws AccessibilityException if the operation fails
|
|
247
|
+
*/
|
|
248
|
+
async setAltText(page: number, mcid: number, text: string): Promise<void> {
|
|
249
|
+
if (!this.native?.pdf_accessibility_set_alt_text) {
|
|
250
|
+
throw new AccessibilityException('Native accessibility not available: pdf_accessibility_set_alt_text not found');
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const errorCode = Buffer.alloc(4);
|
|
254
|
+
this.native.pdf_accessibility_set_alt_text(
|
|
255
|
+
this.document._handle ?? this.document,
|
|
256
|
+
page,
|
|
257
|
+
mcid,
|
|
258
|
+
text,
|
|
259
|
+
errorCode,
|
|
260
|
+
);
|
|
261
|
+
const code = errorCode.readInt32LE(0);
|
|
262
|
+
|
|
263
|
+
if (code !== 0) {
|
|
264
|
+
throw mapFfiErrorCode(code, `Failed to set alt text on page ${page}, mcid ${mcid}`);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
this.emit('alt-text-set', { page, mcid, text });
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// ===========================================================================
|
|
271
|
+
// Document-Level Accessibility Metadata
|
|
272
|
+
// ===========================================================================
|
|
273
|
+
|
|
274
|
+
/**
|
|
275
|
+
* Sets the document language.
|
|
276
|
+
*
|
|
277
|
+
* @param lang - BCP-47 language tag (e.g., 'en', 'en-US', 'fr')
|
|
278
|
+
* @throws AccessibilityException if the operation fails
|
|
279
|
+
*/
|
|
280
|
+
async setLanguage(lang: string): Promise<void> {
|
|
281
|
+
if (!this.native?.pdf_accessibility_set_language) {
|
|
282
|
+
throw new AccessibilityException('Native accessibility not available: pdf_accessibility_set_language not found');
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const errorCode = Buffer.alloc(4);
|
|
286
|
+
this.native.pdf_accessibility_set_language(
|
|
287
|
+
this.document._handle ?? this.document,
|
|
288
|
+
lang,
|
|
289
|
+
errorCode,
|
|
290
|
+
);
|
|
291
|
+
const code = errorCode.readInt32LE(0);
|
|
292
|
+
|
|
293
|
+
if (code !== 0) {
|
|
294
|
+
throw mapFfiErrorCode(code, `Failed to set document language to '${lang}'`);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
this.emit('language-set', { lang });
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Sets the document title for accessibility.
|
|
302
|
+
*
|
|
303
|
+
* @param title - Document title
|
|
304
|
+
* @throws AccessibilityException if the operation fails
|
|
305
|
+
*/
|
|
306
|
+
async setTitle(title: string): Promise<void> {
|
|
307
|
+
if (!this.native?.pdf_accessibility_set_title) {
|
|
308
|
+
throw new AccessibilityException('Native accessibility not available: pdf_accessibility_set_title not found');
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
const errorCode = Buffer.alloc(4);
|
|
312
|
+
this.native.pdf_accessibility_set_title(
|
|
313
|
+
this.document._handle ?? this.document,
|
|
314
|
+
title,
|
|
315
|
+
errorCode,
|
|
316
|
+
);
|
|
317
|
+
const code = errorCode.readInt32LE(0);
|
|
318
|
+
|
|
319
|
+
if (code !== 0) {
|
|
320
|
+
throw mapFfiErrorCode(code, 'Failed to set document title');
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
this.emit('title-set', { title });
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// ===========================================================================
|
|
327
|
+
// Cleanup
|
|
328
|
+
// ===========================================================================
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Releases resources held by this manager.
|
|
332
|
+
*/
|
|
333
|
+
destroy(): void {
|
|
334
|
+
this.removeAllListeners();
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
export default AccessibilityManager;
|