pdf-oxide 0.3.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -0
- package/binding.gyp +35 -0
- package/package.json +78 -0
- package/src/builders/annotation-builder.ts +367 -0
- package/src/builders/conversion-options-builder.ts +257 -0
- package/src/builders/index.ts +12 -0
- package/src/builders/metadata-builder.ts +317 -0
- package/src/builders/pdf-builder.ts +386 -0
- package/src/builders/search-options-builder.ts +151 -0
- package/src/document-editor-manager.ts +318 -0
- package/src/errors.ts +1629 -0
- package/src/form-field-manager.ts +666 -0
- package/src/hybrid-ml-manager.ts +283 -0
- package/src/index.ts +453 -0
- package/src/managers/accessibility-manager.ts +338 -0
- package/src/managers/annotation-manager.ts +439 -0
- package/src/managers/barcode-manager.ts +235 -0
- package/src/managers/batch-manager.ts +533 -0
- package/src/managers/cache-manager.ts +486 -0
- package/src/managers/compliance-manager.ts +375 -0
- package/src/managers/content-manager.ts +339 -0
- package/src/managers/document-utility-manager.ts +922 -0
- package/src/managers/dom-pdf-creator.ts +365 -0
- package/src/managers/editing-manager.ts +514 -0
- package/src/managers/enterprise-manager.ts +478 -0
- package/src/managers/extended-managers.ts +437 -0
- package/src/managers/extraction-manager.ts +583 -0
- package/src/managers/final-utilities.ts +429 -0
- package/src/managers/hybrid-ml-advanced.ts +479 -0
- package/src/managers/index.ts +239 -0
- package/src/managers/layer-manager.ts +500 -0
- package/src/managers/metadata-manager.ts +303 -0
- package/src/managers/ocr-manager.ts +756 -0
- package/src/managers/optimization-manager.ts +262 -0
- package/src/managers/outline-manager.ts +196 -0
- package/src/managers/page-manager.ts +289 -0
- package/src/managers/pattern-detection.ts +440 -0
- package/src/managers/rendering-manager.ts +863 -0
- package/src/managers/search-manager.ts +385 -0
- package/src/managers/security-manager.ts +345 -0
- package/src/managers/signature-manager.ts +1664 -0
- package/src/managers/streams.ts +618 -0
- package/src/managers/xfa-manager.ts +500 -0
- package/src/pdf-creator-manager.ts +494 -0
- package/src/properties.ts +522 -0
- package/src/result-accessors-manager.ts +867 -0
- package/src/tests/advanced-features.test.ts +414 -0
- package/src/tests/advanced.test.ts +266 -0
- package/src/tests/extended-managers.test.ts +316 -0
- package/src/tests/final-utilities.test.ts +455 -0
- package/src/tests/foundation.test.ts +315 -0
- package/src/tests/high-demand.test.ts +257 -0
- package/src/tests/specialized.test.ts +97 -0
- package/src/thumbnail-manager.ts +272 -0
- package/src/types/common.ts +142 -0
- package/src/types/document-types.ts +457 -0
- package/src/types/index.ts +6 -0
- package/src/types/manager-types.ts +284 -0
- package/src/types/native-bindings.ts +517 -0
- package/src/workers/index.ts +7 -0
- package/src/workers/pool.ts +274 -0
- package/src/workers/worker.ts +131 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ComplianceManager - Canonical Compliance Manager (merged from 2 implementations)
|
|
3
|
+
*
|
|
4
|
+
* Consolidates:
|
|
5
|
+
* - src/compliance-manager.ts ComplianceManager (validation + issue analysis + native FFI)
|
|
6
|
+
* - src/managers/ocr-compliance-cache.ts ComplianceManager (validation + conversion + fixing)
|
|
7
|
+
*
|
|
8
|
+
* Provides complete PDF/A, PDF/X, PDF/UA compliance operations.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { EventEmitter } from 'events';
|
|
12
|
+
import { promises as fs } from 'fs';
|
|
13
|
+
import { dirname } from 'path';
|
|
14
|
+
|
|
15
|
+
// =============================================================================
|
|
16
|
+
// Type Definitions (from root-level compliance-manager.ts)
|
|
17
|
+
// =============================================================================
|
|
18
|
+
|
|
19
|
+
export enum PdfALevel {
|
|
20
|
+
A1a = 'PDF/A-1a',
|
|
21
|
+
A1b = 'PDF/A-1b',
|
|
22
|
+
A2a = 'PDF/A-2a',
|
|
23
|
+
A2b = 'PDF/A-2b',
|
|
24
|
+
A3a = 'PDF/A-3a',
|
|
25
|
+
A3b = 'PDF/A-3b',
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export enum PdfXLevel {
|
|
29
|
+
X1a = 'PDF/X-1a',
|
|
30
|
+
X1a2001 = 'PDF/X-1a:2001',
|
|
31
|
+
X1a2003 = 'PDF/X-1a:2003',
|
|
32
|
+
X3 = 'PDF/X-3',
|
|
33
|
+
X3_2002 = 'PDF/X-3:2002',
|
|
34
|
+
X3_2003 = 'PDF/X-3:2003',
|
|
35
|
+
X4 = 'PDF/X-4',
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export enum PdfUALevel {
|
|
39
|
+
UA1 = 'PDF/UA-1',
|
|
40
|
+
UA2 = 'PDF/UA-2',
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export enum ComplianceIssueType {
|
|
44
|
+
FontNotEmbedded = 'font_not_embedded',
|
|
45
|
+
InvalidColorSpace = 'invalid_color_space',
|
|
46
|
+
MissingAltText = 'missing_alt_text',
|
|
47
|
+
MissingLanguage = 'missing_language',
|
|
48
|
+
MissingTitle = 'missing_title',
|
|
49
|
+
InvalidAnnotation = 'invalid_annotation',
|
|
50
|
+
MissingStructure = 'missing_structure',
|
|
51
|
+
InvalidLink = 'invalid_link',
|
|
52
|
+
Other = 'other',
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export enum IssueSeverity {
|
|
56
|
+
Error = 'error',
|
|
57
|
+
Warning = 'warning',
|
|
58
|
+
Info = 'info',
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface ComplianceIssue {
|
|
62
|
+
type: ComplianceIssueType;
|
|
63
|
+
severity: IssueSeverity;
|
|
64
|
+
message: string;
|
|
65
|
+
pageIndex?: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface ComplianceValidationResult {
|
|
69
|
+
isCompliant: boolean;
|
|
70
|
+
level: string;
|
|
71
|
+
issues: ComplianceIssue[];
|
|
72
|
+
validationTime: number;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Types from managers version
|
|
76
|
+
export interface ComplianceResult {
|
|
77
|
+
type: string;
|
|
78
|
+
valid: boolean;
|
|
79
|
+
issues: string[];
|
|
80
|
+
severity: string;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// =============================================================================
|
|
84
|
+
// Canonical ComplianceManager
|
|
85
|
+
// =============================================================================
|
|
86
|
+
|
|
87
|
+
export class ComplianceManager extends EventEmitter {
|
|
88
|
+
private document: any;
|
|
89
|
+
private resultCache = new Map<string, any>();
|
|
90
|
+
private maxCacheSize = 100;
|
|
91
|
+
private native: any;
|
|
92
|
+
|
|
93
|
+
constructor(document: any) {
|
|
94
|
+
super();
|
|
95
|
+
this.document = document;
|
|
96
|
+
try {
|
|
97
|
+
this.native = require('../../index.node');
|
|
98
|
+
} catch {
|
|
99
|
+
this.native = null;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ===========================================================================
|
|
104
|
+
// Validation (from root-level with native FFI)
|
|
105
|
+
// ===========================================================================
|
|
106
|
+
|
|
107
|
+
async validatePdfA(level: PdfALevel | string = PdfALevel.A1b): Promise<ComplianceValidationResult> {
|
|
108
|
+
const cacheKey = `compliance:pdfa:${level}`;
|
|
109
|
+
if (this.resultCache.has(cacheKey)) return this.resultCache.get(cacheKey);
|
|
110
|
+
|
|
111
|
+
let isCompliant = true;
|
|
112
|
+
let issues: ComplianceIssue[] = [];
|
|
113
|
+
let validationTime = 0;
|
|
114
|
+
|
|
115
|
+
if (this.native?.validate_pdf_a) {
|
|
116
|
+
try {
|
|
117
|
+
const nativeResult = this.native.validate_pdf_a(level);
|
|
118
|
+
isCompliant = nativeResult.is_compliant ?? true;
|
|
119
|
+
validationTime = nativeResult.validation_time ?? 0;
|
|
120
|
+
if (nativeResult.issues_json) {
|
|
121
|
+
try { issues = JSON.parse(nativeResult.issues_json); } catch { issues = []; }
|
|
122
|
+
}
|
|
123
|
+
} catch { isCompliant = true; issues = []; }
|
|
124
|
+
} else if (this.document?.validatePdfA) {
|
|
125
|
+
try {
|
|
126
|
+
const valid = await this.document.validatePdfA(typeof level === 'string' ? level : '1b');
|
|
127
|
+
isCompliant = !!valid;
|
|
128
|
+
} catch { isCompliant = true; }
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const result: ComplianceValidationResult = { isCompliant, level: typeof level === 'string' ? level : level, issues, validationTime };
|
|
132
|
+
this.setCached(cacheKey, result);
|
|
133
|
+
this.emit('pdfAValidated', { level, isCompliant, issueCount: issues.length });
|
|
134
|
+
return result;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async validatePdfX(level: PdfXLevel | string = PdfXLevel.X1a): Promise<ComplianceValidationResult> {
|
|
138
|
+
const cacheKey = `compliance:pdfx:${level}`;
|
|
139
|
+
if (this.resultCache.has(cacheKey)) return this.resultCache.get(cacheKey);
|
|
140
|
+
|
|
141
|
+
let isCompliant = true;
|
|
142
|
+
let issues: ComplianceIssue[] = [];
|
|
143
|
+
let validationTime = 0;
|
|
144
|
+
|
|
145
|
+
if (this.native?.validate_pdf_x) {
|
|
146
|
+
try {
|
|
147
|
+
const nativeResult = this.native.validate_pdf_x(level);
|
|
148
|
+
isCompliant = nativeResult.is_compliant ?? true;
|
|
149
|
+
validationTime = nativeResult.validation_time ?? 0;
|
|
150
|
+
if (nativeResult.issues_json) {
|
|
151
|
+
try { issues = JSON.parse(nativeResult.issues_json); } catch { issues = []; }
|
|
152
|
+
}
|
|
153
|
+
} catch { isCompliant = true; issues = []; }
|
|
154
|
+
} else if (this.document?.validatePdfX) {
|
|
155
|
+
try {
|
|
156
|
+
const valid = await this.document.validatePdfX(typeof level === 'string' ? level : '1a');
|
|
157
|
+
isCompliant = !!valid;
|
|
158
|
+
} catch { isCompliant = true; }
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const result: ComplianceValidationResult = { isCompliant, level: typeof level === 'string' ? level : level, issues, validationTime };
|
|
162
|
+
this.setCached(cacheKey, result);
|
|
163
|
+
this.emit('pdfXValidated', { level, isCompliant, issueCount: issues.length });
|
|
164
|
+
return result;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
async validatePdfUA(level: PdfUALevel | string = PdfUALevel.UA1): Promise<ComplianceValidationResult> {
|
|
168
|
+
const cacheKey = `compliance:pdfua:${level}`;
|
|
169
|
+
if (this.resultCache.has(cacheKey)) return this.resultCache.get(cacheKey);
|
|
170
|
+
|
|
171
|
+
let isCompliant = true;
|
|
172
|
+
let issues: ComplianceIssue[] = [];
|
|
173
|
+
let validationTime = 0;
|
|
174
|
+
|
|
175
|
+
if (this.native?.validate_pdf_ua) {
|
|
176
|
+
try {
|
|
177
|
+
const nativeResult = this.native.validate_pdf_ua(level);
|
|
178
|
+
isCompliant = nativeResult.is_compliant ?? true;
|
|
179
|
+
validationTime = nativeResult.validation_time ?? 0;
|
|
180
|
+
if (nativeResult.issues_json) {
|
|
181
|
+
try { issues = JSON.parse(nativeResult.issues_json); } catch { issues = []; }
|
|
182
|
+
}
|
|
183
|
+
} catch { isCompliant = true; issues = []; }
|
|
184
|
+
} else if (this.document?.validatePdfUA) {
|
|
185
|
+
try { isCompliant = !!(await this.document.validatePdfUA()); } catch { isCompliant = true; }
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const result: ComplianceValidationResult = { isCompliant, level: typeof level === 'string' ? level : level, issues, validationTime };
|
|
189
|
+
this.setCached(cacheKey, result);
|
|
190
|
+
this.emit('pdfUAValidated', { level, isCompliant, issueCount: issues.length });
|
|
191
|
+
return result;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ===========================================================================
|
|
195
|
+
// Issue Analysis (from root-level)
|
|
196
|
+
// ===========================================================================
|
|
197
|
+
|
|
198
|
+
async getAllIssues(): Promise<ComplianceIssue[]> {
|
|
199
|
+
const cacheKey = 'compliance:all_issues';
|
|
200
|
+
if (this.resultCache.has(cacheKey)) return this.resultCache.get(cacheKey);
|
|
201
|
+
let issues: ComplianceIssue[] = [];
|
|
202
|
+
if (this.native?.compliance_get_all_issues) {
|
|
203
|
+
try { issues = JSON.parse(this.native.compliance_get_all_issues()) || []; } catch { issues = []; }
|
|
204
|
+
}
|
|
205
|
+
this.setCached(cacheKey, issues);
|
|
206
|
+
this.emit('issuesRetrieved', { count: issues.length });
|
|
207
|
+
return issues;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
async getIssuesOfType(type: ComplianceIssueType): Promise<ComplianceIssue[]> {
|
|
211
|
+
const allIssues = await this.getAllIssues();
|
|
212
|
+
return allIssues.filter(issue => issue.type === type);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
async getIssueCount(): Promise<number> { return (await this.getAllIssues()).length; }
|
|
216
|
+
async getErrorCount(): Promise<number> { return (await this.getAllIssues()).filter(i => i.severity === IssueSeverity.Error).length; }
|
|
217
|
+
async getWarningCount(): Promise<number> { return (await this.getAllIssues()).filter(i => i.severity === IssueSeverity.Warning).length; }
|
|
218
|
+
|
|
219
|
+
// ===========================================================================
|
|
220
|
+
// Conversion & Fixing (from managers version)
|
|
221
|
+
// ===========================================================================
|
|
222
|
+
|
|
223
|
+
async convertToPdfA(level: string = '1b'): Promise<boolean> {
|
|
224
|
+
try {
|
|
225
|
+
const result = await this.document?.convertToPdfA?.(level);
|
|
226
|
+
this.resultCache.delete(`compliance:pdfa:${level}`);
|
|
227
|
+
this.emit('conversion-complete', { type: 'PDF/A', level, success: result });
|
|
228
|
+
return !!result;
|
|
229
|
+
} catch (error) { this.emit('error', error); return false; }
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
async convertToPdfUA(): Promise<boolean> {
|
|
233
|
+
try {
|
|
234
|
+
const result = await this.document?.convertToPdfUA?.();
|
|
235
|
+
this.resultCache.delete('compliance:pdfua:');
|
|
236
|
+
this.emit('conversion-complete', { type: 'PDF/UA', success: result });
|
|
237
|
+
return !!result;
|
|
238
|
+
} catch (error) { this.emit('error', error); return false; }
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async getComplianceReport(complianceType: string = 'all'): Promise<string> {
|
|
242
|
+
try { return await this.document?.getComplianceReport?.(complianceType) ?? ''; }
|
|
243
|
+
catch (error) { this.emit('error', error); return ''; }
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
async checkFontEmbedding(): Promise<boolean> {
|
|
247
|
+
const cacheKey = 'compliance:fonts_embedded';
|
|
248
|
+
if (this.resultCache.has(cacheKey)) return this.resultCache.get(cacheKey);
|
|
249
|
+
const result = this.document?.checkFontEmbedding?.() ?? this.native?.compliance_has_embedded_fonts?.() ?? true;
|
|
250
|
+
this.setCached(cacheKey, result);
|
|
251
|
+
return result;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/** @deprecated Use checkFontEmbedding() instead */
|
|
255
|
+
async hasFontsEmbedded(): Promise<boolean> { return this.checkFontEmbedding(); }
|
|
256
|
+
|
|
257
|
+
async checkColorSpace(): Promise<boolean> {
|
|
258
|
+
const cacheKey = 'compliance:valid_color_space';
|
|
259
|
+
if (this.resultCache.has(cacheKey)) return this.resultCache.get(cacheKey);
|
|
260
|
+
const result = this.document?.checkColorSpace?.() ?? this.native?.compliance_has_valid_color_space?.() ?? true;
|
|
261
|
+
this.setCached(cacheKey, result);
|
|
262
|
+
return result;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/** @deprecated Use checkColorSpace() instead */
|
|
266
|
+
async hasValidColorSpace(): Promise<boolean> { return this.checkColorSpace(); }
|
|
267
|
+
|
|
268
|
+
async checkTaggedContent(): Promise<boolean> {
|
|
269
|
+
try { return await this.document?.checkTaggedContent?.() ?? false; }
|
|
270
|
+
catch (error) { this.emit('error', error); return false; }
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
async addMissingTags(): Promise<boolean> {
|
|
274
|
+
try {
|
|
275
|
+
const result = await this.document?.addMissingTags?.();
|
|
276
|
+
this.emit('tags-added');
|
|
277
|
+
return !!result;
|
|
278
|
+
} catch (error) { this.emit('error', error); return false; }
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
async fixFontIssues(): Promise<number> {
|
|
282
|
+
try {
|
|
283
|
+
const count = await this.document?.fixFontIssues?.() ?? 0;
|
|
284
|
+
this.emit('fonts-fixed', { count });
|
|
285
|
+
return count;
|
|
286
|
+
} catch (error) { this.emit('error', error); return 0; }
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
async fixColorIssues(): Promise<number> {
|
|
290
|
+
try {
|
|
291
|
+
const count = await this.document?.fixColorIssues?.() ?? 0;
|
|
292
|
+
this.emit('colors-fixed', { count });
|
|
293
|
+
return count;
|
|
294
|
+
} catch (error) { this.emit('error', error); return 0; }
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
async removeUnsupportedFeatures(): Promise<number> {
|
|
298
|
+
try {
|
|
299
|
+
const count = await this.document?.removeUnsupportedFeatures?.() ?? 0;
|
|
300
|
+
this.emit('features-removed', { count });
|
|
301
|
+
return count;
|
|
302
|
+
} catch (error) { this.emit('error', error); return 0; }
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
async getComplianceIssues(): Promise<string[]> {
|
|
306
|
+
try {
|
|
307
|
+
const issues: string[] = [];
|
|
308
|
+
const pdfA = await this.validatePdfA(PdfALevel.A1b);
|
|
309
|
+
if (!pdfA.isCompliant) issues.push('PDF/A non-compliant');
|
|
310
|
+
const pdfX = await this.validatePdfX(PdfXLevel.X1a);
|
|
311
|
+
if (!pdfX.isCompliant) issues.push('PDF/X non-compliant');
|
|
312
|
+
const pdfUA = await this.validatePdfUA(PdfUALevel.UA1);
|
|
313
|
+
if (!pdfUA.isCompliant) issues.push('PDF/UA non-accessible');
|
|
314
|
+
const fontEmbedded = await this.checkFontEmbedding();
|
|
315
|
+
if (!fontEmbedded) issues.push('Fonts not properly embedded');
|
|
316
|
+
const taggedContent = await this.checkTaggedContent();
|
|
317
|
+
if (!taggedContent) issues.push('Missing proper tagging');
|
|
318
|
+
this.emit('issues-analyzed', { count: issues.length });
|
|
319
|
+
return issues;
|
|
320
|
+
} catch (error) { this.emit('error', error); return []; }
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
getIssueSeverity(issue: string): string {
|
|
324
|
+
if (issue.includes('Font') || issue.includes('PDF/UA')) return 'critical';
|
|
325
|
+
if (issue.includes('PDF/A') || issue.includes('PDF/X')) return 'high';
|
|
326
|
+
if (issue.includes('tagging')) return 'medium';
|
|
327
|
+
return 'low';
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
async createComplianceReportFile(filePath: string): Promise<boolean> {
|
|
331
|
+
try {
|
|
332
|
+
const report = await this.getComplianceReport('all');
|
|
333
|
+
await fs.mkdir(dirname(filePath), { recursive: true });
|
|
334
|
+
await fs.writeFile(filePath, report, 'utf8');
|
|
335
|
+
this.emit('report-created', { filePath });
|
|
336
|
+
return true;
|
|
337
|
+
} catch (error) { this.emit('error', error); return false; }
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
async getComplianceSummary(): Promise<object> {
|
|
341
|
+
try {
|
|
342
|
+
return {
|
|
343
|
+
pdfA: await this.validatePdfA(PdfALevel.A1b),
|
|
344
|
+
pdfX: await this.validatePdfX(PdfXLevel.X1a),
|
|
345
|
+
pdfUA: await this.validatePdfUA(PdfUALevel.UA1),
|
|
346
|
+
fontEmbedded: await this.checkFontEmbedding(),
|
|
347
|
+
colorSpace: await this.checkColorSpace(),
|
|
348
|
+
taggedContent: await this.checkTaggedContent(),
|
|
349
|
+
issues: await this.getComplianceIssues(),
|
|
350
|
+
};
|
|
351
|
+
} catch (error) { this.emit('error', error); return {}; }
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// ===========================================================================
|
|
355
|
+
// Cache
|
|
356
|
+
// ===========================================================================
|
|
357
|
+
|
|
358
|
+
clearCache(): void { this.resultCache.clear(); this.emit('cacheCleared'); }
|
|
359
|
+
|
|
360
|
+
getCacheStats(): Record<string, any> {
|
|
361
|
+
return { cacheSize: this.resultCache.size, maxCacheSize: this.maxCacheSize, entries: Array.from(this.resultCache.keys()) };
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
destroy(): void { this.resultCache.clear(); this.removeAllListeners(); }
|
|
365
|
+
|
|
366
|
+
private setCached(key: string, value: any): void {
|
|
367
|
+
this.resultCache.set(key, value);
|
|
368
|
+
if (this.resultCache.size > this.maxCacheSize) {
|
|
369
|
+
const firstKey = this.resultCache.keys().next().value;
|
|
370
|
+
if (firstKey !== undefined) this.resultCache.delete(firstKey);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
export default ComplianceManager;
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Manager for page-level content analysis
|
|
3
|
+
*
|
|
4
|
+
* Provides methods to analyze content type, complexity, and characteristics of PDF pages.
|
|
5
|
+
* This manager operates on a specific page, unlike document-level managers.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { ContentManager } from 'pdf_oxide';
|
|
10
|
+
*
|
|
11
|
+
* const doc = PdfDocument.open('document.pdf');
|
|
12
|
+
* const contentManager = new ContentManager(doc, 0);
|
|
13
|
+
*
|
|
14
|
+
* if (!contentManager.isBlank()) {
|
|
15
|
+
* console.log(contentManager.getContentSummary());
|
|
16
|
+
* }
|
|
17
|
+
* ```
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
export interface ContentAnalysis {
|
|
21
|
+
pageIndex: number;
|
|
22
|
+
hasContent: boolean;
|
|
23
|
+
isBlank: boolean;
|
|
24
|
+
contentSize: number;
|
|
25
|
+
complexityScore: number;
|
|
26
|
+
dimensions: string;
|
|
27
|
+
contentTypes: string[];
|
|
28
|
+
likelyHasForms: boolean;
|
|
29
|
+
likelyHasTables: boolean;
|
|
30
|
+
likelyHasImages: boolean;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export class ContentManager {
|
|
34
|
+
private _document: any;
|
|
35
|
+
private _pageIndex: number;
|
|
36
|
+
private _cache: Map<string, any>;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Creates a new ContentManager for a specific page
|
|
40
|
+
* @param document - The PDF document
|
|
41
|
+
* @param pageIndex - Page index to analyze (0-based)
|
|
42
|
+
* @throws Error if document is null or undefined
|
|
43
|
+
*/
|
|
44
|
+
constructor(document: any, pageIndex: number) {
|
|
45
|
+
if (!document) {
|
|
46
|
+
throw new Error('Document is required');
|
|
47
|
+
}
|
|
48
|
+
if (typeof pageIndex !== 'number' || pageIndex < 0) {
|
|
49
|
+
throw new Error('Page index must be a non-negative number');
|
|
50
|
+
}
|
|
51
|
+
this._document = document;
|
|
52
|
+
this._pageIndex = pageIndex;
|
|
53
|
+
this._cache = new Map();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Gets the page index this manager operates on
|
|
58
|
+
* @returns Page index
|
|
59
|
+
*/
|
|
60
|
+
get pageIndex(): number {
|
|
61
|
+
return this._pageIndex;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Clears the content cache
|
|
66
|
+
*/
|
|
67
|
+
clearCache(): void {
|
|
68
|
+
this._cache.clear();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Checks if the page has any content
|
|
73
|
+
* @returns True if the page has content
|
|
74
|
+
*/
|
|
75
|
+
hasContent(): boolean {
|
|
76
|
+
const cacheKey = `content:has:${this._pageIndex}`;
|
|
77
|
+
if (this._cache.has(cacheKey)) {
|
|
78
|
+
return this._cache.get(cacheKey);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
// Placeholder - would check via FFI
|
|
83
|
+
const has = true;
|
|
84
|
+
this._cache.set(cacheKey, has);
|
|
85
|
+
return has;
|
|
86
|
+
} catch (error) {
|
|
87
|
+
return true;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Gets the approximate size of the content stream in bytes
|
|
93
|
+
* @returns Content size in bytes
|
|
94
|
+
*/
|
|
95
|
+
getContentSize(): number {
|
|
96
|
+
const cacheKey = `content:size:${this._pageIndex}`;
|
|
97
|
+
if (this._cache.has(cacheKey)) {
|
|
98
|
+
return this._cache.get(cacheKey);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
// Placeholder - would call FFI
|
|
103
|
+
const size = 0;
|
|
104
|
+
this._cache.set(cacheKey, size);
|
|
105
|
+
return size;
|
|
106
|
+
} catch (error) {
|
|
107
|
+
return 0;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Checks if the page appears to be blank (no visible content)
|
|
113
|
+
* @returns True if the page is blank
|
|
114
|
+
*/
|
|
115
|
+
isBlank(): boolean {
|
|
116
|
+
const cacheKey = `content:blank:${this._pageIndex}`;
|
|
117
|
+
if (this._cache.has(cacheKey)) {
|
|
118
|
+
return this._cache.get(cacheKey);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
// Placeholder - would call FFI
|
|
123
|
+
const blank = false;
|
|
124
|
+
this._cache.set(cacheKey, blank);
|
|
125
|
+
return blank;
|
|
126
|
+
} catch (error) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Gets a complexity score for the page (0-100)
|
|
133
|
+
* Higher scores indicate more complex content.
|
|
134
|
+
* @returns Complexity score from 0 to 100
|
|
135
|
+
*/
|
|
136
|
+
getComplexityScore(): number {
|
|
137
|
+
const cacheKey = `content:complexity:${this._pageIndex}`;
|
|
138
|
+
if (this._cache.has(cacheKey)) {
|
|
139
|
+
return this._cache.get(cacheKey);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
try {
|
|
143
|
+
// Placeholder - would call FFI
|
|
144
|
+
const score = 0;
|
|
145
|
+
this._cache.set(cacheKey, score);
|
|
146
|
+
return score;
|
|
147
|
+
} catch (error) {
|
|
148
|
+
return 0;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Gets a human-readable summary of page dimensions
|
|
154
|
+
* @returns Formatted dimensions string
|
|
155
|
+
*/
|
|
156
|
+
getDimensionsSummary(): string {
|
|
157
|
+
const cacheKey = `content:dimensions:${this._pageIndex}`;
|
|
158
|
+
if (this._cache.has(cacheKey)) {
|
|
159
|
+
return this._cache.get(cacheKey);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
try {
|
|
163
|
+
const page = this._document.getPage(this._pageIndex);
|
|
164
|
+
const width = page?.width || 612;
|
|
165
|
+
const height = page?.height || 792;
|
|
166
|
+
|
|
167
|
+
// Convert points to inches and mm
|
|
168
|
+
const widthInches = width / 72;
|
|
169
|
+
const heightInches = height / 72;
|
|
170
|
+
const widthMm = width * 0.352778;
|
|
171
|
+
const heightMm = height * 0.352778;
|
|
172
|
+
|
|
173
|
+
const summary = `${width.toFixed(0)} x ${height.toFixed(0)} pt ` +
|
|
174
|
+
`(${widthInches.toFixed(2)} x ${heightInches.toFixed(2)} in, ` +
|
|
175
|
+
`${widthMm.toFixed(0)} x ${heightMm.toFixed(0)} mm)`;
|
|
176
|
+
|
|
177
|
+
this._cache.set(cacheKey, summary);
|
|
178
|
+
return summary;
|
|
179
|
+
} catch (error) {
|
|
180
|
+
return '0 x 0 pt';
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Checks if the page likely contains form fields
|
|
186
|
+
* @returns True if the page likely has forms
|
|
187
|
+
*/
|
|
188
|
+
likelyHasForms(): boolean {
|
|
189
|
+
const cacheKey = `content:likely_forms:${this._pageIndex}`;
|
|
190
|
+
if (this._cache.has(cacheKey)) {
|
|
191
|
+
return this._cache.get(cacheKey);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
// Placeholder - would call FFI
|
|
196
|
+
const has = false;
|
|
197
|
+
this._cache.set(cacheKey, has);
|
|
198
|
+
return has;
|
|
199
|
+
} catch (error) {
|
|
200
|
+
return false;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Checks if the page likely contains tables
|
|
206
|
+
* @returns True if the page likely has tables
|
|
207
|
+
*/
|
|
208
|
+
likelyHasTables(): boolean {
|
|
209
|
+
const cacheKey = `content:likely_tables:${this._pageIndex}`;
|
|
210
|
+
if (this._cache.has(cacheKey)) {
|
|
211
|
+
return this._cache.get(cacheKey);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
try {
|
|
215
|
+
// Placeholder - would call FFI
|
|
216
|
+
const has = false;
|
|
217
|
+
this._cache.set(cacheKey, has);
|
|
218
|
+
return has;
|
|
219
|
+
} catch (error) {
|
|
220
|
+
return false;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Checks if the page likely contains images
|
|
226
|
+
* @returns True if the page likely has images
|
|
227
|
+
*/
|
|
228
|
+
likelyHasImages(): boolean {
|
|
229
|
+
const cacheKey = `content:likely_images:${this._pageIndex}`;
|
|
230
|
+
if (this._cache.has(cacheKey)) {
|
|
231
|
+
return this._cache.get(cacheKey);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
try {
|
|
235
|
+
// Placeholder - would call FFI
|
|
236
|
+
const has = false;
|
|
237
|
+
this._cache.set(cacheKey, has);
|
|
238
|
+
return has;
|
|
239
|
+
} catch (error) {
|
|
240
|
+
return false;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Gets a list of content types detected on the page
|
|
246
|
+
* @returns Array of content type strings
|
|
247
|
+
*
|
|
248
|
+
* @example
|
|
249
|
+
* ```typescript
|
|
250
|
+
* const types = manager.getContentTypes();
|
|
251
|
+
* console.log(`Content types: ${types.join(', ')}`);
|
|
252
|
+
* ```
|
|
253
|
+
*/
|
|
254
|
+
getContentTypes(): string[] {
|
|
255
|
+
const cacheKey = `content:types:${this._pageIndex}`;
|
|
256
|
+
if (this._cache.has(cacheKey)) {
|
|
257
|
+
return this._cache.get(cacheKey);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const types: string[] = [];
|
|
261
|
+
|
|
262
|
+
if (!this.hasContent()) {
|
|
263
|
+
types.push('empty');
|
|
264
|
+
} else {
|
|
265
|
+
types.push('text'); // Most pages have text
|
|
266
|
+
|
|
267
|
+
if (this.likelyHasImages()) {
|
|
268
|
+
types.push('images');
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
if (this.likelyHasTables()) {
|
|
272
|
+
types.push('tables');
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
if (this.likelyHasForms()) {
|
|
276
|
+
types.push('forms');
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
this._cache.set(cacheKey, types);
|
|
281
|
+
return types;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Gets a human-readable summary of the page content
|
|
286
|
+
* @returns Formatted content summary string
|
|
287
|
+
*
|
|
288
|
+
* @example
|
|
289
|
+
* ```typescript
|
|
290
|
+
* const summary = manager.getContentSummary();
|
|
291
|
+
* console.log(summary);
|
|
292
|
+
* // Output: "Dimensions: 612 x 792 pt; Content: text, images; Complexity: 45/100"
|
|
293
|
+
* ```
|
|
294
|
+
*/
|
|
295
|
+
getContentSummary(): string {
|
|
296
|
+
const cacheKey = `content:summary:${this._pageIndex}`;
|
|
297
|
+
if (this._cache.has(cacheKey)) {
|
|
298
|
+
return this._cache.get(cacheKey);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
if (this.isBlank()) {
|
|
302
|
+
return 'Blank page';
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const parts: string[] = [];
|
|
306
|
+
|
|
307
|
+
const dimensions = this.getDimensionsSummary();
|
|
308
|
+
parts.push(`Dimensions: ${dimensions}`);
|
|
309
|
+
|
|
310
|
+
const contentTypes = this.getContentTypes();
|
|
311
|
+
parts.push(`Content: ${contentTypes.join(', ')}`);
|
|
312
|
+
|
|
313
|
+
const complexity = this.getComplexityScore();
|
|
314
|
+
parts.push(`Complexity: ${complexity}/100`);
|
|
315
|
+
|
|
316
|
+
const summary = parts.join('; ');
|
|
317
|
+
this._cache.set(cacheKey, summary);
|
|
318
|
+
return summary;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Analyzes page content thoroughly
|
|
323
|
+
* @returns Detailed content analysis
|
|
324
|
+
*/
|
|
325
|
+
analyze(): ContentAnalysis {
|
|
326
|
+
return {
|
|
327
|
+
pageIndex: this._pageIndex,
|
|
328
|
+
hasContent: this.hasContent(),
|
|
329
|
+
isBlank: this.isBlank(),
|
|
330
|
+
contentSize: this.getContentSize(),
|
|
331
|
+
complexityScore: this.getComplexityScore(),
|
|
332
|
+
dimensions: this.getDimensionsSummary(),
|
|
333
|
+
contentTypes: this.getContentTypes(),
|
|
334
|
+
likelyHasForms: this.likelyHasForms(),
|
|
335
|
+
likelyHasTables: this.likelyHasTables(),
|
|
336
|
+
likelyHasImages: this.likelyHasImages(),
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
}
|