pdf-oxide 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/README.md +218 -0
  2. package/binding.gyp +35 -0
  3. package/package.json +78 -0
  4. package/src/builders/annotation-builder.ts +367 -0
  5. package/src/builders/conversion-options-builder.ts +257 -0
  6. package/src/builders/index.ts +12 -0
  7. package/src/builders/metadata-builder.ts +317 -0
  8. package/src/builders/pdf-builder.ts +386 -0
  9. package/src/builders/search-options-builder.ts +151 -0
  10. package/src/document-editor-manager.ts +318 -0
  11. package/src/errors.ts +1629 -0
  12. package/src/form-field-manager.ts +666 -0
  13. package/src/hybrid-ml-manager.ts +283 -0
  14. package/src/index.ts +453 -0
  15. package/src/managers/accessibility-manager.ts +338 -0
  16. package/src/managers/annotation-manager.ts +439 -0
  17. package/src/managers/barcode-manager.ts +235 -0
  18. package/src/managers/batch-manager.ts +533 -0
  19. package/src/managers/cache-manager.ts +486 -0
  20. package/src/managers/compliance-manager.ts +375 -0
  21. package/src/managers/content-manager.ts +339 -0
  22. package/src/managers/document-utility-manager.ts +922 -0
  23. package/src/managers/dom-pdf-creator.ts +365 -0
  24. package/src/managers/editing-manager.ts +514 -0
  25. package/src/managers/enterprise-manager.ts +478 -0
  26. package/src/managers/extended-managers.ts +437 -0
  27. package/src/managers/extraction-manager.ts +583 -0
  28. package/src/managers/final-utilities.ts +429 -0
  29. package/src/managers/hybrid-ml-advanced.ts +479 -0
  30. package/src/managers/index.ts +239 -0
  31. package/src/managers/layer-manager.ts +500 -0
  32. package/src/managers/metadata-manager.ts +303 -0
  33. package/src/managers/ocr-manager.ts +756 -0
  34. package/src/managers/optimization-manager.ts +262 -0
  35. package/src/managers/outline-manager.ts +196 -0
  36. package/src/managers/page-manager.ts +289 -0
  37. package/src/managers/pattern-detection.ts +440 -0
  38. package/src/managers/rendering-manager.ts +863 -0
  39. package/src/managers/search-manager.ts +385 -0
  40. package/src/managers/security-manager.ts +345 -0
  41. package/src/managers/signature-manager.ts +1664 -0
  42. package/src/managers/streams.ts +618 -0
  43. package/src/managers/xfa-manager.ts +500 -0
  44. package/src/pdf-creator-manager.ts +494 -0
  45. package/src/properties.ts +522 -0
  46. package/src/result-accessors-manager.ts +867 -0
  47. package/src/tests/advanced-features.test.ts +414 -0
  48. package/src/tests/advanced.test.ts +266 -0
  49. package/src/tests/extended-managers.test.ts +316 -0
  50. package/src/tests/final-utilities.test.ts +455 -0
  51. package/src/tests/foundation.test.ts +315 -0
  52. package/src/tests/high-demand.test.ts +257 -0
  53. package/src/tests/specialized.test.ts +97 -0
  54. package/src/thumbnail-manager.ts +272 -0
  55. package/src/types/common.ts +142 -0
  56. package/src/types/document-types.ts +457 -0
  57. package/src/types/index.ts +6 -0
  58. package/src/types/manager-types.ts +284 -0
  59. package/src/types/native-bindings.ts +517 -0
  60. package/src/workers/index.ts +7 -0
  61. package/src/workers/pool.ts +274 -0
  62. package/src/workers/worker.ts +131 -0
@@ -0,0 +1,257 @@
1
+ /**
2
+ * Builder for conversion options when converting PDF to other formats
3
+ *
4
+ * Configures how PDFs are converted to Markdown, HTML, or other text formats
5
+ * with options for formatting, image handling, and content extraction.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { ConversionOptionsBuilder } from 'pdf_oxide';
10
+ *
11
+ * const options = ConversionOptionsBuilder.create()
12
+ * .preserveFormatting(true)
13
+ * .includeImages(true)
14
+ * .detectHeadings(true)
15
+ * .detectTables(true)
16
+ * .build();
17
+ *
18
+ * const doc = PdfDocument.open('file.pdf');
19
+ * const markdown = doc.toMarkdown(0, options);
20
+ * ```
21
+ */
22
+
23
+ interface PageRangeOptions {
24
+ start: number;
25
+ end: number;
26
+ }
27
+
28
+ export interface ConversionOptions {
29
+ preserveFormatting: boolean;
30
+ detectHeadings: boolean;
31
+ detectTables: boolean;
32
+ detectLists: boolean;
33
+ includeImages: boolean;
34
+ imageFormat: string;
35
+ imageQuality: number;
36
+ maxImageDimension: number;
37
+ outputEncoding: string;
38
+ normalizeWhitespace: boolean;
39
+ extractAnnotations: boolean;
40
+ useStructureTree: boolean;
41
+ pageRange?: PageRangeOptions;
42
+ }
43
+
44
+ export class ConversionOptionsBuilder {
45
+ private _preserveFormatting: boolean = true;
46
+ private _detectHeadings: boolean = true;
47
+ private _detectTables: boolean = true;
48
+ private _detectLists: boolean = true;
49
+ private _includeImages: boolean = true;
50
+ private _imageFormat: string = 'png';
51
+ private _imageQuality: number = 85;
52
+ private _maxImageDimension: number = 2048;
53
+ private _outputEncoding: string = 'utf-8';
54
+ private _normalizeWhitespace: boolean = true;
55
+ private _extractAnnotations: boolean = false;
56
+ private _useStructureTree: boolean = true;
57
+ private _pageRange?: PageRangeOptions;
58
+
59
+ /**
60
+ * Creates a new ConversionOptionsBuilder instance
61
+ * @private
62
+ */
63
+ private constructor() {}
64
+
65
+ /**
66
+ * Creates a new ConversionOptionsBuilder instance
67
+ * @returns New builder instance
68
+ */
69
+ static create(): ConversionOptionsBuilder {
70
+ return new ConversionOptionsBuilder();
71
+ }
72
+
73
+ /**
74
+ * Creates options with default settings optimized for readability
75
+ * @returns Conversion options with default preset
76
+ */
77
+ static default(): ConversionOptions {
78
+ return ConversionOptionsBuilder.create().build();
79
+ }
80
+
81
+ /**
82
+ * Creates options optimized for text-only extraction
83
+ * @returns Conversion options with text-only preset
84
+ */
85
+ static textOnly(): ConversionOptions {
86
+ return ConversionOptionsBuilder.create()
87
+ .preserveFormatting(false)
88
+ .detectHeadings(true)
89
+ .detectTables(false)
90
+ .detectLists(false)
91
+ .includeImages(false)
92
+ .build();
93
+ }
94
+
95
+ /**
96
+ * Creates options optimized for maximum quality and detail preservation
97
+ * @returns Conversion options with high-quality preset
98
+ */
99
+ static highQuality(): ConversionOptions {
100
+ return ConversionOptionsBuilder.create()
101
+ .preserveFormatting(true)
102
+ .detectHeadings(true)
103
+ .detectTables(true)
104
+ .detectLists(true)
105
+ .includeImages(true)
106
+ .imageQuality(95)
107
+ .normalizeWhitespace(false)
108
+ .build();
109
+ }
110
+
111
+ /**
112
+ * Creates options for fast, basic conversion
113
+ * @returns Conversion options with fast preset
114
+ */
115
+ static fast(): ConversionOptions {
116
+ return ConversionOptionsBuilder.create()
117
+ .preserveFormatting(false)
118
+ .detectHeadings(false)
119
+ .detectTables(false)
120
+ .detectLists(false)
121
+ .includeImages(false)
122
+ .normalizeWhitespace(true)
123
+ .build();
124
+ }
125
+
126
+ preserveFormatting(preserve: boolean): this {
127
+ if (typeof preserve !== 'boolean') {
128
+ throw new Error('preserveFormatting must be a boolean');
129
+ }
130
+ this._preserveFormatting = preserve;
131
+ return this;
132
+ }
133
+
134
+ detectHeadings(detect: boolean): this {
135
+ if (typeof detect !== 'boolean') {
136
+ throw new Error('detectHeadings must be a boolean');
137
+ }
138
+ this._detectHeadings = detect;
139
+ return this;
140
+ }
141
+
142
+ detectTables(detect: boolean): this {
143
+ if (typeof detect !== 'boolean') {
144
+ throw new Error('detectTables must be a boolean');
145
+ }
146
+ this._detectTables = detect;
147
+ return this;
148
+ }
149
+
150
+ detectLists(detect: boolean): this {
151
+ if (typeof detect !== 'boolean') {
152
+ throw new Error('detectLists must be a boolean');
153
+ }
154
+ this._detectLists = detect;
155
+ return this;
156
+ }
157
+
158
+ includeImages(include: boolean): this {
159
+ if (typeof include !== 'boolean') {
160
+ throw new Error('includeImages must be a boolean');
161
+ }
162
+ this._includeImages = include;
163
+ return this;
164
+ }
165
+
166
+ imageFormat(format: string): this {
167
+ const validFormats = ['png', 'jpg', 'jpeg', 'webp'];
168
+ if (!validFormats.includes(format.toLowerCase())) {
169
+ throw new Error(`Invalid image format. Must be one of: ${validFormats.join(', ')}`);
170
+ }
171
+ this._imageFormat = format.toLowerCase();
172
+ return this;
173
+ }
174
+
175
+ imageQuality(quality: number): this {
176
+ if (typeof quality !== 'number' || quality < 0 || quality > 100) {
177
+ throw new Error('imageQuality must be a number between 0 and 100');
178
+ }
179
+ this._imageQuality = quality;
180
+ return this;
181
+ }
182
+
183
+ maxImageDimension(maxDimension: number): this {
184
+ if (typeof maxDimension !== 'number' || maxDimension <= 0) {
185
+ throw new Error('maxImageDimension must be a positive number');
186
+ }
187
+ this._maxImageDimension = maxDimension;
188
+ return this;
189
+ }
190
+
191
+ outputEncoding(encoding: string): this {
192
+ if (typeof encoding !== 'string' || encoding.length === 0) {
193
+ throw new Error('outputEncoding must be a non-empty string');
194
+ }
195
+ this._outputEncoding = encoding;
196
+ return this;
197
+ }
198
+
199
+ normalizeWhitespace(normalize: boolean): this {
200
+ if (typeof normalize !== 'boolean') {
201
+ throw new Error('normalizeWhitespace must be a boolean');
202
+ }
203
+ this._normalizeWhitespace = normalize;
204
+ return this;
205
+ }
206
+
207
+ extractAnnotations(extract: boolean): this {
208
+ if (typeof extract !== 'boolean') {
209
+ throw new Error('extractAnnotations must be a boolean');
210
+ }
211
+ this._extractAnnotations = extract;
212
+ return this;
213
+ }
214
+
215
+ useStructureTree(use: boolean): this {
216
+ if (typeof use !== 'boolean') {
217
+ throw new Error('useStructureTree must be a boolean');
218
+ }
219
+ this._useStructureTree = use;
220
+ return this;
221
+ }
222
+
223
+ pageRange(start: number, end: number): this {
224
+ if (typeof start !== 'number' || typeof end !== 'number' || start < 0 || end < start) {
225
+ throw new Error('pageRange must have valid start and end indices');
226
+ }
227
+ this._pageRange = { start, end };
228
+ return this;
229
+ }
230
+
231
+ build(): ConversionOptions {
232
+ return {
233
+ preserveFormatting: this._preserveFormatting,
234
+ detectHeadings: this._detectHeadings,
235
+ detectTables: this._detectTables,
236
+ detectLists: this._detectLists,
237
+ includeImages: this._includeImages,
238
+ imageFormat: this._imageFormat,
239
+ imageQuality: this._imageQuality,
240
+ maxImageDimension: this._maxImageDimension,
241
+ outputEncoding: this._outputEncoding,
242
+ normalizeWhitespace: this._normalizeWhitespace,
243
+ extractAnnotations: this._extractAnnotations,
244
+ useStructureTree: this._useStructureTree,
245
+ pageRange: this._pageRange,
246
+ };
247
+ }
248
+ }
249
+
250
+ /**
251
+ * Create a new ConversionOptionsBuilder with static factory
252
+ * @deprecated Use ConversionOptionsBuilder.create() instead
253
+ * @returns New builder instance
254
+ */
255
+ export function createConversionOptionsBuilder(): ConversionOptionsBuilder {
256
+ return ConversionOptionsBuilder.create();
257
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * PDF Oxide Builders - Fluent APIs for configuring PDF operations
3
+ *
4
+ * This module exports builder classes that implement the fluent builder pattern
5
+ * for configuring PDF documents, annotations, search options, metadata, and conversion.
6
+ */
7
+
8
+ export * from './pdf-builder';
9
+ export * from './conversion-options-builder';
10
+ export * from './metadata-builder';
11
+ export * from './annotation-builder';
12
+ export * from './search-options-builder';
@@ -0,0 +1,317 @@
1
+ /**
2
+ * Builder for document metadata configuration
3
+ *
4
+ * Configures document information like title, author, subject, keywords,
5
+ * creation date, and custom properties.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { MetadataBuilder } from 'pdf_oxide';
10
+ *
11
+ * const metadata = MetadataBuilder.create()
12
+ * .title('My Document')
13
+ * .author('John Doe')
14
+ * .subject('Important Information')
15
+ * .keywords(['document', 'important', 'example'])
16
+ * .creator('MyApp v1.0')
17
+ * .build();
18
+ *
19
+ * pdf.setMetadata(metadata);
20
+ * ```
21
+ */
22
+
23
+ export interface Metadata {
24
+ title?: string;
25
+ author?: string;
26
+ subject?: string;
27
+ keywords: string[];
28
+ creator?: string;
29
+ producer: string;
30
+ creationDate: Date;
31
+ modificationDate: Date;
32
+ customProperties: Record<string, string>;
33
+ }
34
+
35
+ export class MetadataBuilder {
36
+ private _title?: string;
37
+ private _author?: string;
38
+ private _subject?: string;
39
+ private _keywords: string[] = [];
40
+ private _creator?: string;
41
+ private _producer: string = 'PDF Oxide';
42
+ private _creationDate: Date = new Date();
43
+ private _modificationDate: Date = new Date();
44
+ private _customProperties: Record<string, string> = {};
45
+
46
+ /**
47
+ * Creates a new MetadataBuilder instance
48
+ * @private
49
+ */
50
+ private constructor() {}
51
+
52
+ /**
53
+ * Creates a new MetadataBuilder instance
54
+ * @returns New builder instance
55
+ */
56
+ static create(): MetadataBuilder {
57
+ return new MetadataBuilder();
58
+ }
59
+
60
+ /**
61
+ * Sets the document title
62
+ * @param title - The document title
63
+ * @returns This builder for chaining
64
+ *
65
+ * @example
66
+ * ```typescript
67
+ * builder.title('Project Report 2024');
68
+ * ```
69
+ */
70
+ title(title: string): this {
71
+ if (typeof title !== 'string') {
72
+ throw new Error('Title must be a string');
73
+ }
74
+ this._title = title.length > 0 ? title : undefined;
75
+ return this;
76
+ }
77
+
78
+ /**
79
+ * Sets the document author
80
+ * @param author - The author name
81
+ * @returns This builder for chaining
82
+ *
83
+ * @example
84
+ * ```typescript
85
+ * builder.author('Jane Doe');
86
+ * ```
87
+ */
88
+ author(author: string): this {
89
+ if (typeof author !== 'string') {
90
+ throw new Error('Author must be a string');
91
+ }
92
+ this._author = author.length > 0 ? author : undefined;
93
+ return this;
94
+ }
95
+
96
+ /**
97
+ * Sets the document subject
98
+ * @param subject - The document subject
99
+ * @returns This builder for chaining
100
+ *
101
+ * @example
102
+ * ```typescript
103
+ * builder.subject('Annual Report');
104
+ * ```
105
+ */
106
+ subject(subject: string): this {
107
+ if (typeof subject !== 'string') {
108
+ throw new Error('Subject must be a string');
109
+ }
110
+ this._subject = subject.length > 0 ? subject : undefined;
111
+ return this;
112
+ }
113
+
114
+ /**
115
+ * Sets document keywords
116
+ * @param keywords - Array of keywords
117
+ * @returns This builder for chaining
118
+ *
119
+ * @example
120
+ * ```typescript
121
+ * builder.keywords(['report', 'annual', 'financial']);
122
+ * ```
123
+ */
124
+ keywords(keywords: string[]): this {
125
+ if (!Array.isArray(keywords)) {
126
+ throw new Error('Keywords must be an array');
127
+ }
128
+ if (!keywords.every((k) => typeof k === 'string')) {
129
+ throw new Error('All keywords must be strings');
130
+ }
131
+ this._keywords = [...keywords];
132
+ return this;
133
+ }
134
+
135
+ /**
136
+ * Adds a single keyword
137
+ * @param keyword - A keyword to add
138
+ * @returns This builder for chaining
139
+ *
140
+ * @example
141
+ * ```typescript
142
+ * builder.addKeyword('Important').addKeyword('Urgent');
143
+ * ```
144
+ */
145
+ addKeyword(keyword: string): this {
146
+ if (typeof keyword !== 'string' || keyword.length === 0) {
147
+ throw new Error('Keyword must be a non-empty string');
148
+ }
149
+ if (!this._keywords.includes(keyword)) {
150
+ this._keywords.push(keyword);
151
+ }
152
+ return this;
153
+ }
154
+
155
+ /**
156
+ * Sets the creator application name
157
+ * @param creator - Name of the application that created the document
158
+ * @returns This builder for chaining
159
+ *
160
+ * @example
161
+ * ```typescript
162
+ * builder.creator('MyApp v2.1.0');
163
+ * ```
164
+ */
165
+ creator(creator: string): this {
166
+ if (typeof creator !== 'string') {
167
+ throw new Error('Creator must be a string');
168
+ }
169
+ this._creator = creator.length > 0 ? creator : undefined;
170
+ return this;
171
+ }
172
+
173
+ /**
174
+ * Sets the PDF producer (usually the library/tool that saved it)
175
+ * @param producer - Name of the PDF producer
176
+ * @returns This builder for chaining
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * builder.producer('PDF Oxide v0.3.2');
181
+ * ```
182
+ */
183
+ producer(producer: string): this {
184
+ if (typeof producer !== 'string') {
185
+ throw new Error('Producer must be a string');
186
+ }
187
+ this._producer = producer.length > 0 ? producer : 'PDF Oxide';
188
+ return this;
189
+ }
190
+
191
+ /**
192
+ * Sets the document creation date
193
+ * @param date - The creation date
194
+ * @returns This builder for chaining
195
+ *
196
+ * @example
197
+ * ```typescript
198
+ * builder.creationDate(new Date('2024-01-15'));
199
+ * ```
200
+ */
201
+ creationDate(date: Date): this {
202
+ if (!(date instanceof Date)) {
203
+ throw new Error('creationDate must be a Date object');
204
+ }
205
+ this._creationDate = new Date(date);
206
+ return this;
207
+ }
208
+
209
+ /**
210
+ * Sets the document modification date
211
+ * @param date - The modification date
212
+ * @returns This builder for chaining
213
+ *
214
+ * @example
215
+ * ```typescript
216
+ * builder.modificationDate(new Date());
217
+ * ```
218
+ */
219
+ modificationDate(date: Date): this {
220
+ if (!(date instanceof Date)) {
221
+ throw new Error('modificationDate must be a Date object');
222
+ }
223
+ this._modificationDate = new Date(date);
224
+ return this;
225
+ }
226
+
227
+ /**
228
+ * Sets a custom metadata property
229
+ * @param key - Property key
230
+ * @param value - Property value
231
+ * @returns This builder for chaining
232
+ *
233
+ * @example
234
+ * ```typescript
235
+ * builder.customProperty('Department', 'Engineering');
236
+ * builder.customProperty('Classification', 'Confidential');
237
+ * ```
238
+ */
239
+ customProperty(key: string, value: string): this {
240
+ if (typeof key !== 'string' || key.length === 0) {
241
+ throw new Error('Property key must be a non-empty string');
242
+ }
243
+ if (typeof value !== 'string') {
244
+ throw new Error('Property value must be a string');
245
+ }
246
+ this._customProperties[key] = value;
247
+ return this;
248
+ }
249
+
250
+ /**
251
+ * Sets multiple custom metadata properties
252
+ * @param properties - Object with key-value pairs
253
+ * @returns This builder for chaining
254
+ *
255
+ * @example
256
+ * ```typescript
257
+ * builder.customProperties({
258
+ * Department: 'Engineering',
259
+ * Classification: 'Confidential',
260
+ * ProjectCode: 'PROJ-2024-001'
261
+ * });
262
+ * ```
263
+ */
264
+ customProperties(properties: Record<string, string>): this {
265
+ if (typeof properties !== 'object' || properties === null) {
266
+ throw new Error('customProperties must be an object');
267
+ }
268
+ for (const [key, value] of Object.entries(properties)) {
269
+ if (typeof value !== 'string') {
270
+ throw new Error(`Custom property "${key}" value must be a string`);
271
+ }
272
+ this._customProperties[key] = value;
273
+ }
274
+ return this;
275
+ }
276
+
277
+ /**
278
+ * Builds and returns the metadata object
279
+ * @returns Immutable metadata object
280
+ *
281
+ * @example
282
+ * ```typescript
283
+ * const metadata = builder.build();
284
+ * ```
285
+ */
286
+ build(): Metadata {
287
+ return {
288
+ title: this._title,
289
+ author: this._author,
290
+ subject: this._subject,
291
+ keywords: [...this._keywords],
292
+ creator: this._creator,
293
+ producer: this._producer,
294
+ creationDate: new Date(this._creationDate),
295
+ modificationDate: new Date(this._modificationDate),
296
+ customProperties: { ...this._customProperties },
297
+ };
298
+ }
299
+
300
+ /**
301
+ * Creates metadata with current timestamp
302
+ * @returns This builder with current modification date
303
+ */
304
+ withCurrentDate(): this {
305
+ this._modificationDate = new Date();
306
+ return this;
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Create a new MetadataBuilder with static factory
312
+ * @deprecated Use MetadataBuilder.create() instead
313
+ * @returns New builder instance
314
+ */
315
+ export function createMetadataBuilder(): MetadataBuilder {
316
+ return MetadataBuilder.create();
317
+ }