pdf-lite 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/EXAMPLES.md CHANGED
@@ -915,6 +915,18 @@ function createTextField(
915
915
  // Default appearance string (font and size)
916
916
  fieldDict.set('DA', new PdfString('/Helv 12 Tf 0 g'))
917
917
 
918
+ // Border style (solid, 1pt width)
919
+ const borderDict = new PdfDictionary()
920
+ borderDict.set('W', new PdfNumber(1))
921
+ borderDict.set('S', new PdfName('S'))
922
+ fieldDict.set('BS', borderDict)
923
+
924
+ // Appearance characteristics
925
+ const mkDict = new PdfDictionary()
926
+ mkDict.set('BC', new PdfArray([new PdfNumber(0)])) // Border color (black)
927
+ mkDict.set('BG', new PdfArray([new PdfNumber(1)])) // Background color (white)
928
+ fieldDict.set('MK', mkDict)
929
+
918
930
  return new PdfIndirectObject({ content: fieldDict })
919
931
  }
920
932
 
@@ -951,6 +963,18 @@ function createCheckboxField(
951
963
  fieldDict.set('V', new PdfName(checked ? 'Yes' : 'Off'))
952
964
  fieldDict.set('AS', new PdfName(checked ? 'Yes' : 'Off'))
953
965
 
966
+ // Border style
967
+ const borderDict = new PdfDictionary()
968
+ borderDict.set('W', new PdfNumber(1))
969
+ borderDict.set('S', new PdfName('S'))
970
+ fieldDict.set('BS', borderDict)
971
+
972
+ // Appearance characteristics
973
+ const mkDict = new PdfDictionary()
974
+ mkDict.set('BC', new PdfArray([new PdfNumber(0)])) // Border color (black)
975
+ mkDict.set('BG', new PdfArray([new PdfNumber(1)])) // Background color (white)
976
+ fieldDict.set('MK', mkDict)
977
+
954
978
  return new PdfIndirectObject({ content: fieldDict })
955
979
  }
956
980
 
@@ -978,7 +1002,7 @@ const contentStream = new PdfIndirectObject({
978
1002
  0 -30 Td (Email:) Tj
979
1003
  0 -30 Td (Phone:) Tj
980
1004
  0 -30 Td (Subscribe to newsletter:) Tj
981
- ET`,
1005
+ ET `,
982
1006
  }),
983
1007
  })
984
1008
  document.add(contentStream)
@@ -1041,7 +1065,8 @@ acroForm.set(
1041
1065
  subscribeField.reference,
1042
1066
  ]),
1043
1067
  )
1044
- // NeedAppearances flag tells PDF readers to generate appearance streams
1068
+ // With appearance streams provided, we don't need NeedAppearances
1069
+ // This prevents Acrobat from modifying the PDF on open
1045
1070
  acroForm.set('NeedAppearances', new PdfBoolean(true))
1046
1071
 
1047
1072
  // Default resources for the form (font)
@@ -1051,9 +1076,12 @@ const helveticaFont = new PdfDictionary()
1051
1076
  helveticaFont.set('Type', new PdfName('Font'))
1052
1077
  helveticaFont.set('Subtype', new PdfName('Type1'))
1053
1078
  helveticaFont.set('BaseFont', new PdfName('Helvetica'))
1054
- formFontDict.set('Helv', helveticaFont)
1079
+ const helveticaFontObj = new PdfIndirectObject({ content: helveticaFont })
1080
+ document.add(helveticaFontObj)
1081
+ formFontDict.set('Helv', helveticaFontObj.reference)
1055
1082
  formResources.set('Font', formFontDict)
1056
1083
  acroForm.set('DR', formResources)
1084
+ acroForm.set('DA', new PdfString('/Helv 12 Tf 0 g'))
1057
1085
 
1058
1086
  const acroFormObj = new PdfIndirectObject({ content: acroForm })
1059
1087
  document.add(acroFormObj)
@@ -1081,90 +1109,12 @@ console.log('Created form-empty.pdf with empty form fields')
1081
1109
  const emptyFormBytes = await fs.readFile(`${tmpFolder}/form-empty.pdf`)
1082
1110
  const filledDocument = await PdfDocument.fromBytes([emptyFormBytes])
1083
1111
 
1084
- // Get the catalog reference from trailer
1085
- const catalogRef = filledDocument.trailerDict.get('Root')
1086
- if (!catalogRef || !(catalogRef instanceof PdfObjectReference)) {
1087
- throw new Error('No catalog found in PDF')
1088
- }
1089
-
1090
- // Read the catalog object
1091
- const catalogObj = await filledDocument.readObject({
1092
- objectNumber: catalogRef.objectNumber,
1093
- })
1094
- if (!catalogObj || !(catalogObj.content instanceof PdfDictionary)) {
1095
- throw new Error('Catalog object not found')
1096
- }
1097
-
1098
- // Get the AcroForm reference
1099
- const acroFormRef = catalogObj.content.get('AcroForm')
1100
- if (!acroFormRef || !(acroFormRef instanceof PdfObjectReference)) {
1101
- throw new Error('No AcroForm found in PDF')
1102
- }
1103
-
1104
- // Read the AcroForm object
1105
- const filledAcroFormObj = await filledDocument.readObject({
1106
- objectNumber: acroFormRef.objectNumber,
1112
+ await filledDocument.acroForm.setFieldValues({
1113
+ name: 'John Doe',
1114
+ email: 'john.doe@example.com',
1115
+ phone: '+1 (555) 123-4567',
1116
+ subscribe: 'Off', // For checkbox, use the "Yes/Off" value
1107
1117
  })
1108
- if (
1109
- !filledAcroFormObj ||
1110
- !(filledAcroFormObj.content instanceof PdfDictionary)
1111
- ) {
1112
- throw new Error('AcroForm object not found')
1113
- }
1114
-
1115
- // Get the fields array
1116
- const fieldsArray = filledAcroFormObj.content.get('Fields')
1117
- if (!fieldsArray || !(fieldsArray instanceof PdfArray)) {
1118
- throw new Error('No fields found in AcroForm')
1119
- }
1120
-
1121
- // Helper function to find a field by name
1122
- async function findField(
1123
- fieldName: string,
1124
- ): Promise<PdfIndirectObject<PdfDictionary> | null> {
1125
- for (const fieldRef of fieldsArray.items) {
1126
- if (!(fieldRef instanceof PdfObjectReference)) continue
1127
- const fieldObj = await filledDocument.readObject({
1128
- objectNumber: fieldRef.objectNumber,
1129
- })
1130
- if (!fieldObj || !(fieldObj.content instanceof PdfDictionary)) continue
1131
-
1132
- const name = fieldObj.content.get('T')
1133
- if (name instanceof PdfString) {
1134
- // Convert bytes to string for comparison
1135
- const nameStr = name.value
1136
- if (nameStr === fieldName) {
1137
- return fieldObj as PdfIndirectObject<PdfDictionary>
1138
- }
1139
- }
1140
- }
1141
- return null
1142
- }
1143
-
1144
- // Update the name field value
1145
- const nameFieldObj = await findField('name')
1146
- if (nameFieldObj) {
1147
- nameFieldObj.content.set('V', new PdfString('John Doe'))
1148
- }
1149
-
1150
- // Update the email field value
1151
- const emailFieldObj = await findField('email')
1152
- if (emailFieldObj) {
1153
- emailFieldObj.content.set('V', new PdfString('john.doe@example.com'))
1154
- }
1155
-
1156
- // Update the phone field value
1157
- const phoneFieldObj = await findField('phone')
1158
- if (phoneFieldObj) {
1159
- phoneFieldObj.content.set('V', new PdfString('+1 (555) 123-4567'))
1160
- }
1161
-
1162
- // Check the subscribe checkbox
1163
- const subscribeFieldObj = await findField('subscribe')
1164
- if (subscribeFieldObj) {
1165
- subscribeFieldObj.content.set('V', new PdfName('Yes'))
1166
- subscribeFieldObj.content.set('AS', new PdfName('Yes'))
1167
- }
1168
1118
 
1169
1119
  // Save the filled form
1170
1120
  await fs.writeFile(`${tmpFolder}/form-filled.pdf`, filledDocument.toBytes())
@@ -1174,7 +1124,7 @@ console.log('\nForm field values:')
1174
1124
  console.log('- Name: John Doe')
1175
1125
  console.log('- Email: john.doe@example.com')
1176
1126
  console.log('- Phone: +1 (555) 123-4567')
1177
- console.log('- Subscribe: Yes')
1127
+ console.log('- Subscribe: Off')
1178
1128
  ```
1179
1129
 
1180
1130
  ## Tokeniser usage example
package/README.md CHANGED
@@ -18,6 +18,93 @@ PRs and issues are welcome!
18
18
  - **Browser and Node.js support**: Works seamlessly in both environments, allowing for versatile usage.
19
19
  - **Low-level API**: Provides a low-level API for advanced users who want to manipulate PDF files directly, as well as a higher-level API for easier usage.
20
20
 
21
+ ## Installation
22
+
23
+ ```bash
24
+ npm install pdf-lite
25
+ yarn add pdf-lite
26
+ pnpm add pdf-lite
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ The library provides both low-level and high-level APIs for working with PDF documents. See [PDF Support](#pdf-support) for a list of supported features.
32
+
33
+ ### Reading a PDF
34
+
35
+ ```typescript
36
+ import { PdfReader } from 'pdf-lite/pdf/pdf-reader'
37
+ import { readFile } from 'fs/promises'
38
+
39
+ const pdfBytes = await readFile('document.pdf')
40
+ const doc = await PdfReader.fromBytes([pdfBytes])
41
+ ```
42
+
43
+ ### Creating a PDF from Scratch
44
+
45
+ ```typescript
46
+ import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
47
+ import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
48
+ import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
49
+ import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
50
+ import { PdfName } from 'pdf-lite/core/objects/pdf-name'
51
+ import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
52
+ import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
53
+
54
+ // Create the document
55
+ const document = new PdfDocument()
56
+
57
+ // Create content stream
58
+ const contentStream = new PdfIndirectObject({
59
+ content: new PdfStream({
60
+ header: new PdfDictionary(),
61
+ original: 'BT /F1 24 Tf 100 700 Td (Hello, PDF-Lite!) Tj ET',
62
+ }),
63
+ })
64
+
65
+ // Create and commit objects
66
+ document.commit(contentStream)
67
+ // ... create pages, catalog, etc.
68
+
69
+ // Output the PDF
70
+ console.log(document.toString())
71
+ ```
72
+
73
+ ### Working with Encryption
74
+
75
+ ```typescript
76
+ import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
77
+ import { PdfV2SecurityHandler } from 'pdf-lite/security/handlers/v2'
78
+
79
+ const document = new PdfDocument()
80
+ // ... build your PDF structure
81
+
82
+ // Set up encryption
83
+ document.securityHandler = new PdfV2SecurityHandler({
84
+ password: 'user-password',
85
+ documentId: 'unique-doc-id',
86
+ encryptMetadata: true,
87
+ })
88
+
89
+ // Encrypt the document
90
+ await document.encrypt()
91
+
92
+ console.log(document.toString())
93
+ ```
94
+
95
+ ### Signing PDFs
96
+
97
+ ```typescript
98
+ import {
99
+ PdfAdbePkcs7DetachedSignatureObject,
100
+ PdfEtsiCadesDetachedSignatureObject,
101
+ } from 'pdf-lite'
102
+
103
+ // See examples directory for complete signing implementations
104
+ ```
105
+
106
+ For more detailed examples, see the [EXAMPLES.md](EXAMPLES.md) file and the [examples/](examples/) directory.
107
+
21
108
  ## PDF Support
22
109
 
23
110
  ### Low-level PDF constructs
@@ -100,6 +187,22 @@ Long-Term Validation (LTV) support ensures that digital signatures remain valid
100
187
  - [x] Timestamping
101
188
  - [x] Verification of existing signatures
102
189
 
190
+ ### AcroForm filling
191
+
192
+ Supports filling out AcroForm forms within PDF documents, allowing for dynamic content generation and user interaction.
193
+
194
+ [x] Text fields
195
+ [x] Checkboxes
196
+ [x] Radio buttons
197
+ [x] Dropdowns
198
+
199
+ ### XFA Forms
200
+
201
+ You can read/write XFA XML data from PDFs, but rendering and filling XFA forms is not supported.
202
+
203
+ [x] Read XFA XML
204
+ [x] Write XFA XML
205
+
103
206
  ## Future Plans
104
207
 
105
208
  - **Writing Linearized PDF**: Writing linearized PDFs for faster web viewing, improving user experience when accessing documents online.
@@ -133,156 +236,13 @@ pnpm test:unit
133
236
  pnpm test:acceptance
134
237
  ```
135
238
 
136
- ### Package Structure
137
-
138
- The main package (`packages/pdf-lite`) contains:
139
-
140
- - **src/core/** - Low-level PDF constructs (objects, parser, tokenizer)
141
- - **src/pdf/** - High-level PDF document handling
142
- - **src/signing/** - Digital signature support
143
- - **src/security/** - Encryption and security handlers
144
- - **src/filters/** - Compression/decompression filters
145
-
146
239
  ## Contributing
147
240
 
148
241
  Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
149
242
 
150
- ## Installation
151
-
152
- ```bash
153
- npm install pdf-lite
154
- ```
155
-
156
- or
157
-
158
- ```bash
159
- yarn add pdf-lite
160
- ```
161
-
162
- or
163
-
164
- ```bash
165
- pnpm add pdf-lite
166
- ```
167
-
168
- ## Usage
169
-
170
- The library provides both low-level and high-level APIs for working with PDF documents.
171
-
172
- ### Reading a PDF
173
-
174
- ```typescript
175
- import { PdfReader } from 'pdf-lite/pdf/pdf-reader'
176
- import { readFile } from 'fs/promises'
177
-
178
- const pdfBytes = await readFile('document.pdf')
179
- const doc = await PdfReader.fromBytes([pdfBytes])
180
- ```
181
-
182
- ### Creating a PDF from Scratch
183
-
184
- ```typescript
185
- import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
186
- import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
187
- import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
188
- import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
189
- import { PdfName } from 'pdf-lite/core/objects/pdf-name'
190
- import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
191
- import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
192
-
193
- // Create the document
194
- const document = new PdfDocument()
195
-
196
- // Create content stream
197
- const contentStream = new PdfIndirectObject({
198
- content: new PdfStream({
199
- header: new PdfDictionary(),
200
- original: 'BT /F1 24 Tf 100 700 Td (Hello, PDF-Lite!) Tj ET',
201
- }),
202
- })
203
-
204
- // Create and commit objects
205
- document.commit(contentStream)
206
- // ... create pages, catalog, etc.
207
-
208
- // Output the PDF
209
- console.log(document.toString())
210
- ```
211
-
212
- ### Working with Encryption
213
-
214
- ```typescript
215
- import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
216
- import { PdfV2SecurityHandler } from 'pdf-lite/security/handlers/v2'
217
-
218
- const document = new PdfDocument()
219
- // ... build your PDF structure
220
-
221
- // Set up encryption
222
- document.securityHandler = new PdfV2SecurityHandler({
223
- password: 'user-password',
224
- documentId: 'unique-doc-id',
225
- encryptMetadata: true,
226
- })
227
-
228
- // Encrypt the document
229
- await document.encrypt()
230
-
231
- console.log(document.toString())
232
- ```
233
-
234
- ### Signing PDFs
235
-
236
- ```typescript
237
- import {
238
- PdfAdbePkcs7DetachedSignatureObject,
239
- PdfEtsiCadesDetachedSignatureObject,
240
- } from 'pdf-lite'
241
-
242
- // See examples directory for complete signing implementations
243
- ```
244
-
245
- For more detailed examples, see the [EXAMPLES.md](EXAMPLES.md) file and the [examples/](examples/) directory.
246
-
247
- ## Project Structure
248
-
249
- This project is organized as a monorepo:
250
-
251
- - **packages/pdf-lite** - Main library package
252
- - **examples/** - Example scripts demonstrating library usage
253
- - **scripts/** - Build and development scripts
254
-
255
243
  ## API Reference
256
244
 
257
- The library uses TypeScript subpath exports for modular imports:
258
-
259
- ```typescript
260
- // PDF Document
261
- import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
262
- import { PdfReader } from 'pdf-lite/pdf/pdf-reader'
263
-
264
- // Core PDF objects
265
- import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
266
- import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
267
- import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
268
- import { PdfString } from 'pdf-lite/core/objects/pdf-string'
269
- import { PdfName } from 'pdf-lite/core/objects/pdf-name'
270
- import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
271
- import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
272
- import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
273
-
274
- // Security
275
- import { PdfV2SecurityHandler } from 'pdf-lite/security/handlers/v2'
276
-
277
- // Signing
278
- import {
279
- PdfAdbePkcs7DetachedSignatureObject,
280
- PdfAdbePkcs7Sha1SignatureObject,
281
- PdfAdbePkcsX509RsaSha1SignatureObject,
282
- PdfEtsiCadesDetachedSignatureObject,
283
- PdfEtsiRfc3161SignatureObject,
284
- } from 'pdf-lite'
285
- ```
245
+ See the [documentation folder](./docs/README.md) or the document site for a complete API reference.
286
246
 
287
247
  ## License
288
248
 
@@ -0,0 +1,49 @@
1
+ import { PdfDocument } from '../pdf/pdf-document.js';
2
+ /**
3
+ * Manages AcroForm fields in PDF documents.
4
+ * Provides methods to read and write form field values.
5
+ */
6
+ export declare class PdfAcroFormManager<T extends Record<string, string> = Record<string, string>> {
7
+ private document;
8
+ private _acroForm?;
9
+ constructor(document: PdfDocument);
10
+ /**
11
+ * Checks if the document contains AcroForm fields.
12
+ * @returns True if the document has AcroForm fields, false otherwise
13
+ */
14
+ hasAcroForm(): Promise<boolean>;
15
+ /**
16
+ * Gets all form field values as a key-value map.
17
+ * @returns Object with field names as keys and values as strings
18
+ */
19
+ getFieldValues(): Promise<T | null>;
20
+ /**
21
+ * Sets a form field value by field name.
22
+ * @param fieldName The name of the field to set
23
+ * @param value The value to set
24
+ * @throws Error if the field is not found
25
+ */
26
+ setFieldValues(newFields: Partial<T>): Promise<void>;
27
+ /**
28
+ * Gets the AcroForm indirect object from the document catalog.
29
+ * @returns The AcroForm indirect object or null if not found
30
+ */
31
+ private getAcroFormObject;
32
+ /**
33
+ * Gets the AcroForm dictionary from the document catalog.
34
+ * @returns The AcroForm dictionary or null if not found
35
+ */
36
+ private getAcroForm;
37
+ /**
38
+ * Recursively collects field values from the field tree.
39
+ */
40
+ private collectFieldValues;
41
+ /**
42
+ * Finds a field by its fully qualified name.
43
+ */
44
+ private findFieldByName;
45
+ /**
46
+ * Gets the fully qualified field name.
47
+ */
48
+ private getFieldName;
49
+ }
@@ -0,0 +1,215 @@
1
+ import { PdfDictionary } from '../core/objects/pdf-dictionary.js';
2
+ import { PdfArray } from '../core/objects/pdf-array.js';
3
+ import { PdfString } from '../core/objects/pdf-string.js';
4
+ import { PdfObjectReference } from '../core/objects/pdf-object-reference.js';
5
+ import { PdfIndirectObject } from '../core/objects/pdf-indirect-object.js';
6
+ import { PdfName } from '../core/objects/pdf-name.js';
7
+ import { PdfBoolean } from '../core/objects/pdf-boolean.js';
8
+ /**
9
+ * Manages AcroForm fields in PDF documents.
10
+ * Provides methods to read and write form field values.
11
+ */
12
+ export class PdfAcroFormManager {
13
+ document;
14
+ _acroForm;
15
+ constructor(document) {
16
+ this.document = document;
17
+ }
18
+ /**
19
+ * Checks if the document contains AcroForm fields.
20
+ * @returns True if the document has AcroForm fields, false otherwise
21
+ */
22
+ async hasAcroForm() {
23
+ try {
24
+ const acroForm = await this.getAcroForm();
25
+ return acroForm !== null;
26
+ }
27
+ catch {
28
+ return false;
29
+ }
30
+ }
31
+ /**
32
+ * Gets all form field values as a key-value map.
33
+ * @returns Object with field names as keys and values as strings
34
+ */
35
+ async getFieldValues() {
36
+ const acroForm = await this.getAcroForm();
37
+ if (!acroForm)
38
+ return null;
39
+ const fields = acroForm.get('Fields')?.as(PdfArray);
40
+ if (!fields)
41
+ return null;
42
+ const values = {};
43
+ await this.collectFieldValues(fields, values);
44
+ return values;
45
+ }
46
+ /**
47
+ * Sets a form field value by field name.
48
+ * @param fieldName The name of the field to set
49
+ * @param value The value to set
50
+ * @throws Error if the field is not found
51
+ */
52
+ async setFieldValues(newFields) {
53
+ const acroFormObject = await this.getAcroFormObject();
54
+ if (!acroFormObject) {
55
+ throw new Error('Document does not contain AcroForm');
56
+ }
57
+ const acroForm = acroFormObject.content.as(PdfDictionary);
58
+ const fields = acroForm.get('Fields')?.as(PdfArray);
59
+ if (!fields) {
60
+ throw new Error('AcroForm has no fields');
61
+ }
62
+ const isIncremental = this.document.isIncremental();
63
+ this.document.setIncremental(true);
64
+ // Update the AcroForm dictionary with NeedAppearances flag
65
+ const updatedAcroForm = new PdfIndirectObject({
66
+ ...acroFormObject,
67
+ content: acroFormObject.content.clone(),
68
+ });
69
+ const updatedAcroFormDict = updatedAcroForm.content.as(PdfDictionary);
70
+ // Let the PDF viewer know that appearances need to be regenerated
71
+ updatedAcroFormDict.set('NeedAppearances', new PdfBoolean(true));
72
+ await this.document.commit(updatedAcroForm);
73
+ for (const [fieldName, value] of Object.entries(newFields)) {
74
+ const fieldObject = await this.findFieldByName(fields, fieldName);
75
+ if (!fieldObject) {
76
+ throw new Error(`Field '${fieldName}' not found`);
77
+ }
78
+ const updatedField = new PdfIndirectObject({
79
+ ...fieldObject,
80
+ content: fieldObject.content.clone(),
81
+ });
82
+ const fieldDict = updatedField.content.as(PdfDictionary);
83
+ // Set appearance state (AS) for button fields (checkboxes, radio buttons)
84
+ // Button fields use PdfName for both V and AS, text fields use PdfString
85
+ const fieldType = fieldDict.get('FT')?.as(PdfName)?.value;
86
+ if (fieldType === 'Btn') {
87
+ fieldDict.set('V', new PdfName(value));
88
+ fieldDict.set('AS', new PdfName(value));
89
+ }
90
+ else {
91
+ fieldDict.set('V', new PdfString(value));
92
+ }
93
+ await this.document.commit(updatedField);
94
+ }
95
+ this.document.setIncremental(isIncremental);
96
+ }
97
+ /**
98
+ * Gets the AcroForm indirect object from the document catalog.
99
+ * @returns The AcroForm indirect object or null if not found
100
+ */
101
+ async getAcroFormObject() {
102
+ const catalog = this.document.rootDictionary;
103
+ if (!catalog)
104
+ return null;
105
+ const acroFormRef = catalog.get('AcroForm');
106
+ if (!acroFormRef)
107
+ return null;
108
+ if (acroFormRef instanceof PdfObjectReference) {
109
+ const acroFormObject = await this.document.readObject({
110
+ objectNumber: acroFormRef.objectNumber,
111
+ generationNumber: acroFormRef.generationNumber,
112
+ });
113
+ if (!acroFormObject)
114
+ return null;
115
+ return acroFormObject;
116
+ }
117
+ return null;
118
+ }
119
+ /**
120
+ * Gets the AcroForm dictionary from the document catalog.
121
+ * @returns The AcroForm dictionary or null if not found
122
+ */
123
+ async getAcroForm() {
124
+ if (this._acroForm) {
125
+ return this._acroForm;
126
+ }
127
+ const acroFormObject = await this.getAcroFormObject();
128
+ if (acroFormObject) {
129
+ this._acroForm = acroFormObject.content.as(PdfDictionary);
130
+ return this._acroForm;
131
+ }
132
+ const catalog = this.document.rootDictionary;
133
+ if (!catalog)
134
+ return null;
135
+ const acroFormRef = catalog.get('AcroForm');
136
+ if (acroFormRef instanceof PdfDictionary) {
137
+ this._acroForm = acroFormRef;
138
+ return this._acroForm;
139
+ }
140
+ return null;
141
+ }
142
+ /**
143
+ * Recursively collects field values from the field tree.
144
+ */
145
+ async collectFieldValues(fields, values, parentName = '') {
146
+ for (const fieldRef of fields.items) {
147
+ if (!(fieldRef instanceof PdfObjectReference))
148
+ continue;
149
+ // Check if we have a modified version cached
150
+ const fieldObject = await this.document.readObject({
151
+ objectNumber: fieldRef.objectNumber,
152
+ generationNumber: fieldRef.generationNumber,
153
+ });
154
+ if (!fieldObject)
155
+ continue;
156
+ const fieldDict = fieldObject.content.as(PdfDictionary);
157
+ const fieldName = this.getFieldName(fieldDict, parentName);
158
+ // Get field value (return empty string if no value set)
159
+ const value = fieldDict.get('V');
160
+ if (value instanceof PdfString) {
161
+ values[fieldName] = value.value;
162
+ }
163
+ else if (value instanceof PdfName) {
164
+ values[fieldName] = value.value;
165
+ }
166
+ else if (fieldName) {
167
+ // Include empty fields
168
+ values[fieldName] = '';
169
+ }
170
+ // Process child fields (Kids)
171
+ const kids = fieldDict.get('Kids')?.as(PdfArray);
172
+ if (kids) {
173
+ await this.collectFieldValues(kids, values, fieldName);
174
+ }
175
+ }
176
+ }
177
+ /**
178
+ * Finds a field by its fully qualified name.
179
+ */
180
+ async findFieldByName(fields, targetName, parentName = '') {
181
+ for (const fieldRef of fields.items) {
182
+ if (!(fieldRef instanceof PdfObjectReference))
183
+ continue;
184
+ // Check if we have a modified version cached
185
+ const fieldObject = await this.document.readObject({
186
+ objectNumber: fieldRef.objectNumber,
187
+ generationNumber: fieldRef.generationNumber,
188
+ });
189
+ if (!fieldObject)
190
+ continue;
191
+ const fieldDict = fieldObject.content.as(PdfDictionary);
192
+ const fieldName = this.getFieldName(fieldDict, parentName);
193
+ if (fieldName === targetName) {
194
+ return fieldObject;
195
+ }
196
+ // Search in child fields (Kids)
197
+ const kids = fieldDict.get('Kids')?.as(PdfArray);
198
+ if (kids) {
199
+ const found = await this.findFieldByName(kids, targetName, fieldName);
200
+ if (found)
201
+ return found;
202
+ }
203
+ }
204
+ return null;
205
+ }
206
+ /**
207
+ * Gets the fully qualified field name.
208
+ */
209
+ getFieldName(fieldDict, parentName) {
210
+ const partialName = fieldDict.get('T')?.as(PdfString)?.value ?? '';
211
+ if (!parentName)
212
+ return partialName;
213
+ return `${parentName}.${partialName}`;
214
+ }
215
+ }
@@ -0,0 +1 @@
1
+ export { PdfAcroFormManager } from './acroform-manager.js';
@@ -0,0 +1 @@
1
+ export { PdfAcroFormManager } from './acroform-manager.js';
@@ -54,6 +54,7 @@ export declare class PdfStream<T extends PdfDictionary = PdfDictionary> extends
54
54
  };
55
55
  static applyFilters(data: ByteArray, filters: PdfStreamFilterType[]): ByteArray;
56
56
  clone(): this;
57
+ static fromString(data: string): PdfStream;
57
58
  }
58
59
  export declare class PdfObjStream extends PdfStream {
59
60
  constructor(options: {
@@ -220,6 +220,12 @@ export class PdfStream extends PdfObject {
220
220
  original: new Uint8Array(this.original),
221
221
  });
222
222
  }
223
+ static fromString(data) {
224
+ return new PdfStream({
225
+ original: stringToBytes(data),
226
+ header: new PdfDictionary(),
227
+ });
228
+ }
223
229
  }
224
230
  export class PdfObjStream extends PdfStream {
225
231
  constructor(options) {
@@ -253,11 +259,20 @@ export class PdfObjStream extends PdfStream {
253
259
  const reader = bytesToPdfObjects([decodedData]);
254
260
  const numbers = [];
255
261
  let i = 0;
262
+ const n = this.header.get('N');
263
+ const totalObjects = n ? n.value : 0;
256
264
  while (true) {
257
265
  const { value: obj, done } = reader.next();
258
266
  if (done)
259
267
  break;
260
- if (obj instanceof PdfDictionary) {
268
+ if (obj instanceof PdfNumber) {
269
+ // Collect object numbers and byte offsets
270
+ numbers.push(obj);
271
+ }
272
+ else {
273
+ // This is an actual PDF object (can be Dictionary, Array, String, Name, etc.)
274
+ // The first N*2 numbers are: obj_num1 offset1 obj_num2 offset2 ...
275
+ // After that come the actual objects
261
276
  const objectNumber = numbers[i * 2].value;
262
277
  const generationNumber = 0;
263
278
  yield new PdfIndirectObject({
@@ -266,12 +281,10 @@ export class PdfObjStream extends PdfStream {
266
281
  content: obj,
267
282
  });
268
283
  i++;
269
- }
270
- else if (obj instanceof PdfNumber) {
271
- numbers.push(obj);
272
- }
273
- else {
274
- throw new Error('Invalid object in PDF Object Stream');
284
+ // Stop after we've read N objects
285
+ if (totalObjects > 0 && i >= totalObjects) {
286
+ break;
287
+ }
275
288
  }
276
289
  }
277
290
  }
@@ -12,6 +12,8 @@ import { PdfTrailerEntries } from '../core/objects/pdf-trailer.js';
12
12
  import { PdfDocumentSecurityStoreObject } from '../signing/document-security-store.js';
13
13
  import { ByteArray } from '../types.js';
14
14
  import { PdfDocumentVerificationResult, PdfSigner } from '../signing/signer.js';
15
+ import { PdfXfaManager } from '../xfa/xfa-manager.js';
16
+ import { PdfAcroFormManager } from '../acroform/acroform-manager.js';
15
17
  /**
16
18
  * Represents a PDF document with support for reading, writing, and modifying PDF files.
17
19
  * Handles document structure, revisions, encryption, and digital signatures.
@@ -38,8 +40,14 @@ export declare class PdfDocument extends PdfObject {
38
40
  signer: PdfSigner;
39
41
  /** Security handler for encryption/decryption operations */
40
42
  securityHandler?: PdfSecurityHandler;
43
+ private _xfa?;
44
+ private _acroForm?;
41
45
  private hasEncryptionDictionary?;
42
46
  private toBeCommitted;
47
+ /** XFA manager for handling XFA forms */
48
+ get xfa(): PdfXfaManager;
49
+ /** AcroForm manager for handling form fields */
50
+ get acroForm(): PdfAcroFormManager;
43
51
  /**
44
52
  * Creates a new PDF document instance.
45
53
  *
@@ -260,6 +268,12 @@ export declare class PdfDocument extends PdfObject {
260
268
  * @returns The PDF document as a Uint8Array
261
269
  */
262
270
  toBytes(): ByteArray;
271
+ /**
272
+ * Serializes the document to a Base64-encoded string.
273
+ *
274
+ * @returns A promise that resolves to the PDF document as a Base64 string
275
+ */
276
+ toBase64(): string;
263
277
  /**
264
278
  * Creates a deep copy of the document.
265
279
  *
@@ -17,6 +17,8 @@ import { PdfStartXRef } from '../core/objects/pdf-start-xref.js';
17
17
  import { FoundCompressedObjectError } from './errors.js';
18
18
  import { PdfReader } from './pdf-reader.js';
19
19
  import { PdfSigner } from '../signing/signer.js';
20
+ import { PdfXfaManager } from '../xfa/xfa-manager.js';
21
+ import { PdfAcroFormManager } from '../acroform/acroform-manager.js';
20
22
  /**
21
23
  * Represents a PDF document with support for reading, writing, and modifying PDF files.
22
24
  * Handles document structure, revisions, encryption, and digital signatures.
@@ -43,8 +45,24 @@ export class PdfDocument extends PdfObject {
43
45
  signer;
44
46
  /** Security handler for encryption/decryption operations */
45
47
  securityHandler;
48
+ _xfa;
49
+ _acroForm;
46
50
  hasEncryptionDictionary = false;
47
51
  toBeCommitted = [];
52
+ /** XFA manager for handling XFA forms */
53
+ get xfa() {
54
+ if (!this._xfa) {
55
+ this._xfa = new PdfXfaManager(this);
56
+ }
57
+ return this._xfa;
58
+ }
59
+ /** AcroForm manager for handling form fields */
60
+ get acroForm() {
61
+ if (!this._acroForm) {
62
+ this._acroForm = new PdfAcroFormManager(this);
63
+ }
64
+ return this._acroForm;
65
+ }
48
66
  /**
49
67
  * Creates a new PDF document instance.
50
68
  *
@@ -212,6 +230,9 @@ export class PdfDocument extends PdfObject {
212
230
  return undefined;
213
231
  }
214
232
  const rootObject = this.findUncompressedObject(rootRef);
233
+ if (!rootObject) {
234
+ throw new Error('Root object not found');
235
+ }
215
236
  if (!(rootObject?.content instanceof PdfDictionary)) {
216
237
  throw new Error(`Root object ${rootRef.objectNumber} ${rootRef.generationNumber} is not a dictionary, it is a ${rootObject?.content.objectType}`);
217
238
  }
@@ -392,7 +413,7 @@ export class PdfDocument extends PdfObject {
392
413
  }
393
414
  const objectStreamIndirect = this.findUncompressedObject({
394
415
  objectNumber: xrefEntry.objectStreamNumber.value,
395
- });
416
+ })?.clone();
396
417
  if (!objectStreamIndirect) {
397
418
  throw new Error(`Cannot find object stream ${xrefEntry.objectStreamNumber.value} for object ${options.objectNumber}`);
398
419
  }
@@ -418,7 +439,7 @@ export class PdfDocument extends PdfObject {
418
439
  findUncompressedObject(options) {
419
440
  const xrefEntry = this.xrefLookup.getObject(options.objectNumber);
420
441
  if (xrefEntry instanceof PdfXRefStreamCompressedEntry) {
421
- throw new FoundCompressedObjectError(`TODO: Cannot find object ${options.objectNumber} inside object stream via PdfDocument.findObject`);
442
+ throw new FoundCompressedObjectError(`Cannot find object ${options.objectNumber} inside object stream via PdfDocument.findObject`);
422
443
  }
423
444
  if (!xrefEntry ||
424
445
  (options.generationNumber !== undefined &&
@@ -504,9 +525,13 @@ export class PdfDocument extends PdfObject {
504
525
  * @param value - True to enable incremental mode, false to disable. Defaults to true.
505
526
  */
506
527
  setIncremental(value = true) {
528
+ if (value === this.isIncremental()) {
529
+ return;
530
+ }
507
531
  for (const revision of this.revisions) {
508
532
  revision.locked = value;
509
533
  }
534
+ this.startNewRevision();
510
535
  }
511
536
  /**
512
537
  * Checks if the document is in incremental mode.
@@ -664,6 +689,20 @@ export class PdfDocument extends PdfObject {
664
689
  serializer.feed(...this.toTokens());
665
690
  return serializer.toBytes();
666
691
  }
692
+ /**
693
+ * Serializes the document to a Base64-encoded string.
694
+ *
695
+ * @returns A promise that resolves to the PDF document as a Base64 string
696
+ */
697
+ toBase64() {
698
+ const bytes = this.toBytes();
699
+ let binary = '';
700
+ const len = bytes.byteLength;
701
+ for (let i = 0; i < len; i++) {
702
+ binary += String.fromCharCode(bytes[i]);
703
+ }
704
+ return btoa(binary);
705
+ }
667
706
  /**
668
707
  * Creates a deep copy of the document.
669
708
  *
@@ -204,7 +204,8 @@ export class PdfXrefLookup {
204
204
  this.trailerDict.set('Size', new PdfNumber(prevSize));
205
205
  }
206
206
  const size = Math.max(trailerSize, prevSize);
207
- this.trailerDict.set('Size', new PdfNumber(size));
207
+ if (size !== trailerSize)
208
+ this.trailerDict.set('Size', new PdfNumber(size));
208
209
  return size;
209
210
  }
210
211
  /**
@@ -0,0 +1 @@
1
+ export { PdfXfaManager } from './xfa-manager.js';
@@ -0,0 +1 @@
1
+ export { PdfXfaManager } from './xfa-manager.js';
@@ -0,0 +1,44 @@
1
+ import { PdfDocument } from '../pdf/pdf-document.js';
2
+ /**
3
+ * Manages XFA (XML Forms Architecture) forms in PDF documents.
4
+ * Provides methods to read and write XFA form data as XML.
5
+ */
6
+ export declare class PdfXfaManager {
7
+ private document;
8
+ private _datasetsStream?;
9
+ constructor(document: PdfDocument);
10
+ /**
11
+ * Checks if the document contains XFA forms.
12
+ * @returns True if the document has XFA forms, false otherwise
13
+ */
14
+ hasXfaForms(): Promise<boolean>;
15
+ /**
16
+ * Reads the XFA form data as XML string.
17
+ * @returns The XFA datasets XML content, or null if no XFA form exists
18
+ */
19
+ readXml(): Promise<string | null>;
20
+ /**
21
+ * Writes XML content to the XFA datasets stream.
22
+ * @param xml The XML content to write
23
+ * @throws Error if the document doesn't have XFA forms
24
+ */
25
+ writeXml(xml: string): Promise<void>;
26
+ /**
27
+ * Gets the AcroForm dictionary from the document catalog.
28
+ * @returns The AcroForm dictionary or null if not found
29
+ */
30
+ private getAcroForm;
31
+ /**
32
+ * Gets the XFA datasets stream.
33
+ * Caches the stream for subsequent calls to preserve modifications.
34
+ * @returns The datasets stream or null if not found
35
+ */
36
+ private getDatasetsStream;
37
+ /**
38
+ * Finds the XFA dataset object reference from the XFA array.
39
+ * XFA array structure: [(name1) ref1 (name2) ref2 ...]
40
+ * @param xfaArray The XFA array from the AcroForm
41
+ * @returns The object reference to the datasets stream, or null if not found
42
+ */
43
+ private findXfaDatasetReference;
44
+ }
@@ -0,0 +1,138 @@
1
+ import { PdfStream } from '../core/objects/pdf-stream.js';
2
+ import { PdfDictionary } from '../core/objects/pdf-dictionary.js';
3
+ import { PdfArray } from '../core/objects/pdf-array.js';
4
+ import { PdfString } from '../core/objects/pdf-string.js';
5
+ import { PdfObjectReference } from '../core/objects/pdf-object-reference.js';
6
+ import { PdfIndirectObject } from '../core/index.js';
7
+ /**
8
+ * Manages XFA (XML Forms Architecture) forms in PDF documents.
9
+ * Provides methods to read and write XFA form data as XML.
10
+ */
11
+ export class PdfXfaManager {
12
+ document;
13
+ _datasetsStream;
14
+ constructor(document) {
15
+ this.document = document;
16
+ }
17
+ /**
18
+ * Checks if the document contains XFA forms.
19
+ * @returns True if the document has XFA forms, false otherwise
20
+ */
21
+ async hasXfaForms() {
22
+ try {
23
+ const stream = await this.getDatasetsStream();
24
+ return stream !== null;
25
+ }
26
+ catch {
27
+ return false;
28
+ }
29
+ }
30
+ /**
31
+ * Reads the XFA form data as XML string.
32
+ * @returns The XFA datasets XML content, or null if no XFA form exists
33
+ */
34
+ async readXml() {
35
+ const stream = await this.getDatasetsStream();
36
+ if (!stream)
37
+ return null;
38
+ // Get decompressed data without modifying the original stream
39
+ const decompressed = stream.content.decode();
40
+ // Return the raw XML content as string
41
+ const decoder = new TextDecoder();
42
+ return decoder.decode(decompressed);
43
+ }
44
+ /**
45
+ * Writes XML content to the XFA datasets stream.
46
+ * @param xml The XML content to write
47
+ * @throws Error if the document doesn't have XFA forms
48
+ */
49
+ async writeXml(xml) {
50
+ const stream = await this.getDatasetsStream();
51
+ if (!stream) {
52
+ throw new Error('Document does not contain XFA forms');
53
+ }
54
+ // Update the cached stream directly with the new XML content
55
+ const datasetsIndirect = new PdfIndirectObject({
56
+ ...stream,
57
+ content: PdfStream.fromString(xml),
58
+ });
59
+ const isIncremental = this.document.isIncremental();
60
+ this.document.setIncremental(true);
61
+ await this.document.commit(datasetsIndirect);
62
+ this.document.setIncremental(isIncremental);
63
+ // Reset cached stream to force re-fetch if needed
64
+ this._datasetsStream = undefined;
65
+ }
66
+ /**
67
+ * Gets the AcroForm dictionary from the document catalog.
68
+ * @returns The AcroForm dictionary or null if not found
69
+ */
70
+ async getAcroForm() {
71
+ const catalog = this.document.rootDictionary;
72
+ if (!catalog)
73
+ return null;
74
+ const acroFormRef = catalog.get('AcroForm');
75
+ if (!acroFormRef)
76
+ return null;
77
+ if (acroFormRef instanceof PdfObjectReference) {
78
+ const acroFormObject = await this.document.readObject({
79
+ objectNumber: acroFormRef.objectNumber,
80
+ generationNumber: acroFormRef.generationNumber,
81
+ });
82
+ if (!acroFormObject)
83
+ return null;
84
+ return acroFormObject.content.as(PdfDictionary);
85
+ }
86
+ else if (acroFormRef instanceof PdfDictionary) {
87
+ return acroFormRef;
88
+ }
89
+ return null;
90
+ }
91
+ /**
92
+ * Gets the XFA datasets stream.
93
+ * Caches the stream for subsequent calls to preserve modifications.
94
+ * @returns The datasets stream or null if not found
95
+ */
96
+ async getDatasetsStream() {
97
+ if (this._datasetsStream) {
98
+ return this._datasetsStream;
99
+ }
100
+ const acroForm = await this.getAcroForm();
101
+ if (!acroForm)
102
+ return null;
103
+ const xfaArray = acroForm.get('XFA');
104
+ if (!(xfaArray instanceof PdfArray))
105
+ return null;
106
+ const datasetsRef = this.findXfaDatasetReference(xfaArray);
107
+ if (!datasetsRef)
108
+ return null;
109
+ const datasetObject = await this.document.readObject({
110
+ objectNumber: datasetsRef.objectNumber,
111
+ generationNumber: datasetsRef.generationNumber,
112
+ allowUnindexed: true,
113
+ });
114
+ if (!datasetObject)
115
+ return null;
116
+ return datasetObject;
117
+ }
118
+ /**
119
+ * Finds the XFA dataset object reference from the XFA array.
120
+ * XFA array structure: [(name1) ref1 (name2) ref2 ...]
121
+ * @param xfaArray The XFA array from the AcroForm
122
+ * @returns The object reference to the datasets stream, or null if not found
123
+ */
124
+ findXfaDatasetReference(xfaArray) {
125
+ const items = xfaArray.items;
126
+ // XFA array alternates between name strings and object references
127
+ for (let i = 0; i < items.length - 1; i += 2) {
128
+ const name = items[i];
129
+ const ref = items[i + 1];
130
+ if (name instanceof PdfString && name.value === 'datasets') {
131
+ if (ref instanceof PdfObjectReference) {
132
+ return ref;
133
+ }
134
+ }
135
+ }
136
+ return null;
137
+ }
138
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-lite",
3
- "version": "1.0.5",
3
+ "version": "1.0.6",
4
4
  "main": "dist/index.js",
5
5
  "type": "module",
6
6
  "exports": {