pdf-lite 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/EXAMPLES.md +1512 -0
- package/package.json +6 -4
package/EXAMPLES.md
ADDED
|
@@ -0,0 +1,1512 @@
|
|
|
1
|
+
# PDF-Lite Examples
|
|
2
|
+
|
|
3
|
+
This directory contains example scripts demonstrating how to use the PDF-Lite library.
|
|
4
|
+
|
|
5
|
+
## PDF creation from scratch example
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { writeFileSync } from 'fs'
|
|
9
|
+
import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
|
|
10
|
+
import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
|
|
11
|
+
import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
|
|
12
|
+
import { PdfName } from 'pdf-lite/core/objects/pdf-name'
|
|
13
|
+
import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
|
|
14
|
+
import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
|
|
15
|
+
import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
|
|
16
|
+
import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
|
|
17
|
+
|
|
18
|
+
function createPage(
|
|
19
|
+
contentStreamRef: PdfObjectReference,
|
|
20
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
21
|
+
const pageDict = new PdfDictionary()
|
|
22
|
+
pageDict.set('Type', new PdfName('Page'))
|
|
23
|
+
pageDict.set(
|
|
24
|
+
'MediaBox',
|
|
25
|
+
new PdfArray([
|
|
26
|
+
new PdfNumber(0),
|
|
27
|
+
new PdfNumber(0),
|
|
28
|
+
new PdfNumber(612),
|
|
29
|
+
new PdfNumber(792),
|
|
30
|
+
]),
|
|
31
|
+
)
|
|
32
|
+
pageDict.set('Contents', contentStreamRef)
|
|
33
|
+
return new PdfIndirectObject({ content: pageDict })
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function createPages(
|
|
37
|
+
pages: PdfIndirectObject<PdfDictionary>[],
|
|
38
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
39
|
+
const pagesDict = new PdfDictionary()
|
|
40
|
+
pagesDict.set('Type', new PdfName('Pages'))
|
|
41
|
+
pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
|
|
42
|
+
pagesDict.set('Count', new PdfNumber(pages.length))
|
|
43
|
+
return new PdfIndirectObject({ content: pagesDict })
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function createCatalog(
|
|
47
|
+
pagesRef: PdfObjectReference,
|
|
48
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
49
|
+
const catalogDict = new PdfDictionary()
|
|
50
|
+
catalogDict.set('Type', new PdfName('Catalog'))
|
|
51
|
+
catalogDict.set('Pages', pagesRef)
|
|
52
|
+
return new PdfIndirectObject({ content: catalogDict })
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function createFont(): PdfIndirectObject<PdfDictionary> {
|
|
56
|
+
const fontDict = new PdfDictionary()
|
|
57
|
+
fontDict.set('Type', new PdfName('Font'))
|
|
58
|
+
fontDict.set('Subtype', new PdfName('Type1'))
|
|
59
|
+
fontDict.set('BaseFont', new PdfName('Helvetica'))
|
|
60
|
+
return new PdfIndirectObject({ content: fontDict })
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function createResources(
|
|
64
|
+
fontRef: PdfObjectReference,
|
|
65
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
66
|
+
const resourcesDict = new PdfDictionary()
|
|
67
|
+
const fontDict = new PdfDictionary()
|
|
68
|
+
fontDict.set('F1', fontRef)
|
|
69
|
+
resourcesDict.set('Font', fontDict)
|
|
70
|
+
return new PdfIndirectObject({ content: resourcesDict })
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Create the document
|
|
74
|
+
const document = new PdfDocument()
|
|
75
|
+
|
|
76
|
+
// Create font
|
|
77
|
+
const font = createFont()
|
|
78
|
+
document.add(font)
|
|
79
|
+
|
|
80
|
+
// Create resources with the font
|
|
81
|
+
const resources = createResources(font.reference)
|
|
82
|
+
document.add(resources)
|
|
83
|
+
|
|
84
|
+
// Create content stream
|
|
85
|
+
const contentStream = new PdfIndirectObject({
|
|
86
|
+
content: new PdfStream({
|
|
87
|
+
header: new PdfDictionary(),
|
|
88
|
+
original: 'BT /F1 24 Tf 100 700 Td (Hello, PDF-Lite!) Tj ET',
|
|
89
|
+
}),
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
// Create a page
|
|
93
|
+
const page = createPage(contentStream.reference)
|
|
94
|
+
// Add resources to the page
|
|
95
|
+
page.content.set('Resources', resources.reference)
|
|
96
|
+
document.add(page)
|
|
97
|
+
|
|
98
|
+
// Create pages collection
|
|
99
|
+
const pages = createPages([page])
|
|
100
|
+
// Set parent reference for the page
|
|
101
|
+
page.content.set('Parent', pages.reference)
|
|
102
|
+
document.add(pages)
|
|
103
|
+
|
|
104
|
+
// Create catalog
|
|
105
|
+
const catalog = createCatalog(pages.reference)
|
|
106
|
+
document.add(catalog)
|
|
107
|
+
|
|
108
|
+
// Set the catalog as the root
|
|
109
|
+
document.trailerDict.set('Root', catalog.reference)
|
|
110
|
+
|
|
111
|
+
document.add(contentStream)
|
|
112
|
+
await document.commit()
|
|
113
|
+
|
|
114
|
+
const file = `${import.meta.dirname}/tmp/created.pdf`
|
|
115
|
+
console.log(`Writing PDF to: ${file}`)
|
|
116
|
+
|
|
117
|
+
await writeFileSync(`${file}`, document.toBytes())
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## PDF creation with encryption example
|
|
121
|
+
|
|
122
|
+
```typescript
|
|
123
|
+
import { writeFileSync } from 'fs'
|
|
124
|
+
import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
|
|
125
|
+
import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
|
|
126
|
+
import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
|
|
127
|
+
import { PdfName } from 'pdf-lite/core/objects/pdf-name'
|
|
128
|
+
import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
|
|
129
|
+
import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
|
|
130
|
+
import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
|
|
131
|
+
import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
|
|
132
|
+
import { PdfV2SecurityHandler } from 'pdf-lite/security/handlers/v2'
|
|
133
|
+
|
|
134
|
+
function createPage(
|
|
135
|
+
contentStreamRef: PdfObjectReference,
|
|
136
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
137
|
+
const pageDict = new PdfDictionary()
|
|
138
|
+
pageDict.set('Type', new PdfName('Page'))
|
|
139
|
+
pageDict.set(
|
|
140
|
+
'MediaBox',
|
|
141
|
+
new PdfArray([
|
|
142
|
+
new PdfNumber(0),
|
|
143
|
+
new PdfNumber(0),
|
|
144
|
+
new PdfNumber(612),
|
|
145
|
+
new PdfNumber(792),
|
|
146
|
+
]),
|
|
147
|
+
)
|
|
148
|
+
pageDict.set('Contents', contentStreamRef)
|
|
149
|
+
return new PdfIndirectObject({ content: pageDict })
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function createPages(
|
|
153
|
+
pages: PdfIndirectObject<PdfDictionary>[],
|
|
154
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
155
|
+
const pagesDict = new PdfDictionary()
|
|
156
|
+
pagesDict.set('Type', new PdfName('Pages'))
|
|
157
|
+
pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
|
|
158
|
+
pagesDict.set('Count', new PdfNumber(pages.length))
|
|
159
|
+
return new PdfIndirectObject({ content: pagesDict })
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function createCatalog(
|
|
163
|
+
pagesRef: PdfObjectReference,
|
|
164
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
165
|
+
const catalogDict = new PdfDictionary()
|
|
166
|
+
catalogDict.set('Type', new PdfName('Catalog'))
|
|
167
|
+
catalogDict.set('Pages', pagesRef)
|
|
168
|
+
return new PdfIndirectObject({ content: catalogDict })
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function createFont(): PdfIndirectObject<PdfDictionary> {
|
|
172
|
+
const fontDict = new PdfDictionary()
|
|
173
|
+
fontDict.set('Type', new PdfName('Font'))
|
|
174
|
+
fontDict.set('Subtype', new PdfName('Type1'))
|
|
175
|
+
fontDict.set('BaseFont', new PdfName('Helvetica'))
|
|
176
|
+
return new PdfIndirectObject({ content: fontDict })
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function createResources(
|
|
180
|
+
fontRef: PdfObjectReference,
|
|
181
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
182
|
+
const resourcesDict = new PdfDictionary()
|
|
183
|
+
const fontDict = new PdfDictionary()
|
|
184
|
+
fontDict.set('F1', fontRef)
|
|
185
|
+
resourcesDict.set('Font', fontDict)
|
|
186
|
+
return new PdfIndirectObject({ content: resourcesDict })
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Create the document
|
|
190
|
+
const document = new PdfDocument()
|
|
191
|
+
|
|
192
|
+
// Create font
|
|
193
|
+
const font = createFont()
|
|
194
|
+
document.add(font)
|
|
195
|
+
|
|
196
|
+
// Create resources with the font
|
|
197
|
+
const resources = createResources(font.reference)
|
|
198
|
+
document.add(resources)
|
|
199
|
+
|
|
200
|
+
// Create content stream
|
|
201
|
+
const contentStream = new PdfIndirectObject({
|
|
202
|
+
content: new PdfStream({
|
|
203
|
+
header: new PdfDictionary(),
|
|
204
|
+
original: 'BT /F1 24 Tf 100 700 Td (Hello, PDF-Lite!) Tj ET',
|
|
205
|
+
}),
|
|
206
|
+
})
|
|
207
|
+
|
|
208
|
+
// Create a page
|
|
209
|
+
const page = createPage(contentStream.reference)
|
|
210
|
+
// Add resources to the page
|
|
211
|
+
page.content.set('Resources', resources.reference)
|
|
212
|
+
document.add(page)
|
|
213
|
+
|
|
214
|
+
// Create pages collection
|
|
215
|
+
const pages = createPages([page])
|
|
216
|
+
// Set parent reference for the page
|
|
217
|
+
page.content.set('Parent', pages.reference)
|
|
218
|
+
document.add(pages)
|
|
219
|
+
|
|
220
|
+
// Create catalog
|
|
221
|
+
const catalog = createCatalog(pages.reference)
|
|
222
|
+
document.add(catalog)
|
|
223
|
+
|
|
224
|
+
// Set the catalog as the root
|
|
225
|
+
document.trailerDict.set('Root', catalog.reference)
|
|
226
|
+
|
|
227
|
+
document.add(contentStream)
|
|
228
|
+
await document.commit()
|
|
229
|
+
|
|
230
|
+
document.securityHandler = new PdfV2SecurityHandler({
|
|
231
|
+
password: 'up',
|
|
232
|
+
documentId: 'cafebabe',
|
|
233
|
+
encryptMetadata: true,
|
|
234
|
+
})
|
|
235
|
+
|
|
236
|
+
await document.encrypt()
|
|
237
|
+
|
|
238
|
+
const file = `${import.meta.dirname}/tmp/encrypted.pdf`
|
|
239
|
+
console.log(`Writing encrypted PDF to: ${file}. Password: "up"`)
|
|
240
|
+
|
|
241
|
+
await writeFileSync(`${file}`, document.toBytes())
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## PDF signature example
|
|
245
|
+
|
|
246
|
+
```typescript
|
|
247
|
+
import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
|
|
248
|
+
import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
|
|
249
|
+
import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
|
|
250
|
+
import { PdfName } from 'pdf-lite/core/objects/pdf-name'
|
|
251
|
+
import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
|
|
252
|
+
import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
|
|
253
|
+
import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
|
|
254
|
+
import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
|
|
255
|
+
import { PdfString } from 'pdf-lite/core/objects/pdf-string'
|
|
256
|
+
import {
|
|
257
|
+
PdfAdbePkcsX509RsaSha1SignatureObject,
|
|
258
|
+
PdfAdbePkcs7DetachedSignatureObject,
|
|
259
|
+
PdfAdbePkcs7Sha1SignatureObject,
|
|
260
|
+
PdfEtsiCadesDetachedSignatureObject,
|
|
261
|
+
PdfEtsiRfc3161SignatureObject,
|
|
262
|
+
} from 'pdf-lite'
|
|
263
|
+
import { rsaSigningKeys } from '../packages/pdf-lite/test/unit/fixtures/rsa-2048/index'
|
|
264
|
+
import fs from 'fs/promises'
|
|
265
|
+
|
|
266
|
+
function createPage(
|
|
267
|
+
contentStreamRef: PdfObjectReference,
|
|
268
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
269
|
+
const pageDict = new PdfDictionary()
|
|
270
|
+
pageDict.set('Type', new PdfName('Page'))
|
|
271
|
+
pageDict.set(
|
|
272
|
+
'MediaBox',
|
|
273
|
+
new PdfArray([
|
|
274
|
+
new PdfNumber(0),
|
|
275
|
+
new PdfNumber(0),
|
|
276
|
+
new PdfNumber(612),
|
|
277
|
+
new PdfNumber(792),
|
|
278
|
+
]),
|
|
279
|
+
)
|
|
280
|
+
pageDict.set('Contents', contentStreamRef)
|
|
281
|
+
return new PdfIndirectObject({ content: pageDict })
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
function createPages(
|
|
285
|
+
pages: PdfIndirectObject<PdfDictionary>[],
|
|
286
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
287
|
+
const pagesDict = new PdfDictionary()
|
|
288
|
+
pagesDict.set('Type', new PdfName('Pages'))
|
|
289
|
+
pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
|
|
290
|
+
pagesDict.set('Count', new PdfNumber(pages.length))
|
|
291
|
+
return new PdfIndirectObject({ content: pagesDict })
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function createCatalog(
|
|
295
|
+
pagesRef: PdfObjectReference,
|
|
296
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
297
|
+
const catalogDict = new PdfDictionary()
|
|
298
|
+
catalogDict.set('Type', new PdfName('Catalog'))
|
|
299
|
+
catalogDict.set('Pages', pagesRef)
|
|
300
|
+
return new PdfIndirectObject({ content: catalogDict })
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function createFont(): PdfIndirectObject<PdfDictionary> {
|
|
304
|
+
const fontDict = new PdfDictionary()
|
|
305
|
+
fontDict.set('Type', new PdfName('Font'))
|
|
306
|
+
fontDict.set('Subtype', new PdfName('Type1'))
|
|
307
|
+
fontDict.set('BaseFont', new PdfName('Helvetica'))
|
|
308
|
+
return new PdfIndirectObject({ content: fontDict })
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
function createResources(
|
|
312
|
+
fontRef: PdfObjectReference,
|
|
313
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
314
|
+
const resourcesDict = new PdfDictionary()
|
|
315
|
+
const fontDict = new PdfDictionary()
|
|
316
|
+
fontDict.set('F1', fontRef)
|
|
317
|
+
resourcesDict.set('Font', fontDict)
|
|
318
|
+
return new PdfIndirectObject({ content: resourcesDict })
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
function createPageWithSignatureField(
|
|
322
|
+
contentStreamRef: PdfObjectReference,
|
|
323
|
+
signatureAnnotRef: PdfObjectReference,
|
|
324
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
325
|
+
const pageDict = new PdfDictionary()
|
|
326
|
+
pageDict.set('Type', new PdfName('Page'))
|
|
327
|
+
pageDict.set(
|
|
328
|
+
'MediaBox',
|
|
329
|
+
new PdfArray([
|
|
330
|
+
new PdfNumber(0),
|
|
331
|
+
new PdfNumber(0),
|
|
332
|
+
new PdfNumber(612),
|
|
333
|
+
new PdfNumber(792),
|
|
334
|
+
]),
|
|
335
|
+
)
|
|
336
|
+
pageDict.set('Contents', contentStreamRef)
|
|
337
|
+
pageDict.set('Annots', new PdfArray([signatureAnnotRef]))
|
|
338
|
+
|
|
339
|
+
return new PdfIndirectObject({ content: pageDict })
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function createSignatureAnnotation(
|
|
343
|
+
signatureRef: PdfObjectReference,
|
|
344
|
+
appearanceStreamRef: PdfObjectReference,
|
|
345
|
+
pageRef: PdfObjectReference,
|
|
346
|
+
signatureName: string,
|
|
347
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
348
|
+
const signatureAnnotation = new PdfDictionary()
|
|
349
|
+
signatureAnnotation.set('Type', new PdfName('Annot'))
|
|
350
|
+
signatureAnnotation.set('Subtype', new PdfName('Widget'))
|
|
351
|
+
signatureAnnotation.set('FT', new PdfName('Sig'))
|
|
352
|
+
signatureAnnotation.set('T', new PdfString(signatureName))
|
|
353
|
+
signatureAnnotation.set(
|
|
354
|
+
'Rect',
|
|
355
|
+
new PdfArray([
|
|
356
|
+
new PdfNumber(135), // x1: Start after "Signature: " text (~72 + 63)
|
|
357
|
+
new PdfNumber(640), // y1: Bottom of signature area (652 - 12)
|
|
358
|
+
new PdfNumber(400), // x2: End of signature line
|
|
359
|
+
new PdfNumber(665), // y2: Top of signature area (652 + 13)
|
|
360
|
+
]),
|
|
361
|
+
)
|
|
362
|
+
signatureAnnotation.set('F', new PdfNumber(4))
|
|
363
|
+
signatureAnnotation.set('P', pageRef) // Reference to parent page
|
|
364
|
+
signatureAnnotation.set('V', signatureRef)
|
|
365
|
+
|
|
366
|
+
// Add appearance dictionary
|
|
367
|
+
const appearanceDict = new PdfDictionary()
|
|
368
|
+
appearanceDict.set('N', appearanceStreamRef)
|
|
369
|
+
signatureAnnotation.set('AP', appearanceDict)
|
|
370
|
+
|
|
371
|
+
return new PdfIndirectObject({ content: signatureAnnotation })
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
function createSignatureAppearance(): PdfIndirectObject<PdfStream> {
|
|
375
|
+
// Create font for appearance
|
|
376
|
+
const appearanceFont = new PdfDictionary()
|
|
377
|
+
appearanceFont.set('Type', new PdfName('Font'))
|
|
378
|
+
appearanceFont.set('Subtype', new PdfName('Type1'))
|
|
379
|
+
appearanceFont.set('BaseFont', new PdfName('Helvetica'))
|
|
380
|
+
|
|
381
|
+
const fontDict = new PdfDictionary()
|
|
382
|
+
fontDict.set('F1', appearanceFont)
|
|
383
|
+
|
|
384
|
+
const resourcesDict = new PdfDictionary()
|
|
385
|
+
resourcesDict.set('Font', fontDict)
|
|
386
|
+
|
|
387
|
+
// Create appearance stream header
|
|
388
|
+
const appearanceHeader = new PdfDictionary()
|
|
389
|
+
appearanceHeader.set('Type', new PdfName('XObject'))
|
|
390
|
+
appearanceHeader.set('Subtype', new PdfName('Form'))
|
|
391
|
+
appearanceHeader.set(
|
|
392
|
+
'BBox',
|
|
393
|
+
new PdfArray([
|
|
394
|
+
new PdfNumber(0),
|
|
395
|
+
new PdfNumber(0),
|
|
396
|
+
new PdfNumber(265), // Width: 400 - 135
|
|
397
|
+
new PdfNumber(25), // Height: 665 - 640
|
|
398
|
+
]),
|
|
399
|
+
)
|
|
400
|
+
appearanceHeader.set('Resources', resourcesDict)
|
|
401
|
+
|
|
402
|
+
// Create appearance stream for the signature
|
|
403
|
+
return new PdfIndirectObject({
|
|
404
|
+
content: new PdfStream({
|
|
405
|
+
header: appearanceHeader,
|
|
406
|
+
original:
|
|
407
|
+
'BT /F1 10 Tf 5 14 Td (Digitally signed by: Jake Shirley) Tj ET',
|
|
408
|
+
}),
|
|
409
|
+
})
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Create the document
|
|
413
|
+
const document = new PdfDocument()
|
|
414
|
+
|
|
415
|
+
// Create font
|
|
416
|
+
const font = createFont()
|
|
417
|
+
document.add(font)
|
|
418
|
+
|
|
419
|
+
// Create resources with the font
|
|
420
|
+
const resources = createResources(font.reference)
|
|
421
|
+
document.add(resources)
|
|
422
|
+
|
|
423
|
+
// Create content stream for first page
|
|
424
|
+
const contentStream = new PdfIndirectObject({
|
|
425
|
+
content: new PdfStream({
|
|
426
|
+
header: new PdfDictionary(),
|
|
427
|
+
original: 'BT /F1 24 Tf 100 700 Td (Hello, PDF-Lite!) Tj ET',
|
|
428
|
+
}),
|
|
429
|
+
})
|
|
430
|
+
document.add(contentStream)
|
|
431
|
+
|
|
432
|
+
// Create first page
|
|
433
|
+
const page1 = createPage(contentStream.reference)
|
|
434
|
+
page1.content.set('Resources', resources.reference)
|
|
435
|
+
document.add(page1)
|
|
436
|
+
|
|
437
|
+
// Array to hold all pages and signature objects
|
|
438
|
+
const allPages: PdfIndirectObject<PdfDictionary>[] = [page1]
|
|
439
|
+
const allSignatures: any[] = []
|
|
440
|
+
const signatureFields: PdfObjectReference[] = []
|
|
441
|
+
|
|
442
|
+
// Helper function to create a signature page
|
|
443
|
+
function createSignaturePage(
|
|
444
|
+
signatureType: string,
|
|
445
|
+
signatureObj: any,
|
|
446
|
+
pageNumber: number,
|
|
447
|
+
) {
|
|
448
|
+
const content = new PdfIndirectObject({
|
|
449
|
+
content: new PdfStream({
|
|
450
|
+
header: new PdfDictionary(),
|
|
451
|
+
original: `BT /F1 12 Tf 72 712 Td (Signature Type: ${signatureType}) Tj 0 -60 Td (Signature: ________________________________) Tj ET`,
|
|
452
|
+
}),
|
|
453
|
+
})
|
|
454
|
+
document.add(content)
|
|
455
|
+
|
|
456
|
+
const appearance = createSignatureAppearance()
|
|
457
|
+
document.add(appearance)
|
|
458
|
+
|
|
459
|
+
// Create page first to get its reference
|
|
460
|
+
const page = createPageWithSignatureField(
|
|
461
|
+
content.reference,
|
|
462
|
+
new PdfObjectReference(0, 0), // Temporary placeholder
|
|
463
|
+
)
|
|
464
|
+
page.content.set('Resources', resources.reference)
|
|
465
|
+
document.add(page)
|
|
466
|
+
|
|
467
|
+
// Now create annotation with page reference
|
|
468
|
+
const annotation = createSignatureAnnotation(
|
|
469
|
+
signatureObj.reference,
|
|
470
|
+
appearance.reference,
|
|
471
|
+
page.reference,
|
|
472
|
+
`Signature${pageNumber}`,
|
|
473
|
+
)
|
|
474
|
+
document.add(annotation)
|
|
475
|
+
|
|
476
|
+
// Update page's Annots array with actual annotation reference
|
|
477
|
+
page.content.set('Annots', new PdfArray([annotation.reference]))
|
|
478
|
+
|
|
479
|
+
signatureFields.push(annotation.reference)
|
|
480
|
+
return page
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// Page 2: Adobe PKCS7 Detached
|
|
484
|
+
const pkcs7DetachedSig = new PdfAdbePkcs7DetachedSignatureObject({
|
|
485
|
+
privateKey: rsaSigningKeys.privateKey,
|
|
486
|
+
certificate: rsaSigningKeys.cert,
|
|
487
|
+
issuerCertificate: rsaSigningKeys.caCert,
|
|
488
|
+
name: 'Jake Shirley',
|
|
489
|
+
location: 'Earth',
|
|
490
|
+
reason: 'PKCS7 Detached Signature',
|
|
491
|
+
contactInfo: 'test@test.com',
|
|
492
|
+
revocationInfo: {
|
|
493
|
+
crls: [rsaSigningKeys.caCrl],
|
|
494
|
+
ocsps: [rsaSigningKeys.ocspResponse],
|
|
495
|
+
},
|
|
496
|
+
})
|
|
497
|
+
allSignatures.push(pkcs7DetachedSig)
|
|
498
|
+
allPages.push(createSignaturePage('Adobe PKCS7 Detached', pkcs7DetachedSig, 2))
|
|
499
|
+
|
|
500
|
+
// Page 3: Adobe PKCS7 SHA1
|
|
501
|
+
const pkcs7Sha1Sig = new PdfAdbePkcs7Sha1SignatureObject({
|
|
502
|
+
privateKey: rsaSigningKeys.privateKey,
|
|
503
|
+
certificate: rsaSigningKeys.cert,
|
|
504
|
+
issuerCertificate: rsaSigningKeys.caCert,
|
|
505
|
+
name: 'Jake Shirley',
|
|
506
|
+
location: 'Earth',
|
|
507
|
+
reason: 'PKCS7 SHA1 Signature',
|
|
508
|
+
contactInfo: 'test@test.com',
|
|
509
|
+
})
|
|
510
|
+
allSignatures.push(pkcs7Sha1Sig)
|
|
511
|
+
allPages.push(createSignaturePage('Adobe PKCS7 SHA1', pkcs7Sha1Sig, 3))
|
|
512
|
+
|
|
513
|
+
// Page 4: Adobe X509 RSA SHA1
|
|
514
|
+
const x509RsaSha1Sig = new PdfAdbePkcsX509RsaSha1SignatureObject({
|
|
515
|
+
privateKey: rsaSigningKeys.privateKey,
|
|
516
|
+
certificate: rsaSigningKeys.cert,
|
|
517
|
+
additionalCertificates: [rsaSigningKeys.caCert],
|
|
518
|
+
name: 'Jake Shirley',
|
|
519
|
+
location: 'Earth',
|
|
520
|
+
reason: 'X509 RSA SHA1 Signature',
|
|
521
|
+
contactInfo: 'test@test.com',
|
|
522
|
+
revocationInfo: {
|
|
523
|
+
crls: [rsaSigningKeys.caCrl],
|
|
524
|
+
ocsps: [rsaSigningKeys.ocspResponse],
|
|
525
|
+
},
|
|
526
|
+
})
|
|
527
|
+
allSignatures.push(x509RsaSha1Sig)
|
|
528
|
+
allPages.push(createSignaturePage('Adobe X509 RSA SHA1', x509RsaSha1Sig, 4))
|
|
529
|
+
|
|
530
|
+
// Page 5: ETSI CAdES Detached
|
|
531
|
+
const cadesDetachedSig = new PdfEtsiCadesDetachedSignatureObject({
|
|
532
|
+
privateKey: rsaSigningKeys.privateKey,
|
|
533
|
+
certificate: rsaSigningKeys.cert,
|
|
534
|
+
issuerCertificate: rsaSigningKeys.caCert,
|
|
535
|
+
name: 'Jake Shirley',
|
|
536
|
+
location: 'Earth',
|
|
537
|
+
reason: 'CAdES Detached Signature',
|
|
538
|
+
contactInfo: 'test@test.com',
|
|
539
|
+
revocationInfo: {
|
|
540
|
+
crls: [rsaSigningKeys.caCrl],
|
|
541
|
+
ocsps: [rsaSigningKeys.ocspResponse],
|
|
542
|
+
},
|
|
543
|
+
})
|
|
544
|
+
allSignatures.push(cadesDetachedSig)
|
|
545
|
+
allPages.push(createSignaturePage('ETSI CAdES Detached', cadesDetachedSig, 5))
|
|
546
|
+
|
|
547
|
+
// Page 6: ETSI RFC3161 (Timestamp)
|
|
548
|
+
const rfc3161Sig = new PdfEtsiRfc3161SignatureObject({
|
|
549
|
+
timeStampAuthority: {
|
|
550
|
+
url: 'https://freetsa.org/tsr',
|
|
551
|
+
},
|
|
552
|
+
})
|
|
553
|
+
allSignatures.push(rfc3161Sig)
|
|
554
|
+
allPages.push(createSignaturePage('ETSI RFC3161 Timestamp', rfc3161Sig, 6))
|
|
555
|
+
|
|
556
|
+
// Create pages collection with all pages
|
|
557
|
+
const pages = createPages(allPages)
|
|
558
|
+
// Set parent reference for all pages
|
|
559
|
+
allPages.forEach((page) => {
|
|
560
|
+
page.content.set('Parent', pages.reference)
|
|
561
|
+
})
|
|
562
|
+
document.add(pages)
|
|
563
|
+
|
|
564
|
+
// Create catalog with AcroForm
|
|
565
|
+
const catalog = createCatalog(pages.reference)
|
|
566
|
+
|
|
567
|
+
// Add AcroForm to catalog with all signature fields
|
|
568
|
+
const acroForm = new PdfDictionary()
|
|
569
|
+
acroForm.set('Fields', new PdfArray(signatureFields))
|
|
570
|
+
acroForm.set('SigFlags', new PdfNumber(3))
|
|
571
|
+
const acroFormObj = new PdfIndirectObject({ content: acroForm })
|
|
572
|
+
document.add(acroFormObj)
|
|
573
|
+
catalog.content.set('AcroForm', acroFormObj.reference)
|
|
574
|
+
|
|
575
|
+
document.add(catalog)
|
|
576
|
+
|
|
577
|
+
// Set the catalog as the root
|
|
578
|
+
document.trailerDict.set('Root', catalog.reference)
|
|
579
|
+
|
|
580
|
+
// IMPORTANT: Add all signatures LAST - after all other objects
|
|
581
|
+
// This ensures the ByteRange is calculated correctly for each signature
|
|
582
|
+
allSignatures.forEach((sig) => {
|
|
583
|
+
document.startNewRevision()
|
|
584
|
+
document.add(sig)
|
|
585
|
+
})
|
|
586
|
+
|
|
587
|
+
await document.commit()
|
|
588
|
+
|
|
589
|
+
const tmpFolder = `${import.meta.dirname}/tmp`
|
|
590
|
+
await fs.mkdir(tmpFolder, { recursive: true })
|
|
591
|
+
await fs.writeFile(`${tmpFolder}/signed-output.pdf`, document.toBytes())
|
|
592
|
+
```
|
|
593
|
+
|
|
594
|
+
## Incremental PDF update example
|
|
595
|
+
|
|
596
|
+
```typescript
|
|
597
|
+
import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
|
|
598
|
+
import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
|
|
599
|
+
import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
|
|
600
|
+
import { PdfName } from 'pdf-lite/core/objects/pdf-name'
|
|
601
|
+
import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
|
|
602
|
+
import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
|
|
603
|
+
import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
|
|
604
|
+
import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
|
|
605
|
+
import fs from 'fs/promises'
|
|
606
|
+
|
|
607
|
+
const tmpFolder = `${import.meta.dirname}/tmp`
|
|
608
|
+
await fs.mkdir(tmpFolder, { recursive: true })
|
|
609
|
+
|
|
610
|
+
// Helper functions for creating PDF objects
|
|
611
|
+
function createPage(
|
|
612
|
+
contentStreamRef: PdfObjectReference,
|
|
613
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
614
|
+
const pageDict = new PdfDictionary()
|
|
615
|
+
pageDict.set('Type', new PdfName('Page'))
|
|
616
|
+
pageDict.set(
|
|
617
|
+
'MediaBox',
|
|
618
|
+
new PdfArray([
|
|
619
|
+
new PdfNumber(0),
|
|
620
|
+
new PdfNumber(0),
|
|
621
|
+
new PdfNumber(612),
|
|
622
|
+
new PdfNumber(792),
|
|
623
|
+
]),
|
|
624
|
+
)
|
|
625
|
+
pageDict.set('Contents', contentStreamRef)
|
|
626
|
+
return new PdfIndirectObject({ content: pageDict })
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
function createPages(
|
|
630
|
+
pages: PdfIndirectObject<PdfDictionary>[],
|
|
631
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
632
|
+
const pagesDict = new PdfDictionary()
|
|
633
|
+
pagesDict.set('Type', new PdfName('Pages'))
|
|
634
|
+
pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
|
|
635
|
+
pagesDict.set('Count', new PdfNumber(pages.length))
|
|
636
|
+
return new PdfIndirectObject({ content: pagesDict })
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
function createCatalog(
|
|
640
|
+
pagesRef: PdfObjectReference,
|
|
641
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
642
|
+
const catalogDict = new PdfDictionary()
|
|
643
|
+
catalogDict.set('Type', new PdfName('Catalog'))
|
|
644
|
+
catalogDict.set('Pages', pagesRef)
|
|
645
|
+
return new PdfIndirectObject({ content: catalogDict })
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
function createFont(): PdfIndirectObject<PdfDictionary> {
|
|
649
|
+
const fontDict = new PdfDictionary()
|
|
650
|
+
fontDict.set('Type', new PdfName('Font'))
|
|
651
|
+
fontDict.set('Subtype', new PdfName('Type1'))
|
|
652
|
+
fontDict.set('BaseFont', new PdfName('Helvetica'))
|
|
653
|
+
return new PdfIndirectObject({ content: fontDict })
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
function createResources(
|
|
657
|
+
fontRef: PdfObjectReference,
|
|
658
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
659
|
+
const resourcesDict = new PdfDictionary()
|
|
660
|
+
const fontDict = new PdfDictionary()
|
|
661
|
+
fontDict.set('F1', fontRef)
|
|
662
|
+
resourcesDict.set('Font', fontDict)
|
|
663
|
+
return new PdfIndirectObject({ content: resourcesDict })
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
// Step 1: Create an initial PDF document
|
|
667
|
+
console.log('Step 1: Creating initial PDF document...')
|
|
668
|
+
const document = new PdfDocument()
|
|
669
|
+
|
|
670
|
+
const font = createFont()
|
|
671
|
+
document.add(font)
|
|
672
|
+
const resources = createResources(font.reference)
|
|
673
|
+
document.add(resources)
|
|
674
|
+
|
|
675
|
+
const contentStream = new PdfIndirectObject({
|
|
676
|
+
content: new PdfStream({
|
|
677
|
+
header: new PdfDictionary(),
|
|
678
|
+
original:
|
|
679
|
+
'BT /F1 24 Tf 100 700 Td (Original Document - Revision 1) Tj ET',
|
|
680
|
+
}),
|
|
681
|
+
})
|
|
682
|
+
|
|
683
|
+
const page = createPage(contentStream.reference)
|
|
684
|
+
page.content.set('Resources', resources.reference)
|
|
685
|
+
document.add(page)
|
|
686
|
+
|
|
687
|
+
const pages = createPages([page])
|
|
688
|
+
page.content.set('Parent', pages.reference)
|
|
689
|
+
document.add(pages)
|
|
690
|
+
|
|
691
|
+
const catalog = createCatalog(pages.reference)
|
|
692
|
+
document.add(catalog)
|
|
693
|
+
|
|
694
|
+
document.trailerDict.set('Root', catalog.reference)
|
|
695
|
+
document.add(contentStream)
|
|
696
|
+
|
|
697
|
+
await document.commit()
|
|
698
|
+
// Save the original PDF
|
|
699
|
+
const originalPdfPath = `${tmpFolder}/original.pdf`
|
|
700
|
+
await fs.writeFile(originalPdfPath, document.toBytes())
|
|
701
|
+
console.log(`Original PDF saved to: ${originalPdfPath}`)
|
|
702
|
+
console.log(`Original PDF has ${document.revisions.length} revision(s)`)
|
|
703
|
+
|
|
704
|
+
// Step 2: Load the PDF and perform an incremental update
|
|
705
|
+
console.log('\nStep 2: Loading PDF and performing incremental update...')
|
|
706
|
+
|
|
707
|
+
// Read the existing PDF
|
|
708
|
+
const existingPdfBytes = await fs.readFile(originalPdfPath)
|
|
709
|
+
const loadedDocument = await PdfDocument.fromBytes([existingPdfBytes])
|
|
710
|
+
|
|
711
|
+
// Lock existing revisions to enable incremental mode
|
|
712
|
+
// This ensures changes are added as new revisions instead of modifying existing ones
|
|
713
|
+
loadedDocument.setIncremental(true)
|
|
714
|
+
|
|
715
|
+
// Create new content for the incremental update
|
|
716
|
+
// In a real scenario, this could be adding annotations, form fields, signatures, etc.
|
|
717
|
+
const newContentStream = new PdfIndirectObject({
|
|
718
|
+
objectNumber: contentStream.objectNumber,
|
|
719
|
+
generationNumber: contentStream.generationNumber,
|
|
720
|
+
content: new PdfStream({
|
|
721
|
+
header: new PdfDictionary(),
|
|
722
|
+
original:
|
|
723
|
+
'BT /F1 18 Tf 100 650 Td (This content was added in Revision 2) Tj ET',
|
|
724
|
+
}),
|
|
725
|
+
})
|
|
726
|
+
|
|
727
|
+
// Add the new content to the document
|
|
728
|
+
loadedDocument.add(newContentStream)
|
|
729
|
+
await loadedDocument.commit()
|
|
730
|
+
|
|
731
|
+
// Save the incrementally updated PDF
|
|
732
|
+
const updatedPdfPath = `${tmpFolder}/incremental-update.pdf`
|
|
733
|
+
await fs.writeFile(updatedPdfPath, loadedDocument.toBytes())
|
|
734
|
+
console.log(`Incrementally updated PDF saved to: ${updatedPdfPath}`)
|
|
735
|
+
console.log(`Updated PDF has ${loadedDocument.revisions.length} revision(s)`)
|
|
736
|
+
|
|
737
|
+
// Step 3: Verify the incremental update preserved the original content
|
|
738
|
+
console.log('\nStep 3: Verifying incremental update...')
|
|
739
|
+
|
|
740
|
+
// Check file sizes to confirm incremental update (new file should be larger)
|
|
741
|
+
const originalStats = await fs.stat(originalPdfPath)
|
|
742
|
+
const updatedStats = await fs.stat(updatedPdfPath)
|
|
743
|
+
|
|
744
|
+
console.log(`Original PDF size: ${originalStats.size} bytes`)
|
|
745
|
+
console.log(`Updated PDF size: ${updatedStats.size} bytes`)
|
|
746
|
+
console.log(
|
|
747
|
+
`Size difference: ${updatedStats.size - originalStats.size} bytes (new revision data)`,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
// The updated PDF contains the original bytes plus the new revision
|
|
751
|
+
// This is the key feature of incremental updates - the original PDF is preserved
|
|
752
|
+
const updatedPdfBytes = await fs.readFile(updatedPdfPath)
|
|
753
|
+
const originalPdfBytesForComparison = await fs.readFile(originalPdfPath)
|
|
754
|
+
|
|
755
|
+
// Verify that the beginning of the updated PDF matches the original
|
|
756
|
+
const originalBytesMatch = updatedPdfBytes
|
|
757
|
+
.slice(0, originalPdfBytesForComparison.length - 10) // Exclude the %%EOF marker area
|
|
758
|
+
.toString()
|
|
759
|
+
.includes(
|
|
760
|
+
originalPdfBytesForComparison
|
|
761
|
+
.subarray(0, -10)
|
|
762
|
+
.toString()
|
|
763
|
+
.substring(0, 100),
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
console.log(`Original content preserved: ${originalBytesMatch ? 'Yes' : 'No'}`)
|
|
767
|
+
|
|
768
|
+
// Step 4: Add another incremental revision
|
|
769
|
+
console.log('\nStep 4: Adding another incremental revision...')
|
|
770
|
+
|
|
771
|
+
const secondUpdate = await PdfDocument.fromBytes([updatedPdfBytes])
|
|
772
|
+
secondUpdate.setIncremental(true)
|
|
773
|
+
|
|
774
|
+
const thirdRevisionContent = new PdfIndirectObject({
|
|
775
|
+
objectNumber: contentStream.objectNumber,
|
|
776
|
+
generationNumber: contentStream.generationNumber,
|
|
777
|
+
content: new PdfStream(
|
|
778
|
+
'BT /F1 14 Tf 100 600 Td (Third revision - demonstrates multiple incremental updates) Tj ET',
|
|
779
|
+
),
|
|
780
|
+
})
|
|
781
|
+
|
|
782
|
+
secondUpdate.add(thirdRevisionContent)
|
|
783
|
+
await secondUpdate.commit()
|
|
784
|
+
|
|
785
|
+
const multiRevisionPdfPath = `${tmpFolder}/multi-revision.pdf`
|
|
786
|
+
await fs.writeFile(multiRevisionPdfPath, secondUpdate.toBytes())
|
|
787
|
+
console.log(`Multi-revision PDF saved to: ${multiRevisionPdfPath}`)
|
|
788
|
+
console.log(
|
|
789
|
+
`Multi-revision PDF has ${secondUpdate.revisions.length} revision(s)`,
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
const multiRevisionStats = await fs.stat(multiRevisionPdfPath)
|
|
793
|
+
console.log(`Multi-revision PDF size: ${multiRevisionStats.size} bytes`)
|
|
794
|
+
|
|
795
|
+
console.log('\n=== Summary ===')
|
|
796
|
+
console.log('Incremental updates allow you to:')
|
|
797
|
+
console.log('1. Preserve the original PDF content (important for signatures)')
|
|
798
|
+
console.log('2. Add new content without modifying existing revisions')
|
|
799
|
+
console.log('3. Maintain a complete history of document changes')
|
|
800
|
+
console.log('4. Stack multiple revisions on top of each other')
|
|
801
|
+
```
|
|
802
|
+
|
|
803
|
+
## Modifying AcroForms example - Creating and filling PDF form fields
|
|
804
|
+
|
|
805
|
+
```typescript
|
|
806
|
+
import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
|
|
807
|
+
import { PdfBoolean } from 'pdf-lite/core/objects/pdf-boolean'
|
|
808
|
+
import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
|
|
809
|
+
import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
|
|
810
|
+
import { PdfName } from 'pdf-lite/core/objects/pdf-name'
|
|
811
|
+
import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
|
|
812
|
+
import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
|
|
813
|
+
import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
|
|
814
|
+
import { PdfString } from 'pdf-lite/core/objects/pdf-string'
|
|
815
|
+
import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
|
|
816
|
+
import fs from 'fs/promises'
|
|
817
|
+
|
|
818
|
+
const tmpFolder = `${import.meta.dirname}/tmp`
|
|
819
|
+
await fs.mkdir(tmpFolder, { recursive: true })
|
|
820
|
+
|
|
821
|
+
// Helper function to create a basic page
|
|
822
|
+
function createPage(
|
|
823
|
+
contentStreamRef: PdfObjectReference,
|
|
824
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
825
|
+
const pageDict = new PdfDictionary()
|
|
826
|
+
pageDict.set('Type', new PdfName('Page'))
|
|
827
|
+
pageDict.set(
|
|
828
|
+
'MediaBox',
|
|
829
|
+
new PdfArray([
|
|
830
|
+
new PdfNumber(0),
|
|
831
|
+
new PdfNumber(0),
|
|
832
|
+
new PdfNumber(612),
|
|
833
|
+
new PdfNumber(792),
|
|
834
|
+
]),
|
|
835
|
+
)
|
|
836
|
+
pageDict.set('Contents', contentStreamRef)
|
|
837
|
+
return new PdfIndirectObject({ content: pageDict })
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
// Helper function to create pages collection
|
|
841
|
+
function createPages(
|
|
842
|
+
pages: PdfIndirectObject<PdfDictionary>[],
|
|
843
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
844
|
+
const pagesDict = new PdfDictionary()
|
|
845
|
+
pagesDict.set('Type', new PdfName('Pages'))
|
|
846
|
+
pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
|
|
847
|
+
pagesDict.set('Count', new PdfNumber(pages.length))
|
|
848
|
+
return new PdfIndirectObject({ content: pagesDict })
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
// Helper function to create catalog
|
|
852
|
+
function createCatalog(
|
|
853
|
+
pagesRef: PdfObjectReference,
|
|
854
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
855
|
+
const catalogDict = new PdfDictionary()
|
|
856
|
+
catalogDict.set('Type', new PdfName('Catalog'))
|
|
857
|
+
catalogDict.set('Pages', pagesRef)
|
|
858
|
+
return new PdfIndirectObject({ content: catalogDict })
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
// Helper function to create font
|
|
862
|
+
function createFont(): PdfIndirectObject<PdfDictionary> {
|
|
863
|
+
const fontDict = new PdfDictionary()
|
|
864
|
+
fontDict.set('Type', new PdfName('Font'))
|
|
865
|
+
fontDict.set('Subtype', new PdfName('Type1'))
|
|
866
|
+
fontDict.set('BaseFont', new PdfName('Helvetica'))
|
|
867
|
+
return new PdfIndirectObject({ content: fontDict })
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
// Helper function to create resources
|
|
871
|
+
function createResources(
|
|
872
|
+
fontRef: PdfObjectReference,
|
|
873
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
874
|
+
const resourcesDict = new PdfDictionary()
|
|
875
|
+
const fontDict = new PdfDictionary()
|
|
876
|
+
fontDict.set('F1', fontRef)
|
|
877
|
+
resourcesDict.set('Font', fontDict)
|
|
878
|
+
return new PdfIndirectObject({ content: resourcesDict })
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
// Helper function to create a text field widget annotation
|
|
882
|
+
function createTextField(
|
|
883
|
+
fieldName: string,
|
|
884
|
+
pageRef: PdfObjectReference,
|
|
885
|
+
rect: [number, number, number, number],
|
|
886
|
+
defaultValue: string = '',
|
|
887
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
888
|
+
const fieldDict = new PdfDictionary()
|
|
889
|
+
// Annotation properties
|
|
890
|
+
fieldDict.set('Type', new PdfName('Annot'))
|
|
891
|
+
fieldDict.set('Subtype', new PdfName('Widget'))
|
|
892
|
+
// Field type: Text
|
|
893
|
+
fieldDict.set('FT', new PdfName('Tx'))
|
|
894
|
+
// Field name
|
|
895
|
+
fieldDict.set('T', new PdfString(fieldName))
|
|
896
|
+
// Bounding rectangle [x1, y1, x2, y2]
|
|
897
|
+
fieldDict.set(
|
|
898
|
+
'Rect',
|
|
899
|
+
new PdfArray([
|
|
900
|
+
new PdfNumber(rect[0]),
|
|
901
|
+
new PdfNumber(rect[1]),
|
|
902
|
+
new PdfNumber(rect[2]),
|
|
903
|
+
new PdfNumber(rect[3]),
|
|
904
|
+
]),
|
|
905
|
+
)
|
|
906
|
+
// Annotation flags (4 = print)
|
|
907
|
+
fieldDict.set('F', new PdfNumber(4))
|
|
908
|
+
// Parent page reference
|
|
909
|
+
fieldDict.set('P', pageRef)
|
|
910
|
+
// Default value (if any)
|
|
911
|
+
if (defaultValue) {
|
|
912
|
+
fieldDict.set('V', new PdfString(defaultValue))
|
|
913
|
+
fieldDict.set('DV', new PdfString(defaultValue))
|
|
914
|
+
}
|
|
915
|
+
// Default appearance string (font and size)
|
|
916
|
+
fieldDict.set('DA', new PdfString('/Helv 12 Tf 0 g'))
|
|
917
|
+
|
|
918
|
+
return new PdfIndirectObject({ content: fieldDict })
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
// Helper function to create a checkbox field widget annotation
|
|
922
|
+
function createCheckboxField(
|
|
923
|
+
fieldName: string,
|
|
924
|
+
pageRef: PdfObjectReference,
|
|
925
|
+
rect: [number, number, number, number],
|
|
926
|
+
checked: boolean = false,
|
|
927
|
+
): PdfIndirectObject<PdfDictionary> {
|
|
928
|
+
const fieldDict = new PdfDictionary()
|
|
929
|
+
// Annotation properties
|
|
930
|
+
fieldDict.set('Type', new PdfName('Annot'))
|
|
931
|
+
fieldDict.set('Subtype', new PdfName('Widget'))
|
|
932
|
+
// Field type: Button
|
|
933
|
+
fieldDict.set('FT', new PdfName('Btn'))
|
|
934
|
+
// Field name
|
|
935
|
+
fieldDict.set('T', new PdfString(fieldName))
|
|
936
|
+
// Bounding rectangle
|
|
937
|
+
fieldDict.set(
|
|
938
|
+
'Rect',
|
|
939
|
+
new PdfArray([
|
|
940
|
+
new PdfNumber(rect[0]),
|
|
941
|
+
new PdfNumber(rect[1]),
|
|
942
|
+
new PdfNumber(rect[2]),
|
|
943
|
+
new PdfNumber(rect[3]),
|
|
944
|
+
]),
|
|
945
|
+
)
|
|
946
|
+
// Annotation flags (4 = print)
|
|
947
|
+
fieldDict.set('F', new PdfNumber(4))
|
|
948
|
+
// Parent page reference
|
|
949
|
+
fieldDict.set('P', pageRef)
|
|
950
|
+
// Value: /Yes for checked, /Off for unchecked
|
|
951
|
+
fieldDict.set('V', new PdfName(checked ? 'Yes' : 'Off'))
|
|
952
|
+
fieldDict.set('AS', new PdfName(checked ? 'Yes' : 'Off'))
|
|
953
|
+
|
|
954
|
+
return new PdfIndirectObject({ content: fieldDict })
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
// ============================================
|
|
958
|
+
// PART 1: Create a PDF with form fields
|
|
959
|
+
// ============================================
|
|
960
|
+
|
|
961
|
+
const document = new PdfDocument()
|
|
962
|
+
|
|
963
|
+
// Create font
|
|
964
|
+
const font = createFont()
|
|
965
|
+
document.add(font)
|
|
966
|
+
|
|
967
|
+
// Create resources with the font
|
|
968
|
+
const resources = createResources(font.reference)
|
|
969
|
+
document.add(resources)
|
|
970
|
+
|
|
971
|
+
// Create content stream with form labels
|
|
972
|
+
const contentStream = new PdfIndirectObject({
|
|
973
|
+
content: new PdfStream({
|
|
974
|
+
header: new PdfDictionary(),
|
|
975
|
+
original: `BT
|
|
976
|
+
/F1 18 Tf 72 720 Td (PDF Form Example) Tj
|
|
977
|
+
/F1 12 Tf 0 -40 Td (Name:) Tj
|
|
978
|
+
0 -30 Td (Email:) Tj
|
|
979
|
+
0 -30 Td (Phone:) Tj
|
|
980
|
+
0 -30 Td (Subscribe to newsletter:) Tj
|
|
981
|
+
ET`,
|
|
982
|
+
}),
|
|
983
|
+
})
|
|
984
|
+
document.add(contentStream)
|
|
985
|
+
|
|
986
|
+
// Create page
|
|
987
|
+
const page = createPage(contentStream.reference)
|
|
988
|
+
page.content.set('Resources', resources.reference)
|
|
989
|
+
document.add(page)
|
|
990
|
+
|
|
991
|
+
// Create form fields
|
|
992
|
+
const nameField = createTextField('name', page.reference, [150, 665, 400, 685])
|
|
993
|
+
const emailField = createTextField(
|
|
994
|
+
'email',
|
|
995
|
+
page.reference,
|
|
996
|
+
[150, 635, 400, 655],
|
|
997
|
+
)
|
|
998
|
+
const phoneField = createTextField(
|
|
999
|
+
'phone',
|
|
1000
|
+
page.reference,
|
|
1001
|
+
[150, 605, 400, 625],
|
|
1002
|
+
)
|
|
1003
|
+
const subscribeField = createCheckboxField(
|
|
1004
|
+
'subscribe',
|
|
1005
|
+
page.reference,
|
|
1006
|
+
[200, 575, 215, 590],
|
|
1007
|
+
)
|
|
1008
|
+
|
|
1009
|
+
document.add(nameField)
|
|
1010
|
+
document.add(emailField)
|
|
1011
|
+
document.add(phoneField)
|
|
1012
|
+
document.add(subscribeField)
|
|
1013
|
+
|
|
1014
|
+
// Add annotations to page
|
|
1015
|
+
page.content.set(
|
|
1016
|
+
'Annots',
|
|
1017
|
+
new PdfArray([
|
|
1018
|
+
nameField.reference,
|
|
1019
|
+
emailField.reference,
|
|
1020
|
+
phoneField.reference,
|
|
1021
|
+
subscribeField.reference,
|
|
1022
|
+
]),
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
// Create pages collection
|
|
1026
|
+
const pages = createPages([page])
|
|
1027
|
+
page.content.set('Parent', pages.reference)
|
|
1028
|
+
document.add(pages)
|
|
1029
|
+
|
|
1030
|
+
// Create catalog
|
|
1031
|
+
const catalog = createCatalog(pages.reference)
|
|
1032
|
+
|
|
1033
|
+
// Create AcroForm with all fields
|
|
1034
|
+
const acroForm = new PdfDictionary()
|
|
1035
|
+
acroForm.set(
|
|
1036
|
+
'Fields',
|
|
1037
|
+
new PdfArray([
|
|
1038
|
+
nameField.reference,
|
|
1039
|
+
emailField.reference,
|
|
1040
|
+
phoneField.reference,
|
|
1041
|
+
subscribeField.reference,
|
|
1042
|
+
]),
|
|
1043
|
+
)
|
|
1044
|
+
// NeedAppearances flag tells PDF readers to generate appearance streams
|
|
1045
|
+
acroForm.set('NeedAppearances', new PdfBoolean(true))
|
|
1046
|
+
|
|
1047
|
+
// Default resources for the form (font)
|
|
1048
|
+
const formResources = new PdfDictionary()
|
|
1049
|
+
const formFontDict = new PdfDictionary()
|
|
1050
|
+
const helveticaFont = new PdfDictionary()
|
|
1051
|
+
helveticaFont.set('Type', new PdfName('Font'))
|
|
1052
|
+
helveticaFont.set('Subtype', new PdfName('Type1'))
|
|
1053
|
+
helveticaFont.set('BaseFont', new PdfName('Helvetica'))
|
|
1054
|
+
formFontDict.set('Helv', helveticaFont)
|
|
1055
|
+
formResources.set('Font', formFontDict)
|
|
1056
|
+
acroForm.set('DR', formResources)
|
|
1057
|
+
|
|
1058
|
+
const acroFormObj = new PdfIndirectObject({ content: acroForm })
|
|
1059
|
+
document.add(acroFormObj)
|
|
1060
|
+
catalog.content.set('AcroForm', acroFormObj.reference)
|
|
1061
|
+
|
|
1062
|
+
document.add(catalog)
|
|
1063
|
+
|
|
1064
|
+
// Set the catalog as the root
|
|
1065
|
+
document.trailerDict.set('Root', catalog.reference)
|
|
1066
|
+
|
|
1067
|
+
await document.commit()
|
|
1068
|
+
|
|
1069
|
+
// Save the empty form
|
|
1070
|
+
// This demonstrates creating a blank form that users can fill in
|
|
1071
|
+
await fs.writeFile(`${tmpFolder}/form-empty.pdf`, document.toBytes())
|
|
1072
|
+
console.log('Created form-empty.pdf with empty form fields')
|
|
1073
|
+
|
|
1074
|
+
// ============================================
|
|
1075
|
+
// PART 2: Fill in the form fields
|
|
1076
|
+
// ============================================
|
|
1077
|
+
// This demonstrates how to programmatically fill in form fields.
|
|
1078
|
+
// We now read the previously saved empty form and modify it.
|
|
1079
|
+
|
|
1080
|
+
// Read the empty form PDF
|
|
1081
|
+
const emptyFormBytes = await fs.readFile(`${tmpFolder}/form-empty.pdf`)
|
|
1082
|
+
const filledDocument = await PdfDocument.fromBytes([emptyFormBytes])
|
|
1083
|
+
|
|
1084
|
+
// Get the catalog reference from trailer
|
|
1085
|
+
const catalogRef = filledDocument.trailerDict.get('Root')
|
|
1086
|
+
if (!catalogRef || !(catalogRef instanceof PdfObjectReference)) {
|
|
1087
|
+
throw new Error('No catalog found in PDF')
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
// Read the catalog object
|
|
1091
|
+
const catalogObj = await filledDocument.readObject({
|
|
1092
|
+
objectNumber: catalogRef.objectNumber,
|
|
1093
|
+
})
|
|
1094
|
+
if (!catalogObj || !(catalogObj.content instanceof PdfDictionary)) {
|
|
1095
|
+
throw new Error('Catalog object not found')
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
// Get the AcroForm reference
|
|
1099
|
+
const acroFormRef = catalogObj.content.get('AcroForm')
|
|
1100
|
+
if (!acroFormRef || !(acroFormRef instanceof PdfObjectReference)) {
|
|
1101
|
+
throw new Error('No AcroForm found in PDF')
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
// Read the AcroForm object
|
|
1105
|
+
const filledAcroFormObj = await filledDocument.readObject({
|
|
1106
|
+
objectNumber: acroFormRef.objectNumber,
|
|
1107
|
+
})
|
|
1108
|
+
if (
|
|
1109
|
+
!filledAcroFormObj ||
|
|
1110
|
+
!(filledAcroFormObj.content instanceof PdfDictionary)
|
|
1111
|
+
) {
|
|
1112
|
+
throw new Error('AcroForm object not found')
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
// Get the fields array
|
|
1116
|
+
const fieldsArray = filledAcroFormObj.content.get('Fields')
|
|
1117
|
+
if (!fieldsArray || !(fieldsArray instanceof PdfArray)) {
|
|
1118
|
+
throw new Error('No fields found in AcroForm')
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
// Helper function to find a field by name
|
|
1122
|
+
async function findField(
|
|
1123
|
+
fieldName: string,
|
|
1124
|
+
): Promise<PdfIndirectObject<PdfDictionary> | null> {
|
|
1125
|
+
for (const fieldRef of fieldsArray.items) {
|
|
1126
|
+
if (!(fieldRef instanceof PdfObjectReference)) continue
|
|
1127
|
+
const fieldObj = await filledDocument.readObject({
|
|
1128
|
+
objectNumber: fieldRef.objectNumber,
|
|
1129
|
+
})
|
|
1130
|
+
if (!fieldObj || !(fieldObj.content instanceof PdfDictionary)) continue
|
|
1131
|
+
|
|
1132
|
+
const name = fieldObj.content.get('T')
|
|
1133
|
+
if (name instanceof PdfString) {
|
|
1134
|
+
// Convert bytes to string for comparison
|
|
1135
|
+
const nameStr = name.value
|
|
1136
|
+
if (nameStr === fieldName) {
|
|
1137
|
+
return fieldObj as PdfIndirectObject<PdfDictionary>
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
return null
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
// Update the name field value
|
|
1145
|
+
const nameFieldObj = await findField('name')
|
|
1146
|
+
if (nameFieldObj) {
|
|
1147
|
+
nameFieldObj.content.set('V', new PdfString('John Doe'))
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
// Update the email field value
|
|
1151
|
+
const emailFieldObj = await findField('email')
|
|
1152
|
+
if (emailFieldObj) {
|
|
1153
|
+
emailFieldObj.content.set('V', new PdfString('john.doe@example.com'))
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
// Update the phone field value
|
|
1157
|
+
const phoneFieldObj = await findField('phone')
|
|
1158
|
+
if (phoneFieldObj) {
|
|
1159
|
+
phoneFieldObj.content.set('V', new PdfString('+1 (555) 123-4567'))
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// Check the subscribe checkbox
|
|
1163
|
+
const subscribeFieldObj = await findField('subscribe')
|
|
1164
|
+
if (subscribeFieldObj) {
|
|
1165
|
+
subscribeFieldObj.content.set('V', new PdfName('Yes'))
|
|
1166
|
+
subscribeFieldObj.content.set('AS', new PdfName('Yes'))
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
// Save the filled form
|
|
1170
|
+
await fs.writeFile(`${tmpFolder}/form-filled.pdf`, filledDocument.toBytes())
|
|
1171
|
+
console.log('Created form-filled.pdf with filled form fields')
|
|
1172
|
+
|
|
1173
|
+
console.log('\nForm field values:')
|
|
1174
|
+
console.log('- Name: John Doe')
|
|
1175
|
+
console.log('- Email: john.doe@example.com')
|
|
1176
|
+
console.log('- Phone: +1 (555) 123-4567')
|
|
1177
|
+
console.log('- Subscribe: Yes')
|
|
1178
|
+
```
|
|
1179
|
+
|
|
1180
|
+
## Tokeniser usage example
|
|
1181
|
+
|
|
1182
|
+
```typescript
|
|
1183
|
+
import { PdfByteStreamTokeniser } from 'pdf-lite/core/tokeniser'
|
|
1184
|
+
import { PdfToken } from 'pdf-lite/core/tokens/token'
|
|
1185
|
+
import { stringToBytes } from 'pdf-lite/utils/stringToBytes'
|
|
1186
|
+
|
|
1187
|
+
/**
|
|
1188
|
+
* This example demonstrates how to use the PdfByteStreamTokeniser
|
|
1189
|
+
* to tokenize PDF content into individual tokens.
|
|
1190
|
+
*
|
|
1191
|
+
* The tokeniser converts raw PDF bytes into a stream of tokens that can
|
|
1192
|
+
* be further processed by the decoder or used for PDF analysis.
|
|
1193
|
+
*/
|
|
1194
|
+
|
|
1195
|
+
// Sample PDF content to tokenize
|
|
1196
|
+
const pdfContent = `%PDF-2.0
|
|
1197
|
+
1 0 obj
|
|
1198
|
+
<< /Type /Catalog /Pages 2 0 R >>
|
|
1199
|
+
endobj
|
|
1200
|
+
2 0 obj
|
|
1201
|
+
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
|
1202
|
+
endobj
|
|
1203
|
+
3 0 obj
|
|
1204
|
+
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>
|
|
1205
|
+
endobj
|
|
1206
|
+
trailer
|
|
1207
|
+
<< /Size 4 /Root 1 0 R >>
|
|
1208
|
+
startxref
|
|
1209
|
+
0
|
|
1210
|
+
%%EOF`
|
|
1211
|
+
|
|
1212
|
+
// Create the tokeniser
|
|
1213
|
+
const tokeniser = new PdfByteStreamTokeniser()
|
|
1214
|
+
|
|
1215
|
+
// Convert the PDF content to bytes and feed it to the tokeniser
|
|
1216
|
+
const bytes = stringToBytes(pdfContent)
|
|
1217
|
+
tokeniser.feedBytes(bytes)
|
|
1218
|
+
|
|
1219
|
+
// Signal end of input
|
|
1220
|
+
tokeniser.eof = true
|
|
1221
|
+
|
|
1222
|
+
// Collect all tokens
|
|
1223
|
+
const tokens: PdfToken[] = []
|
|
1224
|
+
for (const token of tokeniser.nextItems()) {
|
|
1225
|
+
tokens.push(token)
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
// Display tokenisation results
|
|
1229
|
+
console.log('PDF Tokenisation Example')
|
|
1230
|
+
console.log('========================\n')
|
|
1231
|
+
console.log(`Input: ${pdfContent.length} bytes`)
|
|
1232
|
+
console.log(`Output: ${tokens.length} tokens\n`)
|
|
1233
|
+
|
|
1234
|
+
// Group tokens by type for summary
|
|
1235
|
+
const tokenCounts = new Map<string, number>()
|
|
1236
|
+
for (const token of tokens) {
|
|
1237
|
+
const type = token.constructor.name
|
|
1238
|
+
tokenCounts.set(type, (tokenCounts.get(type) ?? 0) + 1)
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
console.log('Token type counts:')
|
|
1242
|
+
for (const [type, count] of tokenCounts) {
|
|
1243
|
+
console.log(` ${type}: ${count}`)
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
console.log('\nFirst 20 tokens:')
|
|
1247
|
+
for (const token of tokens.slice(0, 20)) {
|
|
1248
|
+
const tokenString = token.toString().slice(0, 40)
|
|
1249
|
+
const displayString =
|
|
1250
|
+
tokenString.length >= 40 ? tokenString + '...' : tokenString
|
|
1251
|
+
console.log(
|
|
1252
|
+
` ${token.constructor.name.padEnd(30)} ${JSON.stringify(displayString)}`,
|
|
1253
|
+
)
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
// Example: Tokenising incrementally (useful for streaming)
|
|
1257
|
+
console.log('\n\nIncremental Tokenisation Example')
|
|
1258
|
+
console.log('=================================\n')
|
|
1259
|
+
|
|
1260
|
+
const incrementalTokeniser = new PdfByteStreamTokeniser()
|
|
1261
|
+
|
|
1262
|
+
// Feed bytes in chunks (simulating streaming)
|
|
1263
|
+
const chunkSize = 50
|
|
1264
|
+
const numChunks = Math.ceil(bytes.length / chunkSize)
|
|
1265
|
+
|
|
1266
|
+
console.log(`Processing ${numChunks} chunks of ~${chunkSize} bytes each...`)
|
|
1267
|
+
|
|
1268
|
+
let totalTokens = 0
|
|
1269
|
+
for (let i = 0; i < bytes.length; i += chunkSize) {
|
|
1270
|
+
const chunk = bytes.slice(i, i + chunkSize)
|
|
1271
|
+
incrementalTokeniser.feedBytes(chunk)
|
|
1272
|
+
|
|
1273
|
+
// Process tokens as they become available
|
|
1274
|
+
for (const _ of incrementalTokeniser.nextItems()) {
|
|
1275
|
+
totalTokens++
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
// Signal end of input and collect remaining tokens
|
|
1280
|
+
incrementalTokeniser.eof = true
|
|
1281
|
+
for (const _ of incrementalTokeniser.nextItems()) {
|
|
1282
|
+
totalTokens++
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
console.log(`Total tokens produced: ${totalTokens}`)
|
|
1286
|
+
|
|
1287
|
+
// Example: Custom stream chunk size
|
|
1288
|
+
console.log('\n\nCustom Stream Chunk Size Example')
|
|
1289
|
+
console.log('================================\n')
|
|
1290
|
+
|
|
1291
|
+
const customTokeniser = new PdfByteStreamTokeniser({
|
|
1292
|
+
streamChunkSizeBytes: 512, // Customize the chunk size for stream content
|
|
1293
|
+
})
|
|
1294
|
+
|
|
1295
|
+
const streamContent = `1 0 obj
|
|
1296
|
+
<< /Length 100 >>
|
|
1297
|
+
stream
|
|
1298
|
+
This is stream content that will be chunked by the tokeniser.
|
|
1299
|
+
The chunk size determines how the stream data is delivered.
|
|
1300
|
+
endstream
|
|
1301
|
+
endobj`
|
|
1302
|
+
|
|
1303
|
+
customTokeniser.feedBytes(stringToBytes(streamContent))
|
|
1304
|
+
customTokeniser.eof = true
|
|
1305
|
+
|
|
1306
|
+
console.log('Tokens from stream content:')
|
|
1307
|
+
for (const token of customTokeniser.nextItems()) {
|
|
1308
|
+
const type = token.constructor.name
|
|
1309
|
+
const preview = token.toString().slice(0, 50).replace(/\n/g, '\\n')
|
|
1310
|
+
console.log(` ${type.padEnd(25)} ${JSON.stringify(preview)}`)
|
|
1311
|
+
}
|
|
1312
|
+
```
|
|
1313
|
+
|
|
1314
|
+
## Decoder usage example
|
|
1315
|
+
|
|
1316
|
+
```typescript
|
|
1317
|
+
import { PdfDecoder } from 'pdf-lite/core/decoder'
|
|
1318
|
+
import { PdfByteStreamTokeniser } from 'pdf-lite/core/tokeniser'
|
|
1319
|
+
import { pdfDecoder } from 'pdf-lite/core/generators'
|
|
1320
|
+
import { PdfObject } from 'pdf-lite/core/objects/pdf-object'
|
|
1321
|
+
import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
|
|
1322
|
+
import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
|
|
1323
|
+
import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
|
|
1324
|
+
import { PdfTrailer } from 'pdf-lite/core/objects/pdf-trailer'
|
|
1325
|
+
import { PdfXRefTable } from 'pdf-lite/core/objects/pdf-xref-table'
|
|
1326
|
+
import { PdfComment } from 'pdf-lite/core/objects/pdf-comment'
|
|
1327
|
+
import { PdfStartXRef } from 'pdf-lite/core/objects/pdf-start-xref'
|
|
1328
|
+
import { stringToBytes } from 'pdf-lite/utils/stringToBytes'
|
|
1329
|
+
|
|
1330
|
+
/**
|
|
1331
|
+
* This example demonstrates how to use the PdfDecoder
|
|
1332
|
+
* to decode PDF tokens into PDF objects.
|
|
1333
|
+
*
|
|
1334
|
+
* The decoder transforms a stream of tokens (from the tokeniser)
|
|
1335
|
+
* into high-level PDF objects like dictionaries, arrays, streams,
|
|
1336
|
+
* and indirect objects.
|
|
1337
|
+
*/
|
|
1338
|
+
|
|
1339
|
+
// Sample PDF content to decode
|
|
1340
|
+
const pdfContent = `%PDF-2.0
|
|
1341
|
+
1 0 obj
|
|
1342
|
+
<< /Type /Catalog /Pages 2 0 R >>
|
|
1343
|
+
endobj
|
|
1344
|
+
2 0 obj
|
|
1345
|
+
<< /Type /Pages /Kids [3 0 R] /Count 1 >>
|
|
1346
|
+
endobj
|
|
1347
|
+
3 0 obj
|
|
1348
|
+
<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
|
|
1349
|
+
endobj
|
|
1350
|
+
4 0 obj
|
|
1351
|
+
<< /Length 44 >>
|
|
1352
|
+
stream
|
|
1353
|
+
BT /F1 12 Tf 100 700 Td (Hello!) Tj ET
|
|
1354
|
+
endstream
|
|
1355
|
+
endobj
|
|
1356
|
+
xref
|
|
1357
|
+
0 5
|
|
1358
|
+
0000000000 65535 f
|
|
1359
|
+
0000000010 00000 n
|
|
1360
|
+
0000000060 00000 n
|
|
1361
|
+
0000000118 00000 n
|
|
1362
|
+
0000000217 00000 n
|
|
1363
|
+
trailer
|
|
1364
|
+
<< /Size 5 /Root 1 0 R >>
|
|
1365
|
+
startxref
|
|
1366
|
+
310
|
|
1367
|
+
%%EOF`
|
|
1368
|
+
|
|
1369
|
+
// Method 1: Using the pdfDecoder generator (recommended)
|
|
1370
|
+
console.log('PDF Decoder Example - Using pdfDecoder Generator')
|
|
1371
|
+
console.log('================================================\n')
|
|
1372
|
+
|
|
1373
|
+
const bytes = stringToBytes(pdfContent)
|
|
1374
|
+
const objects: PdfObject[] = []
|
|
1375
|
+
|
|
1376
|
+
for (const obj of pdfDecoder([bytes])) {
|
|
1377
|
+
objects.push(obj)
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
console.log(`Decoded ${objects.length} PDF objects:\n`)
|
|
1381
|
+
|
|
1382
|
+
for (const obj of objects) {
|
|
1383
|
+
const type = obj.constructor.name
|
|
1384
|
+
|
|
1385
|
+
if (obj instanceof PdfComment) {
|
|
1386
|
+
console.log(` ${type}: ${obj.toString().trim()}`)
|
|
1387
|
+
} else if (obj instanceof PdfIndirectObject) {
|
|
1388
|
+
const contentType = obj.content.constructor.name
|
|
1389
|
+
console.log(
|
|
1390
|
+
` ${type}: ${obj.objectNumber} ${obj.generationNumber} obj (${contentType})`,
|
|
1391
|
+
)
|
|
1392
|
+
|
|
1393
|
+
// Show dictionary keys if content is a dictionary or stream
|
|
1394
|
+
if (obj.content instanceof PdfDictionary) {
|
|
1395
|
+
const keys = Object.keys(obj.content.values)
|
|
1396
|
+
console.log(` Keys: ${keys.join(', ')}`)
|
|
1397
|
+
}
|
|
1398
|
+
if (obj.content instanceof PdfStream) {
|
|
1399
|
+
console.log(
|
|
1400
|
+
` Stream length: ${obj.content.original.length} bytes`,
|
|
1401
|
+
)
|
|
1402
|
+
}
|
|
1403
|
+
} else if (obj instanceof PdfXRefTable) {
|
|
1404
|
+
console.log(` ${type}: ${obj.entries.length} entries`)
|
|
1405
|
+
} else if (obj instanceof PdfTrailer) {
|
|
1406
|
+
const size = obj.dict.get('Size')?.toString() ?? 'unknown'
|
|
1407
|
+
console.log(` ${type}: Size=${size}`)
|
|
1408
|
+
} else if (obj instanceof PdfStartXRef) {
|
|
1409
|
+
console.log(` ${type}: offset=${obj.offset}`)
|
|
1410
|
+
} else {
|
|
1411
|
+
console.log(` ${type}`)
|
|
1412
|
+
}
|
|
1413
|
+
}
|
|
1414
|
+
|
|
1415
|
+
// Method 2: Using the PdfDecoder class directly with a tokeniser
|
|
1416
|
+
console.log('\n\nPDF Decoder Example - Manual Pipeline')
|
|
1417
|
+
console.log('======================================\n')
|
|
1418
|
+
|
|
1419
|
+
const tokeniser = new PdfByteStreamTokeniser()
|
|
1420
|
+
const decoder = new PdfDecoder({ ignoreWhitespace: true })
|
|
1421
|
+
|
|
1422
|
+
// Feed bytes to tokeniser
|
|
1423
|
+
tokeniser.feedBytes(bytes)
|
|
1424
|
+
tokeniser.eof = true
|
|
1425
|
+
|
|
1426
|
+
// Feed tokens to decoder
|
|
1427
|
+
for (const token of tokeniser.nextItems()) {
|
|
1428
|
+
decoder.feed(token)
|
|
1429
|
+
}
|
|
1430
|
+
decoder.eof = true
|
|
1431
|
+
|
|
1432
|
+
// Collect decoded objects
|
|
1433
|
+
const manualObjects: PdfObject[] = []
|
|
1434
|
+
for (const obj of decoder.nextItems()) {
|
|
1435
|
+
manualObjects.push(obj)
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
console.log(`Decoded ${manualObjects.length} objects with whitespace ignored\n`)
|
|
1439
|
+
|
|
1440
|
+
// Count objects by type
|
|
1441
|
+
const typeCounts = new Map<string, number>()
|
|
1442
|
+
for (const obj of manualObjects) {
|
|
1443
|
+
const type = obj.constructor.name
|
|
1444
|
+
typeCounts.set(type, (typeCounts.get(type) ?? 0) + 1)
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
console.log('Object type counts:')
|
|
1448
|
+
for (const [type, count] of typeCounts) {
|
|
1449
|
+
console.log(` ${type}: ${count}`)
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
// Example: Incremental decoding (useful for streaming)
|
|
1453
|
+
console.log('\n\nIncremental Decoding Example')
|
|
1454
|
+
console.log('============================\n')
|
|
1455
|
+
|
|
1456
|
+
const streamTokeniser = new PdfByteStreamTokeniser()
|
|
1457
|
+
const streamDecoder = new PdfDecoder()
|
|
1458
|
+
|
|
1459
|
+
// Simulate streaming by processing in chunks
|
|
1460
|
+
const chunkSize = 100
|
|
1461
|
+
let objectCount = 0
|
|
1462
|
+
|
|
1463
|
+
for (let i = 0; i < bytes.length; i += chunkSize) {
|
|
1464
|
+
const chunk = bytes.slice(i, i + chunkSize)
|
|
1465
|
+
streamTokeniser.feedBytes(chunk)
|
|
1466
|
+
|
|
1467
|
+
// Process available tokens
|
|
1468
|
+
for (const token of streamTokeniser.nextItems()) {
|
|
1469
|
+
streamDecoder.feed(token)
|
|
1470
|
+
|
|
1471
|
+
// Collect any complete objects
|
|
1472
|
+
for (const obj of streamDecoder.nextItems()) {
|
|
1473
|
+
objectCount++
|
|
1474
|
+
console.log(
|
|
1475
|
+
` Chunk ${Math.floor(i / chunkSize) + 1}: Found ${obj.constructor.name}`,
|
|
1476
|
+
)
|
|
1477
|
+
}
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
// Finalize
|
|
1482
|
+
streamTokeniser.eof = true
|
|
1483
|
+
streamDecoder.eof = true
|
|
1484
|
+
|
|
1485
|
+
for (const token of streamTokeniser.nextItems()) {
|
|
1486
|
+
streamDecoder.feed(token)
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
for (const obj of streamDecoder.nextItems()) {
|
|
1490
|
+
objectCount++
|
|
1491
|
+
console.log(` Final: Found ${obj.constructor.name}`)
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
console.log(`\nTotal objects decoded incrementally: ${objectCount}`)
|
|
1495
|
+
|
|
1496
|
+
// Example: Preserving whitespace for round-trip
|
|
1497
|
+
console.log('\n\nRound-Trip Example (Preserving Whitespace)')
|
|
1498
|
+
console.log('==========================================\n')
|
|
1499
|
+
|
|
1500
|
+
const simpleDict = `<< /Type /Page /MediaBox [0 0 612 792] >>`
|
|
1501
|
+
const preservingDecoder = pdfDecoder([stringToBytes(simpleDict)], {
|
|
1502
|
+
ignoreWhitespace: false,
|
|
1503
|
+
})
|
|
1504
|
+
|
|
1505
|
+
for (const obj of preservingDecoder) {
|
|
1506
|
+
// toString() will recreate the original representation
|
|
1507
|
+
const reconstructed = obj.toString()
|
|
1508
|
+
console.log('Original: ', JSON.stringify(simpleDict))
|
|
1509
|
+
console.log('Reconstructed:', JSON.stringify(reconstructed))
|
|
1510
|
+
console.log('Match:', simpleDict === reconstructed)
|
|
1511
|
+
}
|
|
1512
|
+
```
|