pdf-lite 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.commitlintrc.cjs +25 -0
  2. package/.github/ISSUE_TEMPLATE/bug_report.md +40 -0
  3. package/.github/ISSUE_TEMPLATE/feature_request.md +19 -0
  4. package/.github/workflows/docs.yaml +93 -0
  5. package/.github/workflows/prepare-release.yaml +79 -0
  6. package/.github/workflows/release.yaml +80 -0
  7. package/.github/workflows/test.yaml +35 -0
  8. package/.husky/commit-msg +1 -0
  9. package/.husky/pre-commit +1 -0
  10. package/.prettierignore +4 -0
  11. package/.prettierrc +4 -0
  12. package/CONTRIBUTING.md +109 -0
  13. package/EXAMPLES.md +1515 -0
  14. package/LICENSE +21 -0
  15. package/README.md +285 -0
  16. package/examples/001-create-pdf.ts +112 -0
  17. package/examples/002-create-encrypted-pdf.ts +121 -0
  18. package/examples/003-sign-pdf.ts +347 -0
  19. package/examples/004-incremental-update.ts +206 -0
  20. package/examples/005-modify-acroform.ts +374 -0
  21. package/examples/006-tokeniser-example.ts +131 -0
  22. package/examples/007-decoder-example.ts +197 -0
  23. package/package.json +72 -0
  24. package/packages/pdf-lite/README.md +3 -0
  25. package/packages/pdf-lite/package.json +68 -0
  26. package/packages/pdf-lite/scripts/create-encryption-tests.sh +41 -0
  27. package/packages/pdf-lite/scripts/gen-signing-keys.sh +290 -0
  28. package/packages/pdf-lite/scripts/generate-all-signing-keys.sh +70 -0
  29. package/packages/pdf-lite/src/core/decoder.ts +454 -0
  30. package/packages/pdf-lite/src/core/generators.ts +128 -0
  31. package/packages/pdf-lite/src/core/incremental-parser.ts +221 -0
  32. package/packages/pdf-lite/src/core/index.ts +2 -0
  33. package/packages/pdf-lite/src/core/objects/pdf-array.ts +54 -0
  34. package/packages/pdf-lite/src/core/objects/pdf-boolean.ts +19 -0
  35. package/packages/pdf-lite/src/core/objects/pdf-comment.ts +50 -0
  36. package/packages/pdf-lite/src/core/objects/pdf-date.ts +74 -0
  37. package/packages/pdf-lite/src/core/objects/pdf-dictionary.ts +171 -0
  38. package/packages/pdf-lite/src/core/objects/pdf-hexadecimal.ts +54 -0
  39. package/packages/pdf-lite/src/core/objects/pdf-indirect-object.ts +137 -0
  40. package/packages/pdf-lite/src/core/objects/pdf-name.ts +19 -0
  41. package/packages/pdf-lite/src/core/objects/pdf-null.ts +15 -0
  42. package/packages/pdf-lite/src/core/objects/pdf-number.ts +98 -0
  43. package/packages/pdf-lite/src/core/objects/pdf-object-reference.ts +30 -0
  44. package/packages/pdf-lite/src/core/objects/pdf-object.ts +107 -0
  45. package/packages/pdf-lite/src/core/objects/pdf-start-xref.ts +39 -0
  46. package/packages/pdf-lite/src/core/objects/pdf-stream.ts +687 -0
  47. package/packages/pdf-lite/src/core/objects/pdf-string.ts +38 -0
  48. package/packages/pdf-lite/src/core/objects/pdf-trailer.ts +57 -0
  49. package/packages/pdf-lite/src/core/objects/pdf-xref-table.ts +264 -0
  50. package/packages/pdf-lite/src/core/parser.ts +22 -0
  51. package/packages/pdf-lite/src/core/ref.ts +102 -0
  52. package/packages/pdf-lite/src/core/serializer.ts +68 -0
  53. package/packages/pdf-lite/src/core/streams/object-stream.ts +20 -0
  54. package/packages/pdf-lite/src/core/tokeniser.ts +687 -0
  55. package/packages/pdf-lite/src/core/tokens/boolean-token.ts +20 -0
  56. package/packages/pdf-lite/src/core/tokens/byte-offset-token.ts +20 -0
  57. package/packages/pdf-lite/src/core/tokens/comment-token.ts +32 -0
  58. package/packages/pdf-lite/src/core/tokens/end-array-token.ts +10 -0
  59. package/packages/pdf-lite/src/core/tokens/end-dictionary-token.ts +10 -0
  60. package/packages/pdf-lite/src/core/tokens/end-object-token.ts +10 -0
  61. package/packages/pdf-lite/src/core/tokens/end-stream-token.ts +11 -0
  62. package/packages/pdf-lite/src/core/tokens/hexadecimal-token.ts +22 -0
  63. package/packages/pdf-lite/src/core/tokens/name-token.ts +19 -0
  64. package/packages/pdf-lite/src/core/tokens/null-token.ts +9 -0
  65. package/packages/pdf-lite/src/core/tokens/number-token.ts +164 -0
  66. package/packages/pdf-lite/src/core/tokens/object-reference-token.ts +24 -0
  67. package/packages/pdf-lite/src/core/tokens/start-array-token.ts +10 -0
  68. package/packages/pdf-lite/src/core/tokens/start-dictionary-token.ts +10 -0
  69. package/packages/pdf-lite/src/core/tokens/start-object-token.ts +28 -0
  70. package/packages/pdf-lite/src/core/tokens/start-stream-token.ts +52 -0
  71. package/packages/pdf-lite/src/core/tokens/start-xref-token.ts +10 -0
  72. package/packages/pdf-lite/src/core/tokens/stream-chunk-token.ts +8 -0
  73. package/packages/pdf-lite/src/core/tokens/string-token.ts +17 -0
  74. package/packages/pdf-lite/src/core/tokens/token.ts +43 -0
  75. package/packages/pdf-lite/src/core/tokens/trailer-token.ts +12 -0
  76. package/packages/pdf-lite/src/core/tokens/whitespace-token.ts +43 -0
  77. package/packages/pdf-lite/src/core/tokens/xref-table-entry-token.ts +65 -0
  78. package/packages/pdf-lite/src/core/tokens/xref-table-section-start-token.ts +31 -0
  79. package/packages/pdf-lite/src/core/tokens/xref-table-start-token.ts +13 -0
  80. package/packages/pdf-lite/src/crypto/ciphers/aes128.ts +63 -0
  81. package/packages/pdf-lite/src/crypto/ciphers/aes256.ts +50 -0
  82. package/packages/pdf-lite/src/crypto/ciphers/rc4.ts +82 -0
  83. package/packages/pdf-lite/src/crypto/constants.ts +10 -0
  84. package/packages/pdf-lite/src/crypto/key-derivation/key-derivation-aes256.ts +213 -0
  85. package/packages/pdf-lite/src/crypto/key-derivation/key-derivation.ts +122 -0
  86. package/packages/pdf-lite/src/crypto/key-gen/key-gen-aes256.ts +79 -0
  87. package/packages/pdf-lite/src/crypto/key-gen/key-gen-rc4-128.ts +190 -0
  88. package/packages/pdf-lite/src/crypto/key-gen/key-gen-rc4-40.ts +129 -0
  89. package/packages/pdf-lite/src/crypto/types.ts +6 -0
  90. package/packages/pdf-lite/src/crypto/utils.ts +81 -0
  91. package/packages/pdf-lite/src/filters/ascii85.ts +128 -0
  92. package/packages/pdf-lite/src/filters/asciihex.ts +55 -0
  93. package/packages/pdf-lite/src/filters/flate.ts +39 -0
  94. package/packages/pdf-lite/src/filters/lzw.ts +144 -0
  95. package/packages/pdf-lite/src/filters/pass-through.ts +37 -0
  96. package/packages/pdf-lite/src/filters/runlength.ts +92 -0
  97. package/packages/pdf-lite/src/filters/types.ts +21 -0
  98. package/packages/pdf-lite/src/index.ts +4 -0
  99. package/packages/pdf-lite/src/pdf/errors.ts +5 -0
  100. package/packages/pdf-lite/src/pdf/index.ts +4 -0
  101. package/packages/pdf-lite/src/pdf/pdf-document.ts +924 -0
  102. package/packages/pdf-lite/src/pdf/pdf-reader.ts +57 -0
  103. package/packages/pdf-lite/src/pdf/pdf-revision.ts +234 -0
  104. package/packages/pdf-lite/src/pdf/pdf-xref-lookup.ts +527 -0
  105. package/packages/pdf-lite/src/security/crypt-filters/aesv2.ts +58 -0
  106. package/packages/pdf-lite/src/security/crypt-filters/aesv3.ts +56 -0
  107. package/packages/pdf-lite/src/security/crypt-filters/base.ts +140 -0
  108. package/packages/pdf-lite/src/security/crypt-filters/identity.ts +40 -0
  109. package/packages/pdf-lite/src/security/crypt-filters/v2.ts +59 -0
  110. package/packages/pdf-lite/src/security/handlers/base.ts +625 -0
  111. package/packages/pdf-lite/src/security/handlers/pubSec.ts +413 -0
  112. package/packages/pdf-lite/src/security/handlers/utils.ts +304 -0
  113. package/packages/pdf-lite/src/security/handlers/v1.ts +225 -0
  114. package/packages/pdf-lite/src/security/handlers/v2.ts +128 -0
  115. package/packages/pdf-lite/src/security/handlers/v4.ts +379 -0
  116. package/packages/pdf-lite/src/security/handlers/v5.ts +298 -0
  117. package/packages/pdf-lite/src/security/types.ts +158 -0
  118. package/packages/pdf-lite/src/signing/document-security-store.ts +224 -0
  119. package/packages/pdf-lite/src/signing/index.ts +3 -0
  120. package/packages/pdf-lite/src/signing/signatures/adbe-pkcs7-detached.ts +154 -0
  121. package/packages/pdf-lite/src/signing/signatures/adbe-pkcs7-sha1.ts +161 -0
  122. package/packages/pdf-lite/src/signing/signatures/adbe-x509-rsa-sha1.ts +106 -0
  123. package/packages/pdf-lite/src/signing/signatures/base.ts +229 -0
  124. package/packages/pdf-lite/src/signing/signatures/etsi-cades-detached.ts +229 -0
  125. package/packages/pdf-lite/src/signing/signatures/etsi-rfc3161.ts +92 -0
  126. package/packages/pdf-lite/src/signing/signatures/index.ts +6 -0
  127. package/packages/pdf-lite/src/signing/signer.ts +120 -0
  128. package/packages/pdf-lite/src/signing/types.ts +86 -0
  129. package/packages/pdf-lite/src/signing/utils.ts +71 -0
  130. package/packages/pdf-lite/src/types.ts +44 -0
  131. package/packages/pdf-lite/src/utils/IterableReadableStream.ts +30 -0
  132. package/packages/pdf-lite/src/utils/algos.ts +446 -0
  133. package/packages/pdf-lite/src/utils/assert.ts +42 -0
  134. package/packages/pdf-lite/src/utils/bytesToHex.ts +18 -0
  135. package/packages/pdf-lite/src/utils/bytesToHexBytes.ts +27 -0
  136. package/packages/pdf-lite/src/utils/bytesToString.ts +17 -0
  137. package/packages/pdf-lite/src/utils/concatUint8Arrays.ts +26 -0
  138. package/packages/pdf-lite/src/utils/escapeString.ts +49 -0
  139. package/packages/pdf-lite/src/utils/hexBytesToBytes.ts +22 -0
  140. package/packages/pdf-lite/src/utils/hexBytesToString.ts +21 -0
  141. package/packages/pdf-lite/src/utils/hexToBytes.ts +18 -0
  142. package/packages/pdf-lite/src/utils/padBytes.ts +25 -0
  143. package/packages/pdf-lite/src/utils/predictors.ts +332 -0
  144. package/packages/pdf-lite/src/utils/replaceInBuffer.ts +56 -0
  145. package/packages/pdf-lite/src/utils/stringToBytes.ts +22 -0
  146. package/packages/pdf-lite/src/utils/stringToHexBytes.ts +23 -0
  147. package/packages/pdf-lite/src/utils/unescapeString.ts +123 -0
  148. package/packages/pdf-lite/test/acceptance/__snapshots__/versions.node.test.ts.snap +60766 -0
  149. package/packages/pdf-lite/test/acceptance/fixtures/1.3/basic.pdf +0 -0
  150. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-aes-128.pdf +0 -0
  151. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-aes-256.pdf +0 -0
  152. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-rc4-128.pdf +0 -0
  153. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-rc4-40.pdf +0 -0
  154. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic.pdf +0 -0
  155. package/packages/pdf-lite/test/acceptance/fixtures/1.5/basic.pdf +0 -0
  156. package/packages/pdf-lite/test/acceptance/fixtures/1.6/basic.pdf +0 -0
  157. package/packages/pdf-lite/test/acceptance/fixtures/1.7/basic.pdf +0 -0
  158. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-aes-128.pdf +43 -0
  159. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-aes-256.pdf +43 -0
  160. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-rc4-128.pdf +43 -0
  161. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-rc4-40.pdf +44 -0
  162. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic.pdf +79 -0
  163. package/packages/pdf-lite/test/acceptance/versions.node.test.ts +41 -0
  164. package/packages/pdf-lite/test/unit/__snapshots__/decoder.node.test.ts.snap +86947 -0
  165. package/packages/pdf-lite/test/unit/__snapshots__/tokeniser.node.test.ts.snap +131829 -0
  166. package/packages/pdf-lite/test/unit/ciphers.test.ts +61 -0
  167. package/packages/pdf-lite/test/unit/decoder.node.test.ts +21 -0
  168. package/packages/pdf-lite/test/unit/decoder.test.ts +567 -0
  169. package/packages/pdf-lite/test/unit/filters.test.ts +67 -0
  170. package/packages/pdf-lite/test/unit/fixtures/basic.pdf +0 -0
  171. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-aes-128.pdf +0 -0
  172. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-aes-256.pdf +0 -0
  173. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-rc4-128.pdf +0 -0
  174. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-rc4-40.pdf +43 -0
  175. package/packages/pdf-lite/test/unit/fixtures/protectedAdobeLivecycle.pdf +0 -0
  176. package/packages/pdf-lite/test/unit/fixtures/rsa-2048/index.ts +187 -0
  177. package/packages/pdf-lite/test/unit/fixtures/template.pdf +0 -0
  178. package/packages/pdf-lite/test/unit/incremental-update.test.ts +0 -0
  179. package/packages/pdf-lite/test/unit/objects.test.ts +0 -0
  180. package/packages/pdf-lite/test/unit/pdf-document-signing.test.ts +0 -0
  181. package/packages/pdf-lite/test/unit/pdf-revision.test.ts +195 -0
  182. package/packages/pdf-lite/test/unit/pdf.browser.test.ts +0 -0
  183. package/packages/pdf-lite/test/unit/predictors.test.ts +226 -0
  184. package/packages/pdf-lite/test/unit/ref.test.ts +158 -0
  185. package/packages/pdf-lite/test/unit/security-handlers.test.ts +645 -0
  186. package/packages/pdf-lite/test/unit/serializer.test.ts +81 -0
  187. package/packages/pdf-lite/test/unit/signature-objects.test.ts +814 -0
  188. package/packages/pdf-lite/test/unit/string-escaping.test.ts +84 -0
  189. package/packages/pdf-lite/test/unit/tokeniser.node.test.ts +38 -0
  190. package/packages/pdf-lite/test/unit/tokeniser.test.ts +1213 -0
  191. package/packages/pdf-lite/test/unit/utils.test.ts +248 -0
  192. package/packages/pdf-lite/test/unit/xref-lookup.test.ts +72 -0
  193. package/packages/pdf-lite/tsconfig.json +4 -0
  194. package/packages/pdf-lite/tsconfig.prod.json +8 -0
  195. package/packages/pdf-lite/typedoc.json +14 -0
  196. package/packages/pdf-lite/vitest.config.ts +43 -0
  197. package/pnpm-workspace.yaml +2 -0
  198. package/renovate.json +34 -0
  199. package/scripts/build-examples.ts +30 -0
  200. package/scripts/bump-version.sh +56 -0
  201. package/scripts/gen-html-docs.sh +21 -0
  202. package/scripts/gen-md-docs.sh +15 -0
  203. package/scripts/prepare-release.sh +33 -0
  204. package/tsconfig.json +22 -0
  205. package/tsconfig.prod.json +12 -0
  206. package/typedoc.json +34 -0
package/EXAMPLES.md ADDED
@@ -0,0 +1,1515 @@
1
+ # PDF-Lite Examples
2
+
3
+ This directory contains example scripts demonstrating how to use the PDF-Lite library.
4
+
5
+ ## PDF creation from scratch example
6
+
7
+ ```typescript
8
+ import { writeFileSync } from 'fs'
9
+ import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
10
+ import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
11
+ import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
12
+ import { PdfName } from 'pdf-lite/core/objects/pdf-name'
13
+ import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
14
+ import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
15
+ import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
16
+ import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
17
+
18
+ function createPage(
19
+ contentStreamRef: PdfObjectReference,
20
+ ): PdfIndirectObject<PdfDictionary> {
21
+ const pageDict = new PdfDictionary()
22
+ pageDict.set('Type', new PdfName('Page'))
23
+ pageDict.set(
24
+ 'MediaBox',
25
+ new PdfArray([
26
+ new PdfNumber(0),
27
+ new PdfNumber(0),
28
+ new PdfNumber(612),
29
+ new PdfNumber(792),
30
+ ]),
31
+ )
32
+ pageDict.set('Contents', contentStreamRef)
33
+ return new PdfIndirectObject({ content: pageDict })
34
+ }
35
+
36
+ function createPages(
37
+ pages: PdfIndirectObject<PdfDictionary>[],
38
+ ): PdfIndirectObject<PdfDictionary> {
39
+ const pagesDict = new PdfDictionary()
40
+ pagesDict.set('Type', new PdfName('Pages'))
41
+ pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
42
+ pagesDict.set('Count', new PdfNumber(pages.length))
43
+ return new PdfIndirectObject({ content: pagesDict })
44
+ }
45
+
46
+ function createCatalog(
47
+ pagesRef: PdfObjectReference,
48
+ ): PdfIndirectObject<PdfDictionary> {
49
+ const catalogDict = new PdfDictionary()
50
+ catalogDict.set('Type', new PdfName('Catalog'))
51
+ catalogDict.set('Pages', pagesRef)
52
+ return new PdfIndirectObject({ content: catalogDict })
53
+ }
54
+
55
+ function createFont(): PdfIndirectObject<PdfDictionary> {
56
+ const fontDict = new PdfDictionary()
57
+ fontDict.set('Type', new PdfName('Font'))
58
+ fontDict.set('Subtype', new PdfName('Type1'))
59
+ fontDict.set('BaseFont', new PdfName('Helvetica'))
60
+ return new PdfIndirectObject({ content: fontDict })
61
+ }
62
+
63
+ function createResources(
64
+ fontRef: PdfObjectReference,
65
+ ): PdfIndirectObject<PdfDictionary> {
66
+ const resourcesDict = new PdfDictionary()
67
+ const fontDict = new PdfDictionary()
68
+ fontDict.set('F1', fontRef)
69
+ resourcesDict.set('Font', fontDict)
70
+ return new PdfIndirectObject({ content: resourcesDict })
71
+ }
72
+
73
+ // Create the document
74
+ const document = new PdfDocument()
75
+
76
+ // Create font
77
+ const font = createFont()
78
+ document.add(font)
79
+
80
+ // Create resources with the font
81
+ const resources = createResources(font.reference)
82
+ document.add(resources)
83
+
84
+ // Create content stream
85
+ const contentStream = new PdfIndirectObject({
86
+ content: new PdfStream({
87
+ header: new PdfDictionary(),
88
+ original: 'BT /F1 24 Tf 100 700 Td (Hello, PDF-Lite!) Tj ET',
89
+ }),
90
+ })
91
+
92
+ // Create a page
93
+ const page = createPage(contentStream.reference)
94
+ // Add resources to the page
95
+ page.content.set('Resources', resources.reference)
96
+ document.add(page)
97
+
98
+ // Create pages collection
99
+ const pages = createPages([page])
100
+ // Set parent reference for the page
101
+ page.content.set('Parent', pages.reference)
102
+ document.add(pages)
103
+
104
+ // Create catalog
105
+ const catalog = createCatalog(pages.reference)
106
+ document.add(catalog)
107
+
108
+ // Set the catalog as the root
109
+ document.trailerDict.set('Root', catalog.reference)
110
+
111
+ document.add(contentStream)
112
+ await document.commit()
113
+
114
+ const file = `${import.meta.dirname}/tmp/created.pdf`
115
+ console.log(`Writing PDF to: ${file}`)
116
+
117
+ await writeFileSync(`${file}`, document.toBytes())
118
+ ```
119
+
120
+ ## PDF creation with encryption example
121
+
122
+ ```typescript
123
+ import { writeFileSync } from 'fs'
124
+ import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
125
+ import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
126
+ import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
127
+ import { PdfName } from 'pdf-lite/core/objects/pdf-name'
128
+ import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
129
+ import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
130
+ import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
131
+ import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
132
+ import { PdfV2SecurityHandler } from 'pdf-lite/security/handlers/v2'
133
+
134
+ function createPage(
135
+ contentStreamRef: PdfObjectReference,
136
+ ): PdfIndirectObject<PdfDictionary> {
137
+ const pageDict = new PdfDictionary()
138
+ pageDict.set('Type', new PdfName('Page'))
139
+ pageDict.set(
140
+ 'MediaBox',
141
+ new PdfArray([
142
+ new PdfNumber(0),
143
+ new PdfNumber(0),
144
+ new PdfNumber(612),
145
+ new PdfNumber(792),
146
+ ]),
147
+ )
148
+ pageDict.set('Contents', contentStreamRef)
149
+ return new PdfIndirectObject({ content: pageDict })
150
+ }
151
+
152
+ function createPages(
153
+ pages: PdfIndirectObject<PdfDictionary>[],
154
+ ): PdfIndirectObject<PdfDictionary> {
155
+ const pagesDict = new PdfDictionary()
156
+ pagesDict.set('Type', new PdfName('Pages'))
157
+ pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
158
+ pagesDict.set('Count', new PdfNumber(pages.length))
159
+ return new PdfIndirectObject({ content: pagesDict })
160
+ }
161
+
162
+ function createCatalog(
163
+ pagesRef: PdfObjectReference,
164
+ ): PdfIndirectObject<PdfDictionary> {
165
+ const catalogDict = new PdfDictionary()
166
+ catalogDict.set('Type', new PdfName('Catalog'))
167
+ catalogDict.set('Pages', pagesRef)
168
+ return new PdfIndirectObject({ content: catalogDict })
169
+ }
170
+
171
+ function createFont(): PdfIndirectObject<PdfDictionary> {
172
+ const fontDict = new PdfDictionary()
173
+ fontDict.set('Type', new PdfName('Font'))
174
+ fontDict.set('Subtype', new PdfName('Type1'))
175
+ fontDict.set('BaseFont', new PdfName('Helvetica'))
176
+ return new PdfIndirectObject({ content: fontDict })
177
+ }
178
+
179
+ function createResources(
180
+ fontRef: PdfObjectReference,
181
+ ): PdfIndirectObject<PdfDictionary> {
182
+ const resourcesDict = new PdfDictionary()
183
+ const fontDict = new PdfDictionary()
184
+ fontDict.set('F1', fontRef)
185
+ resourcesDict.set('Font', fontDict)
186
+ return new PdfIndirectObject({ content: resourcesDict })
187
+ }
188
+
189
+ // Create the document
190
+ const document = new PdfDocument()
191
+
192
+ // Create font
193
+ const font = createFont()
194
+ document.add(font)
195
+
196
+ // Create resources with the font
197
+ const resources = createResources(font.reference)
198
+ document.add(resources)
199
+
200
+ // Create content stream
201
+ const contentStream = new PdfIndirectObject({
202
+ content: new PdfStream({
203
+ header: new PdfDictionary(),
204
+ original: 'BT /F1 24 Tf 100 700 Td (Hello, PDF-Lite!) Tj ET',
205
+ }),
206
+ })
207
+
208
+ // Create a page
209
+ const page = createPage(contentStream.reference)
210
+ // Add resources to the page
211
+ page.content.set('Resources', resources.reference)
212
+ document.add(page)
213
+
214
+ // Create pages collection
215
+ const pages = createPages([page])
216
+ // Set parent reference for the page
217
+ page.content.set('Parent', pages.reference)
218
+ document.add(pages)
219
+
220
+ // Create catalog
221
+ const catalog = createCatalog(pages.reference)
222
+ document.add(catalog)
223
+
224
+ // Set the catalog as the root
225
+ document.trailerDict.set('Root', catalog.reference)
226
+
227
+ document.add(contentStream)
228
+ await document.commit()
229
+
230
+ document.securityHandler = new PdfV2SecurityHandler({
231
+ password: 'up',
232
+ documentId: 'cafebabe',
233
+ encryptMetadata: true,
234
+ })
235
+
236
+ await document.encrypt()
237
+
238
+ const file = `${import.meta.dirname}/tmp/encrypted.pdf`
239
+ console.log(`Writing encrypted PDF to: ${file}. Password: "up"`)
240
+
241
+ await writeFileSync(`${file}`, document.toBytes())
242
+ ```
243
+
244
+ ## PDF signature example
245
+
246
+ ```typescript
247
+ import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
248
+ import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
249
+ import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
250
+ import { PdfName } from 'pdf-lite/core/objects/pdf-name'
251
+ import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
252
+ import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
253
+ import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
254
+ import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
255
+ import { PdfString } from 'pdf-lite/core/objects/pdf-string'
256
+ import {
257
+ PdfAdbePkcsX509RsaSha1SignatureObject,
258
+ PdfAdbePkcs7DetachedSignatureObject,
259
+ PdfAdbePkcs7Sha1SignatureObject,
260
+ PdfEtsiCadesDetachedSignatureObject,
261
+ PdfEtsiRfc3161SignatureObject,
262
+ } from 'pdf-lite'
263
+ import { rsaSigningKeys } from '../packages/pdf-lite/test/unit/fixtures/rsa-2048/index'
264
+ import fs from 'fs/promises'
265
+
266
+ function createPage(
267
+ contentStreamRef: PdfObjectReference,
268
+ ): PdfIndirectObject<PdfDictionary> {
269
+ const pageDict = new PdfDictionary()
270
+ pageDict.set('Type', new PdfName('Page'))
271
+ pageDict.set(
272
+ 'MediaBox',
273
+ new PdfArray([
274
+ new PdfNumber(0),
275
+ new PdfNumber(0),
276
+ new PdfNumber(612),
277
+ new PdfNumber(792),
278
+ ]),
279
+ )
280
+ pageDict.set('Contents', contentStreamRef)
281
+ return new PdfIndirectObject({ content: pageDict })
282
+ }
283
+
284
+ function createPages(
285
+ pages: PdfIndirectObject<PdfDictionary>[],
286
+ ): PdfIndirectObject<PdfDictionary> {
287
+ const pagesDict = new PdfDictionary()
288
+ pagesDict.set('Type', new PdfName('Pages'))
289
+ pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
290
+ pagesDict.set('Count', new PdfNumber(pages.length))
291
+ return new PdfIndirectObject({ content: pagesDict })
292
+ }
293
+
294
+ function createCatalog(
295
+ pagesRef: PdfObjectReference,
296
+ ): PdfIndirectObject<PdfDictionary> {
297
+ const catalogDict = new PdfDictionary()
298
+ catalogDict.set('Type', new PdfName('Catalog'))
299
+ catalogDict.set('Pages', pagesRef)
300
+ return new PdfIndirectObject({ content: catalogDict })
301
+ }
302
+
303
+ function createFont(): PdfIndirectObject<PdfDictionary> {
304
+ const fontDict = new PdfDictionary()
305
+ fontDict.set('Type', new PdfName('Font'))
306
+ fontDict.set('Subtype', new PdfName('Type1'))
307
+ fontDict.set('BaseFont', new PdfName('Helvetica'))
308
+ return new PdfIndirectObject({ content: fontDict })
309
+ }
310
+
311
+ function createResources(
312
+ fontRef: PdfObjectReference,
313
+ ): PdfIndirectObject<PdfDictionary> {
314
+ const resourcesDict = new PdfDictionary()
315
+ const fontDict = new PdfDictionary()
316
+ fontDict.set('F1', fontRef)
317
+ resourcesDict.set('Font', fontDict)
318
+ return new PdfIndirectObject({ content: resourcesDict })
319
+ }
320
+
321
+ function createPageWithSignatureField(
322
+ contentStreamRef: PdfObjectReference,
323
+ signatureAnnotRef: PdfObjectReference,
324
+ ): PdfIndirectObject<PdfDictionary> {
325
+ const pageDict = new PdfDictionary()
326
+ pageDict.set('Type', new PdfName('Page'))
327
+ pageDict.set(
328
+ 'MediaBox',
329
+ new PdfArray([
330
+ new PdfNumber(0),
331
+ new PdfNumber(0),
332
+ new PdfNumber(612),
333
+ new PdfNumber(792),
334
+ ]),
335
+ )
336
+ pageDict.set('Contents', contentStreamRef)
337
+ pageDict.set('Annots', new PdfArray([signatureAnnotRef]))
338
+
339
+ return new PdfIndirectObject({ content: pageDict })
340
+ }
341
+
342
+ function createSignatureAnnotation(
343
+ signatureRef: PdfObjectReference,
344
+ appearanceStreamRef: PdfObjectReference,
345
+ pageRef: PdfObjectReference,
346
+ signatureName: string,
347
+ ): PdfIndirectObject<PdfDictionary> {
348
+ const signatureAnnotation = new PdfDictionary()
349
+ signatureAnnotation.set('Type', new PdfName('Annot'))
350
+ signatureAnnotation.set('Subtype', new PdfName('Widget'))
351
+ signatureAnnotation.set('FT', new PdfName('Sig'))
352
+ signatureAnnotation.set('T', new PdfString(signatureName))
353
+ signatureAnnotation.set(
354
+ 'Rect',
355
+ new PdfArray([
356
+ new PdfNumber(135), // x1: Start after "Signature: " text (~72 + 63)
357
+ new PdfNumber(640), // y1: Bottom of signature area (652 - 12)
358
+ new PdfNumber(400), // x2: End of signature line
359
+ new PdfNumber(665), // y2: Top of signature area (652 + 13)
360
+ ]),
361
+ )
362
+ signatureAnnotation.set('F', new PdfNumber(4))
363
+ signatureAnnotation.set('P', pageRef) // Reference to parent page
364
+ signatureAnnotation.set('V', signatureRef)
365
+
366
+ // Add appearance dictionary
367
+ const appearanceDict = new PdfDictionary()
368
+ appearanceDict.set('N', appearanceStreamRef)
369
+ signatureAnnotation.set('AP', appearanceDict)
370
+
371
+ return new PdfIndirectObject({ content: signatureAnnotation })
372
+ }
373
+
374
+ function createSignatureAppearance(): PdfIndirectObject<PdfStream> {
375
+ // Create font for appearance
376
+ const appearanceFont = new PdfDictionary()
377
+ appearanceFont.set('Type', new PdfName('Font'))
378
+ appearanceFont.set('Subtype', new PdfName('Type1'))
379
+ appearanceFont.set('BaseFont', new PdfName('Helvetica'))
380
+
381
+ const fontDict = new PdfDictionary()
382
+ fontDict.set('F1', appearanceFont)
383
+
384
+ const resourcesDict = new PdfDictionary()
385
+ resourcesDict.set('Font', fontDict)
386
+
387
+ // Create appearance stream header
388
+ const appearanceHeader = new PdfDictionary()
389
+ appearanceHeader.set('Type', new PdfName('XObject'))
390
+ appearanceHeader.set('Subtype', new PdfName('Form'))
391
+ appearanceHeader.set(
392
+ 'BBox',
393
+ new PdfArray([
394
+ new PdfNumber(0),
395
+ new PdfNumber(0),
396
+ new PdfNumber(265), // Width: 400 - 135
397
+ new PdfNumber(25), // Height: 665 - 640
398
+ ]),
399
+ )
400
+ appearanceHeader.set('Resources', resourcesDict)
401
+
402
+ // Create appearance stream for the signature
403
+ return new PdfIndirectObject({
404
+ content: new PdfStream({
405
+ header: appearanceHeader,
406
+ original:
407
+ 'BT /F1 10 Tf 5 14 Td (Digitally signed by: Jake Shirley) Tj ET',
408
+ }),
409
+ })
410
+ }
411
+
412
+ // Create the document
413
+ const document = new PdfDocument()
414
+
415
+ // Create font
416
+ const font = createFont()
417
+ document.add(font)
418
+
419
+ // Create resources with the font
420
+ const resources = createResources(font.reference)
421
+ document.add(resources)
422
+
423
+ // Create content stream for first page
424
+ const contentStream = new PdfIndirectObject({
425
+ content: new PdfStream({
426
+ header: new PdfDictionary(),
427
+ original: 'BT /F1 24 Tf 100 700 Td (Hello, PDF-Lite!) Tj ET',
428
+ }),
429
+ })
430
+ document.add(contentStream)
431
+
432
+ // Create first page
433
+ const page1 = createPage(contentStream.reference)
434
+ page1.content.set('Resources', resources.reference)
435
+ document.add(page1)
436
+
437
+ // Array to hold all pages and signature objects
438
+ const allPages: PdfIndirectObject<PdfDictionary>[] = [page1]
439
+ const allSignatures: any[] = []
440
+ const signatureFields: PdfObjectReference[] = []
441
+
442
+ // Helper function to create a signature page
443
+ function createSignaturePage(
444
+ signatureType: string,
445
+ signatureObj: any,
446
+ pageNumber: number,
447
+ ) {
448
+ const content = new PdfIndirectObject({
449
+ content: new PdfStream({
450
+ header: new PdfDictionary(),
451
+ original: `BT /F1 12 Tf 72 712 Td (Signature Type: ${signatureType}) Tj 0 -60 Td (Signature: ________________________________) Tj ET`,
452
+ }),
453
+ })
454
+ document.add(content)
455
+
456
+ const appearance = createSignatureAppearance()
457
+ document.add(appearance)
458
+
459
+ // Create page first to get its reference
460
+ const page = createPageWithSignatureField(
461
+ content.reference,
462
+ new PdfObjectReference(0, 0), // Temporary placeholder
463
+ )
464
+ page.content.set('Resources', resources.reference)
465
+ document.add(page)
466
+
467
+ // Now create annotation with page reference
468
+ const annotation = createSignatureAnnotation(
469
+ signatureObj.reference,
470
+ appearance.reference,
471
+ page.reference,
472
+ `Signature${pageNumber}`,
473
+ )
474
+ document.add(annotation)
475
+
476
+ // Update page's Annots array with actual annotation reference
477
+ page.content.set('Annots', new PdfArray([annotation.reference]))
478
+
479
+ signatureFields.push(annotation.reference)
480
+ return page
481
+ }
482
+
483
+ // Page 2: Adobe PKCS7 Detached
484
+ const pkcs7DetachedSig = new PdfAdbePkcs7DetachedSignatureObject({
485
+ privateKey: rsaSigningKeys.privateKey,
486
+ certificate: rsaSigningKeys.cert,
487
+ issuerCertificate: rsaSigningKeys.caCert,
488
+ name: 'Jake Shirley',
489
+ location: 'Earth',
490
+ reason: 'PKCS7 Detached Signature',
491
+ contactInfo: 'test@test.com',
492
+ revocationInfo: {
493
+ crls: [rsaSigningKeys.caCrl],
494
+ ocsps: [rsaSigningKeys.ocspResponse],
495
+ },
496
+ })
497
+ allSignatures.push(pkcs7DetachedSig)
498
+ allPages.push(createSignaturePage('Adobe PKCS7 Detached', pkcs7DetachedSig, 2))
499
+
500
+ // Page 3: Adobe PKCS7 SHA1
501
+ const pkcs7Sha1Sig = new PdfAdbePkcs7Sha1SignatureObject({
502
+ privateKey: rsaSigningKeys.privateKey,
503
+ certificate: rsaSigningKeys.cert,
504
+ issuerCertificate: rsaSigningKeys.caCert,
505
+ name: 'Jake Shirley',
506
+ location: 'Earth',
507
+ reason: 'PKCS7 SHA1 Signature',
508
+ contactInfo: 'test@test.com',
509
+ })
510
+ allSignatures.push(pkcs7Sha1Sig)
511
+ allPages.push(createSignaturePage('Adobe PKCS7 SHA1', pkcs7Sha1Sig, 3))
512
+
513
+ // Page 4: Adobe X509 RSA SHA1
514
+ const x509RsaSha1Sig = new PdfAdbePkcsX509RsaSha1SignatureObject({
515
+ privateKey: rsaSigningKeys.privateKey,
516
+ certificate: rsaSigningKeys.cert,
517
+ additionalCertificates: [rsaSigningKeys.caCert],
518
+ name: 'Jake Shirley',
519
+ location: 'Earth',
520
+ reason: 'X509 RSA SHA1 Signature',
521
+ contactInfo: 'test@test.com',
522
+ revocationInfo: {
523
+ crls: [rsaSigningKeys.caCrl],
524
+ ocsps: [rsaSigningKeys.ocspResponse],
525
+ },
526
+ })
527
+ allSignatures.push(x509RsaSha1Sig)
528
+ allPages.push(createSignaturePage('Adobe X509 RSA SHA1', x509RsaSha1Sig, 4))
529
+
530
+ // Page 5: ETSI CAdES Detached
531
+ const cadesDetachedSig = new PdfEtsiCadesDetachedSignatureObject({
532
+ privateKey: rsaSigningKeys.privateKey,
533
+ certificate: rsaSigningKeys.cert,
534
+ issuerCertificate: rsaSigningKeys.caCert,
535
+ name: 'Jake Shirley',
536
+ location: 'Earth',
537
+ reason: 'CAdES Detached Signature',
538
+ contactInfo: 'test@test.com',
539
+ revocationInfo: {
540
+ crls: [rsaSigningKeys.caCrl],
541
+ ocsps: [rsaSigningKeys.ocspResponse],
542
+ },
543
+ })
544
+ allSignatures.push(cadesDetachedSig)
545
+ allPages.push(createSignaturePage('ETSI CAdES Detached', cadesDetachedSig, 5))
546
+
547
+ // Page 6: ETSI RFC3161 (Timestamp)
548
+ const rfc3161Sig = new PdfEtsiRfc3161SignatureObject({
549
+ timeStampAuthority: {
550
+ url: 'https://freetsa.org/tsr',
551
+ },
552
+ })
553
+ allSignatures.push(rfc3161Sig)
554
+ allPages.push(createSignaturePage('ETSI RFC3161 Timestamp', rfc3161Sig, 6))
555
+
556
+ // Create pages collection with all pages
557
+ const pages = createPages(allPages)
558
+ // Set parent reference for all pages
559
+ allPages.forEach((page) => {
560
+ page.content.set('Parent', pages.reference)
561
+ })
562
+ document.add(pages)
563
+
564
+ // Create catalog with AcroForm
565
+ const catalog = createCatalog(pages.reference)
566
+
567
+ // Add AcroForm to catalog with all signature fields
568
+ const acroForm = new PdfDictionary()
569
+ acroForm.set('Fields', new PdfArray(signatureFields))
570
+ acroForm.set('SigFlags', new PdfNumber(3))
571
+ const acroFormObj = new PdfIndirectObject({ content: acroForm })
572
+ document.add(acroFormObj)
573
+ catalog.content.set('AcroForm', acroFormObj.reference)
574
+
575
+ document.add(catalog)
576
+
577
+ // Set the catalog as the root
578
+ document.trailerDict.set('Root', catalog.reference)
579
+
580
+ // IMPORTANT: Add all signatures LAST - after all other objects
581
+ // This ensures the ByteRange is calculated correctly for each signature
582
+ allSignatures.forEach((sig) => {
583
+ document.startNewRevision()
584
+ document.add(sig)
585
+ })
586
+
587
+ await document.commit()
588
+
589
+ const tmpFolder = `${import.meta.dirname}/tmp`
590
+ await fs.mkdir(tmpFolder, { recursive: true })
591
+ await fs.writeFile(`${tmpFolder}/signed-output.pdf`, document.toBytes())
592
+ ```
593
+
594
+ ## Incremental PDF update example
595
+
596
+ ```typescript
597
+ import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
598
+ import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
599
+ import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
600
+ import { PdfName } from 'pdf-lite/core/objects/pdf-name'
601
+ import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
602
+ import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
603
+ import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
604
+ import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
605
+ import fs from 'fs/promises'
606
+
607
+ const tmpFolder = `${import.meta.dirname}/tmp`
608
+ await fs.mkdir(tmpFolder, { recursive: true })
609
+
610
+ // Helper functions for creating PDF objects
611
+ function createPage(
612
+ contentStreamRef: PdfObjectReference,
613
+ ): PdfIndirectObject<PdfDictionary> {
614
+ const pageDict = new PdfDictionary()
615
+ pageDict.set('Type', new PdfName('Page'))
616
+ pageDict.set(
617
+ 'MediaBox',
618
+ new PdfArray([
619
+ new PdfNumber(0),
620
+ new PdfNumber(0),
621
+ new PdfNumber(612),
622
+ new PdfNumber(792),
623
+ ]),
624
+ )
625
+ pageDict.set('Contents', contentStreamRef)
626
+ return new PdfIndirectObject({ content: pageDict })
627
+ }
628
+
629
+ function createPages(
630
+ pages: PdfIndirectObject<PdfDictionary>[],
631
+ ): PdfIndirectObject<PdfDictionary> {
632
+ const pagesDict = new PdfDictionary()
633
+ pagesDict.set('Type', new PdfName('Pages'))
634
+ pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
635
+ pagesDict.set('Count', new PdfNumber(pages.length))
636
+ return new PdfIndirectObject({ content: pagesDict })
637
+ }
638
+
639
+ function createCatalog(
640
+ pagesRef: PdfObjectReference,
641
+ ): PdfIndirectObject<PdfDictionary> {
642
+ const catalogDict = new PdfDictionary()
643
+ catalogDict.set('Type', new PdfName('Catalog'))
644
+ catalogDict.set('Pages', pagesRef)
645
+ return new PdfIndirectObject({ content: catalogDict })
646
+ }
647
+
648
+ function createFont(): PdfIndirectObject<PdfDictionary> {
649
+ const fontDict = new PdfDictionary()
650
+ fontDict.set('Type', new PdfName('Font'))
651
+ fontDict.set('Subtype', new PdfName('Type1'))
652
+ fontDict.set('BaseFont', new PdfName('Helvetica'))
653
+ return new PdfIndirectObject({ content: fontDict })
654
+ }
655
+
656
+ function createResources(
657
+ fontRef: PdfObjectReference,
658
+ ): PdfIndirectObject<PdfDictionary> {
659
+ const resourcesDict = new PdfDictionary()
660
+ const fontDict = new PdfDictionary()
661
+ fontDict.set('F1', fontRef)
662
+ resourcesDict.set('Font', fontDict)
663
+ return new PdfIndirectObject({ content: resourcesDict })
664
+ }
665
+
666
+ // Step 1: Create an initial PDF document
667
+ console.log('Step 1: Creating initial PDF document...')
668
+ const document = new PdfDocument()
669
+
670
+ const font = createFont()
671
+ document.add(font)
672
+ const resources = createResources(font.reference)
673
+ document.add(resources)
674
+
675
+ const contentStream = new PdfIndirectObject({
676
+ content: new PdfStream({
677
+ header: new PdfDictionary(),
678
+ original:
679
+ 'BT /F1 24 Tf 100 700 Td (Original Document - Revision 1) Tj ET',
680
+ }),
681
+ })
682
+
683
+ const page = createPage(contentStream.reference)
684
+ page.content.set('Resources', resources.reference)
685
+ document.add(page)
686
+
687
+ const pages = createPages([page])
688
+ page.content.set('Parent', pages.reference)
689
+ document.add(pages)
690
+
691
+ const catalog = createCatalog(pages.reference)
692
+ document.add(catalog)
693
+
694
+ document.trailerDict.set('Root', catalog.reference)
695
+ document.add(contentStream)
696
+
697
+ await document.commit()
698
+ // Save the original PDF
699
+ const originalPdfPath = `${tmpFolder}/original.pdf`
700
+ await fs.writeFile(originalPdfPath, document.toBytes())
701
+ console.log(`Original PDF saved to: ${originalPdfPath}`)
702
+ console.log(`Original PDF has ${document.revisions.length} revision(s)`)
703
+
704
+ // Step 2: Load the PDF and perform an incremental update
705
+ console.log('\nStep 2: Loading PDF and performing incremental update...')
706
+
707
+ // Read the existing PDF
708
+ const existingPdfBytes = await fs.readFile(originalPdfPath)
709
+ const loadedDocument = await PdfDocument.fromBytes([existingPdfBytes])
710
+
711
+ // Lock existing revisions to enable incremental mode
712
+ // This ensures changes are added as new revisions instead of modifying existing ones
713
+ loadedDocument.setIncremental(true)
714
+
715
+ // Create new content for the incremental update
716
+ // In a real scenario, this could be adding annotations, form fields, signatures, etc.
717
+ const newContentStream = new PdfIndirectObject({
718
+ objectNumber: contentStream.objectNumber,
719
+ generationNumber: contentStream.generationNumber,
720
+ content: new PdfStream({
721
+ header: new PdfDictionary(),
722
+ original:
723
+ 'BT /F1 18 Tf 100 650 Td (This content was added in Revision 2) Tj ET',
724
+ }),
725
+ })
726
+
727
+ // Add the new content to the document
728
+ loadedDocument.add(newContentStream)
729
+ await loadedDocument.commit()
730
+
731
+ // Save the incrementally updated PDF
732
+ const updatedPdfPath = `${tmpFolder}/incremental-update.pdf`
733
+ await fs.writeFile(updatedPdfPath, loadedDocument.toBytes())
734
+ console.log(`Incrementally updated PDF saved to: ${updatedPdfPath}`)
735
+ console.log(`Updated PDF has ${loadedDocument.revisions.length} revision(s)`)
736
+
737
+ // Step 3: Verify the incremental update preserved the original content
738
+ console.log('\nStep 3: Verifying incremental update...')
739
+
740
+ // Check file sizes to confirm incremental update (new file should be larger)
741
+ const originalStats = await fs.stat(originalPdfPath)
742
+ const updatedStats = await fs.stat(updatedPdfPath)
743
+
744
+ console.log(`Original PDF size: ${originalStats.size} bytes`)
745
+ console.log(`Updated PDF size: ${updatedStats.size} bytes`)
746
+ console.log(
747
+ `Size difference: ${updatedStats.size - originalStats.size} bytes (new revision data)`,
748
+ )
749
+
750
+ // The updated PDF contains the original bytes plus the new revision
751
+ // This is the key feature of incremental updates - the original PDF is preserved
752
+ const updatedPdfBytes = await fs.readFile(updatedPdfPath)
753
+ const originalPdfBytesForComparison = await fs.readFile(originalPdfPath)
754
+
755
+ // Verify that the beginning of the updated PDF matches the original
756
+ const originalBytesMatch = updatedPdfBytes
757
+ .slice(0, originalPdfBytesForComparison.length - 10) // Exclude the %%EOF marker area
758
+ .toString()
759
+ .includes(
760
+ originalPdfBytesForComparison
761
+ .subarray(0, -10)
762
+ .toString()
763
+ .substring(0, 100),
764
+ )
765
+
766
+ console.log(`Original content preserved: ${originalBytesMatch ? 'Yes' : 'No'}`)
767
+
768
+ // Step 4: Add another incremental revision
769
+ console.log('\nStep 4: Adding another incremental revision...')
770
+
771
+ const secondUpdate = await PdfDocument.fromBytes([updatedPdfBytes])
772
+ secondUpdate.setIncremental(true)
773
+
774
+ const thirdRevisionContent = new PdfIndirectObject({
775
+ objectNumber: contentStream.objectNumber,
776
+ generationNumber: contentStream.generationNumber,
777
+ content: new PdfStream(
778
+ 'BT /F1 14 Tf 100 600 Td (Third revision - demonstrates multiple incremental updates) Tj ET',
779
+ ),
780
+ })
781
+
782
+ secondUpdate.add(thirdRevisionContent)
783
+ await secondUpdate.commit()
784
+
785
+ const multiRevisionPdfPath = `${tmpFolder}/multi-revision.pdf`
786
+ await fs.writeFile(multiRevisionPdfPath, secondUpdate.toBytes())
787
+ console.log(`Multi-revision PDF saved to: ${multiRevisionPdfPath}`)
788
+ console.log(
789
+ `Multi-revision PDF has ${secondUpdate.revisions.length} revision(s)`,
790
+ )
791
+
792
+ const multiRevisionStats = await fs.stat(multiRevisionPdfPath)
793
+ console.log(`Multi-revision PDF size: ${multiRevisionStats.size} bytes`)
794
+
795
+ console.log('\n=== Summary ===')
796
+ console.log('Incremental updates allow you to:')
797
+ console.log('1. Preserve the original PDF content (important for signatures)')
798
+ console.log('2. Add new content without modifying existing revisions')
799
+ console.log('3. Maintain a complete history of document changes')
800
+ console.log('4. Stack multiple revisions on top of each other')
801
+ ```
802
+
803
+ ## Modifying AcroForms example - Creating and filling PDF form fields
804
+
805
+ ```typescript
806
+ import { PdfArray } from 'pdf-lite/core/objects/pdf-array'
807
+ import { PdfBoolean } from 'pdf-lite/core/objects/pdf-boolean'
808
+ import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
809
+ import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
810
+ import { PdfName } from 'pdf-lite/core/objects/pdf-name'
811
+ import { PdfNumber } from 'pdf-lite/core/objects/pdf-number'
812
+ import { PdfObjectReference } from 'pdf-lite/core/objects/pdf-object-reference'
813
+ import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
814
+ import { PdfString } from 'pdf-lite/core/objects/pdf-string'
815
+ import { PdfDocument } from 'pdf-lite/pdf/pdf-document'
816
+ import fs from 'fs/promises'
817
+
818
+ const tmpFolder = `${import.meta.dirname}/tmp`
819
+ await fs.mkdir(tmpFolder, { recursive: true })
820
+
821
+ // Helper function to create a basic page
822
+ function createPage(
823
+ contentStreamRef: PdfObjectReference,
824
+ ): PdfIndirectObject<PdfDictionary> {
825
+ const pageDict = new PdfDictionary()
826
+ pageDict.set('Type', new PdfName('Page'))
827
+ pageDict.set(
828
+ 'MediaBox',
829
+ new PdfArray([
830
+ new PdfNumber(0),
831
+ new PdfNumber(0),
832
+ new PdfNumber(612),
833
+ new PdfNumber(792),
834
+ ]),
835
+ )
836
+ pageDict.set('Contents', contentStreamRef)
837
+ return new PdfIndirectObject({ content: pageDict })
838
+ }
839
+
840
+ // Helper function to create pages collection
841
+ function createPages(
842
+ pages: PdfIndirectObject<PdfDictionary>[],
843
+ ): PdfIndirectObject<PdfDictionary> {
844
+ const pagesDict = new PdfDictionary()
845
+ pagesDict.set('Type', new PdfName('Pages'))
846
+ pagesDict.set('Kids', new PdfArray(pages.map((x) => x.reference)))
847
+ pagesDict.set('Count', new PdfNumber(pages.length))
848
+ return new PdfIndirectObject({ content: pagesDict })
849
+ }
850
+
851
+ // Helper function to create catalog
852
+ function createCatalog(
853
+ pagesRef: PdfObjectReference,
854
+ ): PdfIndirectObject<PdfDictionary> {
855
+ const catalogDict = new PdfDictionary()
856
+ catalogDict.set('Type', new PdfName('Catalog'))
857
+ catalogDict.set('Pages', pagesRef)
858
+ return new PdfIndirectObject({ content: catalogDict })
859
+ }
860
+
861
+ // Helper function to create font
862
+ function createFont(): PdfIndirectObject<PdfDictionary> {
863
+ const fontDict = new PdfDictionary()
864
+ fontDict.set('Type', new PdfName('Font'))
865
+ fontDict.set('Subtype', new PdfName('Type1'))
866
+ fontDict.set('BaseFont', new PdfName('Helvetica'))
867
+ return new PdfIndirectObject({ content: fontDict })
868
+ }
869
+
870
+ // Helper function to create resources
871
+ function createResources(
872
+ fontRef: PdfObjectReference,
873
+ ): PdfIndirectObject<PdfDictionary> {
874
+ const resourcesDict = new PdfDictionary()
875
+ const fontDict = new PdfDictionary()
876
+ fontDict.set('F1', fontRef)
877
+ resourcesDict.set('Font', fontDict)
878
+ return new PdfIndirectObject({ content: resourcesDict })
879
+ }
880
+
881
+ // Helper function to create a text field widget annotation
882
+ function createTextField(
883
+ fieldName: string,
884
+ pageRef: PdfObjectReference,
885
+ rect: [number, number, number, number],
886
+ defaultValue: string = '',
887
+ ): PdfIndirectObject<PdfDictionary> {
888
+ const fieldDict = new PdfDictionary()
889
+ // Annotation properties
890
+ fieldDict.set('Type', new PdfName('Annot'))
891
+ fieldDict.set('Subtype', new PdfName('Widget'))
892
+ // Field type: Text
893
+ fieldDict.set('FT', new PdfName('Tx'))
894
+ // Field name
895
+ fieldDict.set('T', new PdfString(fieldName))
896
+ // Bounding rectangle [x1, y1, x2, y2]
897
+ fieldDict.set(
898
+ 'Rect',
899
+ new PdfArray([
900
+ new PdfNumber(rect[0]),
901
+ new PdfNumber(rect[1]),
902
+ new PdfNumber(rect[2]),
903
+ new PdfNumber(rect[3]),
904
+ ]),
905
+ )
906
+ // Annotation flags (4 = print)
907
+ fieldDict.set('F', new PdfNumber(4))
908
+ // Parent page reference
909
+ fieldDict.set('P', pageRef)
910
+ // Default value (if any)
911
+ if (defaultValue) {
912
+ fieldDict.set('V', new PdfString(defaultValue))
913
+ fieldDict.set('DV', new PdfString(defaultValue))
914
+ }
915
+ // Default appearance string (font and size)
916
+ fieldDict.set('DA', new PdfString('/Helv 12 Tf 0 g'))
917
+
918
+ return new PdfIndirectObject({ content: fieldDict })
919
+ }
920
+
921
+ // Helper function to create a checkbox field widget annotation
922
+ function createCheckboxField(
923
+ fieldName: string,
924
+ pageRef: PdfObjectReference,
925
+ rect: [number, number, number, number],
926
+ checked: boolean = false,
927
+ ): PdfIndirectObject<PdfDictionary> {
928
+ const fieldDict = new PdfDictionary()
929
+ // Annotation properties
930
+ fieldDict.set('Type', new PdfName('Annot'))
931
+ fieldDict.set('Subtype', new PdfName('Widget'))
932
+ // Field type: Button
933
+ fieldDict.set('FT', new PdfName('Btn'))
934
+ // Field name
935
+ fieldDict.set('T', new PdfString(fieldName))
936
+ // Bounding rectangle
937
+ fieldDict.set(
938
+ 'Rect',
939
+ new PdfArray([
940
+ new PdfNumber(rect[0]),
941
+ new PdfNumber(rect[1]),
942
+ new PdfNumber(rect[2]),
943
+ new PdfNumber(rect[3]),
944
+ ]),
945
+ )
946
+ // Annotation flags (4 = print)
947
+ fieldDict.set('F', new PdfNumber(4))
948
+ // Parent page reference
949
+ fieldDict.set('P', pageRef)
950
+ // Value: /Yes for checked, /Off for unchecked
951
+ fieldDict.set('V', new PdfName(checked ? 'Yes' : 'Off'))
952
+ fieldDict.set('AS', new PdfName(checked ? 'Yes' : 'Off'))
953
+
954
+ return new PdfIndirectObject({ content: fieldDict })
955
+ }
956
+
957
+ // ============================================
958
+ // PART 1: Create a PDF with form fields
959
+ // ============================================
960
+
961
+ const document = new PdfDocument()
962
+
963
+ // Create font
964
+ const font = createFont()
965
+ document.add(font)
966
+
967
+ // Create resources with the font
968
+ const resources = createResources(font.reference)
969
+ document.add(resources)
970
+
971
+ // Create content stream with form labels
972
+ const contentStream = new PdfIndirectObject({
973
+ content: new PdfStream({
974
+ header: new PdfDictionary(),
975
+ original: `BT
976
+ /F1 18 Tf 72 720 Td (PDF Form Example) Tj
977
+ /F1 12 Tf 0 -40 Td (Name:) Tj
978
+ 0 -30 Td (Email:) Tj
979
+ 0 -30 Td (Phone:) Tj
980
+ 0 -30 Td (Subscribe to newsletter:) Tj
981
+ ET`,
982
+ }),
983
+ })
984
+ document.add(contentStream)
985
+
986
+ // Create page
987
+ const page = createPage(contentStream.reference)
988
+ page.content.set('Resources', resources.reference)
989
+ document.add(page)
990
+
991
+ // Create form fields
992
+ const nameField = createTextField('name', page.reference, [150, 665, 400, 685])
993
+ const emailField = createTextField(
994
+ 'email',
995
+ page.reference,
996
+ [150, 635, 400, 655],
997
+ )
998
+ const phoneField = createTextField(
999
+ 'phone',
1000
+ page.reference,
1001
+ [150, 605, 400, 625],
1002
+ )
1003
+ const subscribeField = createCheckboxField(
1004
+ 'subscribe',
1005
+ page.reference,
1006
+ [200, 575, 215, 590],
1007
+ )
1008
+
1009
+ document.add(nameField)
1010
+ document.add(emailField)
1011
+ document.add(phoneField)
1012
+ document.add(subscribeField)
1013
+
1014
+ // Add annotations to page
1015
+ page.content.set(
1016
+ 'Annots',
1017
+ new PdfArray([
1018
+ nameField.reference,
1019
+ emailField.reference,
1020
+ phoneField.reference,
1021
+ subscribeField.reference,
1022
+ ]),
1023
+ )
1024
+
1025
+ // Create pages collection
1026
+ const pages = createPages([page])
1027
+ page.content.set('Parent', pages.reference)
1028
+ document.add(pages)
1029
+
1030
+ // Create catalog
1031
+ const catalog = createCatalog(pages.reference)
1032
+
1033
+ // Create AcroForm with all fields
1034
+ const acroForm = new PdfDictionary()
1035
+ acroForm.set(
1036
+ 'Fields',
1037
+ new PdfArray([
1038
+ nameField.reference,
1039
+ emailField.reference,
1040
+ phoneField.reference,
1041
+ subscribeField.reference,
1042
+ ]),
1043
+ )
1044
+ // NeedAppearances flag tells PDF readers to generate appearance streams
1045
+ acroForm.set('NeedAppearances', new PdfBoolean(true))
1046
+
1047
+ // Default resources for the form (font)
1048
+ const formResources = new PdfDictionary()
1049
+ const formFontDict = new PdfDictionary()
1050
+ const helveticaFont = new PdfDictionary()
1051
+ helveticaFont.set('Type', new PdfName('Font'))
1052
+ helveticaFont.set('Subtype', new PdfName('Type1'))
1053
+ helveticaFont.set('BaseFont', new PdfName('Helvetica'))
1054
+ formFontDict.set('Helv', helveticaFont)
1055
+ formResources.set('Font', formFontDict)
1056
+ acroForm.set('DR', formResources)
1057
+
1058
+ const acroFormObj = new PdfIndirectObject({ content: acroForm })
1059
+ document.add(acroFormObj)
1060
+ catalog.content.set('AcroForm', acroFormObj.reference)
1061
+
1062
+ document.add(catalog)
1063
+
1064
+ // Set the catalog as the root
1065
+ document.trailerDict.set('Root', catalog.reference)
1066
+
1067
+ await document.commit()
1068
+
1069
+ // Save the empty form
1070
+ // This demonstrates creating a blank form that users can fill in
1071
+ await fs.writeFile(`${tmpFolder}/form-empty.pdf`, document.toBytes())
1072
+ console.log('Created form-empty.pdf with empty form fields')
1073
+
1074
+ // ============================================
1075
+ // PART 2: Fill in the form fields
1076
+ // ============================================
1077
+ // This demonstrates how to programmatically fill in form fields.
1078
+ // We now read the previously saved empty form and modify it.
1079
+
1080
+ // Read the empty form PDF
1081
+ const emptyFormBytes = await fs.readFile(`${tmpFolder}/form-empty.pdf`)
1082
+ const filledDocument = await PdfDocument.fromBytes([emptyFormBytes])
1083
+
1084
+ // Get the catalog reference from trailer
1085
+ const catalogRef = filledDocument.trailerDict.get('Root')
1086
+ if (!catalogRef || !(catalogRef instanceof PdfObjectReference)) {
1087
+ throw new Error('No catalog found in PDF')
1088
+ }
1089
+
1090
+ // Read the catalog object
1091
+ const catalogObj = await filledDocument.readObject({
1092
+ objectNumber: catalogRef.objectNumber,
1093
+ })
1094
+ if (!catalogObj || !(catalogObj.content instanceof PdfDictionary)) {
1095
+ throw new Error('Catalog object not found')
1096
+ }
1097
+
1098
+ // Get the AcroForm reference
1099
+ const acroFormRef = catalogObj.content.get('AcroForm')
1100
+ if (!acroFormRef || !(acroFormRef instanceof PdfObjectReference)) {
1101
+ throw new Error('No AcroForm found in PDF')
1102
+ }
1103
+
1104
+ // Read the AcroForm object
1105
+ const filledAcroFormObj = await filledDocument.readObject({
1106
+ objectNumber: acroFormRef.objectNumber,
1107
+ })
1108
+ if (
1109
+ !filledAcroFormObj ||
1110
+ !(filledAcroFormObj.content instanceof PdfDictionary)
1111
+ ) {
1112
+ throw new Error('AcroForm object not found')
1113
+ }
1114
+
1115
+ // Get the fields array
1116
+ const fieldsArray = filledAcroFormObj.content.get('Fields')
1117
+ if (!fieldsArray || !(fieldsArray instanceof PdfArray)) {
1118
+ throw new Error('No fields found in AcroForm')
1119
+ }
1120
+
1121
+ // Helper function to find a field by name
1122
+ async function findField(
1123
+ fieldName: string,
1124
+ ): Promise<PdfIndirectObject<PdfDictionary> | null> {
1125
+ for (const fieldRef of fieldsArray.items) {
1126
+ if (!(fieldRef instanceof PdfObjectReference)) continue
1127
+ const fieldObj = await filledDocument.readObject({
1128
+ objectNumber: fieldRef.objectNumber,
1129
+ })
1130
+ if (!fieldObj || !(fieldObj.content instanceof PdfDictionary)) continue
1131
+
1132
+ const name = fieldObj.content.get('T')
1133
+ if (name instanceof PdfString) {
1134
+ // Convert bytes to string for comparison
1135
+ const nameStr = new TextDecoder().decode(name.raw)
1136
+ if (nameStr === fieldName) {
1137
+ return fieldObj as PdfIndirectObject<PdfDictionary>
1138
+ }
1139
+ }
1140
+ }
1141
+ return null
1142
+ }
1143
+
1144
+ // Update the name field value
1145
+ const nameFieldObj = await findField('name')
1146
+ if (nameFieldObj) {
1147
+ nameFieldObj.content.set('V', new PdfString('John Doe'))
1148
+ }
1149
+
1150
+ // Update the email field value
1151
+ const emailFieldObj = await findField('email')
1152
+ if (emailFieldObj) {
1153
+ emailFieldObj.content.set('V', new PdfString('john.doe@example.com'))
1154
+ }
1155
+
1156
+ // Update the phone field value
1157
+ const phoneFieldObj = await findField('phone')
1158
+ if (phoneFieldObj) {
1159
+ phoneFieldObj.content.set('V', new PdfString('+1 (555) 123-4567'))
1160
+ }
1161
+
1162
+ // Check the subscribe checkbox
1163
+ const subscribeFieldObj = await findField('subscribe')
1164
+ if (subscribeFieldObj) {
1165
+ subscribeFieldObj.content.set('V', new PdfName('Yes'))
1166
+ subscribeFieldObj.content.set('AS', new PdfName('Yes'))
1167
+ }
1168
+
1169
+ // Commit the changes
1170
+ await filledDocument.commit()
1171
+
1172
+ // Save the filled form
1173
+ await fs.writeFile(`${tmpFolder}/form-filled.pdf`, filledDocument.toBytes())
1174
+ console.log('Created form-filled.pdf with filled form fields')
1175
+
1176
+ console.log('\nForm field values:')
1177
+ console.log('- Name: John Doe')
1178
+ console.log('- Email: john.doe@example.com')
1179
+ console.log('- Phone: +1 (555) 123-4567')
1180
+ console.log('- Subscribe: Yes')
1181
+ ```
1182
+
1183
+ ## Tokeniser usage example
1184
+
1185
+ ```typescript
1186
+ import { PdfByteStreamTokeniser } from 'pdf-lite/core/tokeniser'
1187
+ import { PdfToken } from 'pdf-lite/core/tokens/token'
1188
+ import { stringToBytes } from 'pdf-lite/utils/stringToBytes'
1189
+
1190
+ /**
1191
+ * This example demonstrates how to use the PdfByteStreamTokeniser
1192
+ * to tokenize PDF content into individual tokens.
1193
+ *
1194
+ * The tokeniser converts raw PDF bytes into a stream of tokens that can
1195
+ * be further processed by the decoder or used for PDF analysis.
1196
+ */
1197
+
1198
+ // Sample PDF content to tokenize
1199
+ const pdfContent = `%PDF-2.0
1200
+ 1 0 obj
1201
+ << /Type /Catalog /Pages 2 0 R >>
1202
+ endobj
1203
+ 2 0 obj
1204
+ << /Type /Pages /Kids [3 0 R] /Count 1 >>
1205
+ endobj
1206
+ 3 0 obj
1207
+ << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>
1208
+ endobj
1209
+ trailer
1210
+ << /Size 4 /Root 1 0 R >>
1211
+ startxref
1212
+ 0
1213
+ %%EOF`
1214
+
1215
+ // Create the tokeniser
1216
+ const tokeniser = new PdfByteStreamTokeniser()
1217
+
1218
+ // Convert the PDF content to bytes and feed it to the tokeniser
1219
+ const bytes = stringToBytes(pdfContent)
1220
+ tokeniser.feedBytes(bytes)
1221
+
1222
+ // Signal end of input
1223
+ tokeniser.eof = true
1224
+
1225
+ // Collect all tokens
1226
+ const tokens: PdfToken[] = []
1227
+ for (const token of tokeniser.nextItems()) {
1228
+ tokens.push(token)
1229
+ }
1230
+
1231
+ // Display tokenisation results
1232
+ console.log('PDF Tokenisation Example')
1233
+ console.log('========================\n')
1234
+ console.log(`Input: ${pdfContent.length} bytes`)
1235
+ console.log(`Output: ${tokens.length} tokens\n`)
1236
+
1237
+ // Group tokens by type for summary
1238
+ const tokenCounts = new Map<string, number>()
1239
+ for (const token of tokens) {
1240
+ const type = token.constructor.name
1241
+ tokenCounts.set(type, (tokenCounts.get(type) ?? 0) + 1)
1242
+ }
1243
+
1244
+ console.log('Token type counts:')
1245
+ for (const [type, count] of tokenCounts) {
1246
+ console.log(` ${type}: ${count}`)
1247
+ }
1248
+
1249
+ console.log('\nFirst 20 tokens:')
1250
+ for (const token of tokens.slice(0, 20)) {
1251
+ const tokenString = token.toString().slice(0, 40)
1252
+ const displayString =
1253
+ tokenString.length >= 40 ? tokenString + '...' : tokenString
1254
+ console.log(
1255
+ ` ${token.constructor.name.padEnd(30)} ${JSON.stringify(displayString)}`,
1256
+ )
1257
+ }
1258
+
1259
+ // Example: Tokenising incrementally (useful for streaming)
1260
+ console.log('\n\nIncremental Tokenisation Example')
1261
+ console.log('=================================\n')
1262
+
1263
+ const incrementalTokeniser = new PdfByteStreamTokeniser()
1264
+
1265
+ // Feed bytes in chunks (simulating streaming)
1266
+ const chunkSize = 50
1267
+ const numChunks = Math.ceil(bytes.length / chunkSize)
1268
+
1269
+ console.log(`Processing ${numChunks} chunks of ~${chunkSize} bytes each...`)
1270
+
1271
+ let totalTokens = 0
1272
+ for (let i = 0; i < bytes.length; i += chunkSize) {
1273
+ const chunk = bytes.slice(i, i + chunkSize)
1274
+ incrementalTokeniser.feedBytes(chunk)
1275
+
1276
+ // Process tokens as they become available
1277
+ for (const _ of incrementalTokeniser.nextItems()) {
1278
+ totalTokens++
1279
+ }
1280
+ }
1281
+
1282
+ // Signal end of input and collect remaining tokens
1283
+ incrementalTokeniser.eof = true
1284
+ for (const _ of incrementalTokeniser.nextItems()) {
1285
+ totalTokens++
1286
+ }
1287
+
1288
+ console.log(`Total tokens produced: ${totalTokens}`)
1289
+
1290
+ // Example: Custom stream chunk size
1291
+ console.log('\n\nCustom Stream Chunk Size Example')
1292
+ console.log('================================\n')
1293
+
1294
+ const customTokeniser = new PdfByteStreamTokeniser({
1295
+ streamChunkSizeBytes: 512, // Customize the chunk size for stream content
1296
+ })
1297
+
1298
+ const streamContent = `1 0 obj
1299
+ << /Length 100 >>
1300
+ stream
1301
+ This is stream content that will be chunked by the tokeniser.
1302
+ The chunk size determines how the stream data is delivered.
1303
+ endstream
1304
+ endobj`
1305
+
1306
+ customTokeniser.feedBytes(stringToBytes(streamContent))
1307
+ customTokeniser.eof = true
1308
+
1309
+ console.log('Tokens from stream content:')
1310
+ for (const token of customTokeniser.nextItems()) {
1311
+ const type = token.constructor.name
1312
+ const preview = token.toString().slice(0, 50).replace(/\n/g, '\\n')
1313
+ console.log(` ${type.padEnd(25)} ${JSON.stringify(preview)}`)
1314
+ }
1315
+ ```
1316
+
1317
+ ## Decoder usage example
1318
+
1319
+ ```typescript
1320
+ import { PdfDecoder } from 'pdf-lite/core/decoder'
1321
+ import { PdfByteStreamTokeniser } from 'pdf-lite/core/tokeniser'
1322
+ import { pdfDecoder } from 'pdf-lite/core/generators'
1323
+ import { PdfObject } from 'pdf-lite/core/objects/pdf-object'
1324
+ import { PdfIndirectObject } from 'pdf-lite/core/objects/pdf-indirect-object'
1325
+ import { PdfDictionary } from 'pdf-lite/core/objects/pdf-dictionary'
1326
+ import { PdfStream } from 'pdf-lite/core/objects/pdf-stream'
1327
+ import { PdfTrailer } from 'pdf-lite/core/objects/pdf-trailer'
1328
+ import { PdfXRefTable } from 'pdf-lite/core/objects/pdf-xref-table'
1329
+ import { PdfComment } from 'pdf-lite/core/objects/pdf-comment'
1330
+ import { PdfStartXRef } from 'pdf-lite/core/objects/pdf-start-xref'
1331
+ import { stringToBytes } from 'pdf-lite/utils/stringToBytes'
1332
+
1333
+ /**
1334
+ * This example demonstrates how to use the PdfDecoder
1335
+ * to decode PDF tokens into PDF objects.
1336
+ *
1337
+ * The decoder transforms a stream of tokens (from the tokeniser)
1338
+ * into high-level PDF objects like dictionaries, arrays, streams,
1339
+ * and indirect objects.
1340
+ */
1341
+
1342
+ // Sample PDF content to decode
1343
+ const pdfContent = `%PDF-2.0
1344
+ 1 0 obj
1345
+ << /Type /Catalog /Pages 2 0 R >>
1346
+ endobj
1347
+ 2 0 obj
1348
+ << /Type /Pages /Kids [3 0 R] /Count 1 >>
1349
+ endobj
1350
+ 3 0 obj
1351
+ << /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R >>
1352
+ endobj
1353
+ 4 0 obj
1354
+ << /Length 44 >>
1355
+ stream
1356
+ BT /F1 12 Tf 100 700 Td (Hello!) Tj ET
1357
+ endstream
1358
+ endobj
1359
+ xref
1360
+ 0 5
1361
+ 0000000000 65535 f
1362
+ 0000000010 00000 n
1363
+ 0000000060 00000 n
1364
+ 0000000118 00000 n
1365
+ 0000000217 00000 n
1366
+ trailer
1367
+ << /Size 5 /Root 1 0 R >>
1368
+ startxref
1369
+ 310
1370
+ %%EOF`
1371
+
1372
+ // Method 1: Using the pdfDecoder generator (recommended)
1373
+ console.log('PDF Decoder Example - Using pdfDecoder Generator')
1374
+ console.log('================================================\n')
1375
+
1376
+ const bytes = stringToBytes(pdfContent)
1377
+ const objects: PdfObject[] = []
1378
+
1379
+ for (const obj of pdfDecoder([bytes])) {
1380
+ objects.push(obj)
1381
+ }
1382
+
1383
+ console.log(`Decoded ${objects.length} PDF objects:\n`)
1384
+
1385
+ for (const obj of objects) {
1386
+ const type = obj.constructor.name
1387
+
1388
+ if (obj instanceof PdfComment) {
1389
+ console.log(` ${type}: ${obj.toString().trim()}`)
1390
+ } else if (obj instanceof PdfIndirectObject) {
1391
+ const contentType = obj.content.constructor.name
1392
+ console.log(
1393
+ ` ${type}: ${obj.objectNumber} ${obj.generationNumber} obj (${contentType})`,
1394
+ )
1395
+
1396
+ // Show dictionary keys if content is a dictionary or stream
1397
+ if (obj.content instanceof PdfDictionary) {
1398
+ const keys = Object.keys(obj.content.values)
1399
+ console.log(` Keys: ${keys.join(', ')}`)
1400
+ }
1401
+ if (obj.content instanceof PdfStream) {
1402
+ console.log(
1403
+ ` Stream length: ${obj.content.original.length} bytes`,
1404
+ )
1405
+ }
1406
+ } else if (obj instanceof PdfXRefTable) {
1407
+ console.log(` ${type}: ${obj.entries.length} entries`)
1408
+ } else if (obj instanceof PdfTrailer) {
1409
+ const size = obj.dict.get('Size')?.toString() ?? 'unknown'
1410
+ console.log(` ${type}: Size=${size}`)
1411
+ } else if (obj instanceof PdfStartXRef) {
1412
+ console.log(` ${type}: offset=${obj.offset}`)
1413
+ } else {
1414
+ console.log(` ${type}`)
1415
+ }
1416
+ }
1417
+
1418
+ // Method 2: Using the PdfDecoder class directly with a tokeniser
1419
+ console.log('\n\nPDF Decoder Example - Manual Pipeline')
1420
+ console.log('======================================\n')
1421
+
1422
+ const tokeniser = new PdfByteStreamTokeniser()
1423
+ const decoder = new PdfDecoder({ ignoreWhitespace: true })
1424
+
1425
+ // Feed bytes to tokeniser
1426
+ tokeniser.feedBytes(bytes)
1427
+ tokeniser.eof = true
1428
+
1429
+ // Feed tokens to decoder
1430
+ for (const token of tokeniser.nextItems()) {
1431
+ decoder.feed(token)
1432
+ }
1433
+ decoder.eof = true
1434
+
1435
+ // Collect decoded objects
1436
+ const manualObjects: PdfObject[] = []
1437
+ for (const obj of decoder.nextItems()) {
1438
+ manualObjects.push(obj)
1439
+ }
1440
+
1441
+ console.log(`Decoded ${manualObjects.length} objects with whitespace ignored\n`)
1442
+
1443
+ // Count objects by type
1444
+ const typeCounts = new Map<string, number>()
1445
+ for (const obj of manualObjects) {
1446
+ const type = obj.constructor.name
1447
+ typeCounts.set(type, (typeCounts.get(type) ?? 0) + 1)
1448
+ }
1449
+
1450
+ console.log('Object type counts:')
1451
+ for (const [type, count] of typeCounts) {
1452
+ console.log(` ${type}: ${count}`)
1453
+ }
1454
+
1455
+ // Example: Incremental decoding (useful for streaming)
1456
+ console.log('\n\nIncremental Decoding Example')
1457
+ console.log('============================\n')
1458
+
1459
+ const streamTokeniser = new PdfByteStreamTokeniser()
1460
+ const streamDecoder = new PdfDecoder()
1461
+
1462
+ // Simulate streaming by processing in chunks
1463
+ const chunkSize = 100
1464
+ let objectCount = 0
1465
+
1466
+ for (let i = 0; i < bytes.length; i += chunkSize) {
1467
+ const chunk = bytes.slice(i, i + chunkSize)
1468
+ streamTokeniser.feedBytes(chunk)
1469
+
1470
+ // Process available tokens
1471
+ for (const token of streamTokeniser.nextItems()) {
1472
+ streamDecoder.feed(token)
1473
+
1474
+ // Collect any complete objects
1475
+ for (const obj of streamDecoder.nextItems()) {
1476
+ objectCount++
1477
+ console.log(
1478
+ ` Chunk ${Math.floor(i / chunkSize) + 1}: Found ${obj.constructor.name}`,
1479
+ )
1480
+ }
1481
+ }
1482
+ }
1483
+
1484
+ // Finalize
1485
+ streamTokeniser.eof = true
1486
+ streamDecoder.eof = true
1487
+
1488
+ for (const token of streamTokeniser.nextItems()) {
1489
+ streamDecoder.feed(token)
1490
+ }
1491
+
1492
+ for (const obj of streamDecoder.nextItems()) {
1493
+ objectCount++
1494
+ console.log(` Final: Found ${obj.constructor.name}`)
1495
+ }
1496
+
1497
+ console.log(`\nTotal objects decoded incrementally: ${objectCount}`)
1498
+
1499
+ // Example: Preserving whitespace for round-trip
1500
+ console.log('\n\nRound-Trip Example (Preserving Whitespace)')
1501
+ console.log('==========================================\n')
1502
+
1503
+ const simpleDict = `<< /Type /Page /MediaBox [0 0 612 792] >>`
1504
+ const preservingDecoder = pdfDecoder([stringToBytes(simpleDict)], {
1505
+ ignoreWhitespace: false,
1506
+ })
1507
+
1508
+ for (const obj of preservingDecoder) {
1509
+ // toString() will recreate the original representation
1510
+ const reconstructed = obj.toString()
1511
+ console.log('Original: ', JSON.stringify(simpleDict))
1512
+ console.log('Reconstructed:', JSON.stringify(reconstructed))
1513
+ console.log('Match:', simpleDict === reconstructed)
1514
+ }
1515
+ ```