pdf-lite 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.commitlintrc.cjs +25 -0
  2. package/.github/ISSUE_TEMPLATE/bug_report.md +40 -0
  3. package/.github/ISSUE_TEMPLATE/feature_request.md +19 -0
  4. package/.github/workflows/docs.yaml +93 -0
  5. package/.github/workflows/prepare-release.yaml +79 -0
  6. package/.github/workflows/release.yaml +80 -0
  7. package/.github/workflows/test.yaml +35 -0
  8. package/.husky/commit-msg +1 -0
  9. package/.husky/pre-commit +1 -0
  10. package/.prettierignore +4 -0
  11. package/.prettierrc +4 -0
  12. package/CONTRIBUTING.md +109 -0
  13. package/EXAMPLES.md +1515 -0
  14. package/LICENSE +21 -0
  15. package/README.md +285 -0
  16. package/examples/001-create-pdf.ts +112 -0
  17. package/examples/002-create-encrypted-pdf.ts +121 -0
  18. package/examples/003-sign-pdf.ts +347 -0
  19. package/examples/004-incremental-update.ts +206 -0
  20. package/examples/005-modify-acroform.ts +374 -0
  21. package/examples/006-tokeniser-example.ts +131 -0
  22. package/examples/007-decoder-example.ts +197 -0
  23. package/package.json +72 -0
  24. package/packages/pdf-lite/README.md +3 -0
  25. package/packages/pdf-lite/package.json +68 -0
  26. package/packages/pdf-lite/scripts/create-encryption-tests.sh +41 -0
  27. package/packages/pdf-lite/scripts/gen-signing-keys.sh +290 -0
  28. package/packages/pdf-lite/scripts/generate-all-signing-keys.sh +70 -0
  29. package/packages/pdf-lite/src/core/decoder.ts +454 -0
  30. package/packages/pdf-lite/src/core/generators.ts +128 -0
  31. package/packages/pdf-lite/src/core/incremental-parser.ts +221 -0
  32. package/packages/pdf-lite/src/core/index.ts +2 -0
  33. package/packages/pdf-lite/src/core/objects/pdf-array.ts +54 -0
  34. package/packages/pdf-lite/src/core/objects/pdf-boolean.ts +19 -0
  35. package/packages/pdf-lite/src/core/objects/pdf-comment.ts +50 -0
  36. package/packages/pdf-lite/src/core/objects/pdf-date.ts +74 -0
  37. package/packages/pdf-lite/src/core/objects/pdf-dictionary.ts +171 -0
  38. package/packages/pdf-lite/src/core/objects/pdf-hexadecimal.ts +54 -0
  39. package/packages/pdf-lite/src/core/objects/pdf-indirect-object.ts +137 -0
  40. package/packages/pdf-lite/src/core/objects/pdf-name.ts +19 -0
  41. package/packages/pdf-lite/src/core/objects/pdf-null.ts +15 -0
  42. package/packages/pdf-lite/src/core/objects/pdf-number.ts +98 -0
  43. package/packages/pdf-lite/src/core/objects/pdf-object-reference.ts +30 -0
  44. package/packages/pdf-lite/src/core/objects/pdf-object.ts +107 -0
  45. package/packages/pdf-lite/src/core/objects/pdf-start-xref.ts +39 -0
  46. package/packages/pdf-lite/src/core/objects/pdf-stream.ts +687 -0
  47. package/packages/pdf-lite/src/core/objects/pdf-string.ts +38 -0
  48. package/packages/pdf-lite/src/core/objects/pdf-trailer.ts +57 -0
  49. package/packages/pdf-lite/src/core/objects/pdf-xref-table.ts +264 -0
  50. package/packages/pdf-lite/src/core/parser.ts +22 -0
  51. package/packages/pdf-lite/src/core/ref.ts +102 -0
  52. package/packages/pdf-lite/src/core/serializer.ts +68 -0
  53. package/packages/pdf-lite/src/core/streams/object-stream.ts +20 -0
  54. package/packages/pdf-lite/src/core/tokeniser.ts +687 -0
  55. package/packages/pdf-lite/src/core/tokens/boolean-token.ts +20 -0
  56. package/packages/pdf-lite/src/core/tokens/byte-offset-token.ts +20 -0
  57. package/packages/pdf-lite/src/core/tokens/comment-token.ts +32 -0
  58. package/packages/pdf-lite/src/core/tokens/end-array-token.ts +10 -0
  59. package/packages/pdf-lite/src/core/tokens/end-dictionary-token.ts +10 -0
  60. package/packages/pdf-lite/src/core/tokens/end-object-token.ts +10 -0
  61. package/packages/pdf-lite/src/core/tokens/end-stream-token.ts +11 -0
  62. package/packages/pdf-lite/src/core/tokens/hexadecimal-token.ts +22 -0
  63. package/packages/pdf-lite/src/core/tokens/name-token.ts +19 -0
  64. package/packages/pdf-lite/src/core/tokens/null-token.ts +9 -0
  65. package/packages/pdf-lite/src/core/tokens/number-token.ts +164 -0
  66. package/packages/pdf-lite/src/core/tokens/object-reference-token.ts +24 -0
  67. package/packages/pdf-lite/src/core/tokens/start-array-token.ts +10 -0
  68. package/packages/pdf-lite/src/core/tokens/start-dictionary-token.ts +10 -0
  69. package/packages/pdf-lite/src/core/tokens/start-object-token.ts +28 -0
  70. package/packages/pdf-lite/src/core/tokens/start-stream-token.ts +52 -0
  71. package/packages/pdf-lite/src/core/tokens/start-xref-token.ts +10 -0
  72. package/packages/pdf-lite/src/core/tokens/stream-chunk-token.ts +8 -0
  73. package/packages/pdf-lite/src/core/tokens/string-token.ts +17 -0
  74. package/packages/pdf-lite/src/core/tokens/token.ts +43 -0
  75. package/packages/pdf-lite/src/core/tokens/trailer-token.ts +12 -0
  76. package/packages/pdf-lite/src/core/tokens/whitespace-token.ts +43 -0
  77. package/packages/pdf-lite/src/core/tokens/xref-table-entry-token.ts +65 -0
  78. package/packages/pdf-lite/src/core/tokens/xref-table-section-start-token.ts +31 -0
  79. package/packages/pdf-lite/src/core/tokens/xref-table-start-token.ts +13 -0
  80. package/packages/pdf-lite/src/crypto/ciphers/aes128.ts +63 -0
  81. package/packages/pdf-lite/src/crypto/ciphers/aes256.ts +50 -0
  82. package/packages/pdf-lite/src/crypto/ciphers/rc4.ts +82 -0
  83. package/packages/pdf-lite/src/crypto/constants.ts +10 -0
  84. package/packages/pdf-lite/src/crypto/key-derivation/key-derivation-aes256.ts +213 -0
  85. package/packages/pdf-lite/src/crypto/key-derivation/key-derivation.ts +122 -0
  86. package/packages/pdf-lite/src/crypto/key-gen/key-gen-aes256.ts +79 -0
  87. package/packages/pdf-lite/src/crypto/key-gen/key-gen-rc4-128.ts +190 -0
  88. package/packages/pdf-lite/src/crypto/key-gen/key-gen-rc4-40.ts +129 -0
  89. package/packages/pdf-lite/src/crypto/types.ts +6 -0
  90. package/packages/pdf-lite/src/crypto/utils.ts +81 -0
  91. package/packages/pdf-lite/src/filters/ascii85.ts +128 -0
  92. package/packages/pdf-lite/src/filters/asciihex.ts +55 -0
  93. package/packages/pdf-lite/src/filters/flate.ts +39 -0
  94. package/packages/pdf-lite/src/filters/lzw.ts +144 -0
  95. package/packages/pdf-lite/src/filters/pass-through.ts +37 -0
  96. package/packages/pdf-lite/src/filters/runlength.ts +92 -0
  97. package/packages/pdf-lite/src/filters/types.ts +21 -0
  98. package/packages/pdf-lite/src/index.ts +4 -0
  99. package/packages/pdf-lite/src/pdf/errors.ts +5 -0
  100. package/packages/pdf-lite/src/pdf/index.ts +4 -0
  101. package/packages/pdf-lite/src/pdf/pdf-document.ts +924 -0
  102. package/packages/pdf-lite/src/pdf/pdf-reader.ts +57 -0
  103. package/packages/pdf-lite/src/pdf/pdf-revision.ts +234 -0
  104. package/packages/pdf-lite/src/pdf/pdf-xref-lookup.ts +527 -0
  105. package/packages/pdf-lite/src/security/crypt-filters/aesv2.ts +58 -0
  106. package/packages/pdf-lite/src/security/crypt-filters/aesv3.ts +56 -0
  107. package/packages/pdf-lite/src/security/crypt-filters/base.ts +140 -0
  108. package/packages/pdf-lite/src/security/crypt-filters/identity.ts +40 -0
  109. package/packages/pdf-lite/src/security/crypt-filters/v2.ts +59 -0
  110. package/packages/pdf-lite/src/security/handlers/base.ts +625 -0
  111. package/packages/pdf-lite/src/security/handlers/pubSec.ts +413 -0
  112. package/packages/pdf-lite/src/security/handlers/utils.ts +304 -0
  113. package/packages/pdf-lite/src/security/handlers/v1.ts +225 -0
  114. package/packages/pdf-lite/src/security/handlers/v2.ts +128 -0
  115. package/packages/pdf-lite/src/security/handlers/v4.ts +379 -0
  116. package/packages/pdf-lite/src/security/handlers/v5.ts +298 -0
  117. package/packages/pdf-lite/src/security/types.ts +158 -0
  118. package/packages/pdf-lite/src/signing/document-security-store.ts +224 -0
  119. package/packages/pdf-lite/src/signing/index.ts +3 -0
  120. package/packages/pdf-lite/src/signing/signatures/adbe-pkcs7-detached.ts +154 -0
  121. package/packages/pdf-lite/src/signing/signatures/adbe-pkcs7-sha1.ts +161 -0
  122. package/packages/pdf-lite/src/signing/signatures/adbe-x509-rsa-sha1.ts +106 -0
  123. package/packages/pdf-lite/src/signing/signatures/base.ts +229 -0
  124. package/packages/pdf-lite/src/signing/signatures/etsi-cades-detached.ts +229 -0
  125. package/packages/pdf-lite/src/signing/signatures/etsi-rfc3161.ts +92 -0
  126. package/packages/pdf-lite/src/signing/signatures/index.ts +6 -0
  127. package/packages/pdf-lite/src/signing/signer.ts +120 -0
  128. package/packages/pdf-lite/src/signing/types.ts +86 -0
  129. package/packages/pdf-lite/src/signing/utils.ts +71 -0
  130. package/packages/pdf-lite/src/types.ts +44 -0
  131. package/packages/pdf-lite/src/utils/IterableReadableStream.ts +30 -0
  132. package/packages/pdf-lite/src/utils/algos.ts +446 -0
  133. package/packages/pdf-lite/src/utils/assert.ts +42 -0
  134. package/packages/pdf-lite/src/utils/bytesToHex.ts +18 -0
  135. package/packages/pdf-lite/src/utils/bytesToHexBytes.ts +27 -0
  136. package/packages/pdf-lite/src/utils/bytesToString.ts +17 -0
  137. package/packages/pdf-lite/src/utils/concatUint8Arrays.ts +26 -0
  138. package/packages/pdf-lite/src/utils/escapeString.ts +49 -0
  139. package/packages/pdf-lite/src/utils/hexBytesToBytes.ts +22 -0
  140. package/packages/pdf-lite/src/utils/hexBytesToString.ts +21 -0
  141. package/packages/pdf-lite/src/utils/hexToBytes.ts +18 -0
  142. package/packages/pdf-lite/src/utils/padBytes.ts +25 -0
  143. package/packages/pdf-lite/src/utils/predictors.ts +332 -0
  144. package/packages/pdf-lite/src/utils/replaceInBuffer.ts +56 -0
  145. package/packages/pdf-lite/src/utils/stringToBytes.ts +22 -0
  146. package/packages/pdf-lite/src/utils/stringToHexBytes.ts +23 -0
  147. package/packages/pdf-lite/src/utils/unescapeString.ts +123 -0
  148. package/packages/pdf-lite/test/acceptance/__snapshots__/versions.node.test.ts.snap +60766 -0
  149. package/packages/pdf-lite/test/acceptance/fixtures/1.3/basic.pdf +0 -0
  150. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-aes-128.pdf +0 -0
  151. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-aes-256.pdf +0 -0
  152. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-rc4-128.pdf +0 -0
  153. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-rc4-40.pdf +0 -0
  154. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic.pdf +0 -0
  155. package/packages/pdf-lite/test/acceptance/fixtures/1.5/basic.pdf +0 -0
  156. package/packages/pdf-lite/test/acceptance/fixtures/1.6/basic.pdf +0 -0
  157. package/packages/pdf-lite/test/acceptance/fixtures/1.7/basic.pdf +0 -0
  158. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-aes-128.pdf +43 -0
  159. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-aes-256.pdf +43 -0
  160. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-rc4-128.pdf +43 -0
  161. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-rc4-40.pdf +44 -0
  162. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic.pdf +79 -0
  163. package/packages/pdf-lite/test/acceptance/versions.node.test.ts +41 -0
  164. package/packages/pdf-lite/test/unit/__snapshots__/decoder.node.test.ts.snap +86947 -0
  165. package/packages/pdf-lite/test/unit/__snapshots__/tokeniser.node.test.ts.snap +131829 -0
  166. package/packages/pdf-lite/test/unit/ciphers.test.ts +61 -0
  167. package/packages/pdf-lite/test/unit/decoder.node.test.ts +21 -0
  168. package/packages/pdf-lite/test/unit/decoder.test.ts +567 -0
  169. package/packages/pdf-lite/test/unit/filters.test.ts +67 -0
  170. package/packages/pdf-lite/test/unit/fixtures/basic.pdf +0 -0
  171. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-aes-128.pdf +0 -0
  172. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-aes-256.pdf +0 -0
  173. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-rc4-128.pdf +0 -0
  174. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-rc4-40.pdf +43 -0
  175. package/packages/pdf-lite/test/unit/fixtures/protectedAdobeLivecycle.pdf +0 -0
  176. package/packages/pdf-lite/test/unit/fixtures/rsa-2048/index.ts +187 -0
  177. package/packages/pdf-lite/test/unit/fixtures/template.pdf +0 -0
  178. package/packages/pdf-lite/test/unit/incremental-update.test.ts +0 -0
  179. package/packages/pdf-lite/test/unit/objects.test.ts +0 -0
  180. package/packages/pdf-lite/test/unit/pdf-document-signing.test.ts +0 -0
  181. package/packages/pdf-lite/test/unit/pdf-revision.test.ts +195 -0
  182. package/packages/pdf-lite/test/unit/pdf.browser.test.ts +0 -0
  183. package/packages/pdf-lite/test/unit/predictors.test.ts +226 -0
  184. package/packages/pdf-lite/test/unit/ref.test.ts +158 -0
  185. package/packages/pdf-lite/test/unit/security-handlers.test.ts +645 -0
  186. package/packages/pdf-lite/test/unit/serializer.test.ts +81 -0
  187. package/packages/pdf-lite/test/unit/signature-objects.test.ts +814 -0
  188. package/packages/pdf-lite/test/unit/string-escaping.test.ts +84 -0
  189. package/packages/pdf-lite/test/unit/tokeniser.node.test.ts +38 -0
  190. package/packages/pdf-lite/test/unit/tokeniser.test.ts +1213 -0
  191. package/packages/pdf-lite/test/unit/utils.test.ts +248 -0
  192. package/packages/pdf-lite/test/unit/xref-lookup.test.ts +72 -0
  193. package/packages/pdf-lite/tsconfig.json +4 -0
  194. package/packages/pdf-lite/tsconfig.prod.json +8 -0
  195. package/packages/pdf-lite/typedoc.json +14 -0
  196. package/packages/pdf-lite/vitest.config.ts +43 -0
  197. package/pnpm-workspace.yaml +2 -0
  198. package/renovate.json +34 -0
  199. package/scripts/build-examples.ts +30 -0
  200. package/scripts/bump-version.sh +56 -0
  201. package/scripts/gen-html-docs.sh +21 -0
  202. package/scripts/gen-md-docs.sh +15 -0
  203. package/scripts/prepare-release.sh +33 -0
  204. package/tsconfig.json +22 -0
  205. package/tsconfig.prod.json +12 -0
  206. package/typedoc.json +34 -0
@@ -0,0 +1,454 @@
1
+ import { PdfStartObjectToken } from './tokens/start-object-token.js'
2
+ import { PdfObject } from './objects/pdf-object.js'
3
+ import { PdfIndirectObject } from './objects/pdf-indirect-object.js'
4
+ import { PdfDictionary } from './objects/pdf-dictionary.js'
5
+ import { PdfEndObjectToken } from './tokens/end-object-token.js'
6
+ import { PdfStartDictionaryToken } from './tokens/start-dictionary-token.js'
7
+ import { PdfArray } from './objects/pdf-array.js'
8
+ import { PdfEndDictionaryToken } from './tokens/end-dictionary-token.js'
9
+ import { PdfNumber } from './objects/pdf-number.js'
10
+ import { PdfObjectReference } from './objects/pdf-object-reference.js'
11
+ import { PdfTrailer } from './objects/pdf-trailer.js'
12
+ import { PdfHexadecimal } from './objects/pdf-hexadecimal.js'
13
+ import { PdfStartArrayToken } from './tokens/start-array-token.js'
14
+ import { PdfEndArrayToken } from './tokens/end-array-token.js'
15
+ import { PdfNameToken } from './tokens/name-token.js'
16
+ import { PdfName } from './objects/pdf-name.js'
17
+ import { PdfBoolean } from './objects/pdf-boolean.js'
18
+ import { PdfNull } from './objects/pdf-null.js'
19
+ import { PdfString } from './objects/pdf-string.js'
20
+ import { PdfNumberToken } from './tokens/number-token.js'
21
+ import { PdfBooleanToken } from './tokens/boolean-token.js'
22
+ import { PdfHexadecimalToken } from './tokens/hexadecimal-token.js'
23
+ import { PdfNullToken } from './tokens/null-token.js'
24
+ import { PdfObjectReferenceToken } from './tokens/object-reference-token.js'
25
+ import { PdfStringToken } from './tokens/string-token.js'
26
+ import { PdfStartStreamToken } from './tokens/start-stream-token.js'
27
+ import { PdfStream } from './objects/pdf-stream.js'
28
+ import { PdfEndStreamToken } from './tokens/end-stream-token.js'
29
+ import { PdfStreamChunkToken } from './tokens/stream-chunk-token.js'
30
+ import { PdfXRefTableStartToken } from './tokens/xref-table-start-token.js'
31
+ import {
32
+ PdfXRefTable,
33
+ PdfXRefTableEntry,
34
+ PdfXRefTableSectionHeader,
35
+ } from './objects/pdf-xref-table.js'
36
+ import { PdfXRefTableSectionStartToken } from './tokens/xref-table-section-start-token.js'
37
+ import { PdfXRefTableEntryToken } from './tokens/xref-table-entry-token.js'
38
+ import { PdfTrailerToken } from './tokens/trailer-token.js'
39
+ import { PdfCommentToken } from './tokens/comment-token.js'
40
+ import { PdfComment } from './objects/pdf-comment.js'
41
+ import { PdfStartXRefToken } from './tokens/start-xref-token.js'
42
+ import { PdfStartXRef } from './objects/pdf-start-xref.js'
43
+ import { PdfWhitespaceToken } from './tokens/whitespace-token.js'
44
+ import { PdfToken } from './tokens/token.js'
45
+ import { IncrementalParser } from './incremental-parser.js'
46
+ import { concatUint8Arrays } from '../utils/concatUint8Arrays.js'
47
+ import { ByteArray } from '../types.js'
48
+ import { Ref } from './ref.js'
49
+
50
+ const DEFAULT_MAX_BUFFER_SIZE_BYTES = 10 * 1024 * 1024 // 10 MB
51
+
52
+ /**
53
+ * Decodes PDF tokens into PDF objects.
54
+ * Handles parsing of all PDF object types including dictionaries, arrays, streams, and xref tables.
55
+ */
56
+ export class PdfDecoder extends IncrementalParser<PdfToken, PdfObject> {
57
+ private ignoreWhitespace: boolean = false
58
+ private maxBufferSizeBytes: number = DEFAULT_MAX_BUFFER_SIZE_BYTES
59
+
60
+ /**
61
+ * Creates a new PDF decoder.
62
+ *
63
+ * @param options - Configuration options
64
+ * @param options.ignoreWhitespace - If true, whitespace tokens are ignored
65
+ * @param options.maxBufferSizeBytes - Maximum buffer size before compaction (default: 10MB)
66
+ */
67
+ constructor(options?: {
68
+ ignoreWhitespace?: boolean
69
+ maxBufferSizeBytes?: number
70
+ }) {
71
+ super()
72
+ this.ignoreWhitespace = options?.ignoreWhitespace ?? false
73
+ this.maxBufferSizeBytes =
74
+ options?.maxBufferSizeBytes ?? DEFAULT_MAX_BUFFER_SIZE_BYTES
75
+ }
76
+
77
+ private nextName(): PdfName {
78
+ const preTokens = this.nextExtraTokens()
79
+ const token = this.expect(PdfNameToken)
80
+ const postTokens = this.nextExtraTokens()
81
+
82
+ const name = new PdfName(token.name)
83
+ name.preTokens = preTokens
84
+ name.postTokens = postTokens
85
+
86
+ return name
87
+ }
88
+
89
+ private nextIndirectObject(): PdfIndirectObject {
90
+ const preTokens = this.nextExtraTokens()
91
+ const startToken = this.expect(PdfStartObjectToken)
92
+
93
+ const contentPreTokens = this.nextExtraTokens()
94
+ const content = this.nextValue()
95
+ content.preTokens = contentPreTokens
96
+ this.expect(PdfEndObjectToken)
97
+
98
+ const postTokens = this.nextExtraTokens()
99
+
100
+ const obj = new PdfIndirectObject({
101
+ objectNumber: startToken.objectNumber,
102
+ generationNumber: startToken.generationNumber,
103
+ content,
104
+ offset: startToken.byteOffset,
105
+ })
106
+
107
+ obj.preTokens = preTokens
108
+ obj.postTokens = postTokens
109
+
110
+ return obj
111
+ }
112
+
113
+ private nextValue(): PdfObject {
114
+ return this.nextObject()
115
+ }
116
+
117
+ private nextDictionary(): PdfDictionary | PdfStream {
118
+ const preTokens = this.nextExtraTokens()
119
+ this.expect(PdfStartDictionaryToken)
120
+ const dictionary = new PdfDictionary()
121
+ dictionary.innerTokens = this.nextExtraTokens()
122
+
123
+ while (true) {
124
+ let next = this.peek()
125
+
126
+ if (!next || next instanceof PdfEndDictionaryToken) {
127
+ this.next() // consume end token
128
+ break
129
+ }
130
+
131
+ const name = this.nextName()
132
+ const value = this.nextValue()
133
+ dictionary.set(name, value)
134
+ }
135
+
136
+ const postTokens = this.nextExtraTokens()
137
+
138
+ dictionary.setModified(false)
139
+ dictionary.preTokens = preTokens
140
+ dictionary.postTokens = postTokens
141
+
142
+ if (this.peek() instanceof PdfStartStreamToken) {
143
+ const stream = this.nextStream(dictionary)
144
+
145
+ return stream
146
+ }
147
+
148
+ return dictionary
149
+ }
150
+
151
+ private nextArray(): PdfArray {
152
+ const preTokens = this.nextExtraTokens()
153
+ this.expect(PdfStartArrayToken)
154
+ const array = new PdfArray()
155
+ const innerTokens = this.nextExtraTokens()
156
+ array.innerTokens = innerTokens
157
+
158
+ while (true) {
159
+ const next = this.peek()
160
+ if (!next || next instanceof PdfEndArrayToken) {
161
+ this.next() // consume end token
162
+ break
163
+ }
164
+
165
+ array.push(this.nextValue())
166
+ }
167
+
168
+ const postTokens = this.nextExtraTokens()
169
+
170
+ array.preTokens = preTokens
171
+ array.postTokens = postTokens
172
+
173
+ return array
174
+ }
175
+
176
+ private nextStream(header: PdfDictionary): PdfStream {
177
+ const preTokens = this.nextExtraTokens()
178
+ const startStreamToken = this.expect(PdfStartStreamToken)
179
+
180
+ const chunks: ByteArray[] = []
181
+ const preStreamTokens = startStreamToken.getTrailingWhitespaceTokens()
182
+
183
+ while (this.peek() instanceof PdfStreamChunkToken) {
184
+ const chunk = this.expect(PdfStreamChunkToken)
185
+
186
+ chunks.push(chunk.toBytes())
187
+ }
188
+ const postStreamDataTokens = this.nextExtraTokens()
189
+
190
+ this.expect(PdfEndStreamToken)
191
+ const postTokens = this.nextExtraTokens()
192
+
193
+ const stream = new PdfStream({
194
+ header,
195
+ original: concatUint8Arrays(...chunks),
196
+ })
197
+
198
+ stream.preStreamDataTokens = preStreamTokens
199
+ stream.postStreamDataTokens = postStreamDataTokens
200
+ stream.preTokens = preTokens
201
+ stream.postTokens = postTokens
202
+
203
+ return stream
204
+ }
205
+
206
+ private nextComment(): PdfComment {
207
+ const token = this.expect(PdfCommentToken)
208
+
209
+ const comment = new PdfComment(token.comment)
210
+
211
+ return comment
212
+ }
213
+
214
+ private nextPrimitive(): PdfObject {
215
+ const preTokens = this.nextExtraTokens()
216
+ const token = this.next() // consume the primitive token
217
+ if (!token) {
218
+ throw new Error('Expected primitive token but got EOF')
219
+ }
220
+
221
+ const postTokens = this.nextExtraTokens()
222
+
223
+ let out: PdfObject
224
+ if (token instanceof PdfNumberToken) {
225
+ out = new PdfNumber({
226
+ value: token.value,
227
+ padTo: token.padTo,
228
+ decimalPlaces: token.decimalPlaces,
229
+ })
230
+ } else if (token instanceof PdfBooleanToken) {
231
+ out = new PdfBoolean(token.value)
232
+ } else if (token instanceof PdfHexadecimalToken) {
233
+ out = new PdfHexadecimal(token.raw, 'hex')
234
+ } else if (token instanceof PdfNullToken) {
235
+ out = new PdfNull()
236
+ } else if (token instanceof PdfObjectReferenceToken) {
237
+ out = new PdfObjectReference(
238
+ token.objectNumber,
239
+ token.generationNumber,
240
+ )
241
+ } else if (token instanceof PdfStringToken) {
242
+ out = new PdfString(token.value)
243
+ } else {
244
+ throw new Error(`Unknown primitive token type: ${token.type}`)
245
+ }
246
+
247
+ out.preTokens = preTokens
248
+ out.postTokens = postTokens
249
+
250
+ return out
251
+ }
252
+
253
+ private nextExtraTokens(root: boolean = false): PdfToken[] {
254
+ const tokens: PdfToken[] = []
255
+ while (true) {
256
+ if (this.atEof()) {
257
+ break
258
+ }
259
+
260
+ const token = this.peek()
261
+ if (!token) {
262
+ break
263
+ }
264
+
265
+ if (this.ignoreWhitespace && token instanceof PdfWhitespaceToken) {
266
+ this.next() // consume whitespace
267
+ continue
268
+ }
269
+
270
+ if (root && token instanceof PdfCommentToken) {
271
+ break
272
+ }
273
+
274
+ if (
275
+ !(
276
+ token instanceof PdfWhitespaceToken ||
277
+ token instanceof PdfCommentToken
278
+ )
279
+ ) {
280
+ break
281
+ }
282
+
283
+ tokens.push(this.next()!)
284
+ }
285
+
286
+ return tokens
287
+ }
288
+
289
+ private nextXRefTable(): PdfXRefTable {
290
+ const preTokens = this.nextExtraTokens()
291
+ const xrefToken = this.expect(PdfXRefTableStartToken)
292
+
293
+ if (xrefToken.byteOffset === undefined) {
294
+ throw new Error('XRef table token missing byte offset')
295
+ }
296
+
297
+ const xrefTable = new PdfXRefTable({
298
+ offset: xrefToken.byteOffset,
299
+ })
300
+
301
+ while (true) {
302
+ const preTokens = this.nextExtraTokens()
303
+ const next = this.peek()
304
+
305
+ if (!next) {
306
+ break
307
+ }
308
+
309
+ if (next instanceof PdfXRefTableSectionStartToken) {
310
+ const sectionToken = this.expect(PdfXRefTableSectionStartToken)
311
+ const postTokens = this.nextExtraTokens()
312
+
313
+ const section = new PdfXRefTableSectionHeader({
314
+ startObjectNumber: sectionToken.start.value,
315
+ entryCount: sectionToken.count.value,
316
+ })
317
+ section.startObjectNumber.setModified(false)
318
+ section.entryCount.setModified(false)
319
+ section.setModified(false)
320
+
321
+ section.preTokens = preTokens
322
+ section.postTokens = postTokens
323
+
324
+ xrefTable.sections.push(section)
325
+ } else if (next instanceof PdfXRefTableEntryToken) {
326
+ const entryToken = this.expect(PdfXRefTableEntryToken)
327
+ const postTokens = this.nextExtraTokens()
328
+
329
+ const entry = new PdfXRefTableEntry({
330
+ objectNumber: entryToken.objectNumber.value,
331
+ byteOffset: entryToken.offset.ref,
332
+ generationNumber: entryToken.generationNumber.value,
333
+ inUse: entryToken.inUse,
334
+ })
335
+
336
+ entry.objectNumber.setModified(false)
337
+ entry.generationNumber.setModified(false)
338
+ entry.byteOffset.setModified(false)
339
+ entry.setModified(false)
340
+
341
+ entry.preTokens = preTokens
342
+ entry.postTokens = postTokens
343
+
344
+ xrefTable.entries.push(entry)
345
+ } else {
346
+ break
347
+ }
348
+ }
349
+
350
+ xrefTable.preTokens = preTokens
351
+ xrefTable.postTokens = this.nextExtraTokens()
352
+ return xrefTable
353
+ }
354
+
355
+ private nextTrailer(): PdfTrailer {
356
+ const preTokens = this.nextExtraTokens()
357
+ const trailerToken = this.expect(PdfTrailerToken)
358
+ const dictionary = this.nextDictionary()
359
+ const postTokens = this.nextExtraTokens()
360
+
361
+ const trailer = new PdfTrailer(dictionary as PdfDictionary<any>)
362
+ if (trailerToken.byteOffset === undefined) {
363
+ throw new Error('Trailer token missing byte offset')
364
+ }
365
+
366
+ trailer.offset.update(trailerToken.byteOffset)
367
+ trailer.offset.isModified = false
368
+
369
+ trailer.preTokens = preTokens
370
+ trailer.postTokens = postTokens
371
+
372
+ return trailer
373
+ }
374
+
375
+ private nextStartXRef(): PdfStartXRef {
376
+ const preTokens = this.nextExtraTokens()
377
+ this.expect(PdfStartXRefToken)
378
+
379
+ const preOffsetTokens = this.nextExtraTokens()
380
+ const offsetToken = this.expect(PdfNumberToken)
381
+
382
+ const offset = new PdfNumber(offsetToken.value)
383
+ offset.setModified(false)
384
+
385
+ const startXref = new PdfStartXRef(offset)
386
+ startXref.preTokens = preTokens
387
+ offset.preTokens = preOffsetTokens
388
+ return startXref
389
+ }
390
+
391
+ private nextObject(root: boolean = false): PdfObject {
392
+ const preTokens = this.nextExtraTokens(root)
393
+ const token = this.peek()
394
+
395
+ if (!token) {
396
+ // This should never happen as atEof should be checked before calling nextObject
397
+ throw new Error('Expected token but did not find one')
398
+ }
399
+
400
+ let out: PdfObject
401
+ if (token instanceof PdfStartDictionaryToken) {
402
+ out = this.nextDictionary()
403
+ } else if (token instanceof PdfStartObjectToken) {
404
+ out = this.nextIndirectObject()
405
+ } else if (token instanceof PdfStartArrayToken) {
406
+ out = this.nextArray()
407
+ } else if (token instanceof PdfNameToken) {
408
+ out = this.nextName()
409
+ } else if (PdfDecoder.isPrimitive(token)) {
410
+ out = this.nextPrimitive()
411
+ } else if (token instanceof PdfCommentToken) {
412
+ out = this.nextComment()
413
+ } else if (token instanceof PdfXRefTableStartToken) {
414
+ out = this.nextXRefTable()
415
+ } else if (token instanceof PdfTrailerToken) {
416
+ out = this.nextTrailer()
417
+ } else if (token instanceof PdfStartXRefToken) {
418
+ out = this.nextStartXRef()
419
+ } else {
420
+ throw new Error(`Unknown token type: ${token.type}`)
421
+ }
422
+ const postTokens = this.nextExtraTokens(root)
423
+ out.preTokens = [...preTokens, ...(out.preTokens ?? [])]
424
+ out.postTokens = [...(out.postTokens ?? []), ...postTokens]
425
+ out.setModified(false)
426
+
427
+ return out
428
+ }
429
+
430
+ private static isPrimitive(token: PdfToken): boolean {
431
+ return (
432
+ token instanceof PdfNumberToken ||
433
+ token instanceof PdfBooleanToken ||
434
+ token instanceof PdfHexadecimalToken ||
435
+ token instanceof PdfNullToken ||
436
+ token instanceof PdfObjectReferenceToken ||
437
+ token instanceof PdfStringToken
438
+ )
439
+ }
440
+
441
+ protected bufferSize(): number {
442
+ return this.buffer.reduce((acc, obj) => acc + obj.byteLength, 0)
443
+ }
444
+
445
+ protected canCompact(): boolean {
446
+ return (
447
+ this.bufferIndex > 50 && this.bufferSize() > this.maxBufferSizeBytes
448
+ )
449
+ }
450
+
451
+ protected parse() {
452
+ return this.nextObject(true)
453
+ }
454
+ }
@@ -0,0 +1,128 @@
1
+ import { ByteArray } from '../types'
2
+ import { stringToBytes } from '../utils/stringToBytes'
3
+ import { PdfDecoder } from './decoder'
4
+ import { PdfObject } from './objects/pdf-object'
5
+ import { PdfByteStreamTokeniser } from './tokeniser'
6
+
7
+ /**
8
+ * Converts an iterable of byte arrays into PDF objects.
9
+ * Processes bytes through tokenization and decoding.
10
+ *
11
+ * @param bytes - Iterable of byte arrays to process
12
+ * @returns A generator yielding parsed PDF objects
13
+ */
14
+ export function* bytesToPdfObjects(
15
+ bytes: Iterable<ByteArray>,
16
+ ): Generator<PdfObject> {
17
+ const tokeniser = new PdfByteStreamTokeniser()
18
+ const decoder = new PdfDecoder()
19
+
20
+ for (const chunk of bytes) {
21
+ tokeniser.feedBytes(chunk)
22
+
23
+ for (const token of tokeniser.nextItems()) {
24
+ decoder.feed(token)
25
+
26
+ for (const obj of decoder.nextItems()) {
27
+ yield obj
28
+ }
29
+ }
30
+ }
31
+
32
+ tokeniser.eof = true
33
+ decoder.eof = true
34
+
35
+ for (const token of tokeniser.nextItems()) {
36
+ decoder.feed(token)
37
+ }
38
+
39
+ for (const obj of decoder.nextItems()) {
40
+ yield obj
41
+ }
42
+ }
43
+
44
+ /**
45
+ * Converts a string containing PDF content into PDF objects.
46
+ *
47
+ * @param str - The string to parse as PDF content
48
+ * @returns A generator yielding parsed PDF objects
49
+ */
50
+ export function* stringToPdfObjects(str: string): Generator<PdfObject> {
51
+ const bytes = stringToBytes(str)
52
+ yield* bytesToPdfObjects([bytes])
53
+ }
54
+
55
+ /**
56
+ * Decodes an iterable of byte arrays into PDF objects.
57
+ * Allows configuring whitespace handling.
58
+ *
59
+ * @param input - Iterable of byte arrays to decode
60
+ * @param options - Configuration options
61
+ * @param options.ignoreWhitespace - If true, whitespace tokens are ignored
62
+ * @returns A generator yielding parsed PDF objects
63
+ */
64
+ export function* pdfDecoder(
65
+ input: Iterable<ByteArray>,
66
+ options?: { ignoreWhitespace?: boolean },
67
+ ) {
68
+ const decoder = new PdfDecoder(options)
69
+ const tokeniser = new PdfByteStreamTokeniser()
70
+
71
+ for (const chunk of input) {
72
+ for (const byte of chunk) {
73
+ tokeniser.feed(byte)
74
+ }
75
+
76
+ for (const token of tokeniser.nextItems()) {
77
+ decoder.feed(token)
78
+ }
79
+
80
+ yield* decoder.nextItems()
81
+ }
82
+
83
+ tokeniser.eof = true
84
+ decoder.eof = true
85
+
86
+ for (const token of tokeniser.nextItems()) {
87
+ decoder.feed(token)
88
+ }
89
+
90
+ yield* decoder.nextItems()
91
+ }
92
+
93
+ /**
94
+ * Asynchronously decodes byte arrays into PDF objects.
95
+ * Supports both async and sync iterables for streaming PDF parsing.
96
+ *
97
+ * @param input - Async or sync iterable of byte arrays
98
+ * @returns An async generator yielding parsed PDF objects
99
+ */
100
+ export async function* pdfDecoderAsync(
101
+ input: AsyncIterable<ByteArray> | Iterable<ByteArray>,
102
+ ): AsyncGenerator<PdfObject> {
103
+ const tokeniser = new PdfByteStreamTokeniser()
104
+ const decoder = new PdfDecoder()
105
+
106
+ for await (const chunk of input) {
107
+ tokeniser.feedBytes(chunk)
108
+
109
+ for (const token of tokeniser.nextItems()) {
110
+ decoder.feed(token)
111
+
112
+ for (const obj of decoder.nextItems()) {
113
+ yield obj
114
+ }
115
+ }
116
+ }
117
+
118
+ tokeniser.eof = true
119
+ decoder.eof = true
120
+
121
+ for (const token of tokeniser.nextItems()) {
122
+ decoder.feed(token)
123
+ }
124
+
125
+ for (const obj of decoder.nextItems()) {
126
+ yield obj
127
+ }
128
+ }