pdf-lite 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. package/.commitlintrc.cjs +25 -0
  2. package/.github/ISSUE_TEMPLATE/bug_report.md +40 -0
  3. package/.github/ISSUE_TEMPLATE/feature_request.md +19 -0
  4. package/.github/workflows/docs.yaml +93 -0
  5. package/.github/workflows/prepare-release.yaml +79 -0
  6. package/.github/workflows/release.yaml +80 -0
  7. package/.github/workflows/test.yaml +35 -0
  8. package/.husky/commit-msg +1 -0
  9. package/.husky/pre-commit +1 -0
  10. package/.prettierignore +4 -0
  11. package/.prettierrc +4 -0
  12. package/CONTRIBUTING.md +109 -0
  13. package/EXAMPLES.md +1515 -0
  14. package/LICENSE +21 -0
  15. package/README.md +285 -0
  16. package/examples/001-create-pdf.ts +112 -0
  17. package/examples/002-create-encrypted-pdf.ts +121 -0
  18. package/examples/003-sign-pdf.ts +347 -0
  19. package/examples/004-incremental-update.ts +206 -0
  20. package/examples/005-modify-acroform.ts +374 -0
  21. package/examples/006-tokeniser-example.ts +131 -0
  22. package/examples/007-decoder-example.ts +197 -0
  23. package/package.json +72 -0
  24. package/packages/pdf-lite/README.md +3 -0
  25. package/packages/pdf-lite/package.json +68 -0
  26. package/packages/pdf-lite/scripts/create-encryption-tests.sh +41 -0
  27. package/packages/pdf-lite/scripts/gen-signing-keys.sh +290 -0
  28. package/packages/pdf-lite/scripts/generate-all-signing-keys.sh +70 -0
  29. package/packages/pdf-lite/src/core/decoder.ts +454 -0
  30. package/packages/pdf-lite/src/core/generators.ts +128 -0
  31. package/packages/pdf-lite/src/core/incremental-parser.ts +221 -0
  32. package/packages/pdf-lite/src/core/index.ts +2 -0
  33. package/packages/pdf-lite/src/core/objects/pdf-array.ts +54 -0
  34. package/packages/pdf-lite/src/core/objects/pdf-boolean.ts +19 -0
  35. package/packages/pdf-lite/src/core/objects/pdf-comment.ts +50 -0
  36. package/packages/pdf-lite/src/core/objects/pdf-date.ts +74 -0
  37. package/packages/pdf-lite/src/core/objects/pdf-dictionary.ts +171 -0
  38. package/packages/pdf-lite/src/core/objects/pdf-hexadecimal.ts +54 -0
  39. package/packages/pdf-lite/src/core/objects/pdf-indirect-object.ts +137 -0
  40. package/packages/pdf-lite/src/core/objects/pdf-name.ts +19 -0
  41. package/packages/pdf-lite/src/core/objects/pdf-null.ts +15 -0
  42. package/packages/pdf-lite/src/core/objects/pdf-number.ts +98 -0
  43. package/packages/pdf-lite/src/core/objects/pdf-object-reference.ts +30 -0
  44. package/packages/pdf-lite/src/core/objects/pdf-object.ts +107 -0
  45. package/packages/pdf-lite/src/core/objects/pdf-start-xref.ts +39 -0
  46. package/packages/pdf-lite/src/core/objects/pdf-stream.ts +687 -0
  47. package/packages/pdf-lite/src/core/objects/pdf-string.ts +38 -0
  48. package/packages/pdf-lite/src/core/objects/pdf-trailer.ts +57 -0
  49. package/packages/pdf-lite/src/core/objects/pdf-xref-table.ts +264 -0
  50. package/packages/pdf-lite/src/core/parser.ts +22 -0
  51. package/packages/pdf-lite/src/core/ref.ts +102 -0
  52. package/packages/pdf-lite/src/core/serializer.ts +68 -0
  53. package/packages/pdf-lite/src/core/streams/object-stream.ts +20 -0
  54. package/packages/pdf-lite/src/core/tokeniser.ts +687 -0
  55. package/packages/pdf-lite/src/core/tokens/boolean-token.ts +20 -0
  56. package/packages/pdf-lite/src/core/tokens/byte-offset-token.ts +20 -0
  57. package/packages/pdf-lite/src/core/tokens/comment-token.ts +32 -0
  58. package/packages/pdf-lite/src/core/tokens/end-array-token.ts +10 -0
  59. package/packages/pdf-lite/src/core/tokens/end-dictionary-token.ts +10 -0
  60. package/packages/pdf-lite/src/core/tokens/end-object-token.ts +10 -0
  61. package/packages/pdf-lite/src/core/tokens/end-stream-token.ts +11 -0
  62. package/packages/pdf-lite/src/core/tokens/hexadecimal-token.ts +22 -0
  63. package/packages/pdf-lite/src/core/tokens/name-token.ts +19 -0
  64. package/packages/pdf-lite/src/core/tokens/null-token.ts +9 -0
  65. package/packages/pdf-lite/src/core/tokens/number-token.ts +164 -0
  66. package/packages/pdf-lite/src/core/tokens/object-reference-token.ts +24 -0
  67. package/packages/pdf-lite/src/core/tokens/start-array-token.ts +10 -0
  68. package/packages/pdf-lite/src/core/tokens/start-dictionary-token.ts +10 -0
  69. package/packages/pdf-lite/src/core/tokens/start-object-token.ts +28 -0
  70. package/packages/pdf-lite/src/core/tokens/start-stream-token.ts +52 -0
  71. package/packages/pdf-lite/src/core/tokens/start-xref-token.ts +10 -0
  72. package/packages/pdf-lite/src/core/tokens/stream-chunk-token.ts +8 -0
  73. package/packages/pdf-lite/src/core/tokens/string-token.ts +17 -0
  74. package/packages/pdf-lite/src/core/tokens/token.ts +43 -0
  75. package/packages/pdf-lite/src/core/tokens/trailer-token.ts +12 -0
  76. package/packages/pdf-lite/src/core/tokens/whitespace-token.ts +43 -0
  77. package/packages/pdf-lite/src/core/tokens/xref-table-entry-token.ts +65 -0
  78. package/packages/pdf-lite/src/core/tokens/xref-table-section-start-token.ts +31 -0
  79. package/packages/pdf-lite/src/core/tokens/xref-table-start-token.ts +13 -0
  80. package/packages/pdf-lite/src/crypto/ciphers/aes128.ts +63 -0
  81. package/packages/pdf-lite/src/crypto/ciphers/aes256.ts +50 -0
  82. package/packages/pdf-lite/src/crypto/ciphers/rc4.ts +82 -0
  83. package/packages/pdf-lite/src/crypto/constants.ts +10 -0
  84. package/packages/pdf-lite/src/crypto/key-derivation/key-derivation-aes256.ts +213 -0
  85. package/packages/pdf-lite/src/crypto/key-derivation/key-derivation.ts +122 -0
  86. package/packages/pdf-lite/src/crypto/key-gen/key-gen-aes256.ts +79 -0
  87. package/packages/pdf-lite/src/crypto/key-gen/key-gen-rc4-128.ts +190 -0
  88. package/packages/pdf-lite/src/crypto/key-gen/key-gen-rc4-40.ts +129 -0
  89. package/packages/pdf-lite/src/crypto/types.ts +6 -0
  90. package/packages/pdf-lite/src/crypto/utils.ts +81 -0
  91. package/packages/pdf-lite/src/filters/ascii85.ts +128 -0
  92. package/packages/pdf-lite/src/filters/asciihex.ts +55 -0
  93. package/packages/pdf-lite/src/filters/flate.ts +39 -0
  94. package/packages/pdf-lite/src/filters/lzw.ts +144 -0
  95. package/packages/pdf-lite/src/filters/pass-through.ts +37 -0
  96. package/packages/pdf-lite/src/filters/runlength.ts +92 -0
  97. package/packages/pdf-lite/src/filters/types.ts +21 -0
  98. package/packages/pdf-lite/src/index.ts +4 -0
  99. package/packages/pdf-lite/src/pdf/errors.ts +5 -0
  100. package/packages/pdf-lite/src/pdf/index.ts +4 -0
  101. package/packages/pdf-lite/src/pdf/pdf-document.ts +924 -0
  102. package/packages/pdf-lite/src/pdf/pdf-reader.ts +57 -0
  103. package/packages/pdf-lite/src/pdf/pdf-revision.ts +234 -0
  104. package/packages/pdf-lite/src/pdf/pdf-xref-lookup.ts +527 -0
  105. package/packages/pdf-lite/src/security/crypt-filters/aesv2.ts +58 -0
  106. package/packages/pdf-lite/src/security/crypt-filters/aesv3.ts +56 -0
  107. package/packages/pdf-lite/src/security/crypt-filters/base.ts +140 -0
  108. package/packages/pdf-lite/src/security/crypt-filters/identity.ts +40 -0
  109. package/packages/pdf-lite/src/security/crypt-filters/v2.ts +59 -0
  110. package/packages/pdf-lite/src/security/handlers/base.ts +625 -0
  111. package/packages/pdf-lite/src/security/handlers/pubSec.ts +413 -0
  112. package/packages/pdf-lite/src/security/handlers/utils.ts +304 -0
  113. package/packages/pdf-lite/src/security/handlers/v1.ts +225 -0
  114. package/packages/pdf-lite/src/security/handlers/v2.ts +128 -0
  115. package/packages/pdf-lite/src/security/handlers/v4.ts +379 -0
  116. package/packages/pdf-lite/src/security/handlers/v5.ts +298 -0
  117. package/packages/pdf-lite/src/security/types.ts +158 -0
  118. package/packages/pdf-lite/src/signing/document-security-store.ts +224 -0
  119. package/packages/pdf-lite/src/signing/index.ts +3 -0
  120. package/packages/pdf-lite/src/signing/signatures/adbe-pkcs7-detached.ts +154 -0
  121. package/packages/pdf-lite/src/signing/signatures/adbe-pkcs7-sha1.ts +161 -0
  122. package/packages/pdf-lite/src/signing/signatures/adbe-x509-rsa-sha1.ts +106 -0
  123. package/packages/pdf-lite/src/signing/signatures/base.ts +229 -0
  124. package/packages/pdf-lite/src/signing/signatures/etsi-cades-detached.ts +229 -0
  125. package/packages/pdf-lite/src/signing/signatures/etsi-rfc3161.ts +92 -0
  126. package/packages/pdf-lite/src/signing/signatures/index.ts +6 -0
  127. package/packages/pdf-lite/src/signing/signer.ts +120 -0
  128. package/packages/pdf-lite/src/signing/types.ts +86 -0
  129. package/packages/pdf-lite/src/signing/utils.ts +71 -0
  130. package/packages/pdf-lite/src/types.ts +44 -0
  131. package/packages/pdf-lite/src/utils/IterableReadableStream.ts +30 -0
  132. package/packages/pdf-lite/src/utils/algos.ts +446 -0
  133. package/packages/pdf-lite/src/utils/assert.ts +42 -0
  134. package/packages/pdf-lite/src/utils/bytesToHex.ts +18 -0
  135. package/packages/pdf-lite/src/utils/bytesToHexBytes.ts +27 -0
  136. package/packages/pdf-lite/src/utils/bytesToString.ts +17 -0
  137. package/packages/pdf-lite/src/utils/concatUint8Arrays.ts +26 -0
  138. package/packages/pdf-lite/src/utils/escapeString.ts +49 -0
  139. package/packages/pdf-lite/src/utils/hexBytesToBytes.ts +22 -0
  140. package/packages/pdf-lite/src/utils/hexBytesToString.ts +21 -0
  141. package/packages/pdf-lite/src/utils/hexToBytes.ts +18 -0
  142. package/packages/pdf-lite/src/utils/padBytes.ts +25 -0
  143. package/packages/pdf-lite/src/utils/predictors.ts +332 -0
  144. package/packages/pdf-lite/src/utils/replaceInBuffer.ts +56 -0
  145. package/packages/pdf-lite/src/utils/stringToBytes.ts +22 -0
  146. package/packages/pdf-lite/src/utils/stringToHexBytes.ts +23 -0
  147. package/packages/pdf-lite/src/utils/unescapeString.ts +123 -0
  148. package/packages/pdf-lite/test/acceptance/__snapshots__/versions.node.test.ts.snap +60766 -0
  149. package/packages/pdf-lite/test/acceptance/fixtures/1.3/basic.pdf +0 -0
  150. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-aes-128.pdf +0 -0
  151. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-aes-256.pdf +0 -0
  152. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-rc4-128.pdf +0 -0
  153. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-rc4-40.pdf +0 -0
  154. package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic.pdf +0 -0
  155. package/packages/pdf-lite/test/acceptance/fixtures/1.5/basic.pdf +0 -0
  156. package/packages/pdf-lite/test/acceptance/fixtures/1.6/basic.pdf +0 -0
  157. package/packages/pdf-lite/test/acceptance/fixtures/1.7/basic.pdf +0 -0
  158. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-aes-128.pdf +43 -0
  159. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-aes-256.pdf +43 -0
  160. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-rc4-128.pdf +43 -0
  161. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-rc4-40.pdf +44 -0
  162. package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic.pdf +79 -0
  163. package/packages/pdf-lite/test/acceptance/versions.node.test.ts +41 -0
  164. package/packages/pdf-lite/test/unit/__snapshots__/decoder.node.test.ts.snap +86947 -0
  165. package/packages/pdf-lite/test/unit/__snapshots__/tokeniser.node.test.ts.snap +131829 -0
  166. package/packages/pdf-lite/test/unit/ciphers.test.ts +61 -0
  167. package/packages/pdf-lite/test/unit/decoder.node.test.ts +21 -0
  168. package/packages/pdf-lite/test/unit/decoder.test.ts +567 -0
  169. package/packages/pdf-lite/test/unit/filters.test.ts +67 -0
  170. package/packages/pdf-lite/test/unit/fixtures/basic.pdf +0 -0
  171. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-aes-128.pdf +0 -0
  172. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-aes-256.pdf +0 -0
  173. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-rc4-128.pdf +0 -0
  174. package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-rc4-40.pdf +43 -0
  175. package/packages/pdf-lite/test/unit/fixtures/protectedAdobeLivecycle.pdf +0 -0
  176. package/packages/pdf-lite/test/unit/fixtures/rsa-2048/index.ts +187 -0
  177. package/packages/pdf-lite/test/unit/fixtures/template.pdf +0 -0
  178. package/packages/pdf-lite/test/unit/incremental-update.test.ts +0 -0
  179. package/packages/pdf-lite/test/unit/objects.test.ts +0 -0
  180. package/packages/pdf-lite/test/unit/pdf-document-signing.test.ts +0 -0
  181. package/packages/pdf-lite/test/unit/pdf-revision.test.ts +195 -0
  182. package/packages/pdf-lite/test/unit/pdf.browser.test.ts +0 -0
  183. package/packages/pdf-lite/test/unit/predictors.test.ts +226 -0
  184. package/packages/pdf-lite/test/unit/ref.test.ts +158 -0
  185. package/packages/pdf-lite/test/unit/security-handlers.test.ts +645 -0
  186. package/packages/pdf-lite/test/unit/serializer.test.ts +81 -0
  187. package/packages/pdf-lite/test/unit/signature-objects.test.ts +814 -0
  188. package/packages/pdf-lite/test/unit/string-escaping.test.ts +84 -0
  189. package/packages/pdf-lite/test/unit/tokeniser.node.test.ts +38 -0
  190. package/packages/pdf-lite/test/unit/tokeniser.test.ts +1213 -0
  191. package/packages/pdf-lite/test/unit/utils.test.ts +248 -0
  192. package/packages/pdf-lite/test/unit/xref-lookup.test.ts +72 -0
  193. package/packages/pdf-lite/tsconfig.json +4 -0
  194. package/packages/pdf-lite/tsconfig.prod.json +8 -0
  195. package/packages/pdf-lite/typedoc.json +14 -0
  196. package/packages/pdf-lite/vitest.config.ts +43 -0
  197. package/pnpm-workspace.yaml +2 -0
  198. package/renovate.json +34 -0
  199. package/scripts/build-examples.ts +30 -0
  200. package/scripts/bump-version.sh +56 -0
  201. package/scripts/gen-html-docs.sh +21 -0
  202. package/scripts/gen-md-docs.sh +15 -0
  203. package/scripts/prepare-release.sh +33 -0
  204. package/tsconfig.json +22 -0
  205. package/tsconfig.prod.json +12 -0
  206. package/typedoc.json +34 -0
@@ -0,0 +1,687 @@
1
+ import { assert } from '../utils/assert.js'
2
+ import { bytesToString } from '../utils/bytesToString.js'
3
+ import { IncrementalParser } from './incremental-parser.js'
4
+ import { PdfObject } from './objects/pdf-object.js'
5
+ import { PdfToken } from './tokens/token.js'
6
+ import { PdfBooleanToken } from './tokens/boolean-token.js'
7
+ import { PdfCommentToken } from './tokens/comment-token.js'
8
+ import { PdfEndArrayToken } from './tokens/end-array-token.js'
9
+ import { PdfEndDictionaryToken } from './tokens/end-dictionary-token.js'
10
+ import { PdfEndObjectToken } from './tokens/end-object-token.js'
11
+ import { PdfEndStreamToken } from './tokens/end-stream-token.js'
12
+ import { PdfHexadecimalToken } from './tokens/hexadecimal-token.js'
13
+ import { PdfNameToken } from './tokens/name-token.js'
14
+ import { PdfNullToken } from './tokens/null-token.js'
15
+ import { PdfNumberToken } from './tokens/number-token.js'
16
+ import { PdfObjectReferenceToken } from './tokens/object-reference-token.js'
17
+ import { PdfStartArrayToken } from './tokens/start-array-token.js'
18
+ import { PdfStartDictionaryToken } from './tokens/start-dictionary-token.js'
19
+ import { PdfStartObjectToken } from './tokens/start-object-token.js'
20
+ import { PdfStartStreamToken } from './tokens/start-stream-token.js'
21
+ import { PdfStartXRefToken } from './tokens/start-xref-token.js'
22
+ import { PdfStreamChunkToken } from './tokens/stream-chunk-token.js'
23
+ import { PdfStringToken } from './tokens/string-token.js'
24
+ import { PdfTrailerToken } from './tokens/trailer-token.js'
25
+ import { PdfWhitespaceToken } from './tokens/whitespace-token.js'
26
+ import { PdfXRefTableEntryToken } from './tokens/xref-table-entry-token.js'
27
+ import { PdfXRefTableSectionStartToken } from './tokens/xref-table-section-start-token.js'
28
+ import { PdfXRefTableStartToken } from './tokens/xref-table-start-token.js'
29
+ import { Parser } from './parser.js'
30
+ import { concatUint8Arrays } from '../utils/concatUint8Arrays.js'
31
+ import { stringToBytes } from '../utils/stringToBytes.js'
32
+ import { ByteArray } from '../types.js'
33
+
34
+ /**
35
+ * Type alias for a parser that converts bytes to PDF tokens.
36
+ */
37
+ export type PdfTokeniser = Parser<number, PdfToken>
38
+
39
+ const ByteMap = {
40
+ LEFT_PARENTHESIS: 0x28, // (
41
+ RIGHT_PARENTHESIS: 0x29, // )
42
+ LEFT_SQUARE_BRACKET: 0x5b, // [
43
+ RIGHT_SQUARE_BRACKET: 0x5d, // ]
44
+ LEFT_ANGLE_BRACKET: 0x3c, // <
45
+ RIGHT_ANGLE_BRACKET: 0x3e, // >
46
+ SLASH: 0x2f, // /
47
+ PERCENT: 0x25, // %
48
+ s: 0x73, // s
49
+ R: 0x52, // R
50
+ o: 0x6f, // o
51
+ e: 0x65, // e
52
+ t: 0x74, // t
53
+ f: 0x66, // f
54
+ n: 0x6e, // n
55
+ d: 0x64, // d
56
+ r: 0x72, // r
57
+ u: 0x75, // u
58
+ l: 0x6c, // l
59
+ a: 0x61, // a
60
+ x: 0x78, // x
61
+ b: 0x62, // b
62
+ j: 0x6a, // j
63
+ m: 0x6d, // m
64
+ i: 0x69, // i
65
+ NEW_LINE: 0x0a, // \n
66
+ LINE_FEED: 0x0a, // \n
67
+ MINUS: 0x2d, // -
68
+ BACKSLASH: 0x5c, // \
69
+ SPACE: 0x20, // Space
70
+ TAB: 0x09, // Tab
71
+ DOT: 0x2e, // .
72
+ }
73
+
74
+ /**
75
+ * Tokenizes a byte stream into PDF tokens.
76
+ * Handles all PDF syntax including objects, streams, and xref tables.
77
+ */
78
+ export class PdfByteStreamTokeniser extends IncrementalParser<
79
+ number,
80
+ PdfToken
81
+ > {
82
+ private inStream: boolean = false
83
+ private inXrefTable: boolean = false
84
+ private xrefEntryCount: number = 0
85
+ private lastSectionStartObjectNumber: number = 0
86
+ private streamChunkSizeBytes: number
87
+
88
+ /**
89
+ * Creates a new byte stream tokenizer.
90
+ *
91
+ * @param options - Configuration options
92
+ * @param options.streamChunkSizeBytes - Size of stream chunks (default: 1024)
93
+ */
94
+ constructor(options?: { streamChunkSizeBytes?: number }) {
95
+ super()
96
+ this.streamChunkSizeBytes = options?.streamChunkSizeBytes ?? 1024
97
+ }
98
+
99
+ /**
100
+ * Feeds a byte array into the tokenizer.
101
+ *
102
+ * @param bytes - The bytes to process
103
+ */
104
+ feedBytes(bytes: ByteArray) {
105
+ for (const byte of bytes) {
106
+ this.feed(byte)
107
+ }
108
+ }
109
+
110
+ private readValue(): ByteArray {
111
+ const valueBytes: number[] = []
112
+ let byte = this.peek()
113
+ while (
114
+ byte !== null &&
115
+ !PdfByteStreamTokeniser.isNewLine(byte) &&
116
+ !PdfByteStreamTokeniser.isNameEnd(byte)
117
+ ) {
118
+ valueBytes.push(byte)
119
+ this.next() // Consume the byte
120
+ byte = this.peek()
121
+ }
122
+ return new Uint8Array(valueBytes)
123
+ }
124
+
125
+ private nextCommentToken(): PdfCommentToken {
126
+ this.expect(ByteMap.PERCENT) // Consume the '%'
127
+ const commentBytes = this.readValue()
128
+ return new PdfCommentToken(commentBytes)
129
+ }
130
+
131
+ private nextWhitespaceToken(): PdfWhitespaceToken {
132
+ const byte = this.next()
133
+ return new PdfWhitespaceToken(byte)
134
+ }
135
+
136
+ private nextStartDictionaryToken(): PdfStartDictionaryToken {
137
+ this.expect(ByteMap.LEFT_ANGLE_BRACKET)
138
+ this.expect(ByteMap.LEFT_ANGLE_BRACKET)
139
+ return new PdfStartDictionaryToken()
140
+ }
141
+
142
+ private nextNameToken(): PdfNameToken {
143
+ this.expect(ByteMap.SLASH)
144
+ const nameBytes: number[] = []
145
+ let byte = this.peek()
146
+ while (
147
+ byte !== null &&
148
+ !PdfByteStreamTokeniser.isNameEnd(byte) &&
149
+ !PdfByteStreamTokeniser.isWhitespace(byte)
150
+ ) {
151
+ nameBytes.push(this.next()!)
152
+ byte = this.peek()
153
+ }
154
+
155
+ return new PdfNameToken(bytesToString(new Uint8Array(nameBytes)))
156
+ }
157
+
158
+ private nextDictionaryEndToken(): PdfEndDictionaryToken {
159
+ this.expect(ByteMap.RIGHT_ANGLE_BRACKET)
160
+ this.expect(ByteMap.RIGHT_ANGLE_BRACKET)
161
+ return new PdfEndDictionaryToken()
162
+ }
163
+
164
+ private nextHexadecimalToken(): PdfHexadecimalToken {
165
+ this.expect(ByteMap.LEFT_ANGLE_BRACKET)
166
+
167
+ const hexBytes: number[] = []
168
+ let byte = this.peek()
169
+
170
+ while (
171
+ byte !== null &&
172
+ byte !== ByteMap.RIGHT_ANGLE_BRACKET &&
173
+ !PdfByteStreamTokeniser.isWhitespace(byte)
174
+ ) {
175
+ hexBytes.push(this.next())
176
+ byte = this.peek()
177
+ }
178
+
179
+ this.expect(ByteMap.RIGHT_ANGLE_BRACKET)
180
+
181
+ return new PdfHexadecimalToken(new Uint8Array(hexBytes))
182
+ }
183
+
184
+ private nextNumberToken(): PdfNumberToken {
185
+ const numberBytes: number[] = []
186
+ let byte = this.peek()
187
+
188
+ while (PdfByteStreamTokeniser.isDigit(byte)) {
189
+ numberBytes.push(this.next())
190
+ byte = this.peek()
191
+ }
192
+
193
+ return new PdfNumberToken({
194
+ value: new Uint8Array(numberBytes),
195
+ })
196
+ }
197
+
198
+ private nextObjectReferenceToken(): PdfObjectReferenceToken {
199
+ const objectNumberToken = this.nextNumberToken()
200
+ this.expect(ByteMap.SPACE)
201
+ const generationNumberToken = this.nextNumberToken()
202
+ this.expect(ByteMap.SPACE)
203
+ this.expect(ByteMap.R)
204
+
205
+ return new PdfObjectReferenceToken(
206
+ objectNumberToken.value,
207
+ generationNumberToken.value,
208
+ )
209
+ }
210
+
211
+ private nextStartObjectToken(): PdfStartObjectToken {
212
+ const offset = this.inputOffset
213
+ const objectNumberToken = this.nextNumberToken()
214
+ this.expect(ByteMap.SPACE)
215
+ const generationNumberToken = this.nextNumberToken()
216
+ this.expect(ByteMap.SPACE)
217
+ this.expect(ByteMap.o)
218
+ this.expect(ByteMap.b)
219
+ this.expect(ByteMap.j)
220
+
221
+ return new PdfStartObjectToken(
222
+ objectNumberToken.value,
223
+ generationNumberToken.value,
224
+ offset,
225
+ )
226
+ }
227
+
228
+ private nextStartArrayToken(): PdfStartArrayToken {
229
+ this.expect(ByteMap.LEFT_SQUARE_BRACKET)
230
+ return new PdfStartArrayToken()
231
+ }
232
+
233
+ private nextEndArrayToken(): PdfEndArrayToken {
234
+ this.expect(ByteMap.RIGHT_SQUARE_BRACKET)
235
+ return new PdfEndArrayToken()
236
+ }
237
+
238
+ private nextStringToken(): PdfStringToken {
239
+ this.expect(ByteMap.LEFT_PARENTHESIS)
240
+
241
+ const stringBytes: number[] = []
242
+ let nesting = 1
243
+ let inEscape = false
244
+
245
+ while (inEscape || nesting > 0) {
246
+ const byte = this.next()
247
+
248
+ if (byte === null) {
249
+ throw new Error('Unexpected end of input in string token')
250
+ }
251
+
252
+ if (byte === ByteMap.LEFT_PARENTHESIS) {
253
+ nesting++
254
+ } else if (byte === ByteMap.RIGHT_PARENTHESIS) {
255
+ nesting--
256
+ if (nesting === 0) {
257
+ break
258
+ }
259
+ } else if (byte === ByteMap.BACKSLASH || inEscape) {
260
+ inEscape = true
261
+ const next = this.next()
262
+
263
+ if (next === null) {
264
+ throw new Error('Unexpected end of input in string token')
265
+ }
266
+
267
+ switch (next) {
268
+ case ByteMap.n:
269
+ stringBytes.push(0x0a)
270
+ break // \n
271
+ case ByteMap.r:
272
+ stringBytes.push(0x0d)
273
+ break // \r
274
+ case ByteMap.t:
275
+ stringBytes.push(0x09)
276
+ break // \t
277
+ case ByteMap.b:
278
+ stringBytes.push(0x08)
279
+ break // \b
280
+ case ByteMap.f:
281
+ stringBytes.push(0x0c)
282
+ break // \f
283
+ case ByteMap.LEFT_PARENTHESIS:
284
+ stringBytes.push(ByteMap.LEFT_PARENTHESIS)
285
+ break // \(
286
+ case ByteMap.RIGHT_PARENTHESIS:
287
+ stringBytes.push(ByteMap.RIGHT_PARENTHESIS)
288
+ break // \)
289
+ case ByteMap.BACKSLASH:
290
+ stringBytes.push(ByteMap.BACKSLASH)
291
+ break // \\
292
+ case 0x0a:
293
+ case 0x0d:
294
+ // Ignore line breaks in the string after a backslash
295
+ break
296
+ default:
297
+ if (PdfByteStreamTokeniser.isOctet(next)) {
298
+ let octal = String.fromCharCode(next)
299
+ // Octal: up to 3 digits
300
+ const next2 = this.peek()
301
+ if (next2 === null) {
302
+ throw new Error(
303
+ 'Unexpected end of input in string token',
304
+ )
305
+ }
306
+
307
+ if (PdfByteStreamTokeniser.isOctet(next2)) {
308
+ octal += String.fromCharCode(this.next()!)
309
+ }
310
+
311
+ const next3 = this.peek()
312
+ if (next3 === null) {
313
+ throw new Error(
314
+ 'Unexpected end of input in string token',
315
+ )
316
+ }
317
+
318
+ if (PdfByteStreamTokeniser.isOctet(next3)) {
319
+ octal += String.fromCharCode(this.next()!)
320
+ }
321
+
322
+ stringBytes.push(parseInt(octal, 8))
323
+ } else {
324
+ // If it's not a valid escape sequence, just add the next byte
325
+ stringBytes.push(next)
326
+ }
327
+ break
328
+ }
329
+
330
+ inEscape = false
331
+ continue
332
+ }
333
+
334
+ stringBytes.push(byte)
335
+ }
336
+
337
+ return new PdfStringToken(new Uint8Array(stringBytes))
338
+ }
339
+
340
+ private nextEndObjectToken(): PdfEndObjectToken {
341
+ this.expect(ByteMap.e)
342
+ this.expect(ByteMap.n)
343
+ this.expect(ByteMap.d)
344
+ this.expect(ByteMap.o)
345
+ this.expect(ByteMap.b)
346
+ this.expect(ByteMap.j)
347
+
348
+ return new PdfEndObjectToken()
349
+ }
350
+
351
+ private nextTrueToken(): PdfBooleanToken {
352
+ this.expect(ByteMap.t)
353
+ this.expect(ByteMap.r)
354
+ this.expect(ByteMap.u)
355
+ this.expect(ByteMap.e)
356
+
357
+ return new PdfBooleanToken(true)
358
+ }
359
+
360
+ private nextFalseToken(): PdfBooleanToken {
361
+ this.expect(ByteMap.f)
362
+ this.expect(ByteMap.a)
363
+ this.expect(ByteMap.l)
364
+ this.expect(ByteMap.s)
365
+ this.expect(ByteMap.e)
366
+
367
+ return new PdfBooleanToken(false)
368
+ }
369
+
370
+ private nextNullToken(): PdfNullToken {
371
+ this.expect(ByteMap.n)
372
+ this.expect(ByteMap.u)
373
+ this.expect(ByteMap.l)
374
+ this.expect(ByteMap.l)
375
+
376
+ return new PdfNullToken()
377
+ }
378
+
379
+ private nextStartStreamToken(): PdfStartStreamToken {
380
+ this.expect(ByteMap.s)
381
+ this.expect(ByteMap.t)
382
+ this.expect(ByteMap.r)
383
+ this.expect(ByteMap.e)
384
+ this.expect(ByteMap.a)
385
+ this.expect(ByteMap.m)
386
+
387
+ const whitespaceBytes: number[] = []
388
+ while (PdfByteStreamTokeniser.isWhitespace(this.peek())) {
389
+ whitespaceBytes.push(this.next())
390
+ }
391
+
392
+ this.inStream = true
393
+
394
+ return new PdfStartStreamToken(
395
+ concatUint8Arrays(
396
+ stringToBytes('stream'),
397
+ new Uint8Array(whitespaceBytes),
398
+ ),
399
+ )
400
+ }
401
+
402
+ private nextStreamChunkToken(): PdfStreamChunkToken {
403
+ if (!this.inStream) {
404
+ throw new Error('Not currently in a stream')
405
+ }
406
+
407
+ const chunkBytes: number[] = []
408
+
409
+ const isEnd = () => {
410
+ return (
411
+ this.peek() === ByteMap.e &&
412
+ this.peek(1) === ByteMap.n &&
413
+ this.peek(2) === ByteMap.d &&
414
+ this.peek(3) === ByteMap.s &&
415
+ this.peek(4) === ByteMap.t &&
416
+ this.peek(5) === ByteMap.r &&
417
+ this.peek(6) === ByteMap.e &&
418
+ this.peek(7) === ByteMap.a &&
419
+ this.peek(8) === ByteMap.m
420
+ )
421
+ }
422
+
423
+ if (isEnd()) {
424
+ return this.nextEndStreamToken()
425
+ }
426
+
427
+ while (!isEnd() && chunkBytes.length < this.streamChunkSizeBytes) {
428
+ const nexted = this.next()
429
+ chunkBytes.push(nexted)
430
+ }
431
+
432
+ return new PdfStreamChunkToken(new Uint8Array(chunkBytes))
433
+ }
434
+
435
+ private nextEndStreamToken(): PdfEndStreamToken {
436
+ this.expect(ByteMap.e)
437
+ this.expect(ByteMap.n)
438
+ this.expect(ByteMap.d)
439
+ this.expect(ByteMap.s)
440
+ this.expect(ByteMap.t)
441
+ this.expect(ByteMap.r)
442
+ this.expect(ByteMap.e)
443
+ this.expect(ByteMap.a)
444
+ this.expect(ByteMap.m)
445
+
446
+ this.inStream = false
447
+
448
+ return new PdfEndStreamToken()
449
+ }
450
+
451
+ private nextStartXRefToken(): PdfStartXRefToken {
452
+ this.expect(ByteMap.s)
453
+ this.expect(ByteMap.t)
454
+ this.expect(ByteMap.a)
455
+ this.expect(ByteMap.r)
456
+ this.expect(ByteMap.t)
457
+ this.expect(ByteMap.x)
458
+ this.expect(ByteMap.r)
459
+ this.expect(ByteMap.e)
460
+ this.expect(ByteMap.f)
461
+
462
+ return new PdfStartXRefToken()
463
+ }
464
+
465
+ private nextTrailerToken(): PdfTrailerToken {
466
+ const offset = this.inputOffset
467
+
468
+ this.expect(ByteMap.t)
469
+ this.expect(ByteMap.r)
470
+ this.expect(ByteMap.a)
471
+ this.expect(ByteMap.i)
472
+ this.expect(ByteMap.l)
473
+ this.expect(ByteMap.e)
474
+ this.expect(ByteMap.r)
475
+
476
+ this.inXrefTable = false
477
+
478
+ return new PdfTrailerToken(offset)
479
+ }
480
+
481
+ private nextXRefTableStartToken(): PdfXRefTableStartToken {
482
+ const offset = this.inputOffset
483
+
484
+ this.expect(ByteMap.x)
485
+ this.expect(ByteMap.r)
486
+ this.expect(ByteMap.e)
487
+ this.expect(ByteMap.f)
488
+
489
+ this.inXrefTable = true
490
+ this.xrefEntryCount = 0
491
+ this.lastSectionStartObjectNumber = 0
492
+
493
+ return new PdfXRefTableStartToken(offset)
494
+ }
495
+
496
+ private nextXRefTableSectionStartToken(): PdfXRefTableSectionStartToken {
497
+ const firstObjectNumberToken = this.nextNumberToken()
498
+ this.expect(ByteMap.SPACE)
499
+ const entryCountToken = this.nextNumberToken()
500
+
501
+ this.lastSectionStartObjectNumber = firstObjectNumberToken.value
502
+ this.xrefEntryCount = 0
503
+
504
+ return new PdfXRefTableSectionStartToken(
505
+ firstObjectNumberToken,
506
+ entryCountToken,
507
+ )
508
+ }
509
+
510
+ private nextXRefTableEntryToken(): PdfXRefTableEntryToken {
511
+ const byteOffsetToken = this.nextNumberToken()
512
+ this.expect(ByteMap.SPACE)
513
+ const generationNumberToken = this.nextNumberToken()
514
+ this.expect(ByteMap.SPACE)
515
+ const inUseByte = this.next()
516
+
517
+ if (inUseByte === null) {
518
+ throw new Error('Unexpected end of input in xref entry token')
519
+ }
520
+
521
+ assert(inUseByte === ByteMap.n || inUseByte === ByteMap.f)
522
+
523
+ const inUse = inUseByte === ByteMap.n
524
+
525
+ return new PdfXRefTableEntryToken(
526
+ byteOffsetToken,
527
+ generationNumberToken,
528
+ this.lastSectionStartObjectNumber + this.xrefEntryCount++, // Increment and use the current count
529
+ inUse,
530
+ )
531
+ }
532
+
533
+ private nextToken(root: boolean = false): PdfToken {
534
+ const byte = this.peek()
535
+
536
+ if (this.inStream) {
537
+ return this.nextStreamChunkToken()
538
+ } else if (byte === ByteMap.PERCENT) {
539
+ return this.nextCommentToken()
540
+ } else if (PdfByteStreamTokeniser.isWhitespace(byte)) {
541
+ return this.nextWhitespaceToken()
542
+ } else if (byte === ByteMap.LEFT_ANGLE_BRACKET) {
543
+ return this.oneOf(
544
+ this.nextStartDictionaryToken,
545
+ this.nextHexadecimalToken,
546
+ )
547
+ } else if (byte === ByteMap.SLASH) {
548
+ return this.nextNameToken()
549
+ } else if (byte === ByteMap.RIGHT_ANGLE_BRACKET) {
550
+ return this.nextDictionaryEndToken()
551
+ } else if (PdfByteStreamTokeniser.isDigit(byte)) {
552
+ if (this.inXrefTable) {
553
+ return this.oneOf(
554
+ this.nextXRefTableEntryToken,
555
+ this.nextXRefTableSectionStartToken,
556
+ )
557
+ }
558
+
559
+ return this.oneOf(
560
+ this.nextStartObjectToken,
561
+ this.nextObjectReferenceToken,
562
+ this.nextNumberToken,
563
+ )
564
+ } else if (byte === ByteMap.LEFT_SQUARE_BRACKET) {
565
+ return this.nextStartArrayToken()
566
+ } else if (byte === ByteMap.RIGHT_SQUARE_BRACKET) {
567
+ return this.nextEndArrayToken()
568
+ } else if (byte === ByteMap.LEFT_PARENTHESIS) {
569
+ return this.nextStringToken()
570
+ } else if (byte === ByteMap.e) {
571
+ return this.oneOf(this.nextEndObjectToken, this.nextEndStreamToken)
572
+ } else if (byte === ByteMap.t) {
573
+ return this.oneOf(this.nextTrueToken, this.nextTrailerToken)
574
+ } else if (byte === ByteMap.f) {
575
+ return this.nextFalseToken()
576
+ } else if (byte === ByteMap.n) {
577
+ return this.nextNullToken()
578
+ } else if (byte === ByteMap.s) {
579
+ return this.oneOf(
580
+ this.nextStartStreamToken,
581
+ this.nextStartXRefToken,
582
+ )
583
+ } else if (byte === ByteMap.x) {
584
+ return this.nextXRefTableStartToken()
585
+ } else {
586
+ throw new Error(
587
+ `Unrecognised token starting with byte: ${byte} (root: ${root}) (char: ${String.fromCharCode(byte ?? 0)})`,
588
+ )
589
+ }
590
+ }
591
+
592
+ protected parse() {
593
+ return this.nextToken(true)
594
+ }
595
+
596
+ private static isWhitespace(byte: number | null): boolean {
597
+ return PdfWhitespaceToken.isWhitespaceByte(byte)
598
+ }
599
+
600
+ private static isNewLine(byte: number | null): boolean {
601
+ return byte === ByteMap.NEW_LINE || byte === 0x0d || byte === 0x0c // \r or \f
602
+ }
603
+
604
+ private static isDigit(byte: number | null): boolean {
605
+ return (
606
+ byte !== null &&
607
+ ((byte >= 0x30 && byte <= 0x39) ||
608
+ byte === ByteMap.MINUS ||
609
+ byte === ByteMap.DOT)
610
+ ) // 0-9 or 0x2d (minus) or 0x2E (.)
611
+ }
612
+
613
+ private static isNameEnd(byte: number | null): boolean {
614
+ if (byte === null) {
615
+ return false
616
+ }
617
+ return (
618
+ byte === ByteMap.SLASH ||
619
+ byte === ByteMap.LEFT_ANGLE_BRACKET ||
620
+ byte === ByteMap.RIGHT_ANGLE_BRACKET ||
621
+ byte === ByteMap.RIGHT_PARENTHESIS ||
622
+ byte === ByteMap.LEFT_PARENTHESIS ||
623
+ byte === ByteMap.LEFT_SQUARE_BRACKET ||
624
+ byte === ByteMap.RIGHT_SQUARE_BRACKET
625
+ )
626
+ }
627
+
628
+ private static isOctet(byte: number | null): boolean {
629
+ if (byte === null) {
630
+ return false
631
+ }
632
+ return (
633
+ byte >= 0x30 && byte <= 0x37 // 0-7
634
+ )
635
+ }
636
+ }
637
+
638
+ /**
639
+ * Converts a PDF object to its token representation.
640
+ *
641
+ * @param object - The PDF object to tokenize
642
+ * @returns A generator yielding the object's tokens
643
+ */
644
+ export function* objectToTokens(object: PdfObject): Generator<PdfToken> {
645
+ return object.toTokens()
646
+ }
647
+
648
+ /**
649
+ * Creates a function that converts a stream of PDF objects to tokens.
650
+ *
651
+ * @returns A generator function that yields tokens from PDF objects
652
+ */
653
+ export function pdfObjectStreamTokeniser() {
654
+ return function* (objects: Iterable<PdfObject>): Generator<PdfToken> {
655
+ for (const object of objects) {
656
+ yield* objectToTokens(object)
657
+ }
658
+ }
659
+ }
660
+
661
+ /**
662
+ * Tokenizes PDF objects into a stream of PDF tokens.
663
+ */
664
+ export class PdfObjectTokeniser extends Parser<PdfObject, PdfToken> {
665
+ private buffer: PdfObject[] = []
666
+
667
+ /**
668
+ * Feeds PDF objects into the tokenizer buffer.
669
+ *
670
+ * @param input - PDF objects to tokenize
671
+ */
672
+ feed(...input: PdfObject[]): void {
673
+ this.buffer.push(...input)
674
+ }
675
+
676
+ /**
677
+ * Generates tokens from the buffered PDF objects.
678
+ *
679
+ * @returns A generator yielding PDF tokens
680
+ */
681
+ *nextItems(): Generator<PdfToken> {
682
+ while (this.buffer.length) {
683
+ const obj = this.buffer.shift()!
684
+ yield* obj.toTokens()
685
+ }
686
+ }
687
+ }
@@ -0,0 +1,20 @@
1
+ import { ByteArray } from '../../types'
2
+ import { PdfToken } from './token'
3
+
4
+ export class PdfBooleanToken extends PdfToken {
5
+ static TRUE = new PdfBooleanToken(true)
6
+ static FALSE = new PdfBooleanToken(false)
7
+
8
+ value: boolean
9
+
10
+ constructor(value: boolean) {
11
+ super(PdfBooleanToken.toBytes(value))
12
+ this.value = value
13
+ }
14
+
15
+ private static toBytes(value: boolean): ByteArray {
16
+ return value
17
+ ? new Uint8Array([0x74, 0x72, 0x75, 0x65])
18
+ : new Uint8Array([0x66, 0x61, 0x6c, 0x73, 0x65])
19
+ }
20
+ }