pdf-lite 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.commitlintrc.cjs +25 -0
- package/.github/ISSUE_TEMPLATE/bug_report.md +40 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +19 -0
- package/.github/workflows/docs.yaml +93 -0
- package/.github/workflows/prepare-release.yaml +79 -0
- package/.github/workflows/release.yaml +80 -0
- package/.github/workflows/test.yaml +35 -0
- package/.husky/commit-msg +1 -0
- package/.husky/pre-commit +1 -0
- package/.prettierignore +4 -0
- package/.prettierrc +4 -0
- package/CONTRIBUTING.md +109 -0
- package/EXAMPLES.md +1515 -0
- package/LICENSE +21 -0
- package/README.md +285 -0
- package/examples/001-create-pdf.ts +112 -0
- package/examples/002-create-encrypted-pdf.ts +121 -0
- package/examples/003-sign-pdf.ts +347 -0
- package/examples/004-incremental-update.ts +206 -0
- package/examples/005-modify-acroform.ts +374 -0
- package/examples/006-tokeniser-example.ts +131 -0
- package/examples/007-decoder-example.ts +197 -0
- package/package.json +72 -0
- package/packages/pdf-lite/README.md +3 -0
- package/packages/pdf-lite/package.json +68 -0
- package/packages/pdf-lite/scripts/create-encryption-tests.sh +41 -0
- package/packages/pdf-lite/scripts/gen-signing-keys.sh +290 -0
- package/packages/pdf-lite/scripts/generate-all-signing-keys.sh +70 -0
- package/packages/pdf-lite/src/core/decoder.ts +454 -0
- package/packages/pdf-lite/src/core/generators.ts +128 -0
- package/packages/pdf-lite/src/core/incremental-parser.ts +221 -0
- package/packages/pdf-lite/src/core/index.ts +2 -0
- package/packages/pdf-lite/src/core/objects/pdf-array.ts +54 -0
- package/packages/pdf-lite/src/core/objects/pdf-boolean.ts +19 -0
- package/packages/pdf-lite/src/core/objects/pdf-comment.ts +50 -0
- package/packages/pdf-lite/src/core/objects/pdf-date.ts +74 -0
- package/packages/pdf-lite/src/core/objects/pdf-dictionary.ts +171 -0
- package/packages/pdf-lite/src/core/objects/pdf-hexadecimal.ts +54 -0
- package/packages/pdf-lite/src/core/objects/pdf-indirect-object.ts +137 -0
- package/packages/pdf-lite/src/core/objects/pdf-name.ts +19 -0
- package/packages/pdf-lite/src/core/objects/pdf-null.ts +15 -0
- package/packages/pdf-lite/src/core/objects/pdf-number.ts +98 -0
- package/packages/pdf-lite/src/core/objects/pdf-object-reference.ts +30 -0
- package/packages/pdf-lite/src/core/objects/pdf-object.ts +107 -0
- package/packages/pdf-lite/src/core/objects/pdf-start-xref.ts +39 -0
- package/packages/pdf-lite/src/core/objects/pdf-stream.ts +687 -0
- package/packages/pdf-lite/src/core/objects/pdf-string.ts +38 -0
- package/packages/pdf-lite/src/core/objects/pdf-trailer.ts +57 -0
- package/packages/pdf-lite/src/core/objects/pdf-xref-table.ts +264 -0
- package/packages/pdf-lite/src/core/parser.ts +22 -0
- package/packages/pdf-lite/src/core/ref.ts +102 -0
- package/packages/pdf-lite/src/core/serializer.ts +68 -0
- package/packages/pdf-lite/src/core/streams/object-stream.ts +20 -0
- package/packages/pdf-lite/src/core/tokeniser.ts +687 -0
- package/packages/pdf-lite/src/core/tokens/boolean-token.ts +20 -0
- package/packages/pdf-lite/src/core/tokens/byte-offset-token.ts +20 -0
- package/packages/pdf-lite/src/core/tokens/comment-token.ts +32 -0
- package/packages/pdf-lite/src/core/tokens/end-array-token.ts +10 -0
- package/packages/pdf-lite/src/core/tokens/end-dictionary-token.ts +10 -0
- package/packages/pdf-lite/src/core/tokens/end-object-token.ts +10 -0
- package/packages/pdf-lite/src/core/tokens/end-stream-token.ts +11 -0
- package/packages/pdf-lite/src/core/tokens/hexadecimal-token.ts +22 -0
- package/packages/pdf-lite/src/core/tokens/name-token.ts +19 -0
- package/packages/pdf-lite/src/core/tokens/null-token.ts +9 -0
- package/packages/pdf-lite/src/core/tokens/number-token.ts +164 -0
- package/packages/pdf-lite/src/core/tokens/object-reference-token.ts +24 -0
- package/packages/pdf-lite/src/core/tokens/start-array-token.ts +10 -0
- package/packages/pdf-lite/src/core/tokens/start-dictionary-token.ts +10 -0
- package/packages/pdf-lite/src/core/tokens/start-object-token.ts +28 -0
- package/packages/pdf-lite/src/core/tokens/start-stream-token.ts +52 -0
- package/packages/pdf-lite/src/core/tokens/start-xref-token.ts +10 -0
- package/packages/pdf-lite/src/core/tokens/stream-chunk-token.ts +8 -0
- package/packages/pdf-lite/src/core/tokens/string-token.ts +17 -0
- package/packages/pdf-lite/src/core/tokens/token.ts +43 -0
- package/packages/pdf-lite/src/core/tokens/trailer-token.ts +12 -0
- package/packages/pdf-lite/src/core/tokens/whitespace-token.ts +43 -0
- package/packages/pdf-lite/src/core/tokens/xref-table-entry-token.ts +65 -0
- package/packages/pdf-lite/src/core/tokens/xref-table-section-start-token.ts +31 -0
- package/packages/pdf-lite/src/core/tokens/xref-table-start-token.ts +13 -0
- package/packages/pdf-lite/src/crypto/ciphers/aes128.ts +63 -0
- package/packages/pdf-lite/src/crypto/ciphers/aes256.ts +50 -0
- package/packages/pdf-lite/src/crypto/ciphers/rc4.ts +82 -0
- package/packages/pdf-lite/src/crypto/constants.ts +10 -0
- package/packages/pdf-lite/src/crypto/key-derivation/key-derivation-aes256.ts +213 -0
- package/packages/pdf-lite/src/crypto/key-derivation/key-derivation.ts +122 -0
- package/packages/pdf-lite/src/crypto/key-gen/key-gen-aes256.ts +79 -0
- package/packages/pdf-lite/src/crypto/key-gen/key-gen-rc4-128.ts +190 -0
- package/packages/pdf-lite/src/crypto/key-gen/key-gen-rc4-40.ts +129 -0
- package/packages/pdf-lite/src/crypto/types.ts +6 -0
- package/packages/pdf-lite/src/crypto/utils.ts +81 -0
- package/packages/pdf-lite/src/filters/ascii85.ts +128 -0
- package/packages/pdf-lite/src/filters/asciihex.ts +55 -0
- package/packages/pdf-lite/src/filters/flate.ts +39 -0
- package/packages/pdf-lite/src/filters/lzw.ts +144 -0
- package/packages/pdf-lite/src/filters/pass-through.ts +37 -0
- package/packages/pdf-lite/src/filters/runlength.ts +92 -0
- package/packages/pdf-lite/src/filters/types.ts +21 -0
- package/packages/pdf-lite/src/index.ts +4 -0
- package/packages/pdf-lite/src/pdf/errors.ts +5 -0
- package/packages/pdf-lite/src/pdf/index.ts +4 -0
- package/packages/pdf-lite/src/pdf/pdf-document.ts +924 -0
- package/packages/pdf-lite/src/pdf/pdf-reader.ts +57 -0
- package/packages/pdf-lite/src/pdf/pdf-revision.ts +234 -0
- package/packages/pdf-lite/src/pdf/pdf-xref-lookup.ts +527 -0
- package/packages/pdf-lite/src/security/crypt-filters/aesv2.ts +58 -0
- package/packages/pdf-lite/src/security/crypt-filters/aesv3.ts +56 -0
- package/packages/pdf-lite/src/security/crypt-filters/base.ts +140 -0
- package/packages/pdf-lite/src/security/crypt-filters/identity.ts +40 -0
- package/packages/pdf-lite/src/security/crypt-filters/v2.ts +59 -0
- package/packages/pdf-lite/src/security/handlers/base.ts +625 -0
- package/packages/pdf-lite/src/security/handlers/pubSec.ts +413 -0
- package/packages/pdf-lite/src/security/handlers/utils.ts +304 -0
- package/packages/pdf-lite/src/security/handlers/v1.ts +225 -0
- package/packages/pdf-lite/src/security/handlers/v2.ts +128 -0
- package/packages/pdf-lite/src/security/handlers/v4.ts +379 -0
- package/packages/pdf-lite/src/security/handlers/v5.ts +298 -0
- package/packages/pdf-lite/src/security/types.ts +158 -0
- package/packages/pdf-lite/src/signing/document-security-store.ts +224 -0
- package/packages/pdf-lite/src/signing/index.ts +3 -0
- package/packages/pdf-lite/src/signing/signatures/adbe-pkcs7-detached.ts +154 -0
- package/packages/pdf-lite/src/signing/signatures/adbe-pkcs7-sha1.ts +161 -0
- package/packages/pdf-lite/src/signing/signatures/adbe-x509-rsa-sha1.ts +106 -0
- package/packages/pdf-lite/src/signing/signatures/base.ts +229 -0
- package/packages/pdf-lite/src/signing/signatures/etsi-cades-detached.ts +229 -0
- package/packages/pdf-lite/src/signing/signatures/etsi-rfc3161.ts +92 -0
- package/packages/pdf-lite/src/signing/signatures/index.ts +6 -0
- package/packages/pdf-lite/src/signing/signer.ts +120 -0
- package/packages/pdf-lite/src/signing/types.ts +86 -0
- package/packages/pdf-lite/src/signing/utils.ts +71 -0
- package/packages/pdf-lite/src/types.ts +44 -0
- package/packages/pdf-lite/src/utils/IterableReadableStream.ts +30 -0
- package/packages/pdf-lite/src/utils/algos.ts +446 -0
- package/packages/pdf-lite/src/utils/assert.ts +42 -0
- package/packages/pdf-lite/src/utils/bytesToHex.ts +18 -0
- package/packages/pdf-lite/src/utils/bytesToHexBytes.ts +27 -0
- package/packages/pdf-lite/src/utils/bytesToString.ts +17 -0
- package/packages/pdf-lite/src/utils/concatUint8Arrays.ts +26 -0
- package/packages/pdf-lite/src/utils/escapeString.ts +49 -0
- package/packages/pdf-lite/src/utils/hexBytesToBytes.ts +22 -0
- package/packages/pdf-lite/src/utils/hexBytesToString.ts +21 -0
- package/packages/pdf-lite/src/utils/hexToBytes.ts +18 -0
- package/packages/pdf-lite/src/utils/padBytes.ts +25 -0
- package/packages/pdf-lite/src/utils/predictors.ts +332 -0
- package/packages/pdf-lite/src/utils/replaceInBuffer.ts +56 -0
- package/packages/pdf-lite/src/utils/stringToBytes.ts +22 -0
- package/packages/pdf-lite/src/utils/stringToHexBytes.ts +23 -0
- package/packages/pdf-lite/src/utils/unescapeString.ts +123 -0
- package/packages/pdf-lite/test/acceptance/__snapshots__/versions.node.test.ts.snap +60766 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.3/basic.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-aes-128.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-aes-256.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-rc4-128.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic-rc4-40.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.4/basic.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.5/basic.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.6/basic.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/1.7/basic.pdf +0 -0
- package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-aes-128.pdf +43 -0
- package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-aes-256.pdf +43 -0
- package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-rc4-128.pdf +43 -0
- package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic-rc4-40.pdf +44 -0
- package/packages/pdf-lite/test/acceptance/fixtures/2.0/basic.pdf +79 -0
- package/packages/pdf-lite/test/acceptance/versions.node.test.ts +41 -0
- package/packages/pdf-lite/test/unit/__snapshots__/decoder.node.test.ts.snap +86947 -0
- package/packages/pdf-lite/test/unit/__snapshots__/tokeniser.node.test.ts.snap +131829 -0
- package/packages/pdf-lite/test/unit/ciphers.test.ts +61 -0
- package/packages/pdf-lite/test/unit/decoder.node.test.ts +21 -0
- package/packages/pdf-lite/test/unit/decoder.test.ts +567 -0
- package/packages/pdf-lite/test/unit/filters.test.ts +67 -0
- package/packages/pdf-lite/test/unit/fixtures/basic.pdf +0 -0
- package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-aes-128.pdf +0 -0
- package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-aes-256.pdf +0 -0
- package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-rc4-128.pdf +0 -0
- package/packages/pdf-lite/test/unit/fixtures/encrypted_v1/basic-rc4-40.pdf +43 -0
- package/packages/pdf-lite/test/unit/fixtures/protectedAdobeLivecycle.pdf +0 -0
- package/packages/pdf-lite/test/unit/fixtures/rsa-2048/index.ts +187 -0
- package/packages/pdf-lite/test/unit/fixtures/template.pdf +0 -0
- package/packages/pdf-lite/test/unit/incremental-update.test.ts +0 -0
- package/packages/pdf-lite/test/unit/objects.test.ts +0 -0
- package/packages/pdf-lite/test/unit/pdf-document-signing.test.ts +0 -0
- package/packages/pdf-lite/test/unit/pdf-revision.test.ts +195 -0
- package/packages/pdf-lite/test/unit/pdf.browser.test.ts +0 -0
- package/packages/pdf-lite/test/unit/predictors.test.ts +226 -0
- package/packages/pdf-lite/test/unit/ref.test.ts +158 -0
- package/packages/pdf-lite/test/unit/security-handlers.test.ts +645 -0
- package/packages/pdf-lite/test/unit/serializer.test.ts +81 -0
- package/packages/pdf-lite/test/unit/signature-objects.test.ts +814 -0
- package/packages/pdf-lite/test/unit/string-escaping.test.ts +84 -0
- package/packages/pdf-lite/test/unit/tokeniser.node.test.ts +38 -0
- package/packages/pdf-lite/test/unit/tokeniser.test.ts +1213 -0
- package/packages/pdf-lite/test/unit/utils.test.ts +248 -0
- package/packages/pdf-lite/test/unit/xref-lookup.test.ts +72 -0
- package/packages/pdf-lite/tsconfig.json +4 -0
- package/packages/pdf-lite/tsconfig.prod.json +8 -0
- package/packages/pdf-lite/typedoc.json +14 -0
- package/packages/pdf-lite/vitest.config.ts +43 -0
- package/pnpm-workspace.yaml +2 -0
- package/renovate.json +34 -0
- package/scripts/build-examples.ts +30 -0
- package/scripts/bump-version.sh +56 -0
- package/scripts/gen-html-docs.sh +21 -0
- package/scripts/gen-md-docs.sh +15 -0
- package/scripts/prepare-release.sh +33 -0
- package/tsconfig.json +22 -0
- package/tsconfig.prod.json +12 -0
- package/typedoc.json +34 -0
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
import { PdfStartObjectToken } from './tokens/start-object-token.js'
|
|
2
|
+
import { PdfObject } from './objects/pdf-object.js'
|
|
3
|
+
import { PdfIndirectObject } from './objects/pdf-indirect-object.js'
|
|
4
|
+
import { PdfDictionary } from './objects/pdf-dictionary.js'
|
|
5
|
+
import { PdfEndObjectToken } from './tokens/end-object-token.js'
|
|
6
|
+
import { PdfStartDictionaryToken } from './tokens/start-dictionary-token.js'
|
|
7
|
+
import { PdfArray } from './objects/pdf-array.js'
|
|
8
|
+
import { PdfEndDictionaryToken } from './tokens/end-dictionary-token.js'
|
|
9
|
+
import { PdfNumber } from './objects/pdf-number.js'
|
|
10
|
+
import { PdfObjectReference } from './objects/pdf-object-reference.js'
|
|
11
|
+
import { PdfTrailer } from './objects/pdf-trailer.js'
|
|
12
|
+
import { PdfHexadecimal } from './objects/pdf-hexadecimal.js'
|
|
13
|
+
import { PdfStartArrayToken } from './tokens/start-array-token.js'
|
|
14
|
+
import { PdfEndArrayToken } from './tokens/end-array-token.js'
|
|
15
|
+
import { PdfNameToken } from './tokens/name-token.js'
|
|
16
|
+
import { PdfName } from './objects/pdf-name.js'
|
|
17
|
+
import { PdfBoolean } from './objects/pdf-boolean.js'
|
|
18
|
+
import { PdfNull } from './objects/pdf-null.js'
|
|
19
|
+
import { PdfString } from './objects/pdf-string.js'
|
|
20
|
+
import { PdfNumberToken } from './tokens/number-token.js'
|
|
21
|
+
import { PdfBooleanToken } from './tokens/boolean-token.js'
|
|
22
|
+
import { PdfHexadecimalToken } from './tokens/hexadecimal-token.js'
|
|
23
|
+
import { PdfNullToken } from './tokens/null-token.js'
|
|
24
|
+
import { PdfObjectReferenceToken } from './tokens/object-reference-token.js'
|
|
25
|
+
import { PdfStringToken } from './tokens/string-token.js'
|
|
26
|
+
import { PdfStartStreamToken } from './tokens/start-stream-token.js'
|
|
27
|
+
import { PdfStream } from './objects/pdf-stream.js'
|
|
28
|
+
import { PdfEndStreamToken } from './tokens/end-stream-token.js'
|
|
29
|
+
import { PdfStreamChunkToken } from './tokens/stream-chunk-token.js'
|
|
30
|
+
import { PdfXRefTableStartToken } from './tokens/xref-table-start-token.js'
|
|
31
|
+
import {
|
|
32
|
+
PdfXRefTable,
|
|
33
|
+
PdfXRefTableEntry,
|
|
34
|
+
PdfXRefTableSectionHeader,
|
|
35
|
+
} from './objects/pdf-xref-table.js'
|
|
36
|
+
import { PdfXRefTableSectionStartToken } from './tokens/xref-table-section-start-token.js'
|
|
37
|
+
import { PdfXRefTableEntryToken } from './tokens/xref-table-entry-token.js'
|
|
38
|
+
import { PdfTrailerToken } from './tokens/trailer-token.js'
|
|
39
|
+
import { PdfCommentToken } from './tokens/comment-token.js'
|
|
40
|
+
import { PdfComment } from './objects/pdf-comment.js'
|
|
41
|
+
import { PdfStartXRefToken } from './tokens/start-xref-token.js'
|
|
42
|
+
import { PdfStartXRef } from './objects/pdf-start-xref.js'
|
|
43
|
+
import { PdfWhitespaceToken } from './tokens/whitespace-token.js'
|
|
44
|
+
import { PdfToken } from './tokens/token.js'
|
|
45
|
+
import { IncrementalParser } from './incremental-parser.js'
|
|
46
|
+
import { concatUint8Arrays } from '../utils/concatUint8Arrays.js'
|
|
47
|
+
import { ByteArray } from '../types.js'
|
|
48
|
+
import { Ref } from './ref.js'
|
|
49
|
+
|
|
50
|
+
const DEFAULT_MAX_BUFFER_SIZE_BYTES = 10 * 1024 * 1024 // 10 MB
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Decodes PDF tokens into PDF objects.
|
|
54
|
+
* Handles parsing of all PDF object types including dictionaries, arrays, streams, and xref tables.
|
|
55
|
+
*/
|
|
56
|
+
export class PdfDecoder extends IncrementalParser<PdfToken, PdfObject> {
|
|
57
|
+
private ignoreWhitespace: boolean = false
|
|
58
|
+
private maxBufferSizeBytes: number = DEFAULT_MAX_BUFFER_SIZE_BYTES
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Creates a new PDF decoder.
|
|
62
|
+
*
|
|
63
|
+
* @param options - Configuration options
|
|
64
|
+
* @param options.ignoreWhitespace - If true, whitespace tokens are ignored
|
|
65
|
+
* @param options.maxBufferSizeBytes - Maximum buffer size before compaction (default: 10MB)
|
|
66
|
+
*/
|
|
67
|
+
constructor(options?: {
|
|
68
|
+
ignoreWhitespace?: boolean
|
|
69
|
+
maxBufferSizeBytes?: number
|
|
70
|
+
}) {
|
|
71
|
+
super()
|
|
72
|
+
this.ignoreWhitespace = options?.ignoreWhitespace ?? false
|
|
73
|
+
this.maxBufferSizeBytes =
|
|
74
|
+
options?.maxBufferSizeBytes ?? DEFAULT_MAX_BUFFER_SIZE_BYTES
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
private nextName(): PdfName {
|
|
78
|
+
const preTokens = this.nextExtraTokens()
|
|
79
|
+
const token = this.expect(PdfNameToken)
|
|
80
|
+
const postTokens = this.nextExtraTokens()
|
|
81
|
+
|
|
82
|
+
const name = new PdfName(token.name)
|
|
83
|
+
name.preTokens = preTokens
|
|
84
|
+
name.postTokens = postTokens
|
|
85
|
+
|
|
86
|
+
return name
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private nextIndirectObject(): PdfIndirectObject {
|
|
90
|
+
const preTokens = this.nextExtraTokens()
|
|
91
|
+
const startToken = this.expect(PdfStartObjectToken)
|
|
92
|
+
|
|
93
|
+
const contentPreTokens = this.nextExtraTokens()
|
|
94
|
+
const content = this.nextValue()
|
|
95
|
+
content.preTokens = contentPreTokens
|
|
96
|
+
this.expect(PdfEndObjectToken)
|
|
97
|
+
|
|
98
|
+
const postTokens = this.nextExtraTokens()
|
|
99
|
+
|
|
100
|
+
const obj = new PdfIndirectObject({
|
|
101
|
+
objectNumber: startToken.objectNumber,
|
|
102
|
+
generationNumber: startToken.generationNumber,
|
|
103
|
+
content,
|
|
104
|
+
offset: startToken.byteOffset,
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
obj.preTokens = preTokens
|
|
108
|
+
obj.postTokens = postTokens
|
|
109
|
+
|
|
110
|
+
return obj
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
private nextValue(): PdfObject {
|
|
114
|
+
return this.nextObject()
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
private nextDictionary(): PdfDictionary | PdfStream {
|
|
118
|
+
const preTokens = this.nextExtraTokens()
|
|
119
|
+
this.expect(PdfStartDictionaryToken)
|
|
120
|
+
const dictionary = new PdfDictionary()
|
|
121
|
+
dictionary.innerTokens = this.nextExtraTokens()
|
|
122
|
+
|
|
123
|
+
while (true) {
|
|
124
|
+
let next = this.peek()
|
|
125
|
+
|
|
126
|
+
if (!next || next instanceof PdfEndDictionaryToken) {
|
|
127
|
+
this.next() // consume end token
|
|
128
|
+
break
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const name = this.nextName()
|
|
132
|
+
const value = this.nextValue()
|
|
133
|
+
dictionary.set(name, value)
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const postTokens = this.nextExtraTokens()
|
|
137
|
+
|
|
138
|
+
dictionary.setModified(false)
|
|
139
|
+
dictionary.preTokens = preTokens
|
|
140
|
+
dictionary.postTokens = postTokens
|
|
141
|
+
|
|
142
|
+
if (this.peek() instanceof PdfStartStreamToken) {
|
|
143
|
+
const stream = this.nextStream(dictionary)
|
|
144
|
+
|
|
145
|
+
return stream
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return dictionary
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
private nextArray(): PdfArray {
|
|
152
|
+
const preTokens = this.nextExtraTokens()
|
|
153
|
+
this.expect(PdfStartArrayToken)
|
|
154
|
+
const array = new PdfArray()
|
|
155
|
+
const innerTokens = this.nextExtraTokens()
|
|
156
|
+
array.innerTokens = innerTokens
|
|
157
|
+
|
|
158
|
+
while (true) {
|
|
159
|
+
const next = this.peek()
|
|
160
|
+
if (!next || next instanceof PdfEndArrayToken) {
|
|
161
|
+
this.next() // consume end token
|
|
162
|
+
break
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
array.push(this.nextValue())
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const postTokens = this.nextExtraTokens()
|
|
169
|
+
|
|
170
|
+
array.preTokens = preTokens
|
|
171
|
+
array.postTokens = postTokens
|
|
172
|
+
|
|
173
|
+
return array
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
private nextStream(header: PdfDictionary): PdfStream {
|
|
177
|
+
const preTokens = this.nextExtraTokens()
|
|
178
|
+
const startStreamToken = this.expect(PdfStartStreamToken)
|
|
179
|
+
|
|
180
|
+
const chunks: ByteArray[] = []
|
|
181
|
+
const preStreamTokens = startStreamToken.getTrailingWhitespaceTokens()
|
|
182
|
+
|
|
183
|
+
while (this.peek() instanceof PdfStreamChunkToken) {
|
|
184
|
+
const chunk = this.expect(PdfStreamChunkToken)
|
|
185
|
+
|
|
186
|
+
chunks.push(chunk.toBytes())
|
|
187
|
+
}
|
|
188
|
+
const postStreamDataTokens = this.nextExtraTokens()
|
|
189
|
+
|
|
190
|
+
this.expect(PdfEndStreamToken)
|
|
191
|
+
const postTokens = this.nextExtraTokens()
|
|
192
|
+
|
|
193
|
+
const stream = new PdfStream({
|
|
194
|
+
header,
|
|
195
|
+
original: concatUint8Arrays(...chunks),
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
stream.preStreamDataTokens = preStreamTokens
|
|
199
|
+
stream.postStreamDataTokens = postStreamDataTokens
|
|
200
|
+
stream.preTokens = preTokens
|
|
201
|
+
stream.postTokens = postTokens
|
|
202
|
+
|
|
203
|
+
return stream
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
private nextComment(): PdfComment {
|
|
207
|
+
const token = this.expect(PdfCommentToken)
|
|
208
|
+
|
|
209
|
+
const comment = new PdfComment(token.comment)
|
|
210
|
+
|
|
211
|
+
return comment
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
private nextPrimitive(): PdfObject {
|
|
215
|
+
const preTokens = this.nextExtraTokens()
|
|
216
|
+
const token = this.next() // consume the primitive token
|
|
217
|
+
if (!token) {
|
|
218
|
+
throw new Error('Expected primitive token but got EOF')
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const postTokens = this.nextExtraTokens()
|
|
222
|
+
|
|
223
|
+
let out: PdfObject
|
|
224
|
+
if (token instanceof PdfNumberToken) {
|
|
225
|
+
out = new PdfNumber({
|
|
226
|
+
value: token.value,
|
|
227
|
+
padTo: token.padTo,
|
|
228
|
+
decimalPlaces: token.decimalPlaces,
|
|
229
|
+
})
|
|
230
|
+
} else if (token instanceof PdfBooleanToken) {
|
|
231
|
+
out = new PdfBoolean(token.value)
|
|
232
|
+
} else if (token instanceof PdfHexadecimalToken) {
|
|
233
|
+
out = new PdfHexadecimal(token.raw, 'hex')
|
|
234
|
+
} else if (token instanceof PdfNullToken) {
|
|
235
|
+
out = new PdfNull()
|
|
236
|
+
} else if (token instanceof PdfObjectReferenceToken) {
|
|
237
|
+
out = new PdfObjectReference(
|
|
238
|
+
token.objectNumber,
|
|
239
|
+
token.generationNumber,
|
|
240
|
+
)
|
|
241
|
+
} else if (token instanceof PdfStringToken) {
|
|
242
|
+
out = new PdfString(token.value)
|
|
243
|
+
} else {
|
|
244
|
+
throw new Error(`Unknown primitive token type: ${token.type}`)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
out.preTokens = preTokens
|
|
248
|
+
out.postTokens = postTokens
|
|
249
|
+
|
|
250
|
+
return out
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
private nextExtraTokens(root: boolean = false): PdfToken[] {
|
|
254
|
+
const tokens: PdfToken[] = []
|
|
255
|
+
while (true) {
|
|
256
|
+
if (this.atEof()) {
|
|
257
|
+
break
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const token = this.peek()
|
|
261
|
+
if (!token) {
|
|
262
|
+
break
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (this.ignoreWhitespace && token instanceof PdfWhitespaceToken) {
|
|
266
|
+
this.next() // consume whitespace
|
|
267
|
+
continue
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (root && token instanceof PdfCommentToken) {
|
|
271
|
+
break
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (
|
|
275
|
+
!(
|
|
276
|
+
token instanceof PdfWhitespaceToken ||
|
|
277
|
+
token instanceof PdfCommentToken
|
|
278
|
+
)
|
|
279
|
+
) {
|
|
280
|
+
break
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
tokens.push(this.next()!)
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return tokens
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
private nextXRefTable(): PdfXRefTable {
|
|
290
|
+
const preTokens = this.nextExtraTokens()
|
|
291
|
+
const xrefToken = this.expect(PdfXRefTableStartToken)
|
|
292
|
+
|
|
293
|
+
if (xrefToken.byteOffset === undefined) {
|
|
294
|
+
throw new Error('XRef table token missing byte offset')
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
const xrefTable = new PdfXRefTable({
|
|
298
|
+
offset: xrefToken.byteOffset,
|
|
299
|
+
})
|
|
300
|
+
|
|
301
|
+
while (true) {
|
|
302
|
+
const preTokens = this.nextExtraTokens()
|
|
303
|
+
const next = this.peek()
|
|
304
|
+
|
|
305
|
+
if (!next) {
|
|
306
|
+
break
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if (next instanceof PdfXRefTableSectionStartToken) {
|
|
310
|
+
const sectionToken = this.expect(PdfXRefTableSectionStartToken)
|
|
311
|
+
const postTokens = this.nextExtraTokens()
|
|
312
|
+
|
|
313
|
+
const section = new PdfXRefTableSectionHeader({
|
|
314
|
+
startObjectNumber: sectionToken.start.value,
|
|
315
|
+
entryCount: sectionToken.count.value,
|
|
316
|
+
})
|
|
317
|
+
section.startObjectNumber.setModified(false)
|
|
318
|
+
section.entryCount.setModified(false)
|
|
319
|
+
section.setModified(false)
|
|
320
|
+
|
|
321
|
+
section.preTokens = preTokens
|
|
322
|
+
section.postTokens = postTokens
|
|
323
|
+
|
|
324
|
+
xrefTable.sections.push(section)
|
|
325
|
+
} else if (next instanceof PdfXRefTableEntryToken) {
|
|
326
|
+
const entryToken = this.expect(PdfXRefTableEntryToken)
|
|
327
|
+
const postTokens = this.nextExtraTokens()
|
|
328
|
+
|
|
329
|
+
const entry = new PdfXRefTableEntry({
|
|
330
|
+
objectNumber: entryToken.objectNumber.value,
|
|
331
|
+
byteOffset: entryToken.offset.ref,
|
|
332
|
+
generationNumber: entryToken.generationNumber.value,
|
|
333
|
+
inUse: entryToken.inUse,
|
|
334
|
+
})
|
|
335
|
+
|
|
336
|
+
entry.objectNumber.setModified(false)
|
|
337
|
+
entry.generationNumber.setModified(false)
|
|
338
|
+
entry.byteOffset.setModified(false)
|
|
339
|
+
entry.setModified(false)
|
|
340
|
+
|
|
341
|
+
entry.preTokens = preTokens
|
|
342
|
+
entry.postTokens = postTokens
|
|
343
|
+
|
|
344
|
+
xrefTable.entries.push(entry)
|
|
345
|
+
} else {
|
|
346
|
+
break
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
xrefTable.preTokens = preTokens
|
|
351
|
+
xrefTable.postTokens = this.nextExtraTokens()
|
|
352
|
+
return xrefTable
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
private nextTrailer(): PdfTrailer {
|
|
356
|
+
const preTokens = this.nextExtraTokens()
|
|
357
|
+
const trailerToken = this.expect(PdfTrailerToken)
|
|
358
|
+
const dictionary = this.nextDictionary()
|
|
359
|
+
const postTokens = this.nextExtraTokens()
|
|
360
|
+
|
|
361
|
+
const trailer = new PdfTrailer(dictionary as PdfDictionary<any>)
|
|
362
|
+
if (trailerToken.byteOffset === undefined) {
|
|
363
|
+
throw new Error('Trailer token missing byte offset')
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
trailer.offset.update(trailerToken.byteOffset)
|
|
367
|
+
trailer.offset.isModified = false
|
|
368
|
+
|
|
369
|
+
trailer.preTokens = preTokens
|
|
370
|
+
trailer.postTokens = postTokens
|
|
371
|
+
|
|
372
|
+
return trailer
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
private nextStartXRef(): PdfStartXRef {
|
|
376
|
+
const preTokens = this.nextExtraTokens()
|
|
377
|
+
this.expect(PdfStartXRefToken)
|
|
378
|
+
|
|
379
|
+
const preOffsetTokens = this.nextExtraTokens()
|
|
380
|
+
const offsetToken = this.expect(PdfNumberToken)
|
|
381
|
+
|
|
382
|
+
const offset = new PdfNumber(offsetToken.value)
|
|
383
|
+
offset.setModified(false)
|
|
384
|
+
|
|
385
|
+
const startXref = new PdfStartXRef(offset)
|
|
386
|
+
startXref.preTokens = preTokens
|
|
387
|
+
offset.preTokens = preOffsetTokens
|
|
388
|
+
return startXref
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
private nextObject(root: boolean = false): PdfObject {
|
|
392
|
+
const preTokens = this.nextExtraTokens(root)
|
|
393
|
+
const token = this.peek()
|
|
394
|
+
|
|
395
|
+
if (!token) {
|
|
396
|
+
// This should never happen as atEof should be checked before calling nextObject
|
|
397
|
+
throw new Error('Expected token but did not find one')
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
let out: PdfObject
|
|
401
|
+
if (token instanceof PdfStartDictionaryToken) {
|
|
402
|
+
out = this.nextDictionary()
|
|
403
|
+
} else if (token instanceof PdfStartObjectToken) {
|
|
404
|
+
out = this.nextIndirectObject()
|
|
405
|
+
} else if (token instanceof PdfStartArrayToken) {
|
|
406
|
+
out = this.nextArray()
|
|
407
|
+
} else if (token instanceof PdfNameToken) {
|
|
408
|
+
out = this.nextName()
|
|
409
|
+
} else if (PdfDecoder.isPrimitive(token)) {
|
|
410
|
+
out = this.nextPrimitive()
|
|
411
|
+
} else if (token instanceof PdfCommentToken) {
|
|
412
|
+
out = this.nextComment()
|
|
413
|
+
} else if (token instanceof PdfXRefTableStartToken) {
|
|
414
|
+
out = this.nextXRefTable()
|
|
415
|
+
} else if (token instanceof PdfTrailerToken) {
|
|
416
|
+
out = this.nextTrailer()
|
|
417
|
+
} else if (token instanceof PdfStartXRefToken) {
|
|
418
|
+
out = this.nextStartXRef()
|
|
419
|
+
} else {
|
|
420
|
+
throw new Error(`Unknown token type: ${token.type}`)
|
|
421
|
+
}
|
|
422
|
+
const postTokens = this.nextExtraTokens(root)
|
|
423
|
+
out.preTokens = [...preTokens, ...(out.preTokens ?? [])]
|
|
424
|
+
out.postTokens = [...(out.postTokens ?? []), ...postTokens]
|
|
425
|
+
out.setModified(false)
|
|
426
|
+
|
|
427
|
+
return out
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
private static isPrimitive(token: PdfToken): boolean {
|
|
431
|
+
return (
|
|
432
|
+
token instanceof PdfNumberToken ||
|
|
433
|
+
token instanceof PdfBooleanToken ||
|
|
434
|
+
token instanceof PdfHexadecimalToken ||
|
|
435
|
+
token instanceof PdfNullToken ||
|
|
436
|
+
token instanceof PdfObjectReferenceToken ||
|
|
437
|
+
token instanceof PdfStringToken
|
|
438
|
+
)
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
protected bufferSize(): number {
|
|
442
|
+
return this.buffer.reduce((acc, obj) => acc + obj.byteLength, 0)
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
protected canCompact(): boolean {
|
|
446
|
+
return (
|
|
447
|
+
this.bufferIndex > 50 && this.bufferSize() > this.maxBufferSizeBytes
|
|
448
|
+
)
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
protected parse() {
|
|
452
|
+
return this.nextObject(true)
|
|
453
|
+
}
|
|
454
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { ByteArray } from '../types'
|
|
2
|
+
import { stringToBytes } from '../utils/stringToBytes'
|
|
3
|
+
import { PdfDecoder } from './decoder'
|
|
4
|
+
import { PdfObject } from './objects/pdf-object'
|
|
5
|
+
import { PdfByteStreamTokeniser } from './tokeniser'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Converts an iterable of byte arrays into PDF objects.
|
|
9
|
+
* Processes bytes through tokenization and decoding.
|
|
10
|
+
*
|
|
11
|
+
* @param bytes - Iterable of byte arrays to process
|
|
12
|
+
* @returns A generator yielding parsed PDF objects
|
|
13
|
+
*/
|
|
14
|
+
export function* bytesToPdfObjects(
|
|
15
|
+
bytes: Iterable<ByteArray>,
|
|
16
|
+
): Generator<PdfObject> {
|
|
17
|
+
const tokeniser = new PdfByteStreamTokeniser()
|
|
18
|
+
const decoder = new PdfDecoder()
|
|
19
|
+
|
|
20
|
+
for (const chunk of bytes) {
|
|
21
|
+
tokeniser.feedBytes(chunk)
|
|
22
|
+
|
|
23
|
+
for (const token of tokeniser.nextItems()) {
|
|
24
|
+
decoder.feed(token)
|
|
25
|
+
|
|
26
|
+
for (const obj of decoder.nextItems()) {
|
|
27
|
+
yield obj
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
tokeniser.eof = true
|
|
33
|
+
decoder.eof = true
|
|
34
|
+
|
|
35
|
+
for (const token of tokeniser.nextItems()) {
|
|
36
|
+
decoder.feed(token)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
for (const obj of decoder.nextItems()) {
|
|
40
|
+
yield obj
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Converts a string containing PDF content into PDF objects.
|
|
46
|
+
*
|
|
47
|
+
* @param str - The string to parse as PDF content
|
|
48
|
+
* @returns A generator yielding parsed PDF objects
|
|
49
|
+
*/
|
|
50
|
+
export function* stringToPdfObjects(str: string): Generator<PdfObject> {
|
|
51
|
+
const bytes = stringToBytes(str)
|
|
52
|
+
yield* bytesToPdfObjects([bytes])
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Decodes an iterable of byte arrays into PDF objects.
|
|
57
|
+
* Allows configuring whitespace handling.
|
|
58
|
+
*
|
|
59
|
+
* @param input - Iterable of byte arrays to decode
|
|
60
|
+
* @param options - Configuration options
|
|
61
|
+
* @param options.ignoreWhitespace - If true, whitespace tokens are ignored
|
|
62
|
+
* @returns A generator yielding parsed PDF objects
|
|
63
|
+
*/
|
|
64
|
+
export function* pdfDecoder(
|
|
65
|
+
input: Iterable<ByteArray>,
|
|
66
|
+
options?: { ignoreWhitespace?: boolean },
|
|
67
|
+
) {
|
|
68
|
+
const decoder = new PdfDecoder(options)
|
|
69
|
+
const tokeniser = new PdfByteStreamTokeniser()
|
|
70
|
+
|
|
71
|
+
for (const chunk of input) {
|
|
72
|
+
for (const byte of chunk) {
|
|
73
|
+
tokeniser.feed(byte)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
for (const token of tokeniser.nextItems()) {
|
|
77
|
+
decoder.feed(token)
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
yield* decoder.nextItems()
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
tokeniser.eof = true
|
|
84
|
+
decoder.eof = true
|
|
85
|
+
|
|
86
|
+
for (const token of tokeniser.nextItems()) {
|
|
87
|
+
decoder.feed(token)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
yield* decoder.nextItems()
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Asynchronously decodes byte arrays into PDF objects.
|
|
95
|
+
* Supports both async and sync iterables for streaming PDF parsing.
|
|
96
|
+
*
|
|
97
|
+
* @param input - Async or sync iterable of byte arrays
|
|
98
|
+
* @returns An async generator yielding parsed PDF objects
|
|
99
|
+
*/
|
|
100
|
+
export async function* pdfDecoderAsync(
|
|
101
|
+
input: AsyncIterable<ByteArray> | Iterable<ByteArray>,
|
|
102
|
+
): AsyncGenerator<PdfObject> {
|
|
103
|
+
const tokeniser = new PdfByteStreamTokeniser()
|
|
104
|
+
const decoder = new PdfDecoder()
|
|
105
|
+
|
|
106
|
+
for await (const chunk of input) {
|
|
107
|
+
tokeniser.feedBytes(chunk)
|
|
108
|
+
|
|
109
|
+
for (const token of tokeniser.nextItems()) {
|
|
110
|
+
decoder.feed(token)
|
|
111
|
+
|
|
112
|
+
for (const obj of decoder.nextItems()) {
|
|
113
|
+
yield obj
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
tokeniser.eof = true
|
|
119
|
+
decoder.eof = true
|
|
120
|
+
|
|
121
|
+
for (const token of tokeniser.nextItems()) {
|
|
122
|
+
decoder.feed(token)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
for (const obj of decoder.nextItems()) {
|
|
126
|
+
yield obj
|
|
127
|
+
}
|
|
128
|
+
}
|