@cj-tech-master/excelts 8.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +14 -1
  2. package/README_zh.md +6 -0
  3. package/dist/browser/modules/archive/zip/stream.d.ts +4 -0
  4. package/dist/browser/modules/archive/zip/stream.js +53 -0
  5. package/dist/browser/modules/pdf/core/crypto.d.ts +65 -0
  6. package/dist/browser/modules/pdf/core/crypto.js +637 -0
  7. package/dist/browser/modules/pdf/core/encryption.d.ts +23 -20
  8. package/dist/browser/modules/pdf/core/encryption.js +88 -261
  9. package/dist/browser/modules/pdf/core/pdf-writer.d.ts +6 -4
  10. package/dist/browser/modules/pdf/core/pdf-writer.js +19 -10
  11. package/dist/browser/modules/pdf/index.d.ts +23 -2
  12. package/dist/browser/modules/pdf/index.js +21 -3
  13. package/dist/browser/modules/pdf/reader/annotation-extractor.d.ts +63 -0
  14. package/dist/browser/modules/pdf/reader/annotation-extractor.js +155 -0
  15. package/dist/browser/modules/pdf/reader/cmap-parser.d.ts +70 -0
  16. package/dist/browser/modules/pdf/reader/cmap-parser.js +321 -0
  17. package/dist/browser/modules/pdf/reader/content-interpreter.d.ts +57 -0
  18. package/dist/browser/modules/pdf/reader/content-interpreter.js +715 -0
  19. package/dist/browser/modules/pdf/reader/font-decoder.d.ts +58 -0
  20. package/dist/browser/modules/pdf/reader/font-decoder.js +1513 -0
  21. package/dist/browser/modules/pdf/reader/form-extractor.d.ts +48 -0
  22. package/dist/browser/modules/pdf/reader/form-extractor.js +355 -0
  23. package/dist/browser/modules/pdf/reader/image-extractor.d.ts +55 -0
  24. package/dist/browser/modules/pdf/reader/image-extractor.js +220 -0
  25. package/dist/browser/modules/pdf/reader/metadata-reader.d.ts +56 -0
  26. package/dist/browser/modules/pdf/reader/metadata-reader.js +275 -0
  27. package/dist/browser/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
  28. package/dist/browser/modules/pdf/reader/pdf-decrypt.js +443 -0
  29. package/dist/browser/modules/pdf/reader/pdf-document.d.ts +191 -0
  30. package/dist/browser/modules/pdf/reader/pdf-document.js +818 -0
  31. package/dist/browser/modules/pdf/reader/pdf-parser.d.ts +65 -0
  32. package/dist/browser/modules/pdf/reader/pdf-parser.js +285 -0
  33. package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +143 -0
  34. package/dist/browser/modules/pdf/reader/pdf-reader.js +200 -0
  35. package/dist/browser/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
  36. package/dist/browser/modules/pdf/reader/pdf-tokenizer.js +543 -0
  37. package/dist/browser/modules/pdf/reader/reader-utils.d.ts +15 -0
  38. package/dist/browser/modules/pdf/reader/reader-utils.js +27 -0
  39. package/dist/browser/modules/pdf/reader/stream-filters.d.ts +20 -0
  40. package/dist/browser/modules/pdf/reader/stream-filters.js +456 -0
  41. package/dist/browser/modules/pdf/reader/text-reconstruction.d.ts +44 -0
  42. package/dist/browser/modules/pdf/reader/text-reconstruction.js +463 -0
  43. package/dist/cjs/modules/archive/zip/stream.js +53 -0
  44. package/dist/cjs/modules/pdf/core/crypto.js +649 -0
  45. package/dist/cjs/modules/pdf/core/encryption.js +88 -263
  46. package/dist/cjs/modules/pdf/core/pdf-writer.js +19 -10
  47. package/dist/cjs/modules/pdf/index.js +23 -4
  48. package/dist/cjs/modules/pdf/reader/annotation-extractor.js +158 -0
  49. package/dist/cjs/modules/pdf/reader/cmap-parser.js +326 -0
  50. package/dist/cjs/modules/pdf/reader/content-interpreter.js +718 -0
  51. package/dist/cjs/modules/pdf/reader/font-decoder.js +1518 -0
  52. package/dist/cjs/modules/pdf/reader/form-extractor.js +358 -0
  53. package/dist/cjs/modules/pdf/reader/image-extractor.js +223 -0
  54. package/dist/cjs/modules/pdf/reader/metadata-reader.js +278 -0
  55. package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +447 -0
  56. package/dist/cjs/modules/pdf/reader/pdf-document.js +822 -0
  57. package/dist/cjs/modules/pdf/reader/pdf-parser.js +301 -0
  58. package/dist/cjs/modules/pdf/reader/pdf-reader.js +203 -0
  59. package/dist/cjs/modules/pdf/reader/pdf-tokenizer.js +517 -0
  60. package/dist/cjs/modules/pdf/reader/reader-utils.js +30 -0
  61. package/dist/cjs/modules/pdf/reader/stream-filters.js +459 -0
  62. package/dist/cjs/modules/pdf/reader/text-reconstruction.js +467 -0
  63. package/dist/esm/modules/archive/zip/stream.js +53 -0
  64. package/dist/esm/modules/pdf/core/crypto.js +637 -0
  65. package/dist/esm/modules/pdf/core/encryption.js +88 -261
  66. package/dist/esm/modules/pdf/core/pdf-writer.js +19 -10
  67. package/dist/esm/modules/pdf/index.js +21 -3
  68. package/dist/esm/modules/pdf/reader/annotation-extractor.js +155 -0
  69. package/dist/esm/modules/pdf/reader/cmap-parser.js +321 -0
  70. package/dist/esm/modules/pdf/reader/content-interpreter.js +715 -0
  71. package/dist/esm/modules/pdf/reader/font-decoder.js +1513 -0
  72. package/dist/esm/modules/pdf/reader/form-extractor.js +355 -0
  73. package/dist/esm/modules/pdf/reader/image-extractor.js +220 -0
  74. package/dist/esm/modules/pdf/reader/metadata-reader.js +275 -0
  75. package/dist/esm/modules/pdf/reader/pdf-decrypt.js +443 -0
  76. package/dist/esm/modules/pdf/reader/pdf-document.js +818 -0
  77. package/dist/esm/modules/pdf/reader/pdf-parser.js +285 -0
  78. package/dist/esm/modules/pdf/reader/pdf-reader.js +200 -0
  79. package/dist/esm/modules/pdf/reader/pdf-tokenizer.js +543 -0
  80. package/dist/esm/modules/pdf/reader/reader-utils.js +27 -0
  81. package/dist/esm/modules/pdf/reader/stream-filters.js +456 -0
  82. package/dist/esm/modules/pdf/reader/text-reconstruction.js +463 -0
  83. package/dist/iife/excelts.iife.js +703 -267
  84. package/dist/iife/excelts.iife.js.map +1 -1
  85. package/dist/iife/excelts.iife.min.js +35 -35
  86. package/dist/types/modules/archive/zip/stream.d.ts +4 -0
  87. package/dist/types/modules/pdf/core/crypto.d.ts +65 -0
  88. package/dist/types/modules/pdf/core/encryption.d.ts +23 -20
  89. package/dist/types/modules/pdf/core/pdf-writer.d.ts +6 -4
  90. package/dist/types/modules/pdf/index.d.ts +23 -2
  91. package/dist/types/modules/pdf/reader/annotation-extractor.d.ts +63 -0
  92. package/dist/types/modules/pdf/reader/cmap-parser.d.ts +70 -0
  93. package/dist/types/modules/pdf/reader/content-interpreter.d.ts +57 -0
  94. package/dist/types/modules/pdf/reader/font-decoder.d.ts +58 -0
  95. package/dist/types/modules/pdf/reader/form-extractor.d.ts +48 -0
  96. package/dist/types/modules/pdf/reader/image-extractor.d.ts +55 -0
  97. package/dist/types/modules/pdf/reader/metadata-reader.d.ts +56 -0
  98. package/dist/types/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
  99. package/dist/types/modules/pdf/reader/pdf-document.d.ts +191 -0
  100. package/dist/types/modules/pdf/reader/pdf-parser.d.ts +65 -0
  101. package/dist/types/modules/pdf/reader/pdf-reader.d.ts +143 -0
  102. package/dist/types/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
  103. package/dist/types/modules/pdf/reader/reader-utils.d.ts +15 -0
  104. package/dist/types/modules/pdf/reader/stream-filters.d.ts +20 -0
  105. package/dist/types/modules/pdf/reader/text-reconstruction.d.ts +44 -0
  106. package/package.json +1 -1
@@ -0,0 +1,443 @@
1
+ /**
2
+ * PDF decryption for reading encrypted PDFs.
3
+ *
4
+ * Supports:
5
+ * - Standard Security Handler (V1/V2/V4/V5, R2/R3/R4/R5)
6
+ * - RC4 encryption (40-bit and 128-bit)
7
+ * - AES-128 encryption (PDF 1.6+)
8
+ * - AES-256 encryption (PDF 2.0, V=5, R=5)
9
+ *
10
+ * @see PDF Reference 1.7, §3.5 - Encryption
11
+ * @see PDF 2.0 (ISO 32000-2), §7.6 - Encryption
12
+ */
13
+ import { rc4, md5, sha256, aesCbcDecrypt, aesCbcDecryptRaw, concatArrays } from "../core/crypto.js";
14
+ import { dictGetNumber, dictGetName, dictGetBytes, dictGetArray, dictGetBool } from "./pdf-parser.js";
15
+ import { PdfStructureError } from "../errors.js";
16
+ // =============================================================================
17
+ // Constants
18
+ // =============================================================================
19
+ /** PDF password padding string (32 bytes) per PDF spec §3.5.2 */
20
+ const PASSWORD_PADDING = new Uint8Array([
21
+ 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
22
+ 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80, 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a
23
+ ]);
24
+ /** Cached TextEncoder instance */
25
+ const textEncoder = new TextEncoder();
26
+ // =============================================================================
27
+ // Public API
28
+ // =============================================================================
29
+ /**
30
+ * Initialize decryption for a PDF document.
31
+ * Returns true if decryption was successfully initialized, false if
32
+ * the password was incorrect.
33
+ *
34
+ * @param doc - The PDF document
35
+ * @param password - User or owner password (empty string for no password)
36
+ */
37
+ export function initDecryption(doc, password = "") {
38
+ const encryptDict = doc.derefDict(doc.trailer.get("Encrypt"));
39
+ if (!encryptDict) {
40
+ return true; // Not encrypted
41
+ }
42
+ const filter = dictGetName(encryptDict, "Filter");
43
+ if (filter !== "Standard") {
44
+ throw new PdfStructureError(`Unsupported encryption filter: ${filter}`);
45
+ }
46
+ const v = dictGetNumber(encryptDict, "V") ?? 0;
47
+ const r = dictGetNumber(encryptDict, "R") ?? 0;
48
+ const keyLength = (dictGetNumber(encryptDict, "Length") ?? 40) / 8; // bits → bytes
49
+ const permissions = dictGetNumber(encryptDict, "P") ?? 0;
50
+ const oValue = dictGetBytes(encryptDict, "O");
51
+ const uValue = dictGetBytes(encryptDict, "U");
52
+ if (!oValue || !uValue) {
53
+ throw new PdfStructureError("Missing /O or /U values in Encrypt dictionary");
54
+ }
55
+ // Get file ID from trailer
56
+ const idArray = dictGetArray(doc.trailer, "ID");
57
+ const fileId = idArray && idArray.length > 0 && idArray[0] instanceof Uint8Array
58
+ ? idArray[0]
59
+ : new Uint8Array(0);
60
+ // Determine EncryptMetadata flag (default true per spec)
61
+ const encryptMetadata = readEncryptMetadata(encryptDict);
62
+ // Handle V=5 (AES-256, PDF 2.0)
63
+ if (v === 5) {
64
+ return initDecryptionV5(doc, encryptDict, password, r, oValue, uValue, permissions, fileId);
65
+ }
66
+ // Determine if we should use AES
67
+ const useAes = v === 4 && isAesCryptFilter(encryptDict);
68
+ // Try user password first, then owner password
69
+ let encryptionKey = tryUserPassword(password, oValue, permissions, fileId, r, keyLength, uValue, encryptMetadata);
70
+ if (!encryptionKey) {
71
+ // Try as owner password
72
+ const derivedUser = deriveUserPasswordFromOwner(password, oValue, r, keyLength);
73
+ encryptionKey = tryUserPassword(derivedUser, oValue, permissions, fileId, r, keyLength, uValue, encryptMetadata);
74
+ }
75
+ if (!encryptionKey) {
76
+ // Try empty password
77
+ if (password !== "") {
78
+ encryptionKey = tryUserPassword("", oValue, permissions, fileId, r, keyLength, uValue, encryptMetadata);
79
+ }
80
+ }
81
+ if (!encryptionKey) {
82
+ return false; // Password incorrect
83
+ }
84
+ // Set up decryption function
85
+ const finalKey = encryptionKey;
86
+ if (useAes) {
87
+ doc.decryptFn = (data, objNum, gen) => decryptAes128(data, objNum, gen, finalKey);
88
+ }
89
+ else {
90
+ doc.decryptFn = (data, objNum, gen) => decryptRc4PerObject(data, objNum, gen, finalKey);
91
+ }
92
+ return true;
93
+ }
94
+ /**
95
+ * Check if the document is encrypted.
96
+ */
97
+ export function isEncrypted(doc) {
98
+ return doc.trailer.has("Encrypt");
99
+ }
100
+ // =============================================================================
101
+ // V5 (AES-256) Decryption
102
+ // =============================================================================
103
+ /**
104
+ * Initialize decryption for V=5 (AES-256, PDF 2.0).
105
+ * Supports R=5 using SHA-256 based key derivation (Algorithm 2.A).
106
+ */
107
+ function initDecryptionV5(doc, encryptDict, password, revision, oValue, uValue, _permissions, _fileId) {
108
+ if (revision === 6) {
109
+ throw new PdfStructureError("R=6 (PDF 2.0 extension) requires SHA-384/SHA-512 which is not yet supported");
110
+ }
111
+ if (revision !== 5) {
112
+ throw new PdfStructureError(`Unsupported revision ${revision} for V=5 encryption`);
113
+ }
114
+ const oeValue = dictGetBytes(encryptDict, "OE");
115
+ const ueValue = dictGetBytes(encryptDict, "UE");
116
+ if (!oeValue || !ueValue) {
117
+ throw new PdfStructureError("Missing /OE or /UE values in V=5 Encrypt dictionary");
118
+ }
119
+ // O value layout: 32 bytes hash + 8 bytes validation salt + 8 bytes key salt
120
+ // U value layout: 32 bytes hash + 8 bytes validation salt + 8 bytes key salt
121
+ if (oValue.length < 48 || uValue.length < 48) {
122
+ throw new PdfStructureError("Invalid /O or /U length for V=5 encryption");
123
+ }
124
+ const passwordBytes = truncatePassword(password);
125
+ // Try user password (Algorithm 2.A step a - user)
126
+ let encryptionKey = tryUserPasswordV5(passwordBytes, uValue, ueValue);
127
+ if (!encryptionKey) {
128
+ // Try owner password (Algorithm 2.A step a - owner)
129
+ encryptionKey = tryOwnerPasswordV5(passwordBytes, oValue, oeValue, uValue);
130
+ }
131
+ if (!encryptionKey) {
132
+ // Try empty password
133
+ if (password !== "") {
134
+ const emptyBytes = new Uint8Array(0);
135
+ encryptionKey = tryUserPasswordV5(emptyBytes, uValue, ueValue);
136
+ if (!encryptionKey) {
137
+ encryptionKey = tryOwnerPasswordV5(emptyBytes, oValue, oeValue, uValue);
138
+ }
139
+ }
140
+ }
141
+ if (!encryptionKey) {
142
+ return false;
143
+ }
144
+ // V=5 always uses AES-256 with the file encryption key directly (no per-object key derivation)
145
+ const finalKey = encryptionKey;
146
+ doc.decryptFn = (data, _objNum, _gen) => decryptAes256Direct(data, finalKey);
147
+ return true;
148
+ }
149
+ /**
150
+ * Truncate password to 127 bytes (UTF-8) per PDF 2.0 spec.
151
+ */
152
+ function truncatePassword(password) {
153
+ const bytes = textEncoder.encode(password);
154
+ return bytes.length > 127 ? bytes.subarray(0, 127) : bytes;
155
+ }
156
+ /**
157
+ * Try user password for V=5/R=5.
158
+ * Validates using SHA-256(password + validation salt from U).
159
+ * If valid, derives file encryption key using SHA-256(password + key salt from U).
160
+ */
161
+ function tryUserPasswordV5(passwordBytes, uValue, ueValue) {
162
+ // U = hash(32) + validation salt(8) + key salt(8)
163
+ const uHash = uValue.subarray(0, 32);
164
+ const uValidationSalt = uValue.subarray(32, 40);
165
+ const uKeySalt = uValue.subarray(40, 48);
166
+ // Validate: SHA-256(password + validation salt) == first 32 bytes of U
167
+ const validateInput = concatArrays(passwordBytes, uValidationSalt);
168
+ const computedHash = sha256(validateInput);
169
+ if (!arraysEqual(computedHash, uHash)) {
170
+ return null;
171
+ }
172
+ // Derive key: SHA-256(password + key salt) => use as AES-256 key to decrypt UE
173
+ const keyInput = concatArrays(passwordBytes, uKeySalt);
174
+ const keyHash = sha256(keyInput);
175
+ // Decrypt UE with this key using AES-256-CBC with zero IV
176
+ const zeroIv = new Uint8Array(16);
177
+ return aesCbcDecryptRaw(ueValue.subarray(0, 32), keyHash, zeroIv);
178
+ }
179
+ /**
180
+ * Try owner password for V=5/R=5.
181
+ * Validates using SHA-256(password + validation salt from O + U(48)).
182
+ * If valid, derives file encryption key using SHA-256(password + key salt from O + U(48)).
183
+ */
184
+ function tryOwnerPasswordV5(passwordBytes, oValue, oeValue, uValue) {
185
+ // O = hash(32) + validation salt(8) + key salt(8)
186
+ const oHash = oValue.subarray(0, 32);
187
+ const oValidationSalt = oValue.subarray(32, 40);
188
+ const oKeySalt = oValue.subarray(40, 48);
189
+ const u48 = uValue.subarray(0, 48);
190
+ // Validate: SHA-256(password + validation salt + U(0..47)) == first 32 bytes of O
191
+ const validateInput = concatArrays(passwordBytes, oValidationSalt, u48);
192
+ const computedHash = sha256(validateInput);
193
+ if (!arraysEqual(computedHash, oHash)) {
194
+ return null;
195
+ }
196
+ // Derive key: SHA-256(password + key salt + U(0..47))
197
+ const keyInput = concatArrays(passwordBytes, oKeySalt, u48);
198
+ const keyHash = sha256(keyInput);
199
+ // Decrypt OE with this key using AES-256-CBC with zero IV
200
+ const zeroIv = new Uint8Array(16);
201
+ return aesCbcDecryptRaw(oeValue.subarray(0, 32), keyHash, zeroIv);
202
+ }
203
+ /**
204
+ * Decrypt data using AES-256 directly (no per-object key derivation).
205
+ * For V=5, the file encryption key is used directly. The first 16 bytes are IV.
206
+ */
207
+ function decryptAes256Direct(data, encryptionKey) {
208
+ if (data.length < 16) {
209
+ return data;
210
+ }
211
+ const iv = data.subarray(0, 16);
212
+ const ciphertext = data.subarray(16);
213
+ if (ciphertext.length === 0 || ciphertext.length % 16 !== 0) {
214
+ return data;
215
+ }
216
+ return aesCbcDecrypt(ciphertext, encryptionKey, iv);
217
+ }
218
+ // =============================================================================
219
+ // Password Verification (V1-V4)
220
+ // =============================================================================
221
+ /**
222
+ * Try to authenticate with the user password.
223
+ * Returns the encryption key if successful, null otherwise.
224
+ */
225
+ function tryUserPassword(password, oValue, permissions, fileId, revision, keyLength, uValue, encryptMetadata) {
226
+ const key = computeEncryptionKeyForReading(password, oValue, permissions, fileId, revision, keyLength, encryptMetadata);
227
+ // Verify against U value
228
+ if (revision === 2) {
229
+ // R2: encrypt password padding with key, compare to U
230
+ const encrypted = rc4(key, PASSWORD_PADDING);
231
+ if (arraysEqual(encrypted, uValue.subarray(0, 32))) {
232
+ return key;
233
+ }
234
+ }
235
+ else if (revision >= 3) {
236
+ // R3/R4: MD5(padding + fileId), encrypt, iterate 19 times
237
+ const hashInput = new Uint8Array(32 + fileId.length);
238
+ hashInput.set(PASSWORD_PADDING);
239
+ hashInput.set(fileId, 32);
240
+ const hash = md5(hashInput);
241
+ let result = rc4(key, hash);
242
+ for (let i = 1; i <= 19; i++) {
243
+ const modKey = new Uint8Array(key.length);
244
+ for (let j = 0; j < key.length; j++) {
245
+ modKey[j] = key[j] ^ i;
246
+ }
247
+ result = rc4(modKey, result);
248
+ }
249
+ // Compare first 16 bytes
250
+ if (arraysEqual(result.subarray(0, 16), uValue.subarray(0, 16))) {
251
+ return key;
252
+ }
253
+ }
254
+ return null;
255
+ }
256
+ /**
257
+ * Compute the encryption key for reading (Algorithm 2, PDF spec §3.5.2).
258
+ */
259
+ function computeEncryptionKeyForReading(password, oValue, permissions, fileId, revision, keyLength, encryptMetadata) {
260
+ const paddedPwd = padPassword(password);
261
+ // When encryptMetadata is false and revision >= 4, append 4 bytes of 0xFF
262
+ const extraBytes = revision >= 4 && !encryptMetadata ? 4 : 0;
263
+ const input = new Uint8Array(32 + 32 + 4 + fileId.length + extraBytes);
264
+ let offset = 0;
265
+ input.set(paddedPwd, offset);
266
+ offset += 32;
267
+ input.set(oValue.subarray(0, 32), offset);
268
+ offset += 32;
269
+ // P value as 4 LE bytes
270
+ input[offset] = permissions & 0xff;
271
+ input[offset + 1] = (permissions >> 8) & 0xff;
272
+ input[offset + 2] = (permissions >> 16) & 0xff;
273
+ input[offset + 3] = (permissions >> 24) & 0xff;
274
+ offset += 4;
275
+ input.set(fileId, offset);
276
+ offset += fileId.length;
277
+ // If EncryptMetadata is false and revision >= 4, append 0xFFFFFFFF
278
+ if (revision >= 4 && !encryptMetadata) {
279
+ input[offset] = 0xff;
280
+ input[offset + 1] = 0xff;
281
+ input[offset + 2] = 0xff;
282
+ input[offset + 3] = 0xff;
283
+ offset += 4;
284
+ }
285
+ let hash = md5(input.subarray(0, offset));
286
+ // For revision >= 3, hash 50 more times
287
+ if (revision >= 3) {
288
+ for (let i = 0; i < 50; i++) {
289
+ hash = md5(hash.subarray(0, keyLength));
290
+ }
291
+ }
292
+ return hash.subarray(0, keyLength);
293
+ }
294
+ /**
295
+ * Derive the user password from the owner password.
296
+ * Uses Algorithm 7 from PDF spec §3.5.2.
297
+ */
298
+ function deriveUserPasswordFromOwner(ownerPassword, oValue, revision, keyLength) {
299
+ let hash = md5(padPassword(ownerPassword));
300
+ if (revision >= 3) {
301
+ for (let i = 0; i < 50; i++) {
302
+ hash = md5(hash.subarray(0, keyLength));
303
+ }
304
+ }
305
+ const key = hash.subarray(0, keyLength);
306
+ let result = new Uint8Array(oValue.subarray(0, 32));
307
+ if (revision === 2) {
308
+ result = rc4(key, result);
309
+ }
310
+ else if (revision >= 3) {
311
+ for (let i = 19; i >= 0; i--) {
312
+ const modKey = new Uint8Array(key.length);
313
+ for (let j = 0; j < key.length; j++) {
314
+ modKey[j] = key[j] ^ i;
315
+ }
316
+ result = rc4(modKey, result);
317
+ }
318
+ }
319
+ // Convert result bytes to password string
320
+ let pwd = "";
321
+ for (let i = 0; i < 32; i++) {
322
+ if (result[i] === PASSWORD_PADDING[0] &&
323
+ arraysEqual(result.subarray(i, i + Math.min(32 - i, 32)), PASSWORD_PADDING.subarray(0, Math.min(32 - i, 32)))) {
324
+ break;
325
+ }
326
+ pwd += String.fromCharCode(result[i]);
327
+ }
328
+ return pwd;
329
+ }
330
+ // =============================================================================
331
+ // AES-128 Decryption
332
+ // =============================================================================
333
+ /**
334
+ * Decrypt data using RC4 with per-object key derivation.
335
+ * Per-object key = MD5(encryptionKey + objNum(3LE) + genNum(2LE)), truncated to min(n+5, 16).
336
+ */
337
+ function decryptRc4PerObject(data, objectNumber, generation, encryptionKey) {
338
+ const keyInput = new Uint8Array(encryptionKey.length + 5);
339
+ keyInput.set(encryptionKey);
340
+ keyInput[encryptionKey.length] = objectNumber & 0xff;
341
+ keyInput[encryptionKey.length + 1] = (objectNumber >> 8) & 0xff;
342
+ keyInput[encryptionKey.length + 2] = (objectNumber >> 16) & 0xff;
343
+ keyInput[encryptionKey.length + 3] = generation & 0xff;
344
+ keyInput[encryptionKey.length + 4] = (generation >> 8) & 0xff;
345
+ const objKey = md5(keyInput);
346
+ const keyLen = Math.min(encryptionKey.length + 5, 16);
347
+ return rc4(objKey.subarray(0, keyLen), data);
348
+ }
349
+ /**
350
+ * Decrypt data using AES-128-CBC.
351
+ * Per PDF spec, the first 16 bytes of the data are the IV.
352
+ */
353
+ function decryptAes128(data, objectNumber, generation, encryptionKey) {
354
+ if (data.length < 16) {
355
+ return data;
356
+ }
357
+ // Compute per-object key: MD5(encryptionKey + objNum(3LE) + genNum(2LE) + "sAlT")
358
+ const keyInput = new Uint8Array(encryptionKey.length + 5 + 4);
359
+ keyInput.set(encryptionKey);
360
+ keyInput[encryptionKey.length] = objectNumber & 0xff;
361
+ keyInput[encryptionKey.length + 1] = (objectNumber >> 8) & 0xff;
362
+ keyInput[encryptionKey.length + 2] = (objectNumber >> 16) & 0xff;
363
+ keyInput[encryptionKey.length + 3] = generation & 0xff;
364
+ keyInput[encryptionKey.length + 4] = (generation >> 8) & 0xff;
365
+ // AES salt
366
+ keyInput[encryptionKey.length + 5] = 0x73; // s
367
+ keyInput[encryptionKey.length + 6] = 0x41; // A
368
+ keyInput[encryptionKey.length + 7] = 0x6c; // l
369
+ keyInput[encryptionKey.length + 8] = 0x54; // T
370
+ const objKey = md5(keyInput);
371
+ const keyLen = Math.min(encryptionKey.length + 5, 16);
372
+ const aesKey = objKey.subarray(0, keyLen);
373
+ // Extract IV (first 16 bytes) and ciphertext
374
+ const iv = data.subarray(0, 16);
375
+ const ciphertext = data.subarray(16);
376
+ if (ciphertext.length === 0 || ciphertext.length % 16 !== 0) {
377
+ return data;
378
+ }
379
+ return aesCbcDecrypt(ciphertext, aesKey, iv);
380
+ }
381
+ // =============================================================================
382
+ // Helpers
383
+ // =============================================================================
384
+ function padPassword(password) {
385
+ const result = new Uint8Array(32);
386
+ const bytes = textEncoder.encode(password);
387
+ const len = Math.min(bytes.length, 32);
388
+ result.set(bytes.subarray(0, len));
389
+ result.set(PASSWORD_PADDING.subarray(0, 32 - len), len);
390
+ return result;
391
+ }
392
+ function arraysEqual(a, b) {
393
+ if (a.length !== b.length) {
394
+ return false;
395
+ }
396
+ for (let i = 0; i < a.length; i++) {
397
+ if (a[i] !== b[i]) {
398
+ return false;
399
+ }
400
+ }
401
+ return true;
402
+ }
403
+ /**
404
+ * Read the EncryptMetadata flag from the encrypt dictionary.
405
+ * Per spec, defaults to true if not present.
406
+ * Checks both the top-level dict and CF/StdCF sub-dictionary.
407
+ */
408
+ function readEncryptMetadata(encryptDict) {
409
+ // Check top-level EncryptMetadata first
410
+ const topLevel = dictGetBool(encryptDict, "EncryptMetadata");
411
+ if (topLevel !== undefined) {
412
+ return topLevel;
413
+ }
414
+ // Check CF/StdCF/EncryptMetadata
415
+ const cf = encryptDict.get("CF");
416
+ if (cf && cf instanceof Map) {
417
+ const stdCF = cf.get("StdCF");
418
+ if (stdCF && stdCF instanceof Map) {
419
+ const cfVal = stdCF.get("EncryptMetadata");
420
+ if (typeof cfVal === "boolean") {
421
+ return cfVal;
422
+ }
423
+ }
424
+ }
425
+ // Default per spec
426
+ return true;
427
+ }
428
+ /**
429
+ * Check if V4 encryption uses AES (vs RC4).
430
+ */
431
+ function isAesCryptFilter(encryptDict) {
432
+ const cf = encryptDict.get("CF");
433
+ if (!cf || !(cf instanceof Map)) {
434
+ return false;
435
+ }
436
+ // Check StdCF filter
437
+ const stdCF = cf.get("StdCF");
438
+ if (!stdCF || !(stdCF instanceof Map)) {
439
+ return false;
440
+ }
441
+ const cfm = stdCF.get("CFM");
442
+ return cfm === "AESV2";
443
+ }
@@ -0,0 +1,191 @@
1
+ /**
2
+ * PDF document parser.
3
+ *
4
+ * Handles the high-level PDF file structure:
5
+ * - Locating startxref
6
+ * - Parsing cross-reference tables (traditional and stream-based)
7
+ * - Reading trailer dictionaries
8
+ * - Resolving indirect object references
9
+ * - Handling incremental updates
10
+ *
11
+ * @see PDF Reference 1.7, §3.4 - File Structure
12
+ */
13
+ import type { PdfObject, PdfDictValue, PdfRef, PdfStream } from "./pdf-parser.js";
14
+ /** Result of resolving an object with its object/generation numbers for decryption */
15
+ interface ResolvedObject {
16
+ /** The resolved PDF object */
17
+ obj: PdfObject | null;
18
+ /** The object number */
19
+ objNum: number;
20
+ /** The generation number */
21
+ gen: number;
22
+ }
23
+ /**
24
+ * Parsed PDF document with lazy object resolution.
25
+ *
26
+ * Reads the cross-reference table and trailer on construction,
27
+ * then resolves individual objects on demand with caching.
28
+ */
29
+ export declare class PdfDocument {
30
+ private tokenizer;
31
+ private xref;
32
+ private cache;
33
+ readonly trailer: PdfDictValue;
34
+ /** Encryption handler (set externally after decryption is initialized) */
35
+ decryptFn: ((data: Uint8Array, objNum: number, gen: number) => Uint8Array) | null;
36
+ constructor(data: Uint8Array);
37
+ /** Get the underlying raw data */
38
+ get data(): Uint8Array;
39
+ private parseFileStructure;
40
+ /**
41
+ * Find the startxref offset by scanning backward from EOF.
42
+ */
43
+ private findStartxref;
44
+ /**
45
+ * Parse the xref chain starting at the given offset.
46
+ * Follows /Prev links for incremental updates.
47
+ * Returns the merged trailer dictionary.
48
+ */
49
+ private parseXrefChain;
50
+ /**
51
+ * Parse a traditional xref table and its trailer.
52
+ */
53
+ private parseTraditionalXref;
54
+ /**
55
+ * Parse a cross-reference stream (PDF 1.5+).
56
+ */
57
+ private parseXrefStream;
58
+ /**
59
+ * Reconstruct the xref table by scanning the entire file for `N N obj` patterns.
60
+ * This is a fallback for corrupted or broken PDFs where the normal xref parsing fails.
61
+ *
62
+ * @returns A synthetic trailer dictionary
63
+ */
64
+ private reconstructXref;
65
+ /**
66
+ * Merge trailer entries from an older trailer into the current one.
67
+ * Only adds keys that don't already exist.
68
+ */
69
+ private mergeTrailer;
70
+ /**
71
+ * Resolve a PDF object by its object number and generation.
72
+ * Returns null if the object doesn't exist.
73
+ */
74
+ resolve(objNum: number, gen?: number): PdfObject | null;
75
+ /**
76
+ * Resolve a PDF object and return it along with its object/generation numbers.
77
+ * Useful for tracking which object a value came from (for decryption).
78
+ *
79
+ * @param objNum - The object number to resolve
80
+ * @param gen - The generation number (default 0)
81
+ * @returns The resolved object with its objNum and gen for decryption context
82
+ */
83
+ resolveWithObjNum(objNum: number, gen?: number): ResolvedObject;
84
+ /**
85
+ * Dereference a PdfRef to its actual object value.
86
+ * If the input is not a PdfRef, returns it as-is.
87
+ */
88
+ deref(obj: PdfObject | null | undefined): PdfObject | null;
89
+ /**
90
+ * Dereference a PdfRef and assert it's a dictionary.
91
+ */
92
+ derefDict(obj: PdfObject | null | undefined): PdfDictValue | null;
93
+ /**
94
+ * Dereference a PdfRef and get the stream, along with the objNum/gen
95
+ * needed for correct per-object decryption.
96
+ */
97
+ derefStream(obj: PdfObject | null | undefined): PdfStream | null;
98
+ /**
99
+ * Dereference a PdfRef and get the stream with its object number and generation.
100
+ * Returns null if the object is not a stream.
101
+ * The objNum/gen are needed for correct per-object decryption (V1-V4).
102
+ */
103
+ derefStreamWithObjNum(obj: PdfObject | null | undefined): {
104
+ stream: PdfStream;
105
+ objNum: number;
106
+ gen: number;
107
+ } | null;
108
+ /**
109
+ * Get decoded stream data from a stream object.
110
+ * Applies filter chain decoding and decryption.
111
+ *
112
+ * When objNum/gen are not provided (default 0), decryption may not
113
+ * produce correct results. Use {@link resolveWithObjNum} to obtain
114
+ * the correct objNum/gen for the stream's containing object.
115
+ */
116
+ getStreamData(stream: PdfStream, objNum?: number, gen?: number): Uint8Array;
117
+ /**
118
+ * Decrypt a string value (bytes) if encryption is active.
119
+ */
120
+ decryptString(bytes: Uint8Array, objNum: number, gen: number): Uint8Array;
121
+ /**
122
+ * Decode a PDF string to a JS string, with optional decryption.
123
+ */
124
+ decodeString(bytes: Uint8Array, objNum?: number, gen?: number): string;
125
+ /**
126
+ * Recursively decrypt all string values (Uint8Array) within a parsed PDF object.
127
+ * PDF spec requires all strings in an encrypted document to be decrypted using
128
+ * the per-object key derived from the containing object's objNum/gen.
129
+ * Streams are NOT decrypted here — they are decrypted in getStreamData().
130
+ */
131
+ private decryptObjectStrings;
132
+ /**
133
+ * Get the catalog dictionary (the root of the document structure).
134
+ */
135
+ getCatalog(): PdfDictValue;
136
+ /**
137
+ * Get the pages array from the page tree.
138
+ * Returns an array of page dictionaries in order.
139
+ */
140
+ getPages(): PdfDictValue[];
141
+ /**
142
+ * Get pages with their object numbers (needed for correct decryption of
143
+ * inline streams within page objects).
144
+ */
145
+ getPagesWithObjInfo(): Array<{
146
+ dict: PdfDictValue;
147
+ objNum: number;
148
+ gen: number;
149
+ }>;
150
+ /**
151
+ * Recursively collect page dictionaries from the page tree.
152
+ * Uses a visited set to prevent infinite recursion on cyclic page trees.
153
+ */
154
+ private collectPages;
155
+ /**
156
+ * Get the object number for a given object reference.
157
+ * Useful for tracking which object a value came from (for decryption).
158
+ */
159
+ getObjNumForRef(ref: PdfRef): number;
160
+ /**
161
+ * Parse an object definition at the given byte offset.
162
+ */
163
+ private parseObjectAt;
164
+ /**
165
+ * Parse a compressed object from an object stream.
166
+ * @param objStmNum - The object number of the object stream
167
+ * @param index - The index of the object within the stream
168
+ */
169
+ private parseCompressedObject;
170
+ /**
171
+ * Parse all objects from an object stream.
172
+ * @returns Map of object number → object value
173
+ */
174
+ private parseObjectStream;
175
+ /**
176
+ * Resolve a page's bounding box (MediaBox/CropBox) with indirect ref resolution
177
+ * and parent inheritance. Returns `{ width, height }` or null if no box found.
178
+ *
179
+ * This is a shared helper so callers don't duplicate box resolution logic.
180
+ */
181
+ resolvePageBox(pageDict: PdfDictValue, visited?: Set<PdfDictValue>): {
182
+ width: number;
183
+ height: number;
184
+ } | null;
185
+ /**
186
+ * Resolve a page's Resources dictionary, inheriting from parent pages if needed.
187
+ * Protected against cyclic parent chains.
188
+ */
189
+ resolvePageResources(pageDict: PdfDictValue, visited?: Set<PdfDictValue>): PdfDictValue;
190
+ }
191
+ export {};