@cj-tech-master/excelts 8.0.0 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +14 -1
  2. package/README_zh.md +6 -0
  3. package/dist/browser/modules/archive/zip/stream.d.ts +4 -0
  4. package/dist/browser/modules/archive/zip/stream.js +53 -0
  5. package/dist/browser/modules/pdf/core/crypto.d.ts +65 -0
  6. package/dist/browser/modules/pdf/core/crypto.js +637 -0
  7. package/dist/browser/modules/pdf/core/encryption.d.ts +23 -20
  8. package/dist/browser/modules/pdf/core/encryption.js +88 -261
  9. package/dist/browser/modules/pdf/core/pdf-writer.d.ts +6 -4
  10. package/dist/browser/modules/pdf/core/pdf-writer.js +19 -10
  11. package/dist/browser/modules/pdf/index.d.ts +23 -2
  12. package/dist/browser/modules/pdf/index.js +21 -3
  13. package/dist/browser/modules/pdf/reader/annotation-extractor.d.ts +63 -0
  14. package/dist/browser/modules/pdf/reader/annotation-extractor.js +155 -0
  15. package/dist/browser/modules/pdf/reader/cmap-parser.d.ts +70 -0
  16. package/dist/browser/modules/pdf/reader/cmap-parser.js +321 -0
  17. package/dist/browser/modules/pdf/reader/content-interpreter.d.ts +57 -0
  18. package/dist/browser/modules/pdf/reader/content-interpreter.js +715 -0
  19. package/dist/browser/modules/pdf/reader/font-decoder.d.ts +58 -0
  20. package/dist/browser/modules/pdf/reader/font-decoder.js +1513 -0
  21. package/dist/browser/modules/pdf/reader/form-extractor.d.ts +48 -0
  22. package/dist/browser/modules/pdf/reader/form-extractor.js +355 -0
  23. package/dist/browser/modules/pdf/reader/image-extractor.d.ts +55 -0
  24. package/dist/browser/modules/pdf/reader/image-extractor.js +220 -0
  25. package/dist/browser/modules/pdf/reader/metadata-reader.d.ts +56 -0
  26. package/dist/browser/modules/pdf/reader/metadata-reader.js +275 -0
  27. package/dist/browser/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
  28. package/dist/browser/modules/pdf/reader/pdf-decrypt.js +443 -0
  29. package/dist/browser/modules/pdf/reader/pdf-document.d.ts +191 -0
  30. package/dist/browser/modules/pdf/reader/pdf-document.js +818 -0
  31. package/dist/browser/modules/pdf/reader/pdf-parser.d.ts +65 -0
  32. package/dist/browser/modules/pdf/reader/pdf-parser.js +285 -0
  33. package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +143 -0
  34. package/dist/browser/modules/pdf/reader/pdf-reader.js +200 -0
  35. package/dist/browser/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
  36. package/dist/browser/modules/pdf/reader/pdf-tokenizer.js +543 -0
  37. package/dist/browser/modules/pdf/reader/reader-utils.d.ts +15 -0
  38. package/dist/browser/modules/pdf/reader/reader-utils.js +27 -0
  39. package/dist/browser/modules/pdf/reader/stream-filters.d.ts +20 -0
  40. package/dist/browser/modules/pdf/reader/stream-filters.js +456 -0
  41. package/dist/browser/modules/pdf/reader/text-reconstruction.d.ts +44 -0
  42. package/dist/browser/modules/pdf/reader/text-reconstruction.js +463 -0
  43. package/dist/cjs/modules/archive/zip/stream.js +53 -0
  44. package/dist/cjs/modules/pdf/core/crypto.js +649 -0
  45. package/dist/cjs/modules/pdf/core/encryption.js +88 -263
  46. package/dist/cjs/modules/pdf/core/pdf-writer.js +19 -10
  47. package/dist/cjs/modules/pdf/index.js +23 -4
  48. package/dist/cjs/modules/pdf/reader/annotation-extractor.js +158 -0
  49. package/dist/cjs/modules/pdf/reader/cmap-parser.js +326 -0
  50. package/dist/cjs/modules/pdf/reader/content-interpreter.js +718 -0
  51. package/dist/cjs/modules/pdf/reader/font-decoder.js +1518 -0
  52. package/dist/cjs/modules/pdf/reader/form-extractor.js +358 -0
  53. package/dist/cjs/modules/pdf/reader/image-extractor.js +223 -0
  54. package/dist/cjs/modules/pdf/reader/metadata-reader.js +278 -0
  55. package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +447 -0
  56. package/dist/cjs/modules/pdf/reader/pdf-document.js +822 -0
  57. package/dist/cjs/modules/pdf/reader/pdf-parser.js +301 -0
  58. package/dist/cjs/modules/pdf/reader/pdf-reader.js +203 -0
  59. package/dist/cjs/modules/pdf/reader/pdf-tokenizer.js +517 -0
  60. package/dist/cjs/modules/pdf/reader/reader-utils.js +30 -0
  61. package/dist/cjs/modules/pdf/reader/stream-filters.js +459 -0
  62. package/dist/cjs/modules/pdf/reader/text-reconstruction.js +467 -0
  63. package/dist/esm/modules/archive/zip/stream.js +53 -0
  64. package/dist/esm/modules/pdf/core/crypto.js +637 -0
  65. package/dist/esm/modules/pdf/core/encryption.js +88 -261
  66. package/dist/esm/modules/pdf/core/pdf-writer.js +19 -10
  67. package/dist/esm/modules/pdf/index.js +21 -3
  68. package/dist/esm/modules/pdf/reader/annotation-extractor.js +155 -0
  69. package/dist/esm/modules/pdf/reader/cmap-parser.js +321 -0
  70. package/dist/esm/modules/pdf/reader/content-interpreter.js +715 -0
  71. package/dist/esm/modules/pdf/reader/font-decoder.js +1513 -0
  72. package/dist/esm/modules/pdf/reader/form-extractor.js +355 -0
  73. package/dist/esm/modules/pdf/reader/image-extractor.js +220 -0
  74. package/dist/esm/modules/pdf/reader/metadata-reader.js +275 -0
  75. package/dist/esm/modules/pdf/reader/pdf-decrypt.js +443 -0
  76. package/dist/esm/modules/pdf/reader/pdf-document.js +818 -0
  77. package/dist/esm/modules/pdf/reader/pdf-parser.js +285 -0
  78. package/dist/esm/modules/pdf/reader/pdf-reader.js +200 -0
  79. package/dist/esm/modules/pdf/reader/pdf-tokenizer.js +543 -0
  80. package/dist/esm/modules/pdf/reader/reader-utils.js +27 -0
  81. package/dist/esm/modules/pdf/reader/stream-filters.js +456 -0
  82. package/dist/esm/modules/pdf/reader/text-reconstruction.js +463 -0
  83. package/dist/iife/excelts.iife.js +703 -267
  84. package/dist/iife/excelts.iife.js.map +1 -1
  85. package/dist/iife/excelts.iife.min.js +35 -35
  86. package/dist/types/modules/archive/zip/stream.d.ts +4 -0
  87. package/dist/types/modules/pdf/core/crypto.d.ts +65 -0
  88. package/dist/types/modules/pdf/core/encryption.d.ts +23 -20
  89. package/dist/types/modules/pdf/core/pdf-writer.d.ts +6 -4
  90. package/dist/types/modules/pdf/index.d.ts +23 -2
  91. package/dist/types/modules/pdf/reader/annotation-extractor.d.ts +63 -0
  92. package/dist/types/modules/pdf/reader/cmap-parser.d.ts +70 -0
  93. package/dist/types/modules/pdf/reader/content-interpreter.d.ts +57 -0
  94. package/dist/types/modules/pdf/reader/font-decoder.d.ts +58 -0
  95. package/dist/types/modules/pdf/reader/form-extractor.d.ts +48 -0
  96. package/dist/types/modules/pdf/reader/image-extractor.d.ts +55 -0
  97. package/dist/types/modules/pdf/reader/metadata-reader.d.ts +56 -0
  98. package/dist/types/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
  99. package/dist/types/modules/pdf/reader/pdf-document.d.ts +191 -0
  100. package/dist/types/modules/pdf/reader/pdf-parser.d.ts +65 -0
  101. package/dist/types/modules/pdf/reader/pdf-reader.d.ts +143 -0
  102. package/dist/types/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
  103. package/dist/types/modules/pdf/reader/reader-utils.d.ts +15 -0
  104. package/dist/types/modules/pdf/reader/stream-filters.d.ts +20 -0
  105. package/dist/types/modules/pdf/reader/text-reconstruction.d.ts +44 -0
  106. package/package.json +1 -1
@@ -1,310 +1,137 @@
1
1
  /**
2
- * PDF encryption support (Standard Security Handler, Revision 3).
2
+ * PDF encryption support (Standard Security Handler, V=5, R=5).
3
3
  *
4
- * Implements RC4-128 encryption compatible with PDF 1.4.
4
+ * Implements AES-256 encryption compatible with PDF 2.0 (ISO 32000-2:2020).
5
5
  * Supports:
6
6
  * - User password (required to open the document)
7
7
  * - Owner password (grants full access)
8
8
  * - Permission flags (print, copy, modify, etc.)
9
9
  *
10
- * @see PDF Reference 1.7, §3.5 - Encryption
10
+ * The file encryption key (FEK) is a random 256-bit key.
11
+ * All streams and strings are encrypted using AES-256-CBC with a random
12
+ * 16-byte IV prepended to each encrypted value.
13
+ *
14
+ * @see ISO 32000-2:2020, §7.6 — Encryption
11
15
  */
12
- // =============================================================================
13
- // Constants
14
- // =============================================================================
15
- /** PDF password padding string (32 bytes) per PDF spec §3.5.2 */
16
- const PASSWORD_PADDING = new Uint8Array([
17
- 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
18
- 0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80, 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a
19
- ]);
16
+ import { sha256, aesCbcEncrypt, aesCbcEncryptRaw, aesEcbEncrypt, randomBytes, concatArrays } from "./crypto.js";
20
17
  // =============================================================================
21
18
  // Public API
22
19
  // =============================================================================
23
20
  /**
24
- * Initialize encryption state from the given options.
21
+ * Initialize encryption state for AES-256 (V=5, R=5).
25
22
  */
26
23
  export function initEncryption(options) {
27
- const userPwd = options.userPassword ?? "";
28
- const ownerPwd = options.ownerPassword;
24
+ const userPwd = truncatePassword(options.userPassword ?? "");
25
+ const ownerPwd = truncatePassword(options.ownerPassword);
29
26
  const perms = computePermissions(options.permissions);
30
- const fileId = generateFileId();
31
- // Step 1: Compute O value
32
- const oValue = computeOValue(ownerPwd, userPwd);
33
- // Step 2: Compute encryption key
34
- const encryptionKey = computeEncryptionKey(userPwd, oValue, perms, fileId);
27
+ // Step 1: Generate random 32-byte file encryption key
28
+ const encryptionKey = randomBytes(32);
29
+ // Step 2: Generate random salts
30
+ const uValidationSalt = randomBytes(8);
31
+ const uKeySalt = randomBytes(8);
32
+ const oValidationSalt = randomBytes(8);
33
+ const oKeySalt = randomBytes(8);
35
34
  // Step 3: Compute U value
36
- const uValue = computeUValue(encryptionKey, fileId);
37
- return { encryptionKey, oValue, uValue, permissions: perms, fileId };
38
- }
39
- /**
40
- * Encrypt a string or stream for a specific PDF object.
41
- * Per-object encryption key = MD5(encryptionKey + objectNumber + generation).
42
- */
43
- export function encryptData(data, objectNumber, generation, encryptionKey) {
44
- // Compute per-object key: MD5(encryptionKey + objNum(3LE) + genNum(2LE))
45
- const keyInput = new Uint8Array(encryptionKey.length + 5);
46
- keyInput.set(encryptionKey);
47
- keyInput[encryptionKey.length] = objectNumber & 0xff;
48
- keyInput[encryptionKey.length + 1] = (objectNumber >> 8) & 0xff;
49
- keyInput[encryptionKey.length + 2] = (objectNumber >> 16) & 0xff;
50
- keyInput[encryptionKey.length + 3] = generation & 0xff;
51
- keyInput[encryptionKey.length + 4] = (generation >> 8) & 0xff;
52
- const objKey = md5(keyInput);
53
- // Use min(n+5, 16) bytes of the hash as the RC4 key
54
- const keyLen = Math.min(encryptionKey.length + 5, 16);
55
- const rc4Key = objKey.subarray(0, keyLen);
56
- return rc4(rc4Key, data);
35
+ // U hash = SHA-256(userPassword + uValidationSalt)
36
+ const uHash = sha256(concatArrays(userPwd, uValidationSalt));
37
+ const uValue = concatArrays(uHash, uValidationSalt, uKeySalt);
38
+ // Step 4: Compute UE value
39
+ // UE = AES-256-CBC-encrypt(encryptionKey, SHA-256(userPassword + uKeySalt), zeroIV)
40
+ // Actually: the key for encrypting UE is SHA-256(password + key_salt),
41
+ // and we encrypt the file encryption key with it.
42
+ const ueKey = sha256(concatArrays(userPwd, uKeySalt));
43
+ const zeroIv = new Uint8Array(16);
44
+ const ueValue = aesCbcEncryptRaw(encryptionKey, ueKey, zeroIv);
45
+ // Step 5: Compute O value
46
+ // O hash = SHA-256(ownerPassword + oValidationSalt + U(0..47))
47
+ const oHash = sha256(concatArrays(ownerPwd, oValidationSalt, uValue));
48
+ const oValue = concatArrays(oHash, oValidationSalt, oKeySalt);
49
+ // Step 6: Compute OE value
50
+ // OE = AES-256-CBC-encrypt(encryptionKey, SHA-256(ownerPassword + oKeySalt + U(0..47)), zeroIV)
51
+ const oeKey = sha256(concatArrays(ownerPwd, oKeySalt, uValue));
52
+ const oeValue = aesCbcEncryptRaw(encryptionKey, oeKey, zeroIv);
53
+ // Step 7: Compute Perms value
54
+ // 16-byte block: P(4 LE bytes) + 0xFF(4 bytes) + 'T' or 'F' (encryptMetadata) + 'a' 'd' 'b' + 0(3 bytes)
55
+ const permsBlock = new Uint8Array(16);
56
+ const permsView = new DataView(permsBlock.buffer);
57
+ permsView.setInt32(0, perms, true); // P value in little-endian
58
+ permsBlock[4] = 0xff;
59
+ permsBlock[5] = 0xff;
60
+ permsBlock[6] = 0xff;
61
+ permsBlock[7] = 0xff;
62
+ permsBlock[8] = 0x54; // 'T' — EncryptMetadata = true
63
+ permsBlock[9] = 0x61; // 'a'
64
+ permsBlock[10] = 0x64; // 'd'
65
+ permsBlock[11] = 0x62; // 'b'
66
+ // bytes 12-15 are zero
67
+ const permsValue = aesEcbEncrypt(permsBlock, encryptionKey);
68
+ // File ID (random 16 bytes, used in trailer)
69
+ const fileId = randomBytes(16);
70
+ return {
71
+ encryptionKey,
72
+ oValue,
73
+ uValue,
74
+ oeValue,
75
+ ueValue,
76
+ permsValue,
77
+ permissions: perms,
78
+ fileId
79
+ };
57
80
  }
58
- // =============================================================================
59
- // RC4 Cipher
60
- // =============================================================================
61
81
  /**
62
- * RC4 stream cipher implementation.
82
+ * Encrypt data for a PDF object using AES-256-CBC.
83
+ *
84
+ * For V=5/R=5, the file encryption key is used directly (no per-object key derivation).
85
+ * A random 16-byte IV is prepended to the ciphertext.
63
86
  */
64
- export function rc4(key, data) {
65
- // Key Scheduling Algorithm (KSA)
66
- const s = new Uint8Array(256);
67
- for (let i = 0; i < 256; i++) {
68
- s[i] = i;
69
- }
70
- let j = 0;
71
- for (let i = 0; i < 256; i++) {
72
- j = (j + s[i] + key[i % key.length]) & 0xff;
73
- [s[i], s[j]] = [s[j], s[i]];
74
- }
75
- // Pseudo-Random Generation Algorithm (PRGA)
76
- const result = new Uint8Array(data.length);
77
- let ii = 0;
78
- let jj = 0;
79
- for (let k = 0; k < data.length; k++) {
80
- ii = (ii + 1) & 0xff;
81
- jj = (jj + s[ii]) & 0xff;
82
- [s[ii], s[jj]] = [s[jj], s[ii]];
83
- result[k] = data[k] ^ s[(s[ii] + s[jj]) & 0xff];
84
- }
87
+ export function encryptData(data, _objectNumber, _generation, encryptionKey) {
88
+ const iv = randomBytes(16);
89
+ const ciphertext = aesCbcEncrypt(data, encryptionKey, iv);
90
+ // Prepend IV to ciphertext per PDF spec
91
+ const result = new Uint8Array(16 + ciphertext.length);
92
+ result.set(iv);
93
+ result.set(ciphertext, 16);
85
94
  return result;
86
95
  }
87
96
  // =============================================================================
88
- // MD5 Hash
89
- // =============================================================================
90
- /**
91
- * MD5 hash implementation (RFC 1321).
92
- * Returns 16-byte digest.
93
- */
94
- export function md5(input) {
95
- // Pre-processing: padding
96
- const msgLen = input.length;
97
- const bitLen = msgLen * 8;
98
- // Pad to 64-byte boundary (56 bytes mod 64, then 8 bytes length)
99
- const padLen = ((56 - ((msgLen + 1) % 64) + 64) % 64) + 1;
100
- const padded = new Uint8Array(msgLen + padLen + 8);
101
- padded.set(input);
102
- padded[msgLen] = 0x80;
103
- // Append length in bits as 64-bit little-endian
104
- const view = new DataView(padded.buffer);
105
- view.setUint32(padded.length - 8, bitLen >>> 0, true);
106
- view.setUint32(padded.length - 4, 0, true); // high 32 bits (always 0 for our sizes)
107
- // Initialize hash values
108
- let a0 = 0x67452301;
109
- let b0 = 0xefcdab89;
110
- let c0 = 0x98badcfe;
111
- let d0 = 0x10325476;
112
- // Process each 64-byte block
113
- for (let i = 0; i < padded.length; i += 64) {
114
- const M = new Uint32Array(16);
115
- for (let j = 0; j < 16; j++) {
116
- M[j] = view.getUint32(i + j * 4, true);
117
- }
118
- let A = a0;
119
- let B = b0;
120
- let C = c0;
121
- let D = d0;
122
- for (let j = 0; j < 64; j++) {
123
- let F;
124
- let g;
125
- if (j < 16) {
126
- F = (B & C) | (~B & D);
127
- g = j;
128
- }
129
- else if (j < 32) {
130
- F = (D & B) | (~D & C);
131
- g = (5 * j + 1) % 16;
132
- }
133
- else if (j < 48) {
134
- F = B ^ C ^ D;
135
- g = (3 * j + 5) % 16;
136
- }
137
- else {
138
- F = C ^ (B | ~D);
139
- g = (7 * j) % 16;
140
- }
141
- F = (F + A + K[j] + M[g]) >>> 0;
142
- A = D;
143
- D = C;
144
- C = B;
145
- B = (B + rotl(F, S[j])) >>> 0;
146
- }
147
- a0 = (a0 + A) >>> 0;
148
- b0 = (b0 + B) >>> 0;
149
- c0 = (c0 + C) >>> 0;
150
- d0 = (d0 + D) >>> 0;
151
- }
152
- // Produce the 128-bit digest
153
- const digest = new Uint8Array(16);
154
- const dv = new DataView(digest.buffer);
155
- dv.setUint32(0, a0, true);
156
- dv.setUint32(4, b0, true);
157
- dv.setUint32(8, c0, true);
158
- dv.setUint32(12, d0, true);
159
- return digest;
160
- }
161
- function rotl(x, n) {
162
- return ((x << n) | (x >>> (32 - n))) >>> 0;
163
- }
164
- // MD5 per-round shift amounts
165
- const S = [
166
- 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14,
167
- 20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6,
168
- 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21
169
- ];
170
- // MD5 per-round constants (floor(2^32 × abs(sin(i+1))))
171
- const K = new Uint32Array([
172
- 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
173
- 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
174
- 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
175
- 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
176
- 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
177
- 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
178
- 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
179
- 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
180
- ]);
181
- // =============================================================================
182
- // PDF Password / Key Computation
97
+ // Internal Helpers
183
98
  // =============================================================================
184
99
  /**
185
- * Pad or truncate a password to 32 bytes using the PDF password padding.
100
+ * Truncate password to 127 bytes (UTF-8) per PDF 2.0 spec.
186
101
  */
187
- function padPassword(password) {
188
- const result = new Uint8Array(32);
102
+ function truncatePassword(password) {
189
103
  const bytes = new TextEncoder().encode(password);
190
- const len = Math.min(bytes.length, 32);
191
- result.set(bytes.subarray(0, len));
192
- result.set(PASSWORD_PADDING.subarray(0, 32 - len), len);
193
- return result;
194
- }
195
- /**
196
- * Compute the O (owner) value.
197
- * Algorithm 3 from PDF spec §3.5.2.
198
- */
199
- function computeOValue(ownerPassword, userPassword) {
200
- // Step 1: MD5 hash of padded owner password
201
- let hash = md5(padPassword(ownerPassword));
202
- // Step 2: For revision 3, hash 50 more times
203
- for (let i = 0; i < 50; i++) {
204
- hash = md5(hash);
205
- }
206
- // Use first 16 bytes as RC4 key (128-bit / key length = 16)
207
- const rc4Key = hash.subarray(0, 16);
208
- // Step 3: RC4-encrypt the padded user password
209
- let result = rc4(rc4Key, padPassword(userPassword));
210
- // Step 4: For revision 3, iterate 1-19 with modified key
211
- for (let i = 1; i <= 19; i++) {
212
- const modKey = new Uint8Array(16);
213
- for (let j = 0; j < 16; j++) {
214
- modKey[j] = rc4Key[j] ^ i;
215
- }
216
- result = rc4(modKey, result);
217
- }
218
- return result;
219
- }
220
- /**
221
- * Compute the encryption key.
222
- * Algorithm 2 from PDF spec §3.5.2.
223
- */
224
- function computeEncryptionKey(userPassword, oValue, permissions, fileId) {
225
- // Concatenate: padded password + O value + P value (4 LE bytes) + file ID
226
- const paddedPwd = padPassword(userPassword);
227
- const input = new Uint8Array(32 + 32 + 4 + fileId.length);
228
- input.set(paddedPwd);
229
- input.set(oValue, 32);
230
- const pView = new DataView(input.buffer, input.byteOffset);
231
- pView.setInt32(64, permissions, true);
232
- input.set(fileId, 68);
233
- let hash = md5(input);
234
- // For revision 3, hash 50 more times
235
- for (let i = 0; i < 50; i++) {
236
- hash = md5(hash.subarray(0, 16));
237
- }
238
- return hash.subarray(0, 16); // 128-bit key
239
- }
240
- /**
241
- * Compute the U (user) value.
242
- * Algorithm 5 from PDF spec §3.5.2 (revision 3).
243
- */
244
- function computeUValue(encryptionKey, fileId) {
245
- // Step 1: MD5 hash of padding + file ID
246
- const hashInput = new Uint8Array(32 + fileId.length);
247
- hashInput.set(PASSWORD_PADDING);
248
- hashInput.set(fileId, 32);
249
- const hash = md5(hashInput);
250
- // Step 2: RC4-encrypt with the encryption key
251
- let result = rc4(encryptionKey, hash);
252
- // Step 3: Iterate 1-19 with modified key
253
- for (let i = 1; i <= 19; i++) {
254
- const modKey = new Uint8Array(16);
255
- for (let j = 0; j < 16; j++) {
256
- modKey[j] = encryptionKey[j] ^ i;
257
- }
258
- result = rc4(modKey, result);
259
- }
260
- // Pad to 32 bytes with arbitrary padding
261
- const uValue = new Uint8Array(32);
262
- uValue.set(result);
263
- return uValue;
104
+ return bytes.length > 127 ? bytes.subarray(0, 127) : bytes;
264
105
  }
265
106
  /**
266
107
  * Compute the permissions integer (P value) from permission flags.
267
108
  */
268
109
  function computePermissions(perms) {
269
- // Start with all bits set that are "reserved" and must be 1
270
- // Bits 1-2, 7-8 must be 0; bits 13-32 must be 1 (per spec)
271
- let p = 0xfffff000 | 0b11000000; // bits 7-8 = reserved 1, high bits = 1
110
+ // Start with all reserved bits set to 1
111
+ let p = 0xfffff000 | 0b11000000;
272
112
  if (perms?.print) {
273
- p |= 1 << 2; // bit 3
113
+ p |= 1 << 2;
274
114
  }
275
115
  if (perms?.modify) {
276
- p |= 1 << 3; // bit 4
116
+ p |= 1 << 3;
277
117
  }
278
118
  if (perms?.copy) {
279
- p |= 1 << 4; // bit 5
119
+ p |= 1 << 4;
280
120
  }
281
121
  if (perms?.annotate) {
282
- p |= 1 << 5; // bit 6
122
+ p |= 1 << 5;
283
123
  }
284
124
  if (perms?.fillForms) {
285
- p |= 1 << 8; // bit 9
125
+ p |= 1 << 8;
286
126
  }
287
127
  if (perms?.accessibility) {
288
- p |= 1 << 9; // bit 10
128
+ p |= 1 << 9;
289
129
  }
290
130
  if (perms?.assemble) {
291
- p |= 1 << 10; // bit 11
131
+ p |= 1 << 10;
292
132
  }
293
133
  if (perms?.printHighQuality) {
294
- p |= 1 << 11; // bit 12
134
+ p |= 1 << 11;
295
135
  }
296
- // Convert to signed 32-bit
297
136
  return p | 0;
298
137
  }
299
- /**
300
- * Generate a random file identifier (16 bytes).
301
- */
302
- function generateFileId() {
303
- // Use MD5 of current timestamp + random for determinism in tests
304
- const seed = new Uint8Array(16);
305
- const now = Date.now();
306
- const view = new DataView(seed.buffer);
307
- view.setFloat64(0, now, true);
308
- view.setFloat64(8, Math.random() * 1e15, true);
309
- return md5(seed);
310
- }
@@ -1,14 +1,16 @@
1
1
  /**
2
2
  * PDF file writer.
3
3
  *
4
- * Assembles a complete PDF document from indirect objects.
4
+ * Assembles a complete PDF 2.0 document from indirect objects.
5
5
  * Handles the four sections of a PDF file:
6
- * 1. Header (%PDF-1.4)
6
+ * 1. Header (%PDF-2.0)
7
7
  * 2. Body (indirect objects)
8
8
  * 3. Cross-reference table
9
9
  * 4. Trailer (with document catalog reference)
10
10
  *
11
- * @see PDF Reference 1.7, Chapter 3.4 - File Structure
11
+ * Encryption uses AES-256 (V=5, R=5) per ISO 32000-2:2020.
12
+ *
13
+ * @see ISO 32000-2:2020, Chapter 7.5 — File Structure
12
14
  */
13
15
  import { PdfDict, pdfRef, pdfString, pdfHexString, pdfDate, pdfNumber } from "./pdf-object.js";
14
16
  import { PdfStructureError } from "../errors.js";
@@ -19,7 +21,7 @@ import { encryptData } from "./encryption.js";
19
21
  // PDF Writer
20
22
  // =============================================================================
21
23
  /**
22
- * Constructs a valid PDF 1.4 file from a set of indirect objects.
24
+ * Constructs a valid PDF 2.0 file from a set of indirect objects.
23
25
  *
24
26
  * Usage:
25
27
  * 1. Allocate object numbers with allocObject()
@@ -168,7 +170,7 @@ export class PdfWriter {
168
170
  let byteOffset = 0;
169
171
  // --- Header ---
170
172
  // Include a comment with high bytes to signal binary content per PDF spec §3.4.1
171
- const headerStr = "%PDF-1.4\n";
173
+ const headerStr = "%PDF-2.0\n";
172
174
  const headerStrBytes = encoder.encode(headerStr);
173
175
  chunks.push(headerStrBytes);
174
176
  byteOffset += headerStrBytes.length;
@@ -220,16 +222,23 @@ export class PdfWriter {
220
222
  chunks.push(objFooter);
221
223
  byteOffset += objFooter.length;
222
224
  }
223
- // --- Encrypt dictionary (must be added before xref) ---
225
+ // --- Encrypt dictionary (V=5, R=5, AES-256) ---
224
226
  if (this.encryption) {
225
227
  const encDict = new PdfDict()
226
228
  .set("Filter", "/Standard")
227
- .set("V", "2")
228
- .set("R", "3")
229
- .set("Length", "128")
229
+ .set("V", "5")
230
+ .set("R", "5")
231
+ .set("Length", "256")
230
232
  .set("P", String(this.encryption.permissions))
231
233
  .set("O", pdfHexString(this.encryption.oValue))
232
- .set("U", pdfHexString(this.encryption.uValue));
234
+ .set("U", pdfHexString(this.encryption.uValue))
235
+ .set("OE", pdfHexString(this.encryption.oeValue))
236
+ .set("UE", pdfHexString(this.encryption.ueValue))
237
+ .set("Perms", pdfHexString(this.encryption.permsValue))
238
+ .set("EncryptMetadata", "true")
239
+ .set("CF", "<< /StdCF << /Type /CryptFilter /CFM /AESV3 /AuthEvent /DocOpen /Length 32 >> >>")
240
+ .set("StmF", "/StdCF")
241
+ .set("StrF", "/StdCF");
233
242
  const encContent = encDict.toString();
234
243
  const encObj = {
235
244
  objectNumber: encryptObjNum,
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * PDF module for excelts.
3
3
  *
4
- * A full-featured, zero-dependency PDF engine.
4
+ * A full-featured, zero-dependency PDF engine for both writing and reading.
5
5
  *
6
- * @example Standalone:
6
+ * @example Standalone PDF generation:
7
7
  * ```typescript
8
8
  * import { pdf } from "excelts/pdf";
9
9
  *
@@ -25,15 +25,33 @@
25
25
  * const bytes = excelToPdf(workbook);
26
26
  * ```
27
27
  *
28
+ * @example Read PDF — extract text, images, and metadata:
29
+ * ```typescript
30
+ * import { readPdf } from "excelts/pdf";
31
+ *
32
+ * const result = readPdf(pdfBytes);
33
+ * console.log(result.text); // All text
34
+ * console.log(result.pages[0].text); // Page 1 text
35
+ * console.log(result.pages[0].images); // Page 1 images
36
+ * console.log(result.pages[0].annotations); // Page 1 annotations
37
+ * console.log(result.metadata.title); // Document title
38
+ * console.log(result.formFields); // Form fields
39
+ * ```
40
+ *
28
41
  * @module pdf
29
42
  */
30
43
  // =============================================================================
31
- // Public API
44
+ // Public API — Writing
32
45
  // =============================================================================
33
46
  /** Standalone PDF generation — accepts plain arrays, sheet objects, or workbooks. */
34
47
  export { pdf } from "./pdf.js";
35
48
  /** Excel-to-PDF conversion — accepts an Excel Workbook instance. */
36
49
  export { excelToPdf } from "./excel-bridge.js";
50
+ // =============================================================================
51
+ // Public API — Reading
52
+ // =============================================================================
53
+ /** Read a PDF file and extract text, images, and metadata. */
54
+ export { readPdf } from "./reader/pdf-reader.js";
37
55
  export { PageSizes } from "./types.js";
38
56
  // =============================================================================
39
57
  // Errors
@@ -0,0 +1,155 @@
1
+ /**
2
+ * PDF annotation extractor.
3
+ *
4
+ * Extracts annotations from a PDF page's `/Annots` array.
5
+ * Supports all standard annotation subtypes defined in PDF Reference 1.7, §12.5.
6
+ *
7
+ * Common annotation types:
8
+ * - **Link** — Hyperlinks (URI, GoTo, GoToR)
9
+ * - **Text** — Sticky notes / comments
10
+ * - **FreeText** — Inline text annotations
11
+ * - **Highlight / Underline / StrikeOut / Squiggly** — Text markup
12
+ * - **Stamp** — Rubber stamp annotations
13
+ * - **Popup** — Associated popup windows
14
+ * - **Widget** — Form field widgets (handled separately by form-extractor)
15
+ *
16
+ * @see PDF Reference 1.7, §12.5 - Annotations
17
+ */
18
+ import { isPdfArray, dictGetName, dictGetNumber, decodePdfStringBytes } from "./pdf-parser.js";
19
+ import { getDictStringValue } from "./reader-utils.js";
20
+ // =============================================================================
21
+ // Public API
22
+ // =============================================================================
23
+ /**
24
+ * Extract annotations from a PDF page.
25
+ *
26
+ * Skips Widget annotations (form fields) — those are handled by the form extractor.
27
+ *
28
+ * @param pageDict - The page dictionary
29
+ * @param doc - The PDF document for resolving references
30
+ * @returns Array of extracted annotations
31
+ */
32
+ export function extractAnnotationsFromPage(pageDict, doc) {
33
+ const annotsObj = pageDict.get("Annots");
34
+ if (!annotsObj) {
35
+ return [];
36
+ }
37
+ // Resolve the Annots array (may be an indirect reference)
38
+ const annotsResolved = doc.deref(annotsObj);
39
+ if (!isPdfArray(annotsResolved)) {
40
+ return [];
41
+ }
42
+ const annotations = [];
43
+ for (const annotRef of annotsResolved) {
44
+ try {
45
+ const annotDict = doc.derefDict(annotRef);
46
+ if (!annotDict) {
47
+ continue;
48
+ }
49
+ const subtype = dictGetName(annotDict, "Subtype") ?? "";
50
+ // Skip Widget annotations — handled by form-extractor
51
+ if (subtype === "Widget") {
52
+ continue;
53
+ }
54
+ // Skip Popup annotations — they are auxiliary
55
+ if (subtype === "Popup") {
56
+ continue;
57
+ }
58
+ const annotation = parseAnnotation(annotDict, subtype, doc);
59
+ if (annotation) {
60
+ annotations.push(annotation);
61
+ }
62
+ }
63
+ catch {
64
+ // Skip malformed annotations
65
+ }
66
+ }
67
+ return annotations;
68
+ }
69
+ // =============================================================================
70
+ // Parsing
71
+ // =============================================================================
72
+ function parseAnnotation(dict, subtype, doc) {
73
+ const rect = parseRect(dict.get("Rect"), doc);
74
+ if (!rect) {
75
+ return null;
76
+ }
77
+ const contents = getDictStringValue(dict, "Contents", doc);
78
+ const author = getDictStringValue(dict, "T", doc);
79
+ const subject = getDictStringValue(dict, "Subj", doc);
80
+ const modifiedDate = getDictStringValue(dict, "M", doc);
81
+ const flags = dictGetNumber(dict, "F") ?? 0;
82
+ const color = parseColorArray(dict.get("C"), doc);
83
+ // Extract link-specific fields
84
+ let uri = "";
85
+ let destination = "";
86
+ if (subtype === "Link") {
87
+ const actionObj = doc.derefDict(dict.get("A"));
88
+ if (actionObj) {
89
+ const actionType = dictGetName(actionObj, "S");
90
+ if (actionType === "URI") {
91
+ uri = getDictStringValue(actionObj, "URI", doc);
92
+ }
93
+ else if (actionType === "GoTo") {
94
+ const dest = actionObj.get("D");
95
+ if (typeof dest === "string") {
96
+ destination = dest;
97
+ }
98
+ }
99
+ else if (actionType === "GoToR") {
100
+ uri = getDictStringValue(actionObj, "F", doc);
101
+ }
102
+ }
103
+ // Check /Dest directly (older PDFs use this instead of /A)
104
+ if (!uri && !destination) {
105
+ const destObj = dict.get("Dest");
106
+ if (destObj) {
107
+ const resolved = doc.deref(destObj);
108
+ if (typeof resolved === "string") {
109
+ destination = resolved;
110
+ }
111
+ else if (resolved instanceof Uint8Array) {
112
+ destination = decodePdfStringBytes(resolved);
113
+ }
114
+ }
115
+ }
116
+ }
117
+ return {
118
+ subtype,
119
+ rect,
120
+ contents,
121
+ author,
122
+ subject,
123
+ modifiedDate,
124
+ uri,
125
+ destination,
126
+ flags,
127
+ color
128
+ };
129
+ }
130
+ function parseRect(obj, doc) {
131
+ if (!obj) {
132
+ return null;
133
+ }
134
+ const resolved = doc.deref(obj);
135
+ if (!isPdfArray(resolved) || resolved.length < 4) {
136
+ return null;
137
+ }
138
+ const nums = resolved.map(v => (typeof v === "number" ? v : 0));
139
+ return {
140
+ x1: nums[0],
141
+ y1: nums[1],
142
+ x2: nums[2],
143
+ y2: nums[3]
144
+ };
145
+ }
146
+ function parseColorArray(obj, doc) {
147
+ if (!obj) {
148
+ return [];
149
+ }
150
+ const resolved = doc.deref(obj);
151
+ if (!isPdfArray(resolved)) {
152
+ return [];
153
+ }
154
+ return resolved.map(v => (typeof v === "number" ? v : 0));
155
+ }