@cj-tech-master/excelts 8.0.0 → 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/README_zh.md +6 -0
- package/dist/browser/modules/archive/zip/stream.d.ts +4 -0
- package/dist/browser/modules/archive/zip/stream.js +53 -0
- package/dist/browser/modules/pdf/core/crypto.d.ts +65 -0
- package/dist/browser/modules/pdf/core/crypto.js +637 -0
- package/dist/browser/modules/pdf/core/encryption.d.ts +23 -20
- package/dist/browser/modules/pdf/core/encryption.js +88 -261
- package/dist/browser/modules/pdf/core/pdf-writer.d.ts +6 -4
- package/dist/browser/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/browser/modules/pdf/index.d.ts +23 -2
- package/dist/browser/modules/pdf/index.js +21 -3
- package/dist/browser/modules/pdf/reader/annotation-extractor.d.ts +63 -0
- package/dist/browser/modules/pdf/reader/annotation-extractor.js +155 -0
- package/dist/browser/modules/pdf/reader/cmap-parser.d.ts +70 -0
- package/dist/browser/modules/pdf/reader/cmap-parser.js +321 -0
- package/dist/browser/modules/pdf/reader/content-interpreter.d.ts +57 -0
- package/dist/browser/modules/pdf/reader/content-interpreter.js +715 -0
- package/dist/browser/modules/pdf/reader/font-decoder.d.ts +58 -0
- package/dist/browser/modules/pdf/reader/font-decoder.js +1513 -0
- package/dist/browser/modules/pdf/reader/form-extractor.d.ts +48 -0
- package/dist/browser/modules/pdf/reader/form-extractor.js +355 -0
- package/dist/browser/modules/pdf/reader/image-extractor.d.ts +55 -0
- package/dist/browser/modules/pdf/reader/image-extractor.js +220 -0
- package/dist/browser/modules/pdf/reader/metadata-reader.d.ts +56 -0
- package/dist/browser/modules/pdf/reader/metadata-reader.js +275 -0
- package/dist/browser/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
- package/dist/browser/modules/pdf/reader/pdf-decrypt.js +443 -0
- package/dist/browser/modules/pdf/reader/pdf-document.d.ts +191 -0
- package/dist/browser/modules/pdf/reader/pdf-document.js +818 -0
- package/dist/browser/modules/pdf/reader/pdf-parser.d.ts +65 -0
- package/dist/browser/modules/pdf/reader/pdf-parser.js +285 -0
- package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +143 -0
- package/dist/browser/modules/pdf/reader/pdf-reader.js +200 -0
- package/dist/browser/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
- package/dist/browser/modules/pdf/reader/pdf-tokenizer.js +543 -0
- package/dist/browser/modules/pdf/reader/reader-utils.d.ts +15 -0
- package/dist/browser/modules/pdf/reader/reader-utils.js +27 -0
- package/dist/browser/modules/pdf/reader/stream-filters.d.ts +20 -0
- package/dist/browser/modules/pdf/reader/stream-filters.js +456 -0
- package/dist/browser/modules/pdf/reader/text-reconstruction.d.ts +44 -0
- package/dist/browser/modules/pdf/reader/text-reconstruction.js +463 -0
- package/dist/cjs/modules/archive/zip/stream.js +53 -0
- package/dist/cjs/modules/pdf/core/crypto.js +649 -0
- package/dist/cjs/modules/pdf/core/encryption.js +88 -263
- package/dist/cjs/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/cjs/modules/pdf/index.js +23 -4
- package/dist/cjs/modules/pdf/reader/annotation-extractor.js +158 -0
- package/dist/cjs/modules/pdf/reader/cmap-parser.js +326 -0
- package/dist/cjs/modules/pdf/reader/content-interpreter.js +718 -0
- package/dist/cjs/modules/pdf/reader/font-decoder.js +1518 -0
- package/dist/cjs/modules/pdf/reader/form-extractor.js +358 -0
- package/dist/cjs/modules/pdf/reader/image-extractor.js +223 -0
- package/dist/cjs/modules/pdf/reader/metadata-reader.js +278 -0
- package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +447 -0
- package/dist/cjs/modules/pdf/reader/pdf-document.js +822 -0
- package/dist/cjs/modules/pdf/reader/pdf-parser.js +301 -0
- package/dist/cjs/modules/pdf/reader/pdf-reader.js +203 -0
- package/dist/cjs/modules/pdf/reader/pdf-tokenizer.js +517 -0
- package/dist/cjs/modules/pdf/reader/reader-utils.js +30 -0
- package/dist/cjs/modules/pdf/reader/stream-filters.js +459 -0
- package/dist/cjs/modules/pdf/reader/text-reconstruction.js +467 -0
- package/dist/esm/modules/archive/zip/stream.js +53 -0
- package/dist/esm/modules/pdf/core/crypto.js +637 -0
- package/dist/esm/modules/pdf/core/encryption.js +88 -261
- package/dist/esm/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/esm/modules/pdf/index.js +21 -3
- package/dist/esm/modules/pdf/reader/annotation-extractor.js +155 -0
- package/dist/esm/modules/pdf/reader/cmap-parser.js +321 -0
- package/dist/esm/modules/pdf/reader/content-interpreter.js +715 -0
- package/dist/esm/modules/pdf/reader/font-decoder.js +1513 -0
- package/dist/esm/modules/pdf/reader/form-extractor.js +355 -0
- package/dist/esm/modules/pdf/reader/image-extractor.js +220 -0
- package/dist/esm/modules/pdf/reader/metadata-reader.js +275 -0
- package/dist/esm/modules/pdf/reader/pdf-decrypt.js +443 -0
- package/dist/esm/modules/pdf/reader/pdf-document.js +818 -0
- package/dist/esm/modules/pdf/reader/pdf-parser.js +285 -0
- package/dist/esm/modules/pdf/reader/pdf-reader.js +200 -0
- package/dist/esm/modules/pdf/reader/pdf-tokenizer.js +543 -0
- package/dist/esm/modules/pdf/reader/reader-utils.js +27 -0
- package/dist/esm/modules/pdf/reader/stream-filters.js +456 -0
- package/dist/esm/modules/pdf/reader/text-reconstruction.js +463 -0
- package/dist/iife/excelts.iife.js +703 -267
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +35 -35
- package/dist/types/modules/archive/zip/stream.d.ts +4 -0
- package/dist/types/modules/pdf/core/crypto.d.ts +65 -0
- package/dist/types/modules/pdf/core/encryption.d.ts +23 -20
- package/dist/types/modules/pdf/core/pdf-writer.d.ts +6 -4
- package/dist/types/modules/pdf/index.d.ts +23 -2
- package/dist/types/modules/pdf/reader/annotation-extractor.d.ts +63 -0
- package/dist/types/modules/pdf/reader/cmap-parser.d.ts +70 -0
- package/dist/types/modules/pdf/reader/content-interpreter.d.ts +57 -0
- package/dist/types/modules/pdf/reader/font-decoder.d.ts +58 -0
- package/dist/types/modules/pdf/reader/form-extractor.d.ts +48 -0
- package/dist/types/modules/pdf/reader/image-extractor.d.ts +55 -0
- package/dist/types/modules/pdf/reader/metadata-reader.d.ts +56 -0
- package/dist/types/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
- package/dist/types/modules/pdf/reader/pdf-document.d.ts +191 -0
- package/dist/types/modules/pdf/reader/pdf-parser.d.ts +65 -0
- package/dist/types/modules/pdf/reader/pdf-reader.d.ts +143 -0
- package/dist/types/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
- package/dist/types/modules/pdf/reader/reader-utils.d.ts +15 -0
- package/dist/types/modules/pdf/reader/stream-filters.d.ts +20 -0
- package/dist/types/modules/pdf/reader/text-reconstruction.d.ts +44 -0
- package/package.json +1 -1
|
@@ -1,310 +1,137 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* PDF encryption support (Standard Security Handler,
|
|
2
|
+
* PDF encryption support (Standard Security Handler, V=5, R=5).
|
|
3
3
|
*
|
|
4
|
-
* Implements
|
|
4
|
+
* Implements AES-256 encryption compatible with PDF 2.0 (ISO 32000-2:2020).
|
|
5
5
|
* Supports:
|
|
6
6
|
* - User password (required to open the document)
|
|
7
7
|
* - Owner password (grants full access)
|
|
8
8
|
* - Permission flags (print, copy, modify, etc.)
|
|
9
9
|
*
|
|
10
|
-
*
|
|
10
|
+
* The file encryption key (FEK) is a random 256-bit key.
|
|
11
|
+
* All streams and strings are encrypted using AES-256-CBC with a random
|
|
12
|
+
* 16-byte IV prepended to each encrypted value.
|
|
13
|
+
*
|
|
14
|
+
* @see ISO 32000-2:2020, §7.6 — Encryption
|
|
11
15
|
*/
|
|
12
|
-
|
|
13
|
-
// Constants
|
|
14
|
-
// =============================================================================
|
|
15
|
-
/** PDF password padding string (32 bytes) per PDF spec §3.5.2 */
|
|
16
|
-
const PASSWORD_PADDING = new Uint8Array([
|
|
17
|
-
0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41, 0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
|
|
18
|
-
0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80, 0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a
|
|
19
|
-
]);
|
|
16
|
+
import { sha256, aesCbcEncrypt, aesCbcEncryptRaw, aesEcbEncrypt, randomBytes, concatArrays } from "./crypto.js";
|
|
20
17
|
// =============================================================================
|
|
21
18
|
// Public API
|
|
22
19
|
// =============================================================================
|
|
23
20
|
/**
|
|
24
|
-
* Initialize encryption state
|
|
21
|
+
* Initialize encryption state for AES-256 (V=5, R=5).
|
|
25
22
|
*/
|
|
26
23
|
export function initEncryption(options) {
|
|
27
|
-
const userPwd = options.userPassword ?? "";
|
|
28
|
-
const ownerPwd = options.ownerPassword;
|
|
24
|
+
const userPwd = truncatePassword(options.userPassword ?? "");
|
|
25
|
+
const ownerPwd = truncatePassword(options.ownerPassword);
|
|
29
26
|
const perms = computePermissions(options.permissions);
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
const
|
|
27
|
+
// Step 1: Generate random 32-byte file encryption key
|
|
28
|
+
const encryptionKey = randomBytes(32);
|
|
29
|
+
// Step 2: Generate random salts
|
|
30
|
+
const uValidationSalt = randomBytes(8);
|
|
31
|
+
const uKeySalt = randomBytes(8);
|
|
32
|
+
const oValidationSalt = randomBytes(8);
|
|
33
|
+
const oKeySalt = randomBytes(8);
|
|
35
34
|
// Step 3: Compute U value
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
35
|
+
// U hash = SHA-256(userPassword + uValidationSalt)
|
|
36
|
+
const uHash = sha256(concatArrays(userPwd, uValidationSalt));
|
|
37
|
+
const uValue = concatArrays(uHash, uValidationSalt, uKeySalt);
|
|
38
|
+
// Step 4: Compute UE value
|
|
39
|
+
// UE = AES-256-CBC-encrypt(encryptionKey, SHA-256(userPassword + uKeySalt), zeroIV)
|
|
40
|
+
// Actually: the key for encrypting UE is SHA-256(password + key_salt),
|
|
41
|
+
// and we encrypt the file encryption key with it.
|
|
42
|
+
const ueKey = sha256(concatArrays(userPwd, uKeySalt));
|
|
43
|
+
const zeroIv = new Uint8Array(16);
|
|
44
|
+
const ueValue = aesCbcEncryptRaw(encryptionKey, ueKey, zeroIv);
|
|
45
|
+
// Step 5: Compute O value
|
|
46
|
+
// O hash = SHA-256(ownerPassword + oValidationSalt + U(0..47))
|
|
47
|
+
const oHash = sha256(concatArrays(ownerPwd, oValidationSalt, uValue));
|
|
48
|
+
const oValue = concatArrays(oHash, oValidationSalt, oKeySalt);
|
|
49
|
+
// Step 6: Compute OE value
|
|
50
|
+
// OE = AES-256-CBC-encrypt(encryptionKey, SHA-256(ownerPassword + oKeySalt + U(0..47)), zeroIV)
|
|
51
|
+
const oeKey = sha256(concatArrays(ownerPwd, oKeySalt, uValue));
|
|
52
|
+
const oeValue = aesCbcEncryptRaw(encryptionKey, oeKey, zeroIv);
|
|
53
|
+
// Step 7: Compute Perms value
|
|
54
|
+
// 16-byte block: P(4 LE bytes) + 0xFF(4 bytes) + 'T' or 'F' (encryptMetadata) + 'a' 'd' 'b' + 0(3 bytes)
|
|
55
|
+
const permsBlock = new Uint8Array(16);
|
|
56
|
+
const permsView = new DataView(permsBlock.buffer);
|
|
57
|
+
permsView.setInt32(0, perms, true); // P value in little-endian
|
|
58
|
+
permsBlock[4] = 0xff;
|
|
59
|
+
permsBlock[5] = 0xff;
|
|
60
|
+
permsBlock[6] = 0xff;
|
|
61
|
+
permsBlock[7] = 0xff;
|
|
62
|
+
permsBlock[8] = 0x54; // 'T' — EncryptMetadata = true
|
|
63
|
+
permsBlock[9] = 0x61; // 'a'
|
|
64
|
+
permsBlock[10] = 0x64; // 'd'
|
|
65
|
+
permsBlock[11] = 0x62; // 'b'
|
|
66
|
+
// bytes 12-15 are zero
|
|
67
|
+
const permsValue = aesEcbEncrypt(permsBlock, encryptionKey);
|
|
68
|
+
// File ID (random 16 bytes, used in trailer)
|
|
69
|
+
const fileId = randomBytes(16);
|
|
70
|
+
return {
|
|
71
|
+
encryptionKey,
|
|
72
|
+
oValue,
|
|
73
|
+
uValue,
|
|
74
|
+
oeValue,
|
|
75
|
+
ueValue,
|
|
76
|
+
permsValue,
|
|
77
|
+
permissions: perms,
|
|
78
|
+
fileId
|
|
79
|
+
};
|
|
57
80
|
}
|
|
58
|
-
// =============================================================================
|
|
59
|
-
// RC4 Cipher
|
|
60
|
-
// =============================================================================
|
|
61
81
|
/**
|
|
62
|
-
*
|
|
82
|
+
* Encrypt data for a PDF object using AES-256-CBC.
|
|
83
|
+
*
|
|
84
|
+
* For V=5/R=5, the file encryption key is used directly (no per-object key derivation).
|
|
85
|
+
* A random 16-byte IV is prepended to the ciphertext.
|
|
63
86
|
*/
|
|
64
|
-
export function
|
|
65
|
-
|
|
66
|
-
const
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
for (let i = 0; i < 256; i++) {
|
|
72
|
-
j = (j + s[i] + key[i % key.length]) & 0xff;
|
|
73
|
-
[s[i], s[j]] = [s[j], s[i]];
|
|
74
|
-
}
|
|
75
|
-
// Pseudo-Random Generation Algorithm (PRGA)
|
|
76
|
-
const result = new Uint8Array(data.length);
|
|
77
|
-
let ii = 0;
|
|
78
|
-
let jj = 0;
|
|
79
|
-
for (let k = 0; k < data.length; k++) {
|
|
80
|
-
ii = (ii + 1) & 0xff;
|
|
81
|
-
jj = (jj + s[ii]) & 0xff;
|
|
82
|
-
[s[ii], s[jj]] = [s[jj], s[ii]];
|
|
83
|
-
result[k] = data[k] ^ s[(s[ii] + s[jj]) & 0xff];
|
|
84
|
-
}
|
|
87
|
+
export function encryptData(data, _objectNumber, _generation, encryptionKey) {
|
|
88
|
+
const iv = randomBytes(16);
|
|
89
|
+
const ciphertext = aesCbcEncrypt(data, encryptionKey, iv);
|
|
90
|
+
// Prepend IV to ciphertext per PDF spec
|
|
91
|
+
const result = new Uint8Array(16 + ciphertext.length);
|
|
92
|
+
result.set(iv);
|
|
93
|
+
result.set(ciphertext, 16);
|
|
85
94
|
return result;
|
|
86
95
|
}
|
|
87
96
|
// =============================================================================
|
|
88
|
-
//
|
|
89
|
-
// =============================================================================
|
|
90
|
-
/**
|
|
91
|
-
* MD5 hash implementation (RFC 1321).
|
|
92
|
-
* Returns 16-byte digest.
|
|
93
|
-
*/
|
|
94
|
-
export function md5(input) {
|
|
95
|
-
// Pre-processing: padding
|
|
96
|
-
const msgLen = input.length;
|
|
97
|
-
const bitLen = msgLen * 8;
|
|
98
|
-
// Pad to 64-byte boundary (56 bytes mod 64, then 8 bytes length)
|
|
99
|
-
const padLen = ((56 - ((msgLen + 1) % 64) + 64) % 64) + 1;
|
|
100
|
-
const padded = new Uint8Array(msgLen + padLen + 8);
|
|
101
|
-
padded.set(input);
|
|
102
|
-
padded[msgLen] = 0x80;
|
|
103
|
-
// Append length in bits as 64-bit little-endian
|
|
104
|
-
const view = new DataView(padded.buffer);
|
|
105
|
-
view.setUint32(padded.length - 8, bitLen >>> 0, true);
|
|
106
|
-
view.setUint32(padded.length - 4, 0, true); // high 32 bits (always 0 for our sizes)
|
|
107
|
-
// Initialize hash values
|
|
108
|
-
let a0 = 0x67452301;
|
|
109
|
-
let b0 = 0xefcdab89;
|
|
110
|
-
let c0 = 0x98badcfe;
|
|
111
|
-
let d0 = 0x10325476;
|
|
112
|
-
// Process each 64-byte block
|
|
113
|
-
for (let i = 0; i < padded.length; i += 64) {
|
|
114
|
-
const M = new Uint32Array(16);
|
|
115
|
-
for (let j = 0; j < 16; j++) {
|
|
116
|
-
M[j] = view.getUint32(i + j * 4, true);
|
|
117
|
-
}
|
|
118
|
-
let A = a0;
|
|
119
|
-
let B = b0;
|
|
120
|
-
let C = c0;
|
|
121
|
-
let D = d0;
|
|
122
|
-
for (let j = 0; j < 64; j++) {
|
|
123
|
-
let F;
|
|
124
|
-
let g;
|
|
125
|
-
if (j < 16) {
|
|
126
|
-
F = (B & C) | (~B & D);
|
|
127
|
-
g = j;
|
|
128
|
-
}
|
|
129
|
-
else if (j < 32) {
|
|
130
|
-
F = (D & B) | (~D & C);
|
|
131
|
-
g = (5 * j + 1) % 16;
|
|
132
|
-
}
|
|
133
|
-
else if (j < 48) {
|
|
134
|
-
F = B ^ C ^ D;
|
|
135
|
-
g = (3 * j + 5) % 16;
|
|
136
|
-
}
|
|
137
|
-
else {
|
|
138
|
-
F = C ^ (B | ~D);
|
|
139
|
-
g = (7 * j) % 16;
|
|
140
|
-
}
|
|
141
|
-
F = (F + A + K[j] + M[g]) >>> 0;
|
|
142
|
-
A = D;
|
|
143
|
-
D = C;
|
|
144
|
-
C = B;
|
|
145
|
-
B = (B + rotl(F, S[j])) >>> 0;
|
|
146
|
-
}
|
|
147
|
-
a0 = (a0 + A) >>> 0;
|
|
148
|
-
b0 = (b0 + B) >>> 0;
|
|
149
|
-
c0 = (c0 + C) >>> 0;
|
|
150
|
-
d0 = (d0 + D) >>> 0;
|
|
151
|
-
}
|
|
152
|
-
// Produce the 128-bit digest
|
|
153
|
-
const digest = new Uint8Array(16);
|
|
154
|
-
const dv = new DataView(digest.buffer);
|
|
155
|
-
dv.setUint32(0, a0, true);
|
|
156
|
-
dv.setUint32(4, b0, true);
|
|
157
|
-
dv.setUint32(8, c0, true);
|
|
158
|
-
dv.setUint32(12, d0, true);
|
|
159
|
-
return digest;
|
|
160
|
-
}
|
|
161
|
-
function rotl(x, n) {
|
|
162
|
-
return ((x << n) | (x >>> (32 - n))) >>> 0;
|
|
163
|
-
}
|
|
164
|
-
// MD5 per-round shift amounts
|
|
165
|
-
const S = [
|
|
166
|
-
7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14,
|
|
167
|
-
20, 5, 9, 14, 20, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 6, 10, 15, 21, 6,
|
|
168
|
-
10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21
|
|
169
|
-
];
|
|
170
|
-
// MD5 per-round constants (floor(2^32 × abs(sin(i+1))))
|
|
171
|
-
const K = new Uint32Array([
|
|
172
|
-
0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
|
|
173
|
-
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
|
|
174
|
-
0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
|
|
175
|
-
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
|
|
176
|
-
0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
|
|
177
|
-
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
|
|
178
|
-
0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
|
|
179
|
-
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
|
|
180
|
-
]);
|
|
181
|
-
// =============================================================================
|
|
182
|
-
// PDF Password / Key Computation
|
|
97
|
+
// Internal Helpers
|
|
183
98
|
// =============================================================================
|
|
184
99
|
/**
|
|
185
|
-
*
|
|
100
|
+
* Truncate password to 127 bytes (UTF-8) per PDF 2.0 spec.
|
|
186
101
|
*/
|
|
187
|
-
function
|
|
188
|
-
const result = new Uint8Array(32);
|
|
102
|
+
function truncatePassword(password) {
|
|
189
103
|
const bytes = new TextEncoder().encode(password);
|
|
190
|
-
|
|
191
|
-
result.set(bytes.subarray(0, len));
|
|
192
|
-
result.set(PASSWORD_PADDING.subarray(0, 32 - len), len);
|
|
193
|
-
return result;
|
|
194
|
-
}
|
|
195
|
-
/**
|
|
196
|
-
* Compute the O (owner) value.
|
|
197
|
-
* Algorithm 3 from PDF spec §3.5.2.
|
|
198
|
-
*/
|
|
199
|
-
function computeOValue(ownerPassword, userPassword) {
|
|
200
|
-
// Step 1: MD5 hash of padded owner password
|
|
201
|
-
let hash = md5(padPassword(ownerPassword));
|
|
202
|
-
// Step 2: For revision 3, hash 50 more times
|
|
203
|
-
for (let i = 0; i < 50; i++) {
|
|
204
|
-
hash = md5(hash);
|
|
205
|
-
}
|
|
206
|
-
// Use first 16 bytes as RC4 key (128-bit / key length = 16)
|
|
207
|
-
const rc4Key = hash.subarray(0, 16);
|
|
208
|
-
// Step 3: RC4-encrypt the padded user password
|
|
209
|
-
let result = rc4(rc4Key, padPassword(userPassword));
|
|
210
|
-
// Step 4: For revision 3, iterate 1-19 with modified key
|
|
211
|
-
for (let i = 1; i <= 19; i++) {
|
|
212
|
-
const modKey = new Uint8Array(16);
|
|
213
|
-
for (let j = 0; j < 16; j++) {
|
|
214
|
-
modKey[j] = rc4Key[j] ^ i;
|
|
215
|
-
}
|
|
216
|
-
result = rc4(modKey, result);
|
|
217
|
-
}
|
|
218
|
-
return result;
|
|
219
|
-
}
|
|
220
|
-
/**
|
|
221
|
-
* Compute the encryption key.
|
|
222
|
-
* Algorithm 2 from PDF spec §3.5.2.
|
|
223
|
-
*/
|
|
224
|
-
function computeEncryptionKey(userPassword, oValue, permissions, fileId) {
|
|
225
|
-
// Concatenate: padded password + O value + P value (4 LE bytes) + file ID
|
|
226
|
-
const paddedPwd = padPassword(userPassword);
|
|
227
|
-
const input = new Uint8Array(32 + 32 + 4 + fileId.length);
|
|
228
|
-
input.set(paddedPwd);
|
|
229
|
-
input.set(oValue, 32);
|
|
230
|
-
const pView = new DataView(input.buffer, input.byteOffset);
|
|
231
|
-
pView.setInt32(64, permissions, true);
|
|
232
|
-
input.set(fileId, 68);
|
|
233
|
-
let hash = md5(input);
|
|
234
|
-
// For revision 3, hash 50 more times
|
|
235
|
-
for (let i = 0; i < 50; i++) {
|
|
236
|
-
hash = md5(hash.subarray(0, 16));
|
|
237
|
-
}
|
|
238
|
-
return hash.subarray(0, 16); // 128-bit key
|
|
239
|
-
}
|
|
240
|
-
/**
|
|
241
|
-
* Compute the U (user) value.
|
|
242
|
-
* Algorithm 5 from PDF spec §3.5.2 (revision 3).
|
|
243
|
-
*/
|
|
244
|
-
function computeUValue(encryptionKey, fileId) {
|
|
245
|
-
// Step 1: MD5 hash of padding + file ID
|
|
246
|
-
const hashInput = new Uint8Array(32 + fileId.length);
|
|
247
|
-
hashInput.set(PASSWORD_PADDING);
|
|
248
|
-
hashInput.set(fileId, 32);
|
|
249
|
-
const hash = md5(hashInput);
|
|
250
|
-
// Step 2: RC4-encrypt with the encryption key
|
|
251
|
-
let result = rc4(encryptionKey, hash);
|
|
252
|
-
// Step 3: Iterate 1-19 with modified key
|
|
253
|
-
for (let i = 1; i <= 19; i++) {
|
|
254
|
-
const modKey = new Uint8Array(16);
|
|
255
|
-
for (let j = 0; j < 16; j++) {
|
|
256
|
-
modKey[j] = encryptionKey[j] ^ i;
|
|
257
|
-
}
|
|
258
|
-
result = rc4(modKey, result);
|
|
259
|
-
}
|
|
260
|
-
// Pad to 32 bytes with arbitrary padding
|
|
261
|
-
const uValue = new Uint8Array(32);
|
|
262
|
-
uValue.set(result);
|
|
263
|
-
return uValue;
|
|
104
|
+
return bytes.length > 127 ? bytes.subarray(0, 127) : bytes;
|
|
264
105
|
}
|
|
265
106
|
/**
|
|
266
107
|
* Compute the permissions integer (P value) from permission flags.
|
|
267
108
|
*/
|
|
268
109
|
function computePermissions(perms) {
|
|
269
|
-
// Start with all bits set
|
|
270
|
-
|
|
271
|
-
let p = 0xfffff000 | 0b11000000; // bits 7-8 = reserved 1, high bits = 1
|
|
110
|
+
// Start with all reserved bits set to 1
|
|
111
|
+
let p = 0xfffff000 | 0b11000000;
|
|
272
112
|
if (perms?.print) {
|
|
273
|
-
p |= 1 << 2;
|
|
113
|
+
p |= 1 << 2;
|
|
274
114
|
}
|
|
275
115
|
if (perms?.modify) {
|
|
276
|
-
p |= 1 << 3;
|
|
116
|
+
p |= 1 << 3;
|
|
277
117
|
}
|
|
278
118
|
if (perms?.copy) {
|
|
279
|
-
p |= 1 << 4;
|
|
119
|
+
p |= 1 << 4;
|
|
280
120
|
}
|
|
281
121
|
if (perms?.annotate) {
|
|
282
|
-
p |= 1 << 5;
|
|
122
|
+
p |= 1 << 5;
|
|
283
123
|
}
|
|
284
124
|
if (perms?.fillForms) {
|
|
285
|
-
p |= 1 << 8;
|
|
125
|
+
p |= 1 << 8;
|
|
286
126
|
}
|
|
287
127
|
if (perms?.accessibility) {
|
|
288
|
-
p |= 1 << 9;
|
|
128
|
+
p |= 1 << 9;
|
|
289
129
|
}
|
|
290
130
|
if (perms?.assemble) {
|
|
291
|
-
p |= 1 << 10;
|
|
131
|
+
p |= 1 << 10;
|
|
292
132
|
}
|
|
293
133
|
if (perms?.printHighQuality) {
|
|
294
|
-
p |= 1 << 11;
|
|
134
|
+
p |= 1 << 11;
|
|
295
135
|
}
|
|
296
|
-
// Convert to signed 32-bit
|
|
297
136
|
return p | 0;
|
|
298
137
|
}
|
|
299
|
-
/**
|
|
300
|
-
* Generate a random file identifier (16 bytes).
|
|
301
|
-
*/
|
|
302
|
-
function generateFileId() {
|
|
303
|
-
// Use MD5 of current timestamp + random for determinism in tests
|
|
304
|
-
const seed = new Uint8Array(16);
|
|
305
|
-
const now = Date.now();
|
|
306
|
-
const view = new DataView(seed.buffer);
|
|
307
|
-
view.setFloat64(0, now, true);
|
|
308
|
-
view.setFloat64(8, Math.random() * 1e15, true);
|
|
309
|
-
return md5(seed);
|
|
310
|
-
}
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* PDF file writer.
|
|
3
3
|
*
|
|
4
|
-
* Assembles a complete PDF document from indirect objects.
|
|
4
|
+
* Assembles a complete PDF 2.0 document from indirect objects.
|
|
5
5
|
* Handles the four sections of a PDF file:
|
|
6
|
-
* 1. Header (%PDF-
|
|
6
|
+
* 1. Header (%PDF-2.0)
|
|
7
7
|
* 2. Body (indirect objects)
|
|
8
8
|
* 3. Cross-reference table
|
|
9
9
|
* 4. Trailer (with document catalog reference)
|
|
10
10
|
*
|
|
11
|
-
*
|
|
11
|
+
* Encryption uses AES-256 (V=5, R=5) per ISO 32000-2:2020.
|
|
12
|
+
*
|
|
13
|
+
* @see ISO 32000-2:2020, Chapter 7.5 — File Structure
|
|
12
14
|
*/
|
|
13
15
|
import { PdfDict, pdfRef, pdfString, pdfHexString, pdfDate, pdfNumber } from "./pdf-object.js";
|
|
14
16
|
import { PdfStructureError } from "../errors.js";
|
|
@@ -19,7 +21,7 @@ import { encryptData } from "./encryption.js";
|
|
|
19
21
|
// PDF Writer
|
|
20
22
|
// =============================================================================
|
|
21
23
|
/**
|
|
22
|
-
* Constructs a valid PDF
|
|
24
|
+
* Constructs a valid PDF 2.0 file from a set of indirect objects.
|
|
23
25
|
*
|
|
24
26
|
* Usage:
|
|
25
27
|
* 1. Allocate object numbers with allocObject()
|
|
@@ -168,7 +170,7 @@ export class PdfWriter {
|
|
|
168
170
|
let byteOffset = 0;
|
|
169
171
|
// --- Header ---
|
|
170
172
|
// Include a comment with high bytes to signal binary content per PDF spec §3.4.1
|
|
171
|
-
const headerStr = "%PDF-
|
|
173
|
+
const headerStr = "%PDF-2.0\n";
|
|
172
174
|
const headerStrBytes = encoder.encode(headerStr);
|
|
173
175
|
chunks.push(headerStrBytes);
|
|
174
176
|
byteOffset += headerStrBytes.length;
|
|
@@ -220,16 +222,23 @@ export class PdfWriter {
|
|
|
220
222
|
chunks.push(objFooter);
|
|
221
223
|
byteOffset += objFooter.length;
|
|
222
224
|
}
|
|
223
|
-
// --- Encrypt dictionary (
|
|
225
|
+
// --- Encrypt dictionary (V=5, R=5, AES-256) ---
|
|
224
226
|
if (this.encryption) {
|
|
225
227
|
const encDict = new PdfDict()
|
|
226
228
|
.set("Filter", "/Standard")
|
|
227
|
-
.set("V", "
|
|
228
|
-
.set("R", "
|
|
229
|
-
.set("Length", "
|
|
229
|
+
.set("V", "5")
|
|
230
|
+
.set("R", "5")
|
|
231
|
+
.set("Length", "256")
|
|
230
232
|
.set("P", String(this.encryption.permissions))
|
|
231
233
|
.set("O", pdfHexString(this.encryption.oValue))
|
|
232
|
-
.set("U", pdfHexString(this.encryption.uValue))
|
|
234
|
+
.set("U", pdfHexString(this.encryption.uValue))
|
|
235
|
+
.set("OE", pdfHexString(this.encryption.oeValue))
|
|
236
|
+
.set("UE", pdfHexString(this.encryption.ueValue))
|
|
237
|
+
.set("Perms", pdfHexString(this.encryption.permsValue))
|
|
238
|
+
.set("EncryptMetadata", "true")
|
|
239
|
+
.set("CF", "<< /StdCF << /Type /CryptFilter /CFM /AESV3 /AuthEvent /DocOpen /Length 32 >> >>")
|
|
240
|
+
.set("StmF", "/StdCF")
|
|
241
|
+
.set("StrF", "/StdCF");
|
|
233
242
|
const encContent = encDict.toString();
|
|
234
243
|
const encObj = {
|
|
235
244
|
objectNumber: encryptObjNum,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* PDF module for excelts.
|
|
3
3
|
*
|
|
4
|
-
* A full-featured, zero-dependency PDF engine.
|
|
4
|
+
* A full-featured, zero-dependency PDF engine for both writing and reading.
|
|
5
5
|
*
|
|
6
|
-
* @example Standalone:
|
|
6
|
+
* @example Standalone PDF generation:
|
|
7
7
|
* ```typescript
|
|
8
8
|
* import { pdf } from "excelts/pdf";
|
|
9
9
|
*
|
|
@@ -25,15 +25,33 @@
|
|
|
25
25
|
* const bytes = excelToPdf(workbook);
|
|
26
26
|
* ```
|
|
27
27
|
*
|
|
28
|
+
* @example Read PDF — extract text, images, and metadata:
|
|
29
|
+
* ```typescript
|
|
30
|
+
* import { readPdf } from "excelts/pdf";
|
|
31
|
+
*
|
|
32
|
+
* const result = readPdf(pdfBytes);
|
|
33
|
+
* console.log(result.text); // All text
|
|
34
|
+
* console.log(result.pages[0].text); // Page 1 text
|
|
35
|
+
* console.log(result.pages[0].images); // Page 1 images
|
|
36
|
+
* console.log(result.pages[0].annotations); // Page 1 annotations
|
|
37
|
+
* console.log(result.metadata.title); // Document title
|
|
38
|
+
* console.log(result.formFields); // Form fields
|
|
39
|
+
* ```
|
|
40
|
+
*
|
|
28
41
|
* @module pdf
|
|
29
42
|
*/
|
|
30
43
|
// =============================================================================
|
|
31
|
-
// Public API
|
|
44
|
+
// Public API — Writing
|
|
32
45
|
// =============================================================================
|
|
33
46
|
/** Standalone PDF generation — accepts plain arrays, sheet objects, or workbooks. */
|
|
34
47
|
export { pdf } from "./pdf.js";
|
|
35
48
|
/** Excel-to-PDF conversion — accepts an Excel Workbook instance. */
|
|
36
49
|
export { excelToPdf } from "./excel-bridge.js";
|
|
50
|
+
// =============================================================================
|
|
51
|
+
// Public API — Reading
|
|
52
|
+
// =============================================================================
|
|
53
|
+
/** Read a PDF file and extract text, images, and metadata. */
|
|
54
|
+
export { readPdf } from "./reader/pdf-reader.js";
|
|
37
55
|
export { PageSizes } from "./types.js";
|
|
38
56
|
// =============================================================================
|
|
39
57
|
// Errors
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF annotation extractor.
|
|
3
|
+
*
|
|
4
|
+
* Extracts annotations from a PDF page's `/Annots` array.
|
|
5
|
+
* Supports all standard annotation subtypes defined in PDF Reference 1.7, §12.5.
|
|
6
|
+
*
|
|
7
|
+
* Common annotation types:
|
|
8
|
+
* - **Link** — Hyperlinks (URI, GoTo, GoToR)
|
|
9
|
+
* - **Text** — Sticky notes / comments
|
|
10
|
+
* - **FreeText** — Inline text annotations
|
|
11
|
+
* - **Highlight / Underline / StrikeOut / Squiggly** — Text markup
|
|
12
|
+
* - **Stamp** — Rubber stamp annotations
|
|
13
|
+
* - **Popup** — Associated popup windows
|
|
14
|
+
* - **Widget** — Form field widgets (handled separately by form-extractor)
|
|
15
|
+
*
|
|
16
|
+
* @see PDF Reference 1.7, §12.5 - Annotations
|
|
17
|
+
*/
|
|
18
|
+
import { isPdfArray, dictGetName, dictGetNumber, decodePdfStringBytes } from "./pdf-parser.js";
|
|
19
|
+
import { getDictStringValue } from "./reader-utils.js";
|
|
20
|
+
// =============================================================================
|
|
21
|
+
// Public API
|
|
22
|
+
// =============================================================================
|
|
23
|
+
/**
|
|
24
|
+
* Extract annotations from a PDF page.
|
|
25
|
+
*
|
|
26
|
+
* Skips Widget annotations (form fields) — those are handled by the form extractor.
|
|
27
|
+
*
|
|
28
|
+
* @param pageDict - The page dictionary
|
|
29
|
+
* @param doc - The PDF document for resolving references
|
|
30
|
+
* @returns Array of extracted annotations
|
|
31
|
+
*/
|
|
32
|
+
export function extractAnnotationsFromPage(pageDict, doc) {
|
|
33
|
+
const annotsObj = pageDict.get("Annots");
|
|
34
|
+
if (!annotsObj) {
|
|
35
|
+
return [];
|
|
36
|
+
}
|
|
37
|
+
// Resolve the Annots array (may be an indirect reference)
|
|
38
|
+
const annotsResolved = doc.deref(annotsObj);
|
|
39
|
+
if (!isPdfArray(annotsResolved)) {
|
|
40
|
+
return [];
|
|
41
|
+
}
|
|
42
|
+
const annotations = [];
|
|
43
|
+
for (const annotRef of annotsResolved) {
|
|
44
|
+
try {
|
|
45
|
+
const annotDict = doc.derefDict(annotRef);
|
|
46
|
+
if (!annotDict) {
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
const subtype = dictGetName(annotDict, "Subtype") ?? "";
|
|
50
|
+
// Skip Widget annotations — handled by form-extractor
|
|
51
|
+
if (subtype === "Widget") {
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
// Skip Popup annotations — they are auxiliary
|
|
55
|
+
if (subtype === "Popup") {
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
const annotation = parseAnnotation(annotDict, subtype, doc);
|
|
59
|
+
if (annotation) {
|
|
60
|
+
annotations.push(annotation);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
// Skip malformed annotations
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return annotations;
|
|
68
|
+
}
|
|
69
|
+
// =============================================================================
|
|
70
|
+
// Parsing
|
|
71
|
+
// =============================================================================
|
|
72
|
+
function parseAnnotation(dict, subtype, doc) {
|
|
73
|
+
const rect = parseRect(dict.get("Rect"), doc);
|
|
74
|
+
if (!rect) {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
const contents = getDictStringValue(dict, "Contents", doc);
|
|
78
|
+
const author = getDictStringValue(dict, "T", doc);
|
|
79
|
+
const subject = getDictStringValue(dict, "Subj", doc);
|
|
80
|
+
const modifiedDate = getDictStringValue(dict, "M", doc);
|
|
81
|
+
const flags = dictGetNumber(dict, "F") ?? 0;
|
|
82
|
+
const color = parseColorArray(dict.get("C"), doc);
|
|
83
|
+
// Extract link-specific fields
|
|
84
|
+
let uri = "";
|
|
85
|
+
let destination = "";
|
|
86
|
+
if (subtype === "Link") {
|
|
87
|
+
const actionObj = doc.derefDict(dict.get("A"));
|
|
88
|
+
if (actionObj) {
|
|
89
|
+
const actionType = dictGetName(actionObj, "S");
|
|
90
|
+
if (actionType === "URI") {
|
|
91
|
+
uri = getDictStringValue(actionObj, "URI", doc);
|
|
92
|
+
}
|
|
93
|
+
else if (actionType === "GoTo") {
|
|
94
|
+
const dest = actionObj.get("D");
|
|
95
|
+
if (typeof dest === "string") {
|
|
96
|
+
destination = dest;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
else if (actionType === "GoToR") {
|
|
100
|
+
uri = getDictStringValue(actionObj, "F", doc);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Check /Dest directly (older PDFs use this instead of /A)
|
|
104
|
+
if (!uri && !destination) {
|
|
105
|
+
const destObj = dict.get("Dest");
|
|
106
|
+
if (destObj) {
|
|
107
|
+
const resolved = doc.deref(destObj);
|
|
108
|
+
if (typeof resolved === "string") {
|
|
109
|
+
destination = resolved;
|
|
110
|
+
}
|
|
111
|
+
else if (resolved instanceof Uint8Array) {
|
|
112
|
+
destination = decodePdfStringBytes(resolved);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return {
|
|
118
|
+
subtype,
|
|
119
|
+
rect,
|
|
120
|
+
contents,
|
|
121
|
+
author,
|
|
122
|
+
subject,
|
|
123
|
+
modifiedDate,
|
|
124
|
+
uri,
|
|
125
|
+
destination,
|
|
126
|
+
flags,
|
|
127
|
+
color
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
function parseRect(obj, doc) {
|
|
131
|
+
if (!obj) {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
const resolved = doc.deref(obj);
|
|
135
|
+
if (!isPdfArray(resolved) || resolved.length < 4) {
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
const nums = resolved.map(v => (typeof v === "number" ? v : 0));
|
|
139
|
+
return {
|
|
140
|
+
x1: nums[0],
|
|
141
|
+
y1: nums[1],
|
|
142
|
+
x2: nums[2],
|
|
143
|
+
y2: nums[3]
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
function parseColorArray(obj, doc) {
|
|
147
|
+
if (!obj) {
|
|
148
|
+
return [];
|
|
149
|
+
}
|
|
150
|
+
const resolved = doc.deref(obj);
|
|
151
|
+
if (!isPdfArray(resolved)) {
|
|
152
|
+
return [];
|
|
153
|
+
}
|
|
154
|
+
return resolved.map(v => (typeof v === "number" ? v : 0));
|
|
155
|
+
}
|