@cj-tech-master/excelts 8.0.0 → 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/README_zh.md +6 -0
- package/dist/browser/modules/archive/zip/stream.d.ts +4 -0
- package/dist/browser/modules/archive/zip/stream.js +53 -0
- package/dist/browser/modules/pdf/core/crypto.d.ts +65 -0
- package/dist/browser/modules/pdf/core/crypto.js +637 -0
- package/dist/browser/modules/pdf/core/encryption.d.ts +23 -20
- package/dist/browser/modules/pdf/core/encryption.js +88 -261
- package/dist/browser/modules/pdf/core/pdf-writer.d.ts +6 -4
- package/dist/browser/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/browser/modules/pdf/index.d.ts +23 -2
- package/dist/browser/modules/pdf/index.js +21 -3
- package/dist/browser/modules/pdf/reader/annotation-extractor.d.ts +63 -0
- package/dist/browser/modules/pdf/reader/annotation-extractor.js +155 -0
- package/dist/browser/modules/pdf/reader/cmap-parser.d.ts +70 -0
- package/dist/browser/modules/pdf/reader/cmap-parser.js +321 -0
- package/dist/browser/modules/pdf/reader/content-interpreter.d.ts +57 -0
- package/dist/browser/modules/pdf/reader/content-interpreter.js +715 -0
- package/dist/browser/modules/pdf/reader/font-decoder.d.ts +58 -0
- package/dist/browser/modules/pdf/reader/font-decoder.js +1513 -0
- package/dist/browser/modules/pdf/reader/form-extractor.d.ts +48 -0
- package/dist/browser/modules/pdf/reader/form-extractor.js +355 -0
- package/dist/browser/modules/pdf/reader/image-extractor.d.ts +55 -0
- package/dist/browser/modules/pdf/reader/image-extractor.js +220 -0
- package/dist/browser/modules/pdf/reader/metadata-reader.d.ts +56 -0
- package/dist/browser/modules/pdf/reader/metadata-reader.js +275 -0
- package/dist/browser/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
- package/dist/browser/modules/pdf/reader/pdf-decrypt.js +443 -0
- package/dist/browser/modules/pdf/reader/pdf-document.d.ts +191 -0
- package/dist/browser/modules/pdf/reader/pdf-document.js +818 -0
- package/dist/browser/modules/pdf/reader/pdf-parser.d.ts +65 -0
- package/dist/browser/modules/pdf/reader/pdf-parser.js +285 -0
- package/dist/browser/modules/pdf/reader/pdf-reader.d.ts +143 -0
- package/dist/browser/modules/pdf/reader/pdf-reader.js +200 -0
- package/dist/browser/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
- package/dist/browser/modules/pdf/reader/pdf-tokenizer.js +543 -0
- package/dist/browser/modules/pdf/reader/reader-utils.d.ts +15 -0
- package/dist/browser/modules/pdf/reader/reader-utils.js +27 -0
- package/dist/browser/modules/pdf/reader/stream-filters.d.ts +20 -0
- package/dist/browser/modules/pdf/reader/stream-filters.js +456 -0
- package/dist/browser/modules/pdf/reader/text-reconstruction.d.ts +44 -0
- package/dist/browser/modules/pdf/reader/text-reconstruction.js +463 -0
- package/dist/cjs/modules/archive/zip/stream.js +53 -0
- package/dist/cjs/modules/pdf/core/crypto.js +649 -0
- package/dist/cjs/modules/pdf/core/encryption.js +88 -263
- package/dist/cjs/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/cjs/modules/pdf/index.js +23 -4
- package/dist/cjs/modules/pdf/reader/annotation-extractor.js +158 -0
- package/dist/cjs/modules/pdf/reader/cmap-parser.js +326 -0
- package/dist/cjs/modules/pdf/reader/content-interpreter.js +718 -0
- package/dist/cjs/modules/pdf/reader/font-decoder.js +1518 -0
- package/dist/cjs/modules/pdf/reader/form-extractor.js +358 -0
- package/dist/cjs/modules/pdf/reader/image-extractor.js +223 -0
- package/dist/cjs/modules/pdf/reader/metadata-reader.js +278 -0
- package/dist/cjs/modules/pdf/reader/pdf-decrypt.js +447 -0
- package/dist/cjs/modules/pdf/reader/pdf-document.js +822 -0
- package/dist/cjs/modules/pdf/reader/pdf-parser.js +301 -0
- package/dist/cjs/modules/pdf/reader/pdf-reader.js +203 -0
- package/dist/cjs/modules/pdf/reader/pdf-tokenizer.js +517 -0
- package/dist/cjs/modules/pdf/reader/reader-utils.js +30 -0
- package/dist/cjs/modules/pdf/reader/stream-filters.js +459 -0
- package/dist/cjs/modules/pdf/reader/text-reconstruction.js +467 -0
- package/dist/esm/modules/archive/zip/stream.js +53 -0
- package/dist/esm/modules/pdf/core/crypto.js +637 -0
- package/dist/esm/modules/pdf/core/encryption.js +88 -261
- package/dist/esm/modules/pdf/core/pdf-writer.js +19 -10
- package/dist/esm/modules/pdf/index.js +21 -3
- package/dist/esm/modules/pdf/reader/annotation-extractor.js +155 -0
- package/dist/esm/modules/pdf/reader/cmap-parser.js +321 -0
- package/dist/esm/modules/pdf/reader/content-interpreter.js +715 -0
- package/dist/esm/modules/pdf/reader/font-decoder.js +1513 -0
- package/dist/esm/modules/pdf/reader/form-extractor.js +355 -0
- package/dist/esm/modules/pdf/reader/image-extractor.js +220 -0
- package/dist/esm/modules/pdf/reader/metadata-reader.js +275 -0
- package/dist/esm/modules/pdf/reader/pdf-decrypt.js +443 -0
- package/dist/esm/modules/pdf/reader/pdf-document.js +818 -0
- package/dist/esm/modules/pdf/reader/pdf-parser.js +285 -0
- package/dist/esm/modules/pdf/reader/pdf-reader.js +200 -0
- package/dist/esm/modules/pdf/reader/pdf-tokenizer.js +543 -0
- package/dist/esm/modules/pdf/reader/reader-utils.js +27 -0
- package/dist/esm/modules/pdf/reader/stream-filters.js +456 -0
- package/dist/esm/modules/pdf/reader/text-reconstruction.js +463 -0
- package/dist/iife/excelts.iife.js +703 -267
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +35 -35
- package/dist/types/modules/archive/zip/stream.d.ts +4 -0
- package/dist/types/modules/pdf/core/crypto.d.ts +65 -0
- package/dist/types/modules/pdf/core/encryption.d.ts +23 -20
- package/dist/types/modules/pdf/core/pdf-writer.d.ts +6 -4
- package/dist/types/modules/pdf/index.d.ts +23 -2
- package/dist/types/modules/pdf/reader/annotation-extractor.d.ts +63 -0
- package/dist/types/modules/pdf/reader/cmap-parser.d.ts +70 -0
- package/dist/types/modules/pdf/reader/content-interpreter.d.ts +57 -0
- package/dist/types/modules/pdf/reader/font-decoder.d.ts +58 -0
- package/dist/types/modules/pdf/reader/form-extractor.d.ts +48 -0
- package/dist/types/modules/pdf/reader/image-extractor.d.ts +55 -0
- package/dist/types/modules/pdf/reader/metadata-reader.d.ts +56 -0
- package/dist/types/modules/pdf/reader/pdf-decrypt.d.ts +26 -0
- package/dist/types/modules/pdf/reader/pdf-document.d.ts +191 -0
- package/dist/types/modules/pdf/reader/pdf-parser.d.ts +65 -0
- package/dist/types/modules/pdf/reader/pdf-reader.d.ts +143 -0
- package/dist/types/modules/pdf/reader/pdf-tokenizer.d.ts +101 -0
- package/dist/types/modules/pdf/reader/reader-utils.d.ts +15 -0
- package/dist/types/modules/pdf/reader/stream-filters.d.ts +20 -0
- package/dist/types/modules/pdf/reader/text-reconstruction.d.ts +44 -0
- package/package.json +1 -1
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* PDF stream filter decoder chain.
|
|
4
|
+
*
|
|
5
|
+
* Decodes PDF stream data by applying the appropriate filter(s)
|
|
6
|
+
* specified in the stream dictionary's /Filter entry.
|
|
7
|
+
*
|
|
8
|
+
* Supported filters:
|
|
9
|
+
* - /FlateDecode (zlib/deflate compression)
|
|
10
|
+
* - /ASCII85Decode (ASCII base-85 encoding)
|
|
11
|
+
* - /ASCIIHexDecode (ASCII hexadecimal encoding)
|
|
12
|
+
* - /LZWDecode (LZW compression)
|
|
13
|
+
* - /RunLengthDecode (run-length encoding)
|
|
14
|
+
*
|
|
15
|
+
* @see PDF Reference 1.7, §3.3 - Filters
|
|
16
|
+
*/
|
|
17
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
18
|
+
exports.decodeStreamFilters = decodeStreamFilters;
|
|
19
|
+
const pdf_parser_1 = require("./pdf-parser");
|
|
20
|
+
const compress_1 = require("../../archive/compression/compress.js");
|
|
21
|
+
const deflate_fallback_1 = require("../../archive/compression/deflate-fallback.js");
|
|
22
|
+
// =============================================================================
|
|
23
|
+
// Public API
|
|
24
|
+
// =============================================================================
|
|
25
|
+
/**
|
|
26
|
+
* Decode stream data by applying the filter chain from the stream dictionary.
|
|
27
|
+
*/
|
|
28
|
+
function decodeStreamFilters(data, dict) {
|
|
29
|
+
const filter = dict.get("Filter");
|
|
30
|
+
if (filter === undefined || filter === null) {
|
|
31
|
+
return data;
|
|
32
|
+
}
|
|
33
|
+
const decodeParms = dict.get("DecodeParms") ?? dict.get("DP");
|
|
34
|
+
if (typeof filter === "string") {
|
|
35
|
+
// Single filter
|
|
36
|
+
const parms = (0, pdf_parser_1.isPdfDict)(decodeParms) ? decodeParms : undefined;
|
|
37
|
+
return applyFilter(data, filter, parms);
|
|
38
|
+
}
|
|
39
|
+
if ((0, pdf_parser_1.isPdfArray)(filter)) {
|
|
40
|
+
// Filter chain — apply in order
|
|
41
|
+
let result = data;
|
|
42
|
+
const parmsArray = (0, pdf_parser_1.isPdfArray)(decodeParms) ? decodeParms : [];
|
|
43
|
+
for (let i = 0; i < filter.length; i++) {
|
|
44
|
+
const filterName = filter[i];
|
|
45
|
+
const parm = parmsArray[i];
|
|
46
|
+
const parmDict = (0, pdf_parser_1.isPdfDict)(parm) ? parm : undefined;
|
|
47
|
+
result = applyFilter(result, filterName, parmDict);
|
|
48
|
+
}
|
|
49
|
+
return result;
|
|
50
|
+
}
|
|
51
|
+
return data;
|
|
52
|
+
}
|
|
53
|
+
// =============================================================================
|
|
54
|
+
// Filter Application
|
|
55
|
+
// =============================================================================
|
|
56
|
+
function applyFilter(data, filterName, parms) {
|
|
57
|
+
switch (filterName) {
|
|
58
|
+
case "FlateDecode":
|
|
59
|
+
case "Fl":
|
|
60
|
+
return decodeFlateDecode(data, parms);
|
|
61
|
+
case "ASCII85Decode":
|
|
62
|
+
case "A85":
|
|
63
|
+
return decodeAscii85(data);
|
|
64
|
+
case "ASCIIHexDecode":
|
|
65
|
+
case "AHx":
|
|
66
|
+
return decodeAsciiHex(data);
|
|
67
|
+
case "LZWDecode":
|
|
68
|
+
case "LZW":
|
|
69
|
+
return decodeLzw(data, parms);
|
|
70
|
+
case "RunLengthDecode":
|
|
71
|
+
case "RL":
|
|
72
|
+
return decodeRunLength(data);
|
|
73
|
+
case "DCTDecode":
|
|
74
|
+
case "DCT":
|
|
75
|
+
// JPEG data — return as-is (used for image XObjects)
|
|
76
|
+
return data;
|
|
77
|
+
case "JPXDecode":
|
|
78
|
+
// JPEG 2000 — return as-is
|
|
79
|
+
return data;
|
|
80
|
+
case "CCITTFaxDecode":
|
|
81
|
+
case "CCF":
|
|
82
|
+
// CCITT fax — return as-is (would need full CCITT decoder)
|
|
83
|
+
return data;
|
|
84
|
+
case "JBIG2Decode":
|
|
85
|
+
// JBIG2 — return as-is
|
|
86
|
+
return data;
|
|
87
|
+
case "Crypt":
|
|
88
|
+
// Handled by decryption layer — pass through
|
|
89
|
+
return data;
|
|
90
|
+
default:
|
|
91
|
+
// Unknown filter — return as-is
|
|
92
|
+
return data;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// =============================================================================
|
|
96
|
+
// FlateDecode
|
|
97
|
+
// =============================================================================
|
|
98
|
+
function decodeFlateDecode(data, parms) {
|
|
99
|
+
if (data.length === 0) {
|
|
100
|
+
return data;
|
|
101
|
+
}
|
|
102
|
+
let decompressed;
|
|
103
|
+
try {
|
|
104
|
+
// Try zlib (RFC 1950) first — has 2-byte header
|
|
105
|
+
decompressed = (0, compress_1.unzlibSync)(data);
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
try {
|
|
109
|
+
// Fall back to raw deflate
|
|
110
|
+
decompressed = (0, deflate_fallback_1.inflateRaw)(data);
|
|
111
|
+
}
|
|
112
|
+
catch {
|
|
113
|
+
// Last resort: return as-is
|
|
114
|
+
return data;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
// Apply predictor if specified
|
|
118
|
+
if (parms) {
|
|
119
|
+
const predictor = (0, pdf_parser_1.dictGetNumber)(parms, "Predictor") ?? 1;
|
|
120
|
+
if (predictor > 1) {
|
|
121
|
+
decompressed = undoPredictor(decompressed, parms);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
return decompressed;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Undo PNG/TIFF predictors used in FlateDecode and LZWDecode streams.
|
|
128
|
+
*
|
|
129
|
+
* @see PDF Reference 1.7, Table 3.8
|
|
130
|
+
*/
|
|
131
|
+
function undoPredictor(data, parms) {
|
|
132
|
+
const predictor = (0, pdf_parser_1.dictGetNumber)(parms, "Predictor") ?? 1;
|
|
133
|
+
const columns = (0, pdf_parser_1.dictGetNumber)(parms, "Columns") ?? 1;
|
|
134
|
+
const colors = (0, pdf_parser_1.dictGetNumber)(parms, "Colors") ?? 1;
|
|
135
|
+
const bitsPerComponent = (0, pdf_parser_1.dictGetNumber)(parms, "BitsPerComponent") ?? 8;
|
|
136
|
+
if (predictor === 1) {
|
|
137
|
+
return data; // No prediction
|
|
138
|
+
}
|
|
139
|
+
if (predictor === 2) {
|
|
140
|
+
// TIFF predictor 2
|
|
141
|
+
return undoTiffPredictor(data, columns, colors, bitsPerComponent);
|
|
142
|
+
}
|
|
143
|
+
if (predictor >= 10 && predictor <= 15) {
|
|
144
|
+
// PNG predictors (10-15)
|
|
145
|
+
return undoPngPredictor(data, columns, colors, bitsPerComponent);
|
|
146
|
+
}
|
|
147
|
+
return data;
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Undo TIFF Predictor 2 (horizontal differencing).
|
|
151
|
+
*/
|
|
152
|
+
function undoTiffPredictor(data, columns, colors, bitsPerComponent) {
|
|
153
|
+
const bytesPerPixel = Math.ceil((colors * bitsPerComponent) / 8);
|
|
154
|
+
const rowBytes = Math.ceil((columns * colors * bitsPerComponent) / 8);
|
|
155
|
+
const rows = Math.floor(data.length / rowBytes);
|
|
156
|
+
const result = new Uint8Array(data.length);
|
|
157
|
+
for (let row = 0; row < rows; row++) {
|
|
158
|
+
const rowStart = row * rowBytes;
|
|
159
|
+
// First pixel is unmodified
|
|
160
|
+
for (let i = 0; i < bytesPerPixel; i++) {
|
|
161
|
+
result[rowStart + i] = data[rowStart + i];
|
|
162
|
+
}
|
|
163
|
+
// Subsequent pixels: add previous pixel
|
|
164
|
+
for (let i = bytesPerPixel; i < rowBytes; i++) {
|
|
165
|
+
result[rowStart + i] = (data[rowStart + i] + result[rowStart + i - bytesPerPixel]) & 0xff;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
return result;
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Undo PNG row filters.
|
|
172
|
+
* Each row is preceded by a filter type byte.
|
|
173
|
+
*/
|
|
174
|
+
function undoPngPredictor(data, columns, colors, bitsPerComponent) {
|
|
175
|
+
const bytesPerPixel = Math.max(1, Math.ceil((colors * bitsPerComponent) / 8));
|
|
176
|
+
const rowBytes = Math.ceil((columns * colors * bitsPerComponent) / 8);
|
|
177
|
+
const rowWithFilter = rowBytes + 1; // 1 byte for filter type
|
|
178
|
+
const rows = Math.floor(data.length / rowWithFilter);
|
|
179
|
+
const result = new Uint8Array(rows * rowBytes);
|
|
180
|
+
for (let row = 0; row < rows; row++) {
|
|
181
|
+
const srcRow = row * rowWithFilter;
|
|
182
|
+
const dstRow = row * rowBytes;
|
|
183
|
+
const filterType = data[srcRow];
|
|
184
|
+
for (let i = 0; i < rowBytes; i++) {
|
|
185
|
+
const raw = data[srcRow + 1 + i];
|
|
186
|
+
const a = i >= bytesPerPixel ? result[dstRow + i - bytesPerPixel] : 0; // left
|
|
187
|
+
const b = row > 0 ? result[dstRow - rowBytes + i] : 0; // above
|
|
188
|
+
const c = row > 0 && i >= bytesPerPixel ? result[dstRow - rowBytes + i - bytesPerPixel] : 0; // upper-left
|
|
189
|
+
switch (filterType) {
|
|
190
|
+
case 0: // None
|
|
191
|
+
result[dstRow + i] = raw;
|
|
192
|
+
break;
|
|
193
|
+
case 1: // Sub
|
|
194
|
+
result[dstRow + i] = (raw + a) & 0xff;
|
|
195
|
+
break;
|
|
196
|
+
case 2: // Up
|
|
197
|
+
result[dstRow + i] = (raw + b) & 0xff;
|
|
198
|
+
break;
|
|
199
|
+
case 3: // Average
|
|
200
|
+
result[dstRow + i] = (raw + ((a + b) >> 1)) & 0xff;
|
|
201
|
+
break;
|
|
202
|
+
case 4: // Paeth
|
|
203
|
+
result[dstRow + i] = (raw + paethPredictor(a, b, c)) & 0xff;
|
|
204
|
+
break;
|
|
205
|
+
default:
|
|
206
|
+
result[dstRow + i] = raw;
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return result;
|
|
212
|
+
}
|
|
213
|
+
function paethPredictor(a, b, c) {
|
|
214
|
+
const p = a + b - c;
|
|
215
|
+
const pa = Math.abs(p - a);
|
|
216
|
+
const pb = Math.abs(p - b);
|
|
217
|
+
const pc = Math.abs(p - c);
|
|
218
|
+
if (pa <= pb && pa <= pc) {
|
|
219
|
+
return a;
|
|
220
|
+
}
|
|
221
|
+
if (pb <= pc) {
|
|
222
|
+
return b;
|
|
223
|
+
}
|
|
224
|
+
return c;
|
|
225
|
+
}
|
|
226
|
+
// =============================================================================
|
|
227
|
+
// ASCII85Decode
|
|
228
|
+
// =============================================================================
|
|
229
|
+
function decodeAscii85(data) {
|
|
230
|
+
const output = [];
|
|
231
|
+
let i = 0;
|
|
232
|
+
while (i < data.length) {
|
|
233
|
+
const b = data[i];
|
|
234
|
+
// Skip whitespace
|
|
235
|
+
if (b === 0x20 || b === 0x09 || b === 0x0a || b === 0x0d || b === 0x0c) {
|
|
236
|
+
i++;
|
|
237
|
+
continue;
|
|
238
|
+
}
|
|
239
|
+
// End of data marker ~>
|
|
240
|
+
if (b === 0x7e) {
|
|
241
|
+
break;
|
|
242
|
+
}
|
|
243
|
+
// Special 'z' character = four zero bytes
|
|
244
|
+
if (b === 0x7a) {
|
|
245
|
+
output.push(0, 0, 0, 0);
|
|
246
|
+
i++;
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
// Decode 5-character group into 4 bytes
|
|
250
|
+
const group = [];
|
|
251
|
+
while (group.length < 5 && i < data.length) {
|
|
252
|
+
const c = data[i];
|
|
253
|
+
if (c === 0x7e) {
|
|
254
|
+
break; // EOD
|
|
255
|
+
}
|
|
256
|
+
if (c === 0x20 || c === 0x09 || c === 0x0a || c === 0x0d || c === 0x0c) {
|
|
257
|
+
i++;
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
if (c < 0x21 || c > 0x75) {
|
|
261
|
+
i++;
|
|
262
|
+
continue; // Invalid — skip
|
|
263
|
+
}
|
|
264
|
+
group.push(c - 0x21);
|
|
265
|
+
i++;
|
|
266
|
+
}
|
|
267
|
+
if (group.length === 0) {
|
|
268
|
+
break;
|
|
269
|
+
}
|
|
270
|
+
// Pad short final group with 'u' (84) values
|
|
271
|
+
const numBytes = group.length - 1;
|
|
272
|
+
while (group.length < 5) {
|
|
273
|
+
group.push(84);
|
|
274
|
+
}
|
|
275
|
+
const value = group[0] * 85 * 85 * 85 * 85 +
|
|
276
|
+
group[1] * 85 * 85 * 85 +
|
|
277
|
+
group[2] * 85 * 85 +
|
|
278
|
+
group[3] * 85 +
|
|
279
|
+
group[4];
|
|
280
|
+
const bytes = [
|
|
281
|
+
(value >>> 24) & 0xff,
|
|
282
|
+
(value >>> 16) & 0xff,
|
|
283
|
+
(value >>> 8) & 0xff,
|
|
284
|
+
value & 0xff
|
|
285
|
+
];
|
|
286
|
+
for (let j = 0; j < numBytes; j++) {
|
|
287
|
+
output.push(bytes[j]);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return new Uint8Array(output);
|
|
291
|
+
}
|
|
292
|
+
// =============================================================================
|
|
293
|
+
// ASCIIHexDecode
|
|
294
|
+
// =============================================================================
|
|
295
|
+
function decodeAsciiHex(data) {
|
|
296
|
+
const output = [];
|
|
297
|
+
let highNibble = -1;
|
|
298
|
+
for (let i = 0; i < data.length; i++) {
|
|
299
|
+
const b = data[i];
|
|
300
|
+
// End of data marker >
|
|
301
|
+
if (b === 0x3e) {
|
|
302
|
+
break;
|
|
303
|
+
}
|
|
304
|
+
// Skip whitespace
|
|
305
|
+
if (b === 0x20 || b === 0x09 || b === 0x0a || b === 0x0d || b === 0x0c) {
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
308
|
+
let val;
|
|
309
|
+
if (b >= 0x30 && b <= 0x39) {
|
|
310
|
+
val = b - 0x30;
|
|
311
|
+
}
|
|
312
|
+
else if (b >= 0x41 && b <= 0x46) {
|
|
313
|
+
val = b - 0x41 + 10;
|
|
314
|
+
}
|
|
315
|
+
else if (b >= 0x61 && b <= 0x66) {
|
|
316
|
+
val = b - 0x61 + 10;
|
|
317
|
+
}
|
|
318
|
+
else {
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
321
|
+
if (highNibble < 0) {
|
|
322
|
+
highNibble = val;
|
|
323
|
+
}
|
|
324
|
+
else {
|
|
325
|
+
output.push((highNibble << 4) | val);
|
|
326
|
+
highNibble = -1;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
// Odd digit — pad with 0
|
|
330
|
+
if (highNibble >= 0) {
|
|
331
|
+
output.push(highNibble << 4);
|
|
332
|
+
}
|
|
333
|
+
return new Uint8Array(output);
|
|
334
|
+
}
|
|
335
|
+
// =============================================================================
|
|
336
|
+
// LZWDecode
|
|
337
|
+
// =============================================================================
|
|
338
|
+
function decodeLzw(data, parms) {
|
|
339
|
+
const earlyChange = parms ? ((0, pdf_parser_1.dictGetNumber)(parms, "EarlyChange") ?? 1) : 1;
|
|
340
|
+
const output = [];
|
|
341
|
+
// LZW bit reader
|
|
342
|
+
let bitPos = 0;
|
|
343
|
+
function readBits(n) {
|
|
344
|
+
let result = 0;
|
|
345
|
+
for (let i = 0; i < n; i++) {
|
|
346
|
+
const byteIdx = (bitPos + i) >> 3;
|
|
347
|
+
const bitIdx = 7 - ((bitPos + i) & 7); // MSB first
|
|
348
|
+
if (byteIdx < data.length) {
|
|
349
|
+
result = (result << 1) | ((data[byteIdx] >> bitIdx) & 1);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
bitPos += n;
|
|
353
|
+
return result;
|
|
354
|
+
}
|
|
355
|
+
const CLEAR_TABLE = 256;
|
|
356
|
+
const EOD = 257;
|
|
357
|
+
let codeSize = 9;
|
|
358
|
+
let nextCode = 258;
|
|
359
|
+
let table = [];
|
|
360
|
+
// Initialize table
|
|
361
|
+
function resetTable() {
|
|
362
|
+
table = [];
|
|
363
|
+
for (let i = 0; i < 256; i++) {
|
|
364
|
+
table[i] = new Uint8Array([i]);
|
|
365
|
+
}
|
|
366
|
+
table[CLEAR_TABLE] = new Uint8Array(0);
|
|
367
|
+
table[EOD] = new Uint8Array(0);
|
|
368
|
+
nextCode = 258;
|
|
369
|
+
codeSize = 9;
|
|
370
|
+
}
|
|
371
|
+
resetTable();
|
|
372
|
+
let prevEntry = null;
|
|
373
|
+
while (bitPos < data.length * 8) {
|
|
374
|
+
const code = readBits(codeSize);
|
|
375
|
+
if (code === EOD) {
|
|
376
|
+
break;
|
|
377
|
+
}
|
|
378
|
+
if (code === CLEAR_TABLE) {
|
|
379
|
+
resetTable();
|
|
380
|
+
prevEntry = null;
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
let entry;
|
|
384
|
+
if (code < nextCode && table[code]) {
|
|
385
|
+
entry = table[code];
|
|
386
|
+
}
|
|
387
|
+
else if (code === nextCode && prevEntry) {
|
|
388
|
+
// Special case: code not in table yet
|
|
389
|
+
entry = new Uint8Array(prevEntry.length + 1);
|
|
390
|
+
entry.set(prevEntry);
|
|
391
|
+
entry[prevEntry.length] = prevEntry[0];
|
|
392
|
+
}
|
|
393
|
+
else {
|
|
394
|
+
// Invalid code — bail
|
|
395
|
+
break;
|
|
396
|
+
}
|
|
397
|
+
for (let i = 0; i < entry.length; i++) {
|
|
398
|
+
output.push(entry[i]);
|
|
399
|
+
}
|
|
400
|
+
// Add new entry to table
|
|
401
|
+
if (prevEntry !== null) {
|
|
402
|
+
const newEntry = new Uint8Array(prevEntry.length + 1);
|
|
403
|
+
newEntry.set(prevEntry);
|
|
404
|
+
newEntry[prevEntry.length] = entry[0];
|
|
405
|
+
table[nextCode] = newEntry;
|
|
406
|
+
nextCode++;
|
|
407
|
+
// Increase code size
|
|
408
|
+
const threshold = earlyChange ? nextCode : nextCode + 1;
|
|
409
|
+
if (threshold >= 1 << codeSize && codeSize < 12) {
|
|
410
|
+
codeSize++;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
prevEntry = entry;
|
|
414
|
+
}
|
|
415
|
+
let result = new Uint8Array(output);
|
|
416
|
+
// Apply predictor if specified
|
|
417
|
+
if (parms) {
|
|
418
|
+
const predictor = (0, pdf_parser_1.dictGetNumber)(parms, "Predictor") ?? 1;
|
|
419
|
+
if (predictor > 1) {
|
|
420
|
+
result = undoPredictor(result, parms);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
return result;
|
|
424
|
+
}
|
|
425
|
+
// =============================================================================
|
|
426
|
+
// RunLengthDecode
|
|
427
|
+
// =============================================================================
|
|
428
|
+
function decodeRunLength(data) {
|
|
429
|
+
const output = [];
|
|
430
|
+
let i = 0;
|
|
431
|
+
while (i < data.length) {
|
|
432
|
+
const length = data[i];
|
|
433
|
+
i++;
|
|
434
|
+
if (length === 128) {
|
|
435
|
+
// EOD
|
|
436
|
+
break;
|
|
437
|
+
}
|
|
438
|
+
if (length < 128) {
|
|
439
|
+
// Copy (length + 1) literal bytes
|
|
440
|
+
const count = length + 1;
|
|
441
|
+
for (let j = 0; j < count && i < data.length; j++) {
|
|
442
|
+
output.push(data[i]);
|
|
443
|
+
i++;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
else {
|
|
447
|
+
// Repeat next byte (257 - length) times
|
|
448
|
+
const count = 257 - length;
|
|
449
|
+
if (i < data.length) {
|
|
450
|
+
const byte = data[i];
|
|
451
|
+
i++;
|
|
452
|
+
for (let j = 0; j < count; j++) {
|
|
453
|
+
output.push(byte);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
return new Uint8Array(output);
|
|
459
|
+
}
|