@cj-tech-master/excelts 7.6.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +99 -577
- package/README_zh.md +101 -577
- package/dist/browser/index.browser.d.ts +3 -0
- package/dist/browser/index.browser.js +2 -0
- package/dist/browser/index.d.ts +3 -0
- package/dist/browser/index.js +2 -0
- package/dist/browser/modules/archive/compression/compress.browser.js +4 -4
- package/dist/browser/modules/archive/compression/deflate-fallback.d.ts +24 -22
- package/dist/browser/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/browser/modules/archive/compression/streaming-compress.browser.d.ts +7 -0
- package/dist/browser/modules/archive/compression/streaming-compress.browser.js +15 -3
- package/dist/browser/modules/archive/compression/streaming-compress.d.ts +5 -0
- package/dist/browser/modules/archive/compression/streaming-compress.js +7 -0
- package/dist/browser/modules/archive/zip/stream.js +27 -3
- package/dist/browser/modules/excel/workbook.browser.d.ts +72 -0
- package/dist/browser/modules/excel/workbook.browser.js +226 -0
- package/dist/browser/modules/excel/workbook.d.ts +32 -1
- package/dist/browser/modules/excel/workbook.js +47 -2
- package/dist/browser/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/browser/modules/markdown/constants.d.ts +30 -0
- package/dist/browser/modules/markdown/constants.js +30 -0
- package/dist/browser/modules/markdown/errors.d.ts +21 -0
- package/dist/browser/modules/markdown/errors.js +23 -0
- package/dist/browser/modules/markdown/format/index.d.ts +54 -0
- package/dist/browser/modules/markdown/format/index.js +307 -0
- package/dist/browser/modules/markdown/index.d.ts +15 -0
- package/dist/browser/modules/markdown/index.js +22 -0
- package/dist/browser/modules/markdown/parse/index.d.ts +70 -0
- package/dist/browser/modules/markdown/parse/index.js +428 -0
- package/dist/browser/modules/markdown/types.d.ts +130 -0
- package/dist/browser/modules/markdown/types.js +6 -0
- package/dist/cjs/index.js +5 -1
- package/dist/cjs/modules/archive/compression/compress.browser.js +4 -4
- package/dist/cjs/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/cjs/modules/archive/compression/streaming-compress.browser.js +15 -2
- package/dist/cjs/modules/archive/compression/streaming-compress.js +8 -0
- package/dist/cjs/modules/archive/zip/stream.js +26 -2
- package/dist/cjs/modules/excel/workbook.browser.js +226 -0
- package/dist/cjs/modules/excel/workbook.js +46 -1
- package/dist/cjs/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/cjs/modules/markdown/constants.js +33 -0
- package/dist/cjs/modules/markdown/errors.js +28 -0
- package/dist/cjs/modules/markdown/format/index.js +310 -0
- package/dist/cjs/modules/markdown/index.js +30 -0
- package/dist/cjs/modules/markdown/parse/index.js +432 -0
- package/dist/cjs/modules/markdown/types.js +7 -0
- package/dist/esm/index.browser.js +2 -0
- package/dist/esm/index.js +2 -0
- package/dist/esm/modules/archive/compression/compress.browser.js +4 -4
- package/dist/esm/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/esm/modules/archive/compression/streaming-compress.browser.js +15 -3
- package/dist/esm/modules/archive/compression/streaming-compress.js +7 -0
- package/dist/esm/modules/archive/zip/stream.js +27 -3
- package/dist/esm/modules/excel/workbook.browser.js +226 -0
- package/dist/esm/modules/excel/workbook.js +47 -2
- package/dist/esm/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/esm/modules/markdown/constants.js +30 -0
- package/dist/esm/modules/markdown/errors.js +23 -0
- package/dist/esm/modules/markdown/format/index.js +307 -0
- package/dist/esm/modules/markdown/index.js +22 -0
- package/dist/esm/modules/markdown/parse/index.js +428 -0
- package/dist/esm/modules/markdown/types.js +6 -0
- package/dist/iife/excelts.iife.js +1342 -283
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +38 -34
- package/dist/types/index.browser.d.ts +3 -0
- package/dist/types/index.d.ts +3 -0
- package/dist/types/modules/archive/compression/deflate-fallback.d.ts +24 -22
- package/dist/types/modules/archive/compression/streaming-compress.browser.d.ts +7 -0
- package/dist/types/modules/archive/compression/streaming-compress.d.ts +5 -0
- package/dist/types/modules/excel/workbook.browser.d.ts +72 -0
- package/dist/types/modules/excel/workbook.d.ts +32 -1
- package/dist/types/modules/markdown/constants.d.ts +30 -0
- package/dist/types/modules/markdown/errors.d.ts +21 -0
- package/dist/types/modules/markdown/format/index.d.ts +54 -0
- package/dist/types/modules/markdown/index.d.ts +15 -0
- package/dist/types/modules/markdown/parse/index.d.ts +70 -0
- package/dist/types/modules/markdown/types.d.ts +130 -0
- package/package.json +56 -32
|
@@ -357,15 +357,33 @@ function deflateRawStore(data) {
|
|
|
357
357
|
// while providing a good distribution for the 3-byte hash.
|
|
358
358
|
const HASH_SIZE = 32768;
|
|
359
359
|
const HASH_MASK = HASH_SIZE - 1;
|
|
360
|
-
// Maximum hash chain length to walk per position. Longer chains find better
|
|
361
|
-
// matches at the cost of speed. 64 is a good balance (~zlib level 5-6).
|
|
362
|
-
const MAX_CHAIN_LEN = 64;
|
|
363
360
|
// Minimum match length for LZ77 (RFC 1951 minimum).
|
|
364
361
|
const MIN_MATCH = 3;
|
|
365
362
|
// Maximum match length (RFC 1951 maximum).
|
|
366
363
|
const MAX_MATCH = 258;
|
|
367
364
|
// Maximum back-reference distance (RFC 1951 / 32 KB sliding window).
|
|
368
365
|
const MAX_DIST = 32768;
|
|
366
|
+
/**
|
|
367
|
+
* Get LZ77 configuration for the given compression level (1-9).
|
|
368
|
+
* Modelled after zlib's configuration_table.
|
|
369
|
+
*/
|
|
370
|
+
function getLZ77Config(level) {
|
|
371
|
+
// Level 0 should be handled by the caller (store mode).
|
|
372
|
+
if (level <= 1) {
|
|
373
|
+
return { maxChainLen: 4, goodLen: 4, niceLen: 8, lazy: false };
|
|
374
|
+
}
|
|
375
|
+
if (level <= 3) {
|
|
376
|
+
return { maxChainLen: 8, goodLen: 8, niceLen: 32, lazy: true };
|
|
377
|
+
}
|
|
378
|
+
if (level <= 5) {
|
|
379
|
+
return { maxChainLen: 32, goodLen: 16, niceLen: 128, lazy: true };
|
|
380
|
+
}
|
|
381
|
+
if (level <= 7) {
|
|
382
|
+
return { maxChainLen: 64, goodLen: 32, niceLen: 258, lazy: true };
|
|
383
|
+
}
|
|
384
|
+
// level 8-9
|
|
385
|
+
return { maxChainLen: 128, goodLen: 64, niceLen: 258, lazy: true };
|
|
386
|
+
}
|
|
369
387
|
/**
|
|
370
388
|
* Hash function for 3-byte sequences.
|
|
371
389
|
* Uses a multiplicative hash for better distribution than the naive
|
|
@@ -376,16 +394,16 @@ function hash3(a, b, c) {
|
|
|
376
394
|
return ((((a << 16) | (b << 8) | c) * 0x1e35a7bd) >>> 17) & HASH_MASK;
|
|
377
395
|
}
|
|
378
396
|
/**
|
|
379
|
-
* Compress data using DEFLATE with
|
|
397
|
+
* Compress data using DEFLATE with Dynamic Huffman codes (BTYPE=2).
|
|
380
398
|
*
|
|
381
|
-
* Uses LZ77 with hash chains and lazy matching for
|
|
382
|
-
*
|
|
383
|
-
* after zlib's "fast" and "slow" deflate strategies.
|
|
399
|
+
* Uses LZ77 with hash chains and lazy matching for match finding, then builds
|
|
400
|
+
* optimal Huffman trees from the symbol frequencies for entropy coding.
|
|
384
401
|
*
|
|
385
402
|
* @param data - Data to compress
|
|
403
|
+
* @param level - Compression level (1-9, default 6)
|
|
386
404
|
* @returns Compressed data in deflate-raw format
|
|
387
405
|
*/
|
|
388
|
-
function deflateRawCompressed(data) {
|
|
406
|
+
function deflateRawCompressed(data, level = 6) {
|
|
389
407
|
if (data.length === 0) {
|
|
390
408
|
// Empty input: single final block with just end-of-block symbol
|
|
391
409
|
return new Uint8Array([0x03, 0x00]);
|
|
@@ -394,112 +412,12 @@ function deflateRawCompressed(data) {
|
|
|
394
412
|
if (data.length < 100) {
|
|
395
413
|
return deflateRawStore(data);
|
|
396
414
|
}
|
|
415
|
+
const config = getLZ77Config(level);
|
|
416
|
+
// --- Phase 1: LZ77 match finding → collect symbols ---
|
|
417
|
+
const lz77Symbols = lz77Compress(data, 0, data.length, config, null);
|
|
418
|
+
// --- Phase 2: Encode as a single final DEFLATE block ---
|
|
397
419
|
const output = new BitWriter();
|
|
398
|
-
|
|
399
|
-
output.writeBits(1, 1); // BFINAL
|
|
400
|
-
output.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
|
|
401
|
-
// --- Hash chain tables (typed arrays for performance) ---
|
|
402
|
-
// head[h]: most recent position with hash h (0 = unused, positions are 1-based internally)
|
|
403
|
-
// prev[pos & (MAX_DIST-1)]: previous position in the chain for the same hash
|
|
404
|
-
const head = new Int32Array(HASH_SIZE); // filled with 0 (no match)
|
|
405
|
-
const prev = new Int32Array(MAX_DIST);
|
|
406
|
-
let pos = 0;
|
|
407
|
-
// State for lazy matching:
|
|
408
|
-
// When we find a match at position N, we check position N+1 too.
|
|
409
|
-
// If N+1 has a longer match we emit a literal for N and use the N+1 match.
|
|
410
|
-
let prevMatchLen = 0;
|
|
411
|
-
let prevMatchDist = 0;
|
|
412
|
-
let prevLiteral = 0;
|
|
413
|
-
let hasPrevMatch = false;
|
|
414
|
-
while (pos < data.length) {
|
|
415
|
-
let bestLen = 0;
|
|
416
|
-
let bestDist = 0;
|
|
417
|
-
if (pos + 2 < data.length) {
|
|
418
|
-
const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
|
|
419
|
-
// Walk the hash chain to find the best (longest) match
|
|
420
|
-
let chainLen = MAX_CHAIN_LEN;
|
|
421
|
-
let matchHead = head[h];
|
|
422
|
-
while (matchHead > 0 && chainLen-- > 0) {
|
|
423
|
-
const mPos = matchHead - 1; // convert from 1-based to 0-based
|
|
424
|
-
const dist = pos - mPos;
|
|
425
|
-
if (dist > MAX_DIST || dist <= 0) {
|
|
426
|
-
break;
|
|
427
|
-
}
|
|
428
|
-
// Quick check: compare the byte just beyond current best length first
|
|
429
|
-
// to skip obviously shorter matches early.
|
|
430
|
-
if (bestLen >= MIN_MATCH && data[mPos + bestLen] !== data[pos + bestLen]) {
|
|
431
|
-
matchHead = prev[mPos & (MAX_DIST - 1)];
|
|
432
|
-
continue;
|
|
433
|
-
}
|
|
434
|
-
// Full scan
|
|
435
|
-
let len = 0;
|
|
436
|
-
const maxLen = Math.min(MAX_MATCH, data.length - pos);
|
|
437
|
-
while (len < maxLen && data[mPos + len] === data[pos + len]) {
|
|
438
|
-
len++;
|
|
439
|
-
}
|
|
440
|
-
if (len > bestLen) {
|
|
441
|
-
bestLen = len;
|
|
442
|
-
bestDist = dist;
|
|
443
|
-
if (len >= MAX_MATCH) {
|
|
444
|
-
break; // can't do better
|
|
445
|
-
}
|
|
446
|
-
}
|
|
447
|
-
matchHead = prev[mPos & (MAX_DIST - 1)];
|
|
448
|
-
}
|
|
449
|
-
// Insert current position into the hash chain
|
|
450
|
-
prev[pos & (MAX_DIST - 1)] = head[h];
|
|
451
|
-
head[h] = pos + 1; // 1-based
|
|
452
|
-
}
|
|
453
|
-
// --- Lazy matching logic ---
|
|
454
|
-
if (hasPrevMatch) {
|
|
455
|
-
if (bestLen > prevMatchLen) {
|
|
456
|
-
// Current position has a better match; emit previous as literal
|
|
457
|
-
writeLiteralCode(output, prevLiteral);
|
|
458
|
-
// Now adopt current match as the pending one
|
|
459
|
-
prevMatchLen = bestLen;
|
|
460
|
-
prevMatchDist = bestDist;
|
|
461
|
-
prevLiteral = data[pos];
|
|
462
|
-
pos++;
|
|
463
|
-
}
|
|
464
|
-
else {
|
|
465
|
-
// Previous match is at least as good; emit it
|
|
466
|
-
writeLengthCode(output, prevMatchLen);
|
|
467
|
-
writeDistanceCode(output, prevMatchDist);
|
|
468
|
-
// Insert hash entries for the skipped bytes (positions inside the match)
|
|
469
|
-
// so future matches can find them. We already inserted pos-1 (the match
|
|
470
|
-
// start); now insert pos through pos + prevMatchLen - 2.
|
|
471
|
-
const matchEnd = pos - 1 + prevMatchLen;
|
|
472
|
-
for (let i = pos; i < matchEnd && i + 2 < data.length; i++) {
|
|
473
|
-
const h = hash3(data[i], data[i + 1], data[i + 2]);
|
|
474
|
-
prev[i & (MAX_DIST - 1)] = head[h];
|
|
475
|
-
head[h] = i + 1;
|
|
476
|
-
}
|
|
477
|
-
pos = matchEnd;
|
|
478
|
-
hasPrevMatch = false;
|
|
479
|
-
prevMatchLen = 0;
|
|
480
|
-
}
|
|
481
|
-
}
|
|
482
|
-
else if (bestLen >= MIN_MATCH) {
|
|
483
|
-
// We have a match; hold it and try the next position (lazy evaluation)
|
|
484
|
-
hasPrevMatch = true;
|
|
485
|
-
prevMatchLen = bestLen;
|
|
486
|
-
prevMatchDist = bestDist;
|
|
487
|
-
prevLiteral = data[pos];
|
|
488
|
-
pos++;
|
|
489
|
-
}
|
|
490
|
-
else {
|
|
491
|
-
// No match — emit literal
|
|
492
|
-
writeLiteralCode(output, data[pos]);
|
|
493
|
-
pos++;
|
|
494
|
-
}
|
|
495
|
-
}
|
|
496
|
-
// Flush any pending lazy match
|
|
497
|
-
if (hasPrevMatch) {
|
|
498
|
-
writeLengthCode(output, prevMatchLen);
|
|
499
|
-
writeDistanceCode(output, prevMatchDist);
|
|
500
|
-
}
|
|
501
|
-
// Write end-of-block symbol (256)
|
|
502
|
-
writeLiteralCode(output, 256);
|
|
420
|
+
emitDynamicBlock(output, lz77Symbols, true);
|
|
503
421
|
return output.finish();
|
|
504
422
|
}
|
|
505
423
|
/**
|
|
@@ -512,6 +430,14 @@ class BitWriter {
|
|
|
512
430
|
this.bitBuf = 0;
|
|
513
431
|
this.bitCount = 0;
|
|
514
432
|
}
|
|
433
|
+
/**
|
|
434
|
+
* Align to the next byte boundary by padding with zero bits.
|
|
435
|
+
*/
|
|
436
|
+
alignToByte() {
|
|
437
|
+
if (this.bitCount > 0) {
|
|
438
|
+
this.writeBits(0, 8 - this.bitCount);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
515
441
|
writeBits(value, count) {
|
|
516
442
|
this.bitBuf |= value << this.bitCount;
|
|
517
443
|
this.bitCount += count;
|
|
@@ -605,258 +531,512 @@ function writeLiteralCode(output, symbol) {
|
|
|
605
531
|
output.writeBitsReverse(code, len);
|
|
606
532
|
}
|
|
607
533
|
/**
|
|
608
|
-
*
|
|
534
|
+
* Compute the DEFLATE length code (257..285) and extra bits for a given
|
|
535
|
+
* match length (3..258).
|
|
609
536
|
*/
|
|
610
|
-
function
|
|
611
|
-
let
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
else if (length <= 18) {
|
|
620
|
-
const base = length - 11;
|
|
621
|
-
code = 265 + Math.floor(base / 2);
|
|
622
|
-
extraBits = 1;
|
|
623
|
-
extraValue = base % 2;
|
|
624
|
-
}
|
|
625
|
-
else if (length <= 34) {
|
|
626
|
-
const base = length - 19;
|
|
627
|
-
code = 269 + Math.floor(base / 4);
|
|
628
|
-
extraBits = 2;
|
|
629
|
-
extraValue = base % 4;
|
|
630
|
-
}
|
|
631
|
-
else if (length <= 66) {
|
|
632
|
-
const base = length - 35;
|
|
633
|
-
code = 273 + Math.floor(base / 8);
|
|
634
|
-
extraBits = 3;
|
|
635
|
-
extraValue = base % 8;
|
|
636
|
-
}
|
|
637
|
-
else if (length <= 130) {
|
|
638
|
-
const base = length - 67;
|
|
639
|
-
code = 277 + Math.floor(base / 16);
|
|
640
|
-
extraBits = 4;
|
|
641
|
-
extraValue = base % 16;
|
|
642
|
-
}
|
|
643
|
-
else if (length <= 257) {
|
|
644
|
-
const base = length - 131;
|
|
645
|
-
code = 281 + Math.floor(base / 32);
|
|
646
|
-
extraBits = 5;
|
|
647
|
-
extraValue = base % 32;
|
|
537
|
+
function getLengthSymbol(length) {
|
|
538
|
+
for (let i = 0; i < LENGTH_BASE.length; i++) {
|
|
539
|
+
if (i === LENGTH_BASE.length - 1 || length < LENGTH_BASE[i + 1]) {
|
|
540
|
+
return {
|
|
541
|
+
code: 257 + i,
|
|
542
|
+
extra: length - LENGTH_BASE[i],
|
|
543
|
+
extraBits: LENGTH_EXTRA[i]
|
|
544
|
+
};
|
|
545
|
+
}
|
|
648
546
|
}
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
547
|
+
return { code: 285, extra: 0, extraBits: 0 };
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* Compute the DEFLATE distance code (0..29) and extra bits for a given
|
|
551
|
+
* distance (1..32768).
|
|
552
|
+
*/
|
|
553
|
+
function getDistSymbol(distance) {
|
|
554
|
+
for (let i = 0; i < DIST_TABLE.length; i++) {
|
|
555
|
+
const [maxDist, c, extraBitsCount] = DIST_TABLE[i];
|
|
556
|
+
if (distance <= maxDist) {
|
|
557
|
+
const baseVal = i === 0 ? 1 : DIST_TABLE[i - 1][0] + 1;
|
|
558
|
+
return { code: c, extra: distance - baseVal, extraBits: extraBitsCount };
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
// Fallback (should not reach for valid distances)
|
|
562
|
+
return { code: 29, extra: 0, extraBits: 13 };
|
|
563
|
+
}
|
|
564
|
+
/**
|
|
565
|
+
* Build canonical Huffman code lengths from symbol frequencies.
|
|
566
|
+
* Uses a bottom-up approach: build a Huffman tree from a priority queue,
|
|
567
|
+
* then extract depths. Limits maximum code length to maxBits using
|
|
568
|
+
* the algorithm from zlib's build_tree() / gen_bitlen().
|
|
569
|
+
*
|
|
570
|
+
* Returns an array of code lengths indexed by symbol.
|
|
571
|
+
*/
|
|
572
|
+
function buildCodeLengths(freqs, maxBits) {
|
|
573
|
+
const n = freqs.length;
|
|
574
|
+
const codeLens = new Uint8Array(n);
|
|
575
|
+
// Count symbols with non-zero frequency
|
|
576
|
+
const activeSymbols = [];
|
|
577
|
+
for (let i = 0; i < n; i++) {
|
|
578
|
+
if (freqs[i] > 0) {
|
|
579
|
+
activeSymbols.push({ sym: i, freq: freqs[i] });
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
if (activeSymbols.length === 0) {
|
|
583
|
+
return codeLens;
|
|
584
|
+
}
|
|
585
|
+
// RFC 1951 requires a complete prefix code. For a single symbol, we need
|
|
586
|
+
// at least 2 entries to form a valid tree. We assign code length 1 to the
|
|
587
|
+
// symbol — the decoder uses only 1 bit but the tree is valid because
|
|
588
|
+
// DEFLATE decoders handle this as per the spec (the other 1-bit code is
|
|
589
|
+
// simply unused). This matches zlib's behavior.
|
|
590
|
+
if (activeSymbols.length === 1) {
|
|
591
|
+
codeLens[activeSymbols[0].sym] = 1;
|
|
592
|
+
return codeLens;
|
|
593
|
+
}
|
|
594
|
+
// Sort by frequency (ascending), then by symbol (ascending) for stability
|
|
595
|
+
activeSymbols.sort((a, b) => a.freq - b.freq || a.sym - b.sym);
|
|
596
|
+
const nodes = activeSymbols.map(s => ({
|
|
597
|
+
freq: s.freq,
|
|
598
|
+
sym: s.sym,
|
|
599
|
+
left: null,
|
|
600
|
+
right: null
|
|
601
|
+
}));
|
|
602
|
+
let leafIdx = 0;
|
|
603
|
+
let intIdx = 0;
|
|
604
|
+
const intNodes = [];
|
|
605
|
+
function getMin() {
|
|
606
|
+
const hasLeaf = leafIdx < nodes.length;
|
|
607
|
+
const hasInt = intIdx < intNodes.length;
|
|
608
|
+
if (hasLeaf && hasInt) {
|
|
609
|
+
if (nodes[leafIdx].freq <= intNodes[intIdx].freq) {
|
|
610
|
+
return nodes[leafIdx++];
|
|
611
|
+
}
|
|
612
|
+
return intNodes[intIdx++];
|
|
613
|
+
}
|
|
614
|
+
if (hasLeaf) {
|
|
615
|
+
return nodes[leafIdx++];
|
|
616
|
+
}
|
|
617
|
+
return intNodes[intIdx++];
|
|
618
|
+
}
|
|
619
|
+
const totalNodes = activeSymbols.length;
|
|
620
|
+
for (let i = 0; i < totalNodes - 1; i++) {
|
|
621
|
+
const a = getMin();
|
|
622
|
+
const b = getMin();
|
|
623
|
+
const merged = {
|
|
624
|
+
freq: a.freq + b.freq,
|
|
625
|
+
sym: -1,
|
|
626
|
+
left: a,
|
|
627
|
+
right: b
|
|
628
|
+
};
|
|
629
|
+
intNodes.push(merged);
|
|
630
|
+
}
|
|
631
|
+
// Extract depths from the root (last internal node)
|
|
632
|
+
const root = intNodes[intNodes.length - 1];
|
|
633
|
+
function extractDepths(node, depth) {
|
|
634
|
+
if (node.sym >= 0) {
|
|
635
|
+
codeLens[node.sym] = depth;
|
|
636
|
+
return;
|
|
637
|
+
}
|
|
638
|
+
if (node.left) {
|
|
639
|
+
extractDepths(node.left, depth + 1);
|
|
640
|
+
}
|
|
641
|
+
if (node.right) {
|
|
642
|
+
extractDepths(node.right, depth + 1);
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
extractDepths(root, 0);
|
|
646
|
+
// --- Length limiting using the zlib bl_count redistribution algorithm ---
|
|
647
|
+
// Count code lengths at each bit depth
|
|
648
|
+
const blCount = new Uint16Array(maxBits + 1);
|
|
649
|
+
for (let i = 0; i < n; i++) {
|
|
650
|
+
if (codeLens[i] > 0) {
|
|
651
|
+
if (codeLens[i] > maxBits) {
|
|
652
|
+
blCount[maxBits]++;
|
|
653
|
+
codeLens[i] = maxBits;
|
|
654
|
+
}
|
|
655
|
+
else {
|
|
656
|
+
blCount[codeLens[i]]++;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
// Check Kraft inequality: sum of 2^(maxBits - len) must equal 2^maxBits
|
|
661
|
+
let kraft = 0;
|
|
662
|
+
for (let bits = 1; bits <= maxBits; bits++) {
|
|
663
|
+
kraft += blCount[bits] << (maxBits - bits);
|
|
664
|
+
}
|
|
665
|
+
const target = 1 << maxBits;
|
|
666
|
+
if (kraft === target) {
|
|
667
|
+
return codeLens; // Already valid
|
|
668
|
+
}
|
|
669
|
+
// Redistribute to satisfy Kraft's inequality.
|
|
670
|
+
// Strategy: move symbols from shorter lengths to maxBits until balanced.
|
|
671
|
+
// Each symbol moved from length `bits` to `maxBits` reduces kraft by
|
|
672
|
+
// (2^(maxBits-bits) - 1) — we remove a large weight and add a weight of 1.
|
|
673
|
+
while (kraft > target) {
|
|
674
|
+
// Find a code length < maxBits that has symbols we can push down.
|
|
675
|
+
// Start from maxBits-1 to minimize the damage per move.
|
|
676
|
+
let bits = maxBits - 1;
|
|
677
|
+
while (bits > 0 && blCount[bits] === 0) {
|
|
678
|
+
bits--;
|
|
679
|
+
}
|
|
680
|
+
if (bits === 0) {
|
|
681
|
+
break; // Can't redistribute further
|
|
682
|
+
}
|
|
683
|
+
// Move one symbol from length `bits` to length `maxBits`
|
|
684
|
+
blCount[bits]--;
|
|
685
|
+
blCount[maxBits]++;
|
|
686
|
+
// Kraft change: removed 2^(maxBits-bits), added 2^0 = 1
|
|
687
|
+
kraft -= (1 << (maxBits - bits)) - 1;
|
|
653
688
|
}
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
689
|
+
// If kraft < target (under-allocated), add dummy codes at maxBits.
|
|
690
|
+
// This can happen when we overshoot during redistribution.
|
|
691
|
+
while (kraft < target) {
|
|
692
|
+
blCount[maxBits]++;
|
|
693
|
+
kraft++;
|
|
657
694
|
}
|
|
695
|
+
// Reassign code lengths to symbols (preserve relative order: longer
|
|
696
|
+
// codes go to less frequent symbols, matching the Huffman property).
|
|
697
|
+
// Sort symbols by their original code length (longest first), then by
|
|
698
|
+
// frequency (rarest first) for same length.
|
|
699
|
+
const symbolsByLen = [];
|
|
700
|
+
for (let i = 0; i < n; i++) {
|
|
701
|
+
if (codeLens[i] > 0) {
|
|
702
|
+
symbolsByLen.push({ sym: i, origLen: codeLens[i], freq: freqs[i] });
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
symbolsByLen.sort((a, b) => b.origLen - a.origLen || a.freq - b.freq);
|
|
706
|
+
// Assign new lengths from the bl_count distribution
|
|
707
|
+
codeLens.fill(0);
|
|
708
|
+
let symIdx = 0;
|
|
709
|
+
for (let bits = maxBits; bits >= 1; bits--) {
|
|
710
|
+
for (let count = blCount[bits]; count > 0; count--) {
|
|
711
|
+
if (symIdx < symbolsByLen.length) {
|
|
712
|
+
codeLens[symbolsByLen[symIdx].sym] = bits;
|
|
713
|
+
symIdx++;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
return codeLens;
|
|
658
718
|
}
|
|
659
719
|
/**
|
|
660
|
-
*
|
|
720
|
+
* Build canonical Huffman codes from code lengths (RFC 1951 §3.2.2).
|
|
721
|
+
* Returns [code, length] pairs indexed by symbol.
|
|
661
722
|
*/
|
|
662
|
-
function
|
|
663
|
-
|
|
723
|
+
function buildCanonicalCodes(codeLens) {
|
|
724
|
+
const n = codeLens.length;
|
|
725
|
+
const codes = new Array(n);
|
|
726
|
+
const blCount = new Uint16Array(16);
|
|
727
|
+
for (let i = 0; i < n; i++) {
|
|
728
|
+
if (codeLens[i] > 0) {
|
|
729
|
+
blCount[codeLens[i]]++;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
const nextCode = new Uint16Array(16);
|
|
664
733
|
let code = 0;
|
|
665
|
-
let
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
734
|
+
for (let bits = 1; bits <= 15; bits++) {
|
|
735
|
+
code = (code + blCount[bits - 1]) << 1;
|
|
736
|
+
nextCode[bits] = code;
|
|
737
|
+
}
|
|
738
|
+
for (let i = 0; i < n; i++) {
|
|
739
|
+
const len = codeLens[i];
|
|
740
|
+
if (len > 0) {
|
|
741
|
+
codes[i] = [nextCode[len]++, len];
|
|
742
|
+
}
|
|
743
|
+
else {
|
|
744
|
+
codes[i] = [0, 0];
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
return codes;
|
|
748
|
+
}
|
|
749
|
+
/**
|
|
750
|
+
* Emit a Dynamic Huffman DEFLATE block (BTYPE=2).
|
|
751
|
+
*
|
|
752
|
+
* Takes the LZ77 symbol sequence, builds optimal Huffman trees,
|
|
753
|
+
* encodes the tree descriptions, then encodes the symbols.
|
|
754
|
+
*/
|
|
755
|
+
function emitDynamicBlock(out, symbols, isFinal) {
|
|
756
|
+
// --- Step 1: Collect frequencies ---
|
|
757
|
+
const litLenFreqs = new Uint32Array(286);
|
|
758
|
+
const distFreqs = new Uint32Array(30);
|
|
759
|
+
// Always include EOB
|
|
760
|
+
litLenFreqs[256] = 1;
|
|
761
|
+
for (const sym of symbols) {
|
|
762
|
+
if (sym.dist === 0) {
|
|
763
|
+
litLenFreqs[sym.litOrLen]++;
|
|
764
|
+
}
|
|
765
|
+
else {
|
|
766
|
+
const ls = getLengthSymbol(sym.litOrLen);
|
|
767
|
+
litLenFreqs[ls.code]++;
|
|
768
|
+
const ds = getDistSymbol(sym.dist);
|
|
769
|
+
distFreqs[ds.code]++;
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
// --- Step 2: Build Huffman trees ---
|
|
773
|
+
const litLenLens = buildCodeLengths(litLenFreqs, 15);
|
|
774
|
+
let distLens = buildCodeLengths(distFreqs, 15);
|
|
775
|
+
// DEFLATE requires at least 1 distance code even if unused.
|
|
776
|
+
// Assign two codes at length 1 to form a complete prefix code.
|
|
777
|
+
let hasDistCodes = false;
|
|
778
|
+
for (let i = 0; i < distLens.length; i++) {
|
|
779
|
+
if (distLens[i] > 0) {
|
|
780
|
+
hasDistCodes = true;
|
|
672
781
|
break;
|
|
673
782
|
}
|
|
674
|
-
baseDistance = maxDist + 1;
|
|
675
783
|
}
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
784
|
+
if (!hasDistCodes) {
|
|
785
|
+
distLens = new Uint8Array(30);
|
|
786
|
+
distLens[0] = 1;
|
|
787
|
+
distLens[1] = 1;
|
|
788
|
+
}
|
|
789
|
+
const litLenCodes = buildCanonicalCodes(litLenLens);
|
|
790
|
+
const distCodes = buildCanonicalCodes(distLens);
|
|
791
|
+
// --- Step 3: Determine HLIT and HDIST ---
|
|
792
|
+
let hlit = 286;
|
|
793
|
+
while (hlit > 257 && litLenLens[hlit - 1] === 0) {
|
|
794
|
+
hlit--;
|
|
795
|
+
}
|
|
796
|
+
let hdist = 30;
|
|
797
|
+
while (hdist > 1 && distLens[hdist - 1] === 0) {
|
|
798
|
+
hdist--;
|
|
799
|
+
}
|
|
800
|
+
// --- Step 4: Run-length encode the code lengths ---
|
|
801
|
+
const combined = new Uint8Array(hlit + hdist);
|
|
802
|
+
combined.set(litLenLens.subarray(0, hlit));
|
|
803
|
+
combined.set(distLens.subarray(0, hdist), hlit);
|
|
804
|
+
const clSymbols = [];
|
|
805
|
+
const clFreqs = new Uint32Array(19);
|
|
806
|
+
for (let i = 0; i < combined.length;) {
|
|
807
|
+
const val = combined[i];
|
|
808
|
+
if (val === 0) {
|
|
809
|
+
let run = 1;
|
|
810
|
+
while (i + run < combined.length && combined[i + run] === 0) {
|
|
811
|
+
run++;
|
|
812
|
+
}
|
|
813
|
+
while (run > 0) {
|
|
814
|
+
if (run >= 11) {
|
|
815
|
+
const repeat = Math.min(run, 138);
|
|
816
|
+
clSymbols.push({ sym: 18, extra: repeat - 11, extraBits: 7 });
|
|
817
|
+
clFreqs[18]++;
|
|
818
|
+
run -= repeat;
|
|
819
|
+
i += repeat;
|
|
820
|
+
}
|
|
821
|
+
else if (run >= 3) {
|
|
822
|
+
const repeat = Math.min(run, 10);
|
|
823
|
+
clSymbols.push({ sym: 17, extra: repeat - 3, extraBits: 3 });
|
|
824
|
+
clFreqs[17]++;
|
|
825
|
+
run -= repeat;
|
|
826
|
+
i += repeat;
|
|
827
|
+
}
|
|
828
|
+
else {
|
|
829
|
+
clSymbols.push({ sym: 0, extra: 0, extraBits: 0 });
|
|
830
|
+
clFreqs[0]++;
|
|
831
|
+
run--;
|
|
832
|
+
i++;
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
else {
|
|
837
|
+
clSymbols.push({ sym: val, extra: 0, extraBits: 0 });
|
|
838
|
+
clFreqs[val]++;
|
|
839
|
+
i++;
|
|
840
|
+
let run = 0;
|
|
841
|
+
while (i + run < combined.length && combined[i + run] === val) {
|
|
842
|
+
run++;
|
|
843
|
+
}
|
|
844
|
+
while (run >= 3) {
|
|
845
|
+
const repeat = Math.min(run, 6);
|
|
846
|
+
clSymbols.push({ sym: 16, extra: repeat - 3, extraBits: 2 });
|
|
847
|
+
clFreqs[16]++;
|
|
848
|
+
run -= repeat;
|
|
849
|
+
i += repeat;
|
|
850
|
+
}
|
|
851
|
+
while (run > 0) {
|
|
852
|
+
clSymbols.push({ sym: val, extra: 0, extraBits: 0 });
|
|
853
|
+
clFreqs[val]++;
|
|
854
|
+
run--;
|
|
855
|
+
i++;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
// --- Step 5: Build code-length Huffman tree ---
|
|
860
|
+
const clLens = buildCodeLengths(clFreqs, 7);
|
|
861
|
+
const clCodes = buildCanonicalCodes(clLens);
|
|
862
|
+
let hclen = 19;
|
|
863
|
+
while (hclen > 4 && clLens[CODE_LENGTH_ORDER[hclen - 1]] === 0) {
|
|
864
|
+
hclen--;
|
|
865
|
+
}
|
|
866
|
+
// --- Step 6: Write block header ---
|
|
867
|
+
out.writeBits(isFinal ? 1 : 0, 1); // BFINAL
|
|
868
|
+
out.writeBits(2, 2); // BTYPE = 10 (dynamic Huffman)
|
|
869
|
+
out.writeBits(hlit - 257, 5);
|
|
870
|
+
out.writeBits(hdist - 1, 5);
|
|
871
|
+
out.writeBits(hclen - 4, 4);
|
|
872
|
+
for (let i = 0; i < hclen; i++) {
|
|
873
|
+
out.writeBits(clLens[CODE_LENGTH_ORDER[i]], 3);
|
|
874
|
+
}
|
|
875
|
+
for (const cls of clSymbols) {
|
|
876
|
+
const [clCode, clLen] = clCodes[cls.sym];
|
|
877
|
+
out.writeBitsReverse(clCode, clLen);
|
|
878
|
+
if (cls.extraBits > 0) {
|
|
879
|
+
out.writeBits(cls.extra, cls.extraBits);
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
// --- Step 7: Write compressed data ---
|
|
883
|
+
for (const sym of symbols) {
|
|
884
|
+
if (sym.dist === 0) {
|
|
885
|
+
const [lCode, lLen] = litLenCodes[sym.litOrLen];
|
|
886
|
+
out.writeBitsReverse(lCode, lLen);
|
|
887
|
+
}
|
|
888
|
+
else {
|
|
889
|
+
const ls = getLengthSymbol(sym.litOrLen);
|
|
890
|
+
const [lCode, lLen] = litLenCodes[ls.code];
|
|
891
|
+
out.writeBitsReverse(lCode, lLen);
|
|
892
|
+
if (ls.extraBits > 0) {
|
|
893
|
+
out.writeBits(ls.extra, ls.extraBits);
|
|
894
|
+
}
|
|
895
|
+
const ds = getDistSymbol(sym.dist);
|
|
896
|
+
const [dCode, dLen] = distCodes[ds.code];
|
|
897
|
+
out.writeBitsReverse(dCode, dLen);
|
|
898
|
+
if (ds.extraBits > 0) {
|
|
899
|
+
out.writeBits(ds.extra, ds.extraBits);
|
|
900
|
+
}
|
|
901
|
+
}
|
|
681
902
|
}
|
|
903
|
+
// End of block
|
|
904
|
+
const [eobCode, eobLen] = litLenCodes[256];
|
|
905
|
+
out.writeBitsReverse(eobCode, eobLen);
|
|
682
906
|
}
|
|
683
|
-
// ============================================================================
|
|
684
|
-
// Stateful Streaming Deflater
|
|
685
|
-
// ============================================================================
|
|
686
|
-
/** Maximum LZ77 sliding window size (32 KB per RFC 1951). */
|
|
687
|
-
const WINDOW_SIZE = 32768;
|
|
688
907
|
/**
|
|
689
|
-
*
|
|
690
|
-
*
|
|
691
|
-
* Unlike `deflateRawCompressed` (which is a one-shot function), this class
|
|
692
|
-
* maintains state across multiple `write()` calls:
|
|
693
|
-
*
|
|
694
|
-
* - **LZ77 sliding window**: back-references can span across chunks.
|
|
695
|
-
* - **Hash chains**: match positions persist across chunks with typed-array
|
|
696
|
-
* hash tables for fast lookup.
|
|
697
|
-
* - **Lazy matching**: each match is compared with the next position's match
|
|
698
|
-
* to pick the longer one.
|
|
699
|
-
* - **Bit writer**: bit position is preserved, so consecutive blocks form
|
|
700
|
-
* a single valid DEFLATE bit-stream without alignment issues.
|
|
908
|
+
* Run LZ77 match-finding on `data[start..end)`.
|
|
701
909
|
*
|
|
702
|
-
*
|
|
703
|
-
* `
|
|
910
|
+
* When `state` is null, performs one-shot compression with fresh hash tables.
|
|
911
|
+
* When `state` is provided, maintains sliding window and hash chains across calls.
|
|
704
912
|
*
|
|
705
|
-
*
|
|
706
|
-
* `Z_SYNC_FLUSH`, used by the streaming ZIP writer (`pushSync`) to achieve
|
|
707
|
-
* constant-memory streaming in both Node.js and browsers.
|
|
913
|
+
* Returns an array of LZ77 symbols (literals + length/distance pairs).
|
|
708
914
|
*/
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
915
|
+
function lz77Compress(data, start, end, config, state) {
|
|
916
|
+
const symbols = [];
|
|
917
|
+
const maxChainLen = config.maxChainLen;
|
|
918
|
+
const goodLen = config.goodLen;
|
|
919
|
+
const niceLen = config.niceLen;
|
|
920
|
+
const useLazy = config.lazy;
|
|
921
|
+
let head;
|
|
922
|
+
let prevArr;
|
|
923
|
+
let window;
|
|
924
|
+
let wLen;
|
|
925
|
+
let totalIn;
|
|
926
|
+
let hasPrevMatch;
|
|
927
|
+
let prevMatchLen;
|
|
928
|
+
let prevMatchDist;
|
|
929
|
+
let prevLiteral;
|
|
930
|
+
if (state) {
|
|
931
|
+
head = state.head;
|
|
932
|
+
prevArr = state.prev;
|
|
933
|
+
window = state.window;
|
|
934
|
+
wLen = state.windowLen;
|
|
935
|
+
totalIn = state.totalIn;
|
|
936
|
+
hasPrevMatch = state.hasPrevMatch;
|
|
937
|
+
prevMatchLen = state.prevMatchLen;
|
|
938
|
+
prevMatchDist = state.prevMatchDist;
|
|
939
|
+
prevLiteral = state.prevLiteral;
|
|
726
940
|
}
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
941
|
+
else {
|
|
942
|
+
head = new Int32Array(HASH_SIZE);
|
|
943
|
+
prevArr = new Int32Array(MAX_DIST);
|
|
944
|
+
window = null;
|
|
945
|
+
wLen = 0;
|
|
946
|
+
totalIn = 0;
|
|
947
|
+
hasPrevMatch = false;
|
|
948
|
+
prevMatchLen = 0;
|
|
949
|
+
prevMatchDist = 0;
|
|
950
|
+
prevLiteral = 0;
|
|
951
|
+
}
|
|
952
|
+
const getByte = state
|
|
953
|
+
? (globalPos) => {
|
|
954
|
+
const localPos = globalPos - totalIn;
|
|
955
|
+
if (localPos >= start && localPos < end) {
|
|
956
|
+
return data[localPos];
|
|
957
|
+
}
|
|
958
|
+
return window[globalPos & (MAX_DIST - 1)];
|
|
734
959
|
}
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
const window = this._window;
|
|
740
|
-
let wLen = this._windowLen;
|
|
741
|
-
const head = this._head;
|
|
742
|
-
const prevArr = this._prev;
|
|
743
|
-
const totalIn = this._totalIn;
|
|
744
|
-
let hasPrevMatch = this._hasPrevMatch;
|
|
745
|
-
let prevMatchLen = this._prevMatchLen;
|
|
746
|
-
let prevMatchDist = this._prevMatchDist;
|
|
747
|
-
let prevLiteral = this._prevLiteral;
|
|
748
|
-
/**
|
|
749
|
-
* Insert a global position into the hash chain and the sliding window.
|
|
750
|
-
*/
|
|
751
|
-
const insertHash = (localPos) => {
|
|
752
|
-
if (localPos + 2 >= data.length) {
|
|
960
|
+
: (globalPos) => data[globalPos];
|
|
961
|
+
const insertHash = state
|
|
962
|
+
? (localPos) => {
|
|
963
|
+
if (localPos + 2 >= end) {
|
|
753
964
|
return;
|
|
754
965
|
}
|
|
755
966
|
const h = hash3(data[localPos], data[localPos + 1], data[localPos + 2]);
|
|
756
|
-
const
|
|
757
|
-
prevArr[
|
|
758
|
-
head[h] =
|
|
967
|
+
const gp = totalIn + localPos;
|
|
968
|
+
prevArr[gp & (MAX_DIST - 1)] = head[h];
|
|
969
|
+
head[h] = gp + 1;
|
|
970
|
+
}
|
|
971
|
+
: (localPos) => {
|
|
972
|
+
if (localPos + 2 >= end) {
|
|
973
|
+
return;
|
|
974
|
+
}
|
|
975
|
+
const h = hash3(data[localPos], data[localPos + 1], data[localPos + 2]);
|
|
976
|
+
prevArr[localPos & (MAX_DIST - 1)] = head[h];
|
|
977
|
+
head[h] = localPos + 1;
|
|
759
978
|
};
|
|
760
|
-
|
|
979
|
+
const insertWindow = state
|
|
980
|
+
? (localPos, count) => {
|
|
761
981
|
for (let i = 0; i < count; i++) {
|
|
762
|
-
window[(wLen + i) & (
|
|
982
|
+
window[(wLen + i) & (MAX_DIST - 1)] = data[localPos + i];
|
|
763
983
|
}
|
|
764
984
|
wLen += count;
|
|
765
|
-
}
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
if (
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
let checkByte;
|
|
790
|
-
const checkLocal = checkOffset - totalIn;
|
|
791
|
-
if (checkLocal >= 0 && checkLocal < data.length) {
|
|
792
|
-
checkByte = data[checkLocal];
|
|
793
|
-
}
|
|
794
|
-
else {
|
|
795
|
-
checkByte = window[checkOffset & (WINDOW_SIZE - 1)];
|
|
796
|
-
}
|
|
797
|
-
if (checkByte !== data[pos + bestLen]) {
|
|
798
|
-
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
799
|
-
continue;
|
|
800
|
-
}
|
|
985
|
+
}
|
|
986
|
+
: (_localPos, _count) => { };
|
|
987
|
+
let pos = start;
|
|
988
|
+
for (; pos < end;) {
|
|
989
|
+
let bestLen = 0;
|
|
990
|
+
let bestDist = 0;
|
|
991
|
+
if (pos + 2 < end) {
|
|
992
|
+
const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
|
|
993
|
+
const globalPos = state ? totalIn + pos : pos;
|
|
994
|
+
// When we already have a good match from a previous lazy evaluation,
|
|
995
|
+
// reduce the chain search length (matching zlib's good_length behavior).
|
|
996
|
+
let chainRemaining = useLazy && hasPrevMatch && prevMatchLen >= goodLen ? maxChainLen >> 2 : maxChainLen;
|
|
997
|
+
let matchHead = head[h];
|
|
998
|
+
while (matchHead > 0 && chainRemaining-- > 0) {
|
|
999
|
+
const mGlobalPos = matchHead - 1;
|
|
1000
|
+
const dist = globalPos - mGlobalPos;
|
|
1001
|
+
if (dist > MAX_DIST || dist <= 0) {
|
|
1002
|
+
break;
|
|
1003
|
+
}
|
|
1004
|
+
if (bestLen >= MIN_MATCH) {
|
|
1005
|
+
const checkGlobal = mGlobalPos + bestLen;
|
|
1006
|
+
if (getByte(checkGlobal) !== data[pos + bestLen]) {
|
|
1007
|
+
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
1008
|
+
continue;
|
|
801
1009
|
}
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
matchByte = data[matchLocal];
|
|
809
|
-
}
|
|
810
|
-
else {
|
|
811
|
-
matchByte = window[matchOffset & (WINDOW_SIZE - 1)];
|
|
812
|
-
}
|
|
813
|
-
if (matchByte !== data[pos + len]) {
|
|
814
|
-
break;
|
|
815
|
-
}
|
|
816
|
-
len++;
|
|
1010
|
+
}
|
|
1011
|
+
const maxLen = Math.min(MAX_MATCH, end - pos);
|
|
1012
|
+
let len = 0;
|
|
1013
|
+
while (len < maxLen) {
|
|
1014
|
+
if (getByte(mGlobalPos + len) !== data[pos + len]) {
|
|
1015
|
+
break;
|
|
817
1016
|
}
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
1017
|
+
len++;
|
|
1018
|
+
}
|
|
1019
|
+
if (len > bestLen) {
|
|
1020
|
+
bestLen = len;
|
|
1021
|
+
bestDist = dist;
|
|
1022
|
+
if (len >= niceLen) {
|
|
1023
|
+
break;
|
|
824
1024
|
}
|
|
825
|
-
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
826
1025
|
}
|
|
827
|
-
|
|
1026
|
+
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
1027
|
+
}
|
|
1028
|
+
if (state) {
|
|
828
1029
|
prevArr[globalPos & (MAX_DIST - 1)] = head[h];
|
|
829
1030
|
head[h] = globalPos + 1;
|
|
830
1031
|
}
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
// Current position wins — emit previous as literal
|
|
835
|
-
writeLiteralCode(out, prevLiteral);
|
|
836
|
-
prevMatchLen = bestLen;
|
|
837
|
-
prevMatchDist = bestDist;
|
|
838
|
-
prevLiteral = data[pos];
|
|
839
|
-
insertWindow(pos, 1);
|
|
840
|
-
pos++;
|
|
841
|
-
}
|
|
842
|
-
else {
|
|
843
|
-
// Previous match wins — emit it
|
|
844
|
-
writeLengthCode(out, prevMatchLen);
|
|
845
|
-
writeDistanceCode(out, prevMatchDist);
|
|
846
|
-
// Insert hash entries for skipped positions inside the match
|
|
847
|
-
const matchEnd = pos - 1 + prevMatchLen;
|
|
848
|
-
const insertEnd = Math.min(matchEnd, data.length);
|
|
849
|
-
for (let i = pos; i < insertEnd; i++) {
|
|
850
|
-
insertHash(i);
|
|
851
|
-
}
|
|
852
|
-
insertWindow(pos, insertEnd - pos);
|
|
853
|
-
pos = insertEnd;
|
|
854
|
-
hasPrevMatch = false;
|
|
855
|
-
prevMatchLen = 0;
|
|
856
|
-
}
|
|
1032
|
+
else {
|
|
1033
|
+
prevArr[pos & (MAX_DIST - 1)] = head[h];
|
|
1034
|
+
head[h] = pos + 1;
|
|
857
1035
|
}
|
|
858
|
-
|
|
859
|
-
|
|
1036
|
+
}
|
|
1037
|
+
if (useLazy && hasPrevMatch) {
|
|
1038
|
+
if (bestLen > prevMatchLen) {
|
|
1039
|
+
symbols.push({ litOrLen: prevLiteral, dist: 0 });
|
|
860
1040
|
prevMatchLen = bestLen;
|
|
861
1041
|
prevMatchDist = bestDist;
|
|
862
1042
|
prevLiteral = data[pos];
|
|
@@ -864,38 +1044,132 @@ class SyncDeflater {
|
|
|
864
1044
|
pos++;
|
|
865
1045
|
}
|
|
866
1046
|
else {
|
|
867
|
-
|
|
1047
|
+
symbols.push({ litOrLen: prevMatchLen, dist: prevMatchDist });
|
|
1048
|
+
const matchEnd = Math.min(pos - 1 + prevMatchLen, end);
|
|
1049
|
+
for (let i = pos; i < matchEnd; i++) {
|
|
1050
|
+
insertHash(i);
|
|
1051
|
+
}
|
|
1052
|
+
insertWindow(pos, matchEnd - pos);
|
|
1053
|
+
pos = matchEnd;
|
|
1054
|
+
hasPrevMatch = false;
|
|
1055
|
+
prevMatchLen = 0;
|
|
1056
|
+
}
|
|
1057
|
+
}
|
|
1058
|
+
else if (bestLen >= MIN_MATCH) {
|
|
1059
|
+
if (useLazy) {
|
|
1060
|
+
hasPrevMatch = true;
|
|
1061
|
+
prevMatchLen = bestLen;
|
|
1062
|
+
prevMatchDist = bestDist;
|
|
1063
|
+
prevLiteral = data[pos];
|
|
868
1064
|
insertWindow(pos, 1);
|
|
869
1065
|
pos++;
|
|
870
1066
|
}
|
|
1067
|
+
else {
|
|
1068
|
+
symbols.push({ litOrLen: bestLen, dist: bestDist });
|
|
1069
|
+
const matchEnd = Math.min(pos + bestLen, end);
|
|
1070
|
+
for (let i = pos + 1; i < matchEnd; i++) {
|
|
1071
|
+
insertHash(i);
|
|
1072
|
+
}
|
|
1073
|
+
insertWindow(pos, matchEnd - pos);
|
|
1074
|
+
pos = matchEnd;
|
|
1075
|
+
}
|
|
871
1076
|
}
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
1077
|
+
else {
|
|
1078
|
+
if (hasPrevMatch) {
|
|
1079
|
+
// Non-lazy mode shouldn't reach here, but handle gracefully
|
|
1080
|
+
symbols.push({ litOrLen: prevMatchLen, dist: prevMatchDist });
|
|
1081
|
+
hasPrevMatch = false;
|
|
1082
|
+
prevMatchLen = 0;
|
|
1083
|
+
}
|
|
1084
|
+
symbols.push({ litOrLen: data[pos], dist: 0 });
|
|
1085
|
+
insertWindow(pos, 1);
|
|
1086
|
+
pos++;
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
// Flush pending lazy match
|
|
1090
|
+
if (hasPrevMatch) {
|
|
1091
|
+
symbols.push({ litOrLen: prevMatchLen, dist: prevMatchDist });
|
|
1092
|
+
const matchEnd = Math.min(pos - 1 + prevMatchLen, end);
|
|
1093
|
+
for (let i = pos; i < matchEnd; i++) {
|
|
1094
|
+
insertHash(i);
|
|
1095
|
+
}
|
|
1096
|
+
insertWindow(pos, matchEnd - pos);
|
|
1097
|
+
hasPrevMatch = false;
|
|
1098
|
+
prevMatchLen = 0;
|
|
1099
|
+
}
|
|
1100
|
+
if (state) {
|
|
1101
|
+
state.windowLen = wLen;
|
|
1102
|
+
state.totalIn = totalIn + (end - start);
|
|
1103
|
+
state.hasPrevMatch = hasPrevMatch;
|
|
1104
|
+
state.prevMatchLen = prevMatchLen;
|
|
1105
|
+
state.prevMatchDist = prevMatchDist;
|
|
1106
|
+
state.prevLiteral = prevLiteral;
|
|
1107
|
+
}
|
|
1108
|
+
return symbols;
|
|
1109
|
+
}
|
|
1110
|
+
// ============================================================================
|
|
1111
|
+
// Stateful Streaming Deflater
|
|
1112
|
+
// ============================================================================
|
|
1113
|
+
/**
|
|
1114
|
+
* Stateful synchronous DEFLATE compressor with Dynamic Huffman encoding.
|
|
1115
|
+
*
|
|
1116
|
+
* Unlike `deflateRawCompressed` (which is a one-shot function), this class
|
|
1117
|
+
* maintains state across multiple `write()` calls:
|
|
1118
|
+
*
|
|
1119
|
+
* - **LZ77 sliding window**: back-references can span across chunks.
|
|
1120
|
+
* - **Hash chains**: match positions persist across chunks with typed-array
|
|
1121
|
+
* hash tables for fast lookup.
|
|
1122
|
+
* - **Lazy matching**: configurable per compression level.
|
|
1123
|
+
* - **Dynamic Huffman**: each block builds optimal Huffman trees from
|
|
1124
|
+
* actual symbol frequencies (BTYPE=2), producing significantly smaller
|
|
1125
|
+
* output than fixed Huffman (BTYPE=1).
|
|
1126
|
+
* - **Bit writer**: bit position is preserved, so consecutive blocks form
|
|
1127
|
+
* a single valid DEFLATE bit-stream without alignment issues.
|
|
1128
|
+
*
|
|
1129
|
+
* Each `write()` emits one non-final Dynamic Huffman block (BFINAL=0).
|
|
1130
|
+
* `finish()` emits a final empty fixed-Huffman block (BFINAL=1).
|
|
1131
|
+
*
|
|
1132
|
+
* This is the pure-JS equivalent of Node.js `zlib.deflateRawSync` with
|
|
1133
|
+
* `Z_SYNC_FLUSH`, used by the streaming ZIP writer (`pushSync`) to achieve
|
|
1134
|
+
* constant-memory streaming in both Node.js and browsers.
|
|
1135
|
+
*
|
|
1136
|
+
* @param level - Compression level (0-9). Level 0 emits STORE blocks.
|
|
1137
|
+
* Default: 6 (matching zlib default).
|
|
1138
|
+
*/
|
|
1139
|
+
class SyncDeflater {
|
|
1140
|
+
constructor(level = 6) {
|
|
1141
|
+
this._output = new BitWriter();
|
|
1142
|
+
this._state = {
|
|
1143
|
+
head: new Int32Array(HASH_SIZE),
|
|
1144
|
+
prev: new Int32Array(MAX_DIST),
|
|
1145
|
+
window: new Uint8Array(MAX_DIST),
|
|
1146
|
+
windowLen: 0,
|
|
1147
|
+
totalIn: 0,
|
|
1148
|
+
hasPrevMatch: false,
|
|
1149
|
+
prevMatchLen: 0,
|
|
1150
|
+
prevMatchDist: 0,
|
|
1151
|
+
prevLiteral: 0
|
|
1152
|
+
};
|
|
1153
|
+
this._level = Math.max(0, Math.min(9, level));
|
|
1154
|
+
this._config = getLZ77Config(this._level);
|
|
1155
|
+
}
|
|
1156
|
+
/**
|
|
1157
|
+
* Compress a chunk and return the compressed bytes produced so far.
|
|
1158
|
+
* The output is a valid prefix of a DEFLATE stream (one or more non-final blocks).
|
|
1159
|
+
*/
|
|
1160
|
+
write(data) {
|
|
1161
|
+
if (data.length === 0) {
|
|
1162
|
+
return new Uint8Array(0);
|
|
1163
|
+
}
|
|
1164
|
+
const out = this._output;
|
|
1165
|
+
if (this._level === 0) {
|
|
1166
|
+
// Store mode: emit uncompressed block(s)
|
|
1167
|
+
this._writeStore(data);
|
|
1168
|
+
return out.flushBytes();
|
|
1169
|
+
}
|
|
1170
|
+
// LZ77 + Dynamic Huffman
|
|
1171
|
+
const symbols = lz77Compress(data, 0, data.length, this._config, this._state);
|
|
1172
|
+
emitDynamicBlock(out, symbols, false);
|
|
899
1173
|
return out.flushBytes();
|
|
900
1174
|
}
|
|
901
1175
|
/**
|
|
@@ -906,9 +1180,39 @@ class SyncDeflater {
|
|
|
906
1180
|
const out = this._output;
|
|
907
1181
|
// Final block: BFINAL=1, BTYPE=01, immediately followed by EOB (symbol 256)
|
|
908
1182
|
out.writeBits(1, 1); // BFINAL = 1
|
|
909
|
-
out.writeBits(1, 2); // BTYPE = 01
|
|
1183
|
+
out.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
|
|
910
1184
|
writeLiteralCode(out, 256);
|
|
911
1185
|
return out.finish();
|
|
912
1186
|
}
|
|
1187
|
+
/**
|
|
1188
|
+
* Write STORE (uncompressed) blocks for level=0.
|
|
1189
|
+
* Each block is non-final (BFINAL=0); the final block is emitted by finish().
|
|
1190
|
+
*/
|
|
1191
|
+
_writeStore(data) {
|
|
1192
|
+
const out = this._output;
|
|
1193
|
+
const MAX_BLOCK_SIZE = 65535;
|
|
1194
|
+
let offset = 0;
|
|
1195
|
+
while (offset < data.length) {
|
|
1196
|
+
const remaining = data.length - offset;
|
|
1197
|
+
const blockSize = Math.min(MAX_BLOCK_SIZE, remaining);
|
|
1198
|
+
// Align to byte boundary before stored block header
|
|
1199
|
+
out.alignToByte();
|
|
1200
|
+
out.writeBits(0, 1); // BFINAL = 0 (never final; finish() handles that)
|
|
1201
|
+
out.writeBits(0, 2); // BTYPE = 00 (stored)
|
|
1202
|
+
// Align to byte boundary after block header (3 bits → pad to 8)
|
|
1203
|
+
out.alignToByte();
|
|
1204
|
+
// LEN
|
|
1205
|
+
out.writeBits(blockSize & 0xff, 8);
|
|
1206
|
+
out.writeBits((blockSize >> 8) & 0xff, 8);
|
|
1207
|
+
// NLEN
|
|
1208
|
+
out.writeBits(~blockSize & 0xff, 8);
|
|
1209
|
+
out.writeBits((~blockSize >> 8) & 0xff, 8);
|
|
1210
|
+
// Data
|
|
1211
|
+
for (let i = 0; i < blockSize; i++) {
|
|
1212
|
+
out.writeBits(data[offset + i], 8);
|
|
1213
|
+
}
|
|
1214
|
+
offset += blockSize;
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
913
1217
|
}
|
|
914
1218
|
exports.SyncDeflater = SyncDeflater;
|