@cj-tech-master/excelts 6.1.1 → 6.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/modules/archive/compression/deflate-fallback.d.ts +14 -5
- package/dist/browser/modules/archive/compression/deflate-fallback.js +257 -55
- package/dist/browser/modules/archive/compression/streaming-compress.d.ts +20 -8
- package/dist/browser/modules/archive/compression/streaming-compress.js +60 -15
- package/dist/browser/modules/archive/zip/stream.js +1 -1
- package/dist/cjs/modules/archive/compression/deflate-fallback.js +257 -55
- package/dist/cjs/modules/archive/compression/streaming-compress.js +60 -15
- package/dist/cjs/modules/archive/zip/stream.js +1 -1
- package/dist/esm/modules/archive/compression/deflate-fallback.js +257 -55
- package/dist/esm/modules/archive/compression/streaming-compress.js +60 -15
- package/dist/esm/modules/archive/zip/stream.js +1 -1
- package/dist/iife/excelts.iife.js +182 -49
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +30 -30
- package/dist/types/modules/archive/compression/deflate-fallback.d.ts +14 -5
- package/dist/types/modules/archive/compression/streaming-compress.d.ts +20 -8
- package/package.json +2 -2
|
@@ -351,13 +351,36 @@ function deflateRawStore(data) {
|
|
|
351
351
|
return output.subarray(0, outPos);
|
|
352
352
|
}
|
|
353
353
|
// ============================================================================
|
|
354
|
-
// LZ77 + Huffman Compression
|
|
354
|
+
// LZ77 + Huffman Compression
|
|
355
355
|
// ============================================================================
|
|
356
|
+
// Hash table size must be a power of 2. 32768 entries keeps memory reasonable
|
|
357
|
+
// while providing a good distribution for the 3-byte hash.
|
|
358
|
+
const HASH_SIZE = 32768;
|
|
359
|
+
const HASH_MASK = HASH_SIZE - 1;
|
|
360
|
+
// Maximum hash chain length to walk per position. Longer chains find better
|
|
361
|
+
// matches at the cost of speed. 64 is a good balance (~zlib level 5-6).
|
|
362
|
+
const MAX_CHAIN_LEN = 64;
|
|
363
|
+
// Minimum match length for LZ77 (RFC 1951 minimum).
|
|
364
|
+
const MIN_MATCH = 3;
|
|
365
|
+
// Maximum match length (RFC 1951 maximum).
|
|
366
|
+
const MAX_MATCH = 258;
|
|
367
|
+
// Maximum back-reference distance (RFC 1951 / 32 KB sliding window).
|
|
368
|
+
const MAX_DIST = 32768;
|
|
356
369
|
/**
|
|
357
|
-
*
|
|
370
|
+
* Hash function for 3-byte sequences.
|
|
371
|
+
* Uses a multiplicative hash for better distribution than the naive
|
|
372
|
+
* shift-or approach. The constant 0x1e35a7bd is chosen for good avalanche
|
|
373
|
+
* properties in the lower bits.
|
|
374
|
+
*/
|
|
375
|
+
function hash3(a, b, c) {
|
|
376
|
+
return ((((a << 16) | (b << 8) | c) * 0x1e35a7bd) >>> 17) & HASH_MASK;
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* Compress data using DEFLATE with fixed Huffman codes.
|
|
358
380
|
*
|
|
359
|
-
*
|
|
360
|
-
*
|
|
381
|
+
* Uses LZ77 with hash chains and lazy matching for significantly better
|
|
382
|
+
* compression than a single-entry hash table. The algorithm is modelled
|
|
383
|
+
* after zlib's "fast" and "slow" deflate strategies.
|
|
361
384
|
*
|
|
362
385
|
* @param data - Data to compress
|
|
363
386
|
* @returns Compressed data in deflate-raw format
|
|
@@ -375,43 +398,106 @@ function deflateRawCompressed(data) {
|
|
|
375
398
|
// Write final block header with fixed Huffman (BFINAL=1, BTYPE=01)
|
|
376
399
|
output.writeBits(1, 1); // BFINAL
|
|
377
400
|
output.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
|
|
378
|
-
//
|
|
379
|
-
|
|
401
|
+
// --- Hash chain tables (typed arrays for performance) ---
|
|
402
|
+
// head[h]: most recent position with hash h (0 = unused, positions are 1-based internally)
|
|
403
|
+
// prev[pos & (MAX_DIST-1)]: previous position in the chain for the same hash
|
|
404
|
+
const head = new Int32Array(HASH_SIZE); // filled with 0 (no match)
|
|
405
|
+
const prev = new Int32Array(MAX_DIST);
|
|
380
406
|
let pos = 0;
|
|
407
|
+
// State for lazy matching:
|
|
408
|
+
// When we find a match at position N, we check position N+1 too.
|
|
409
|
+
// If N+1 has a longer match we emit a literal for N and use the N+1 match.
|
|
410
|
+
let prevMatchLen = 0;
|
|
411
|
+
let prevMatchDist = 0;
|
|
412
|
+
let prevLiteral = 0;
|
|
413
|
+
let hasPrevMatch = false;
|
|
381
414
|
while (pos < data.length) {
|
|
382
|
-
// Try to find a match
|
|
383
415
|
let bestLen = 0;
|
|
384
416
|
let bestDist = 0;
|
|
385
417
|
if (pos + 2 < data.length) {
|
|
386
|
-
const
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
418
|
+
const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
|
|
419
|
+
// Walk the hash chain to find the best (longest) match
|
|
420
|
+
let chainLen = MAX_CHAIN_LEN;
|
|
421
|
+
let matchHead = head[h];
|
|
422
|
+
while (matchHead > 0 && chainLen-- > 0) {
|
|
423
|
+
const mPos = matchHead - 1; // convert from 1-based to 0-based
|
|
424
|
+
const dist = pos - mPos;
|
|
425
|
+
if (dist > MAX_DIST || dist <= 0) {
|
|
426
|
+
break;
|
|
427
|
+
}
|
|
428
|
+
// Quick check: compare the byte just beyond current best length first
|
|
429
|
+
// to skip obviously shorter matches early.
|
|
430
|
+
if (bestLen >= MIN_MATCH && data[mPos + bestLen] !== data[pos + bestLen]) {
|
|
431
|
+
matchHead = prev[mPos & (MAX_DIST - 1)];
|
|
432
|
+
continue;
|
|
433
|
+
}
|
|
434
|
+
// Full scan
|
|
390
435
|
let len = 0;
|
|
391
|
-
const maxLen = Math.min(
|
|
392
|
-
while (len < maxLen && data[
|
|
436
|
+
const maxLen = Math.min(MAX_MATCH, data.length - pos);
|
|
437
|
+
while (len < maxLen && data[mPos + len] === data[pos + len]) {
|
|
393
438
|
len++;
|
|
394
439
|
}
|
|
395
|
-
if (len
|
|
440
|
+
if (len > bestLen) {
|
|
396
441
|
bestLen = len;
|
|
397
442
|
bestDist = dist;
|
|
443
|
+
if (len >= MAX_MATCH) {
|
|
444
|
+
break; // can't do better
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
matchHead = prev[mPos & (MAX_DIST - 1)];
|
|
448
|
+
}
|
|
449
|
+
// Insert current position into the hash chain
|
|
450
|
+
prev[pos & (MAX_DIST - 1)] = head[h];
|
|
451
|
+
head[h] = pos + 1; // 1-based
|
|
452
|
+
}
|
|
453
|
+
// --- Lazy matching logic ---
|
|
454
|
+
if (hasPrevMatch) {
|
|
455
|
+
if (bestLen > prevMatchLen) {
|
|
456
|
+
// Current position has a better match; emit previous as literal
|
|
457
|
+
writeLiteralCode(output, prevLiteral);
|
|
458
|
+
// Now adopt current match as the pending one
|
|
459
|
+
prevMatchLen = bestLen;
|
|
460
|
+
prevMatchDist = bestDist;
|
|
461
|
+
prevLiteral = data[pos];
|
|
462
|
+
pos++;
|
|
463
|
+
}
|
|
464
|
+
else {
|
|
465
|
+
// Previous match is at least as good; emit it
|
|
466
|
+
writeLengthCode(output, prevMatchLen);
|
|
467
|
+
writeDistanceCode(output, prevMatchDist);
|
|
468
|
+
// Insert hash entries for the skipped bytes (positions inside the match)
|
|
469
|
+
// so future matches can find them. We already inserted pos-1 (the match
|
|
470
|
+
// start); now insert pos through pos + prevMatchLen - 2.
|
|
471
|
+
const matchEnd = pos - 1 + prevMatchLen;
|
|
472
|
+
for (let i = pos; i < matchEnd && i + 2 < data.length; i++) {
|
|
473
|
+
const h = hash3(data[i], data[i + 1], data[i + 2]);
|
|
474
|
+
prev[i & (MAX_DIST - 1)] = head[h];
|
|
475
|
+
head[h] = i + 1;
|
|
398
476
|
}
|
|
477
|
+
pos = matchEnd;
|
|
478
|
+
hasPrevMatch = false;
|
|
479
|
+
prevMatchLen = 0;
|
|
399
480
|
}
|
|
400
|
-
// Update hash table
|
|
401
|
-
hashTable.set(hash, pos);
|
|
402
481
|
}
|
|
403
|
-
if (bestLen >=
|
|
404
|
-
//
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
482
|
+
else if (bestLen >= MIN_MATCH) {
|
|
483
|
+
// We have a match; hold it and try the next position (lazy evaluation)
|
|
484
|
+
hasPrevMatch = true;
|
|
485
|
+
prevMatchLen = bestLen;
|
|
486
|
+
prevMatchDist = bestDist;
|
|
487
|
+
prevLiteral = data[pos];
|
|
488
|
+
pos++;
|
|
408
489
|
}
|
|
409
490
|
else {
|
|
410
|
-
//
|
|
491
|
+
// No match — emit literal
|
|
411
492
|
writeLiteralCode(output, data[pos]);
|
|
412
493
|
pos++;
|
|
413
494
|
}
|
|
414
495
|
}
|
|
496
|
+
// Flush any pending lazy match
|
|
497
|
+
if (hasPrevMatch) {
|
|
498
|
+
writeLengthCode(output, prevMatchLen);
|
|
499
|
+
writeDistanceCode(output, prevMatchDist);
|
|
500
|
+
}
|
|
415
501
|
// Write end-of-block symbol (256)
|
|
416
502
|
writeLiteralCode(output, 256);
|
|
417
503
|
return output.finish();
|
|
@@ -606,7 +692,10 @@ const WINDOW_SIZE = 32768;
|
|
|
606
692
|
* maintains state across multiple `write()` calls:
|
|
607
693
|
*
|
|
608
694
|
* - **LZ77 sliding window**: back-references can span across chunks.
|
|
609
|
-
* - **Hash
|
|
695
|
+
* - **Hash chains**: match positions persist across chunks with typed-array
|
|
696
|
+
* hash tables for fast lookup.
|
|
697
|
+
* - **Lazy matching**: each match is compared with the next position's match
|
|
698
|
+
* to pick the longer one.
|
|
610
699
|
* - **Bit writer**: bit position is preserved, so consecutive blocks form
|
|
611
700
|
* a single valid DEFLATE bit-stream without alignment issues.
|
|
612
701
|
*
|
|
@@ -620,13 +709,20 @@ const WINDOW_SIZE = 32768;
|
|
|
620
709
|
class SyncDeflater {
|
|
621
710
|
constructor() {
|
|
622
711
|
this._output = new BitWriter();
|
|
623
|
-
|
|
712
|
+
// Hash chain tables — shared across chunks for cross-chunk matching.
|
|
713
|
+
this._head = new Int32Array(HASH_SIZE);
|
|
714
|
+
this._prev = new Int32Array(MAX_DIST);
|
|
624
715
|
/** Sliding window: the last WINDOW_SIZE bytes of uncompressed data. */
|
|
625
716
|
this._window = new Uint8Array(WINDOW_SIZE);
|
|
626
717
|
/** Number of valid bytes currently in the window. */
|
|
627
718
|
this._windowLen = 0;
|
|
628
719
|
/** Total bytes written so far (monotonically increasing; used for hash offsets). */
|
|
629
720
|
this._totalIn = 0;
|
|
721
|
+
// Lazy matching state that may span across chunks.
|
|
722
|
+
this._hasPrevMatch = false;
|
|
723
|
+
this._prevMatchLen = 0;
|
|
724
|
+
this._prevMatchDist = 0;
|
|
725
|
+
this._prevLiteral = 0;
|
|
630
726
|
}
|
|
631
727
|
/**
|
|
632
728
|
* Compress a chunk and return the compressed bytes produced so far.
|
|
@@ -642,57 +738,163 @@ class SyncDeflater {
|
|
|
642
738
|
out.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
|
|
643
739
|
const window = this._window;
|
|
644
740
|
let wLen = this._windowLen;
|
|
645
|
-
const
|
|
741
|
+
const head = this._head;
|
|
742
|
+
const prevArr = this._prev;
|
|
646
743
|
const totalIn = this._totalIn;
|
|
647
|
-
|
|
744
|
+
let hasPrevMatch = this._hasPrevMatch;
|
|
745
|
+
let prevMatchLen = this._prevMatchLen;
|
|
746
|
+
let prevMatchDist = this._prevMatchDist;
|
|
747
|
+
let prevLiteral = this._prevLiteral;
|
|
748
|
+
/**
|
|
749
|
+
* Insert a global position into the hash chain and the sliding window.
|
|
750
|
+
*/
|
|
751
|
+
const insertHash = (localPos) => {
|
|
752
|
+
if (localPos + 2 >= data.length) {
|
|
753
|
+
return;
|
|
754
|
+
}
|
|
755
|
+
const h = hash3(data[localPos], data[localPos + 1], data[localPos + 2]);
|
|
756
|
+
const globalPos = totalIn + localPos;
|
|
757
|
+
prevArr[globalPos & (MAX_DIST - 1)] = head[h];
|
|
758
|
+
head[h] = globalPos + 1; // 1-based
|
|
759
|
+
};
|
|
760
|
+
const insertWindow = (localPos, count) => {
|
|
761
|
+
for (let i = 0; i < count; i++) {
|
|
762
|
+
window[(wLen + i) & (WINDOW_SIZE - 1)] = data[localPos + i];
|
|
763
|
+
}
|
|
764
|
+
wLen += count;
|
|
765
|
+
};
|
|
766
|
+
let pos = 0;
|
|
767
|
+
for (; pos < data.length;) {
|
|
648
768
|
let bestLen = 0;
|
|
649
769
|
let bestDist = 0;
|
|
650
770
|
if (pos + 2 < data.length) {
|
|
651
|
-
const h = (data[pos]
|
|
652
|
-
const
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
771
|
+
const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
|
|
772
|
+
const globalPos = totalIn + pos;
|
|
773
|
+
// Walk the hash chain
|
|
774
|
+
let chainLen = MAX_CHAIN_LEN;
|
|
775
|
+
let matchHead = head[h];
|
|
776
|
+
while (matchHead > 0 && chainLen-- > 0) {
|
|
777
|
+
const mGlobalPos = matchHead - 1;
|
|
778
|
+
const dist = globalPos - mGlobalPos;
|
|
779
|
+
if (dist > MAX_DIST || dist <= 0) {
|
|
780
|
+
break;
|
|
781
|
+
}
|
|
782
|
+
// Compare bytes through the sliding window + current chunk
|
|
783
|
+
const maxLen = Math.min(MAX_MATCH, data.length - pos);
|
|
784
|
+
let len = 0;
|
|
785
|
+
// Quick reject on the byte beyond current bestLen
|
|
786
|
+
if (bestLen >= MIN_MATCH) {
|
|
787
|
+
const checkOffset = mGlobalPos + bestLen;
|
|
788
|
+
// Determine the byte at checkOffset
|
|
789
|
+
let checkByte;
|
|
790
|
+
const checkLocal = checkOffset - totalIn;
|
|
791
|
+
if (checkLocal >= 0 && checkLocal < data.length) {
|
|
792
|
+
checkByte = data[checkLocal];
|
|
793
|
+
}
|
|
794
|
+
else {
|
|
795
|
+
checkByte = window[checkOffset & (WINDOW_SIZE - 1)];
|
|
796
|
+
}
|
|
797
|
+
if (checkByte !== data[pos + bestLen]) {
|
|
798
|
+
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
799
|
+
continue;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
while (len < maxLen) {
|
|
803
|
+
const matchOffset = mGlobalPos + len;
|
|
804
|
+
// Get byte from window or current data
|
|
805
|
+
let matchByte;
|
|
806
|
+
const matchLocal = matchOffset - totalIn;
|
|
807
|
+
if (matchLocal >= 0 && matchLocal < data.length) {
|
|
808
|
+
matchByte = data[matchLocal];
|
|
809
|
+
}
|
|
810
|
+
else {
|
|
811
|
+
matchByte = window[matchOffset & (WINDOW_SIZE - 1)];
|
|
812
|
+
}
|
|
813
|
+
if (matchByte !== data[pos + len]) {
|
|
814
|
+
break;
|
|
666
815
|
}
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
816
|
+
len++;
|
|
817
|
+
}
|
|
818
|
+
if (len > bestLen) {
|
|
819
|
+
bestLen = len;
|
|
820
|
+
bestDist = dist;
|
|
821
|
+
if (len >= MAX_MATCH) {
|
|
822
|
+
break;
|
|
670
823
|
}
|
|
671
824
|
}
|
|
825
|
+
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
672
826
|
}
|
|
673
|
-
|
|
827
|
+
// Insert current position into hash chain
|
|
828
|
+
prevArr[globalPos & (MAX_DIST - 1)] = head[h];
|
|
829
|
+
head[h] = globalPos + 1;
|
|
674
830
|
}
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
831
|
+
// --- Lazy matching logic ---
|
|
832
|
+
if (hasPrevMatch) {
|
|
833
|
+
if (bestLen > prevMatchLen) {
|
|
834
|
+
// Current position wins — emit previous as literal
|
|
835
|
+
writeLiteralCode(out, prevLiteral);
|
|
836
|
+
prevMatchLen = bestLen;
|
|
837
|
+
prevMatchDist = bestDist;
|
|
838
|
+
prevLiteral = data[pos];
|
|
839
|
+
insertWindow(pos, 1);
|
|
840
|
+
pos++;
|
|
841
|
+
}
|
|
842
|
+
else {
|
|
843
|
+
// Previous match wins — emit it
|
|
844
|
+
writeLengthCode(out, prevMatchLen);
|
|
845
|
+
writeDistanceCode(out, prevMatchDist);
|
|
846
|
+
// Insert hash entries for skipped positions inside the match
|
|
847
|
+
const matchEnd = pos - 1 + prevMatchLen;
|
|
848
|
+
const insertEnd = Math.min(matchEnd, data.length);
|
|
849
|
+
for (let i = pos; i < insertEnd; i++) {
|
|
850
|
+
insertHash(i);
|
|
851
|
+
}
|
|
852
|
+
insertWindow(pos, insertEnd - pos);
|
|
853
|
+
pos = insertEnd;
|
|
854
|
+
hasPrevMatch = false;
|
|
855
|
+
prevMatchLen = 0;
|
|
682
856
|
}
|
|
683
|
-
|
|
857
|
+
}
|
|
858
|
+
else if (bestLen >= MIN_MATCH) {
|
|
859
|
+
hasPrevMatch = true;
|
|
860
|
+
prevMatchLen = bestLen;
|
|
861
|
+
prevMatchDist = bestDist;
|
|
862
|
+
prevLiteral = data[pos];
|
|
863
|
+
insertWindow(pos, 1);
|
|
864
|
+
pos++;
|
|
684
865
|
}
|
|
685
866
|
else {
|
|
686
867
|
writeLiteralCode(out, data[pos]);
|
|
687
|
-
|
|
688
|
-
wLen++;
|
|
868
|
+
insertWindow(pos, 1);
|
|
689
869
|
pos++;
|
|
690
870
|
}
|
|
691
871
|
}
|
|
872
|
+
// If there's a pending lazy match and we're at chunk boundary,
|
|
873
|
+
// flush it now (the next chunk will start fresh for lazy matching).
|
|
874
|
+
if (hasPrevMatch) {
|
|
875
|
+
writeLengthCode(out, prevMatchLen);
|
|
876
|
+
writeDistanceCode(out, prevMatchDist);
|
|
877
|
+
// The pending match started at pos-1 and covers prevMatchLen bytes.
|
|
878
|
+
// pos-1 was already hashed/windowed when it was first encountered;
|
|
879
|
+
// now insert the remaining positions (pos .. pos-1+prevMatchLen-1)
|
|
880
|
+
// into hash chains and the sliding window so the next chunk can
|
|
881
|
+
// reference them.
|
|
882
|
+
const matchEnd = Math.min(pos - 1 + prevMatchLen, data.length);
|
|
883
|
+
for (let i = pos; i < matchEnd; i++) {
|
|
884
|
+
insertHash(i);
|
|
885
|
+
}
|
|
886
|
+
insertWindow(pos, matchEnd - pos);
|
|
887
|
+
hasPrevMatch = false;
|
|
888
|
+
prevMatchLen = 0;
|
|
889
|
+
}
|
|
692
890
|
// End-of-block symbol
|
|
693
891
|
writeLiteralCode(out, 256);
|
|
694
892
|
this._windowLen = wLen;
|
|
695
893
|
this._totalIn = totalIn + data.length;
|
|
894
|
+
this._hasPrevMatch = hasPrevMatch;
|
|
895
|
+
this._prevMatchLen = prevMatchLen;
|
|
896
|
+
this._prevMatchDist = prevMatchDist;
|
|
897
|
+
this._prevLiteral = prevLiteral;
|
|
696
898
|
// Flush completed bytes from the bit writer
|
|
697
899
|
return out.flushBytes();
|
|
698
900
|
}
|
|
@@ -98,37 +98,82 @@ function createUnzlibStream(_options = {}) {
|
|
|
98
98
|
// Synchronous stateful deflater (Node.js — native zlib)
|
|
99
99
|
// =============================================================================
|
|
100
100
|
/**
|
|
101
|
-
*
|
|
101
|
+
* Minimum batch size before flushing to the native zlib compressor.
|
|
102
102
|
*
|
|
103
|
-
*
|
|
104
|
-
*
|
|
105
|
-
*
|
|
106
|
-
*
|
|
103
|
+
* Small chunks (e.g. one spreadsheet row ≈ 200-400 bytes) compress very
|
|
104
|
+
* poorly when each is given its own zlib context because the LZ77 dictionary
|
|
105
|
+
* starts empty every time. Batching into ≥ 64 KB mega-chunks gives zlib
|
|
106
|
+
* enough history to find good matches, bringing compression ratios within
|
|
107
|
+
* ~1% of single-shot compression.
|
|
107
108
|
*
|
|
108
|
-
*
|
|
109
|
-
*
|
|
110
|
-
|
|
109
|
+
* 64 KB is chosen as a sweet spot: large enough for good compression,
|
|
110
|
+
* small enough to keep memory bounded and latency low.
|
|
111
|
+
*/
|
|
112
|
+
const SYNC_DEFLATE_BATCH_SIZE = 65536;
|
|
113
|
+
/**
|
|
114
|
+
* Node.js synchronous deflater that batches small writes for better
|
|
115
|
+
* compression.
|
|
116
|
+
*
|
|
117
|
+
* Previous implementation compressed each `write()` call independently
|
|
118
|
+
* with `deflateRawSync()`, creating a fresh zlib context every time.
|
|
119
|
+
* For streaming workloads that push many small chunks (e.g. WorkbookWriter
|
|
120
|
+
* writing one row at a time), this destroyed the LZ77 dictionary between
|
|
121
|
+
* chunks and caused compression ratios to drop from ~82% to ~58%.
|
|
122
|
+
*
|
|
123
|
+
* The new implementation accumulates incoming data into an internal buffer
|
|
124
|
+
* and only calls `deflateRawSync()` when the buffer reaches 64 KB (or on
|
|
125
|
+
* `finish()`). Each batch is still compressed independently, but 64 KB
|
|
126
|
+
* is enough for zlib to build a good dictionary — the compression ratio
|
|
127
|
+
* is within ~1% of a single-shot compression of the entire input.
|
|
128
|
+
*
|
|
129
|
+
* The trade-off is slightly higher latency (compressed output is not
|
|
130
|
+
* returned byte-for-byte immediately), but this is acceptable because
|
|
131
|
+
* the ZIP writer buffers output anyway and the streaming contract only
|
|
132
|
+
* requires data to flow *eventually*, not after every single write.
|
|
111
133
|
*/
|
|
112
134
|
class SyncDeflater {
|
|
113
135
|
constructor(level = defaults_1.DEFAULT_COMPRESS_LEVEL) {
|
|
136
|
+
this._pending = [];
|
|
137
|
+
this._pendingSize = 0;
|
|
114
138
|
this._level = level;
|
|
115
139
|
}
|
|
116
140
|
write(data) {
|
|
117
141
|
if (data.length === 0) {
|
|
118
142
|
return new Uint8Array(0);
|
|
119
143
|
}
|
|
120
|
-
|
|
144
|
+
this._pending.push(data);
|
|
145
|
+
this._pendingSize += data.length;
|
|
146
|
+
if (this._pendingSize >= SYNC_DEFLATE_BATCH_SIZE) {
|
|
147
|
+
return this._flushBatch(false);
|
|
148
|
+
}
|
|
149
|
+
return new Uint8Array(0);
|
|
150
|
+
}
|
|
151
|
+
finish() {
|
|
152
|
+
return this._flushBatch(true);
|
|
153
|
+
}
|
|
154
|
+
_flushBatch(final) {
|
|
155
|
+
let input;
|
|
156
|
+
if (this._pending.length === 0) {
|
|
157
|
+
input = Buffer.alloc(0);
|
|
158
|
+
}
|
|
159
|
+
else if (this._pending.length === 1) {
|
|
160
|
+
input = Buffer.from(this._pending[0]);
|
|
161
|
+
}
|
|
162
|
+
else {
|
|
163
|
+
input = Buffer.concat(this._pending);
|
|
164
|
+
}
|
|
165
|
+
this._pending.length = 0;
|
|
166
|
+
this._pendingSize = 0;
|
|
167
|
+
if (input.length === 0 && !final) {
|
|
168
|
+
return new Uint8Array(0);
|
|
169
|
+
}
|
|
170
|
+
const result = (0, zlib_1.deflateRawSync)(input, {
|
|
121
171
|
level: this._level,
|
|
122
|
-
finishFlush: zlib_1.constants.Z_SYNC_FLUSH
|
|
172
|
+
finishFlush: final ? zlib_1.constants.Z_FINISH : zlib_1.constants.Z_SYNC_FLUSH
|
|
123
173
|
});
|
|
124
174
|
// deflateRawSync returns a Buffer sharing a 16 KB slab ArrayBuffer.
|
|
125
175
|
// Copy to a tight Uint8Array so the slab can be reclaimed.
|
|
126
176
|
return new Uint8Array(result);
|
|
127
177
|
}
|
|
128
|
-
finish() {
|
|
129
|
-
// Emit a final empty DEFLATE block (BFINAL=1, BTYPE=01, EOB).
|
|
130
|
-
// This terminates the concatenated DEFLATE stream.
|
|
131
|
-
return new Uint8Array((0, zlib_1.deflateRawSync)(Buffer.alloc(0), { level: this._level }));
|
|
132
|
-
}
|
|
133
178
|
}
|
|
134
179
|
exports.SyncDeflater = SyncDeflater;
|
|
@@ -450,7 +450,7 @@ class ZipDeflateFile {
|
|
|
450
450
|
// Stateful synchronous compression — maintains LZ77 window and bit position
|
|
451
451
|
// across chunks so the output is a single valid DEFLATE stream.
|
|
452
452
|
if (!this._syncDeflater) {
|
|
453
|
-
this._syncDeflater = new streaming_compress_1.SyncDeflater();
|
|
453
|
+
this._syncDeflater = new streaming_compress_1.SyncDeflater(this.level);
|
|
454
454
|
}
|
|
455
455
|
if (data.length > 0) {
|
|
456
456
|
const compressed = this._syncDeflater.write(data);
|