@cj-tech-master/excelts 6.1.1 → 6.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/modules/archive/compression/deflate-fallback.d.ts +14 -5
- package/dist/browser/modules/archive/compression/deflate-fallback.js +257 -55
- package/dist/browser/modules/archive/compression/streaming-compress.d.ts +20 -8
- package/dist/browser/modules/archive/compression/streaming-compress.js +60 -15
- package/dist/browser/modules/archive/zip/stream.js +1 -1
- package/dist/cjs/modules/archive/compression/deflate-fallback.js +257 -55
- package/dist/cjs/modules/archive/compression/streaming-compress.js +60 -15
- package/dist/cjs/modules/archive/zip/stream.js +1 -1
- package/dist/esm/modules/archive/compression/deflate-fallback.js +257 -55
- package/dist/esm/modules/archive/compression/streaming-compress.js +60 -15
- package/dist/esm/modules/archive/zip/stream.js +1 -1
- package/dist/iife/excelts.iife.js +182 -49
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +30 -30
- package/dist/types/modules/archive/compression/deflate-fallback.d.ts +14 -5
- package/dist/types/modules/archive/compression/streaming-compress.d.ts +20 -8
- package/package.json +2 -2
|
@@ -28,10 +28,11 @@ export declare function inflateRaw(data: Uint8Array): Uint8Array;
|
|
|
28
28
|
*/
|
|
29
29
|
export declare function deflateRawStore(data: Uint8Array): Uint8Array;
|
|
30
30
|
/**
|
|
31
|
-
* Compress data using DEFLATE with fixed Huffman codes
|
|
31
|
+
* Compress data using DEFLATE with fixed Huffman codes.
|
|
32
32
|
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
33
|
+
* Uses LZ77 with hash chains and lazy matching for significantly better
|
|
34
|
+
* compression than a single-entry hash table. The algorithm is modelled
|
|
35
|
+
* after zlib's "fast" and "slow" deflate strategies.
|
|
35
36
|
*
|
|
36
37
|
* @param data - Data to compress
|
|
37
38
|
* @returns Compressed data in deflate-raw format
|
|
@@ -44,7 +45,10 @@ export declare function deflateRawCompressed(data: Uint8Array): Uint8Array;
|
|
|
44
45
|
* maintains state across multiple `write()` calls:
|
|
45
46
|
*
|
|
46
47
|
* - **LZ77 sliding window**: back-references can span across chunks.
|
|
47
|
-
* - **Hash
|
|
48
|
+
* - **Hash chains**: match positions persist across chunks with typed-array
|
|
49
|
+
* hash tables for fast lookup.
|
|
50
|
+
* - **Lazy matching**: each match is compared with the next position's match
|
|
51
|
+
* to pick the longer one.
|
|
48
52
|
* - **Bit writer**: bit position is preserved, so consecutive blocks form
|
|
49
53
|
* a single valid DEFLATE bit-stream without alignment issues.
|
|
50
54
|
*
|
|
@@ -57,13 +61,18 @@ export declare function deflateRawCompressed(data: Uint8Array): Uint8Array;
|
|
|
57
61
|
*/
|
|
58
62
|
export declare class SyncDeflater {
|
|
59
63
|
private _output;
|
|
60
|
-
private
|
|
64
|
+
private _head;
|
|
65
|
+
private _prev;
|
|
61
66
|
/** Sliding window: the last WINDOW_SIZE bytes of uncompressed data. */
|
|
62
67
|
private _window;
|
|
63
68
|
/** Number of valid bytes currently in the window. */
|
|
64
69
|
private _windowLen;
|
|
65
70
|
/** Total bytes written so far (monotonically increasing; used for hash offsets). */
|
|
66
71
|
private _totalIn;
|
|
72
|
+
private _hasPrevMatch;
|
|
73
|
+
private _prevMatchLen;
|
|
74
|
+
private _prevMatchDist;
|
|
75
|
+
private _prevLiteral;
|
|
67
76
|
/**
|
|
68
77
|
* Compress a chunk and return the compressed bytes produced so far.
|
|
69
78
|
* The output is a valid prefix of a DEFLATE stream (one or more non-final blocks).
|
|
@@ -345,13 +345,36 @@ export function deflateRawStore(data) {
|
|
|
345
345
|
return output.subarray(0, outPos);
|
|
346
346
|
}
|
|
347
347
|
// ============================================================================
|
|
348
|
-
// LZ77 + Huffman Compression
|
|
348
|
+
// LZ77 + Huffman Compression
|
|
349
349
|
// ============================================================================
|
|
350
|
+
// Hash table size must be a power of 2. 32768 entries keeps memory reasonable
|
|
351
|
+
// while providing a good distribution for the 3-byte hash.
|
|
352
|
+
const HASH_SIZE = 32768;
|
|
353
|
+
const HASH_MASK = HASH_SIZE - 1;
|
|
354
|
+
// Maximum hash chain length to walk per position. Longer chains find better
|
|
355
|
+
// matches at the cost of speed. 64 is a good balance (~zlib level 5-6).
|
|
356
|
+
const MAX_CHAIN_LEN = 64;
|
|
357
|
+
// Minimum match length for LZ77 (RFC 1951 minimum).
|
|
358
|
+
const MIN_MATCH = 3;
|
|
359
|
+
// Maximum match length (RFC 1951 maximum).
|
|
360
|
+
const MAX_MATCH = 258;
|
|
361
|
+
// Maximum back-reference distance (RFC 1951 / 32 KB sliding window).
|
|
362
|
+
const MAX_DIST = 32768;
|
|
350
363
|
/**
|
|
351
|
-
*
|
|
364
|
+
* Hash function for 3-byte sequences.
|
|
365
|
+
* Uses a multiplicative hash for better distribution than the naive
|
|
366
|
+
* shift-or approach. The constant 0x1e35a7bd is chosen for good avalanche
|
|
367
|
+
* properties in the lower bits.
|
|
368
|
+
*/
|
|
369
|
+
function hash3(a, b, c) {
|
|
370
|
+
return ((((a << 16) | (b << 8) | c) * 0x1e35a7bd) >>> 17) & HASH_MASK;
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Compress data using DEFLATE with fixed Huffman codes.
|
|
352
374
|
*
|
|
353
|
-
*
|
|
354
|
-
*
|
|
375
|
+
* Uses LZ77 with hash chains and lazy matching for significantly better
|
|
376
|
+
* compression than a single-entry hash table. The algorithm is modelled
|
|
377
|
+
* after zlib's "fast" and "slow" deflate strategies.
|
|
355
378
|
*
|
|
356
379
|
* @param data - Data to compress
|
|
357
380
|
* @returns Compressed data in deflate-raw format
|
|
@@ -369,43 +392,106 @@ export function deflateRawCompressed(data) {
|
|
|
369
392
|
// Write final block header with fixed Huffman (BFINAL=1, BTYPE=01)
|
|
370
393
|
output.writeBits(1, 1); // BFINAL
|
|
371
394
|
output.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
|
|
372
|
-
//
|
|
373
|
-
|
|
395
|
+
// --- Hash chain tables (typed arrays for performance) ---
|
|
396
|
+
// head[h]: most recent position with hash h (0 = unused, positions are 1-based internally)
|
|
397
|
+
// prev[pos & (MAX_DIST-1)]: previous position in the chain for the same hash
|
|
398
|
+
const head = new Int32Array(HASH_SIZE); // filled with 0 (no match)
|
|
399
|
+
const prev = new Int32Array(MAX_DIST);
|
|
374
400
|
let pos = 0;
|
|
401
|
+
// State for lazy matching:
|
|
402
|
+
// When we find a match at position N, we check position N+1 too.
|
|
403
|
+
// If N+1 has a longer match we emit a literal for N and use the N+1 match.
|
|
404
|
+
let prevMatchLen = 0;
|
|
405
|
+
let prevMatchDist = 0;
|
|
406
|
+
let prevLiteral = 0;
|
|
407
|
+
let hasPrevMatch = false;
|
|
375
408
|
while (pos < data.length) {
|
|
376
|
-
// Try to find a match
|
|
377
409
|
let bestLen = 0;
|
|
378
410
|
let bestDist = 0;
|
|
379
411
|
if (pos + 2 < data.length) {
|
|
380
|
-
const
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
412
|
+
const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
|
|
413
|
+
// Walk the hash chain to find the best (longest) match
|
|
414
|
+
let chainLen = MAX_CHAIN_LEN;
|
|
415
|
+
let matchHead = head[h];
|
|
416
|
+
while (matchHead > 0 && chainLen-- > 0) {
|
|
417
|
+
const mPos = matchHead - 1; // convert from 1-based to 0-based
|
|
418
|
+
const dist = pos - mPos;
|
|
419
|
+
if (dist > MAX_DIST || dist <= 0) {
|
|
420
|
+
break;
|
|
421
|
+
}
|
|
422
|
+
// Quick check: compare the byte just beyond current best length first
|
|
423
|
+
// to skip obviously shorter matches early.
|
|
424
|
+
if (bestLen >= MIN_MATCH && data[mPos + bestLen] !== data[pos + bestLen]) {
|
|
425
|
+
matchHead = prev[mPos & (MAX_DIST - 1)];
|
|
426
|
+
continue;
|
|
427
|
+
}
|
|
428
|
+
// Full scan
|
|
384
429
|
let len = 0;
|
|
385
|
-
const maxLen = Math.min(
|
|
386
|
-
while (len < maxLen && data[
|
|
430
|
+
const maxLen = Math.min(MAX_MATCH, data.length - pos);
|
|
431
|
+
while (len < maxLen && data[mPos + len] === data[pos + len]) {
|
|
387
432
|
len++;
|
|
388
433
|
}
|
|
389
|
-
if (len
|
|
434
|
+
if (len > bestLen) {
|
|
390
435
|
bestLen = len;
|
|
391
436
|
bestDist = dist;
|
|
437
|
+
if (len >= MAX_MATCH) {
|
|
438
|
+
break; // can't do better
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
matchHead = prev[mPos & (MAX_DIST - 1)];
|
|
442
|
+
}
|
|
443
|
+
// Insert current position into the hash chain
|
|
444
|
+
prev[pos & (MAX_DIST - 1)] = head[h];
|
|
445
|
+
head[h] = pos + 1; // 1-based
|
|
446
|
+
}
|
|
447
|
+
// --- Lazy matching logic ---
|
|
448
|
+
if (hasPrevMatch) {
|
|
449
|
+
if (bestLen > prevMatchLen) {
|
|
450
|
+
// Current position has a better match; emit previous as literal
|
|
451
|
+
writeLiteralCode(output, prevLiteral);
|
|
452
|
+
// Now adopt current match as the pending one
|
|
453
|
+
prevMatchLen = bestLen;
|
|
454
|
+
prevMatchDist = bestDist;
|
|
455
|
+
prevLiteral = data[pos];
|
|
456
|
+
pos++;
|
|
457
|
+
}
|
|
458
|
+
else {
|
|
459
|
+
// Previous match is at least as good; emit it
|
|
460
|
+
writeLengthCode(output, prevMatchLen);
|
|
461
|
+
writeDistanceCode(output, prevMatchDist);
|
|
462
|
+
// Insert hash entries for the skipped bytes (positions inside the match)
|
|
463
|
+
// so future matches can find them. We already inserted pos-1 (the match
|
|
464
|
+
// start); now insert pos through pos + prevMatchLen - 2.
|
|
465
|
+
const matchEnd = pos - 1 + prevMatchLen;
|
|
466
|
+
for (let i = pos; i < matchEnd && i + 2 < data.length; i++) {
|
|
467
|
+
const h = hash3(data[i], data[i + 1], data[i + 2]);
|
|
468
|
+
prev[i & (MAX_DIST - 1)] = head[h];
|
|
469
|
+
head[h] = i + 1;
|
|
392
470
|
}
|
|
471
|
+
pos = matchEnd;
|
|
472
|
+
hasPrevMatch = false;
|
|
473
|
+
prevMatchLen = 0;
|
|
393
474
|
}
|
|
394
|
-
// Update hash table
|
|
395
|
-
hashTable.set(hash, pos);
|
|
396
475
|
}
|
|
397
|
-
if (bestLen >=
|
|
398
|
-
//
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
476
|
+
else if (bestLen >= MIN_MATCH) {
|
|
477
|
+
// We have a match; hold it and try the next position (lazy evaluation)
|
|
478
|
+
hasPrevMatch = true;
|
|
479
|
+
prevMatchLen = bestLen;
|
|
480
|
+
prevMatchDist = bestDist;
|
|
481
|
+
prevLiteral = data[pos];
|
|
482
|
+
pos++;
|
|
402
483
|
}
|
|
403
484
|
else {
|
|
404
|
-
//
|
|
485
|
+
// No match — emit literal
|
|
405
486
|
writeLiteralCode(output, data[pos]);
|
|
406
487
|
pos++;
|
|
407
488
|
}
|
|
408
489
|
}
|
|
490
|
+
// Flush any pending lazy match
|
|
491
|
+
if (hasPrevMatch) {
|
|
492
|
+
writeLengthCode(output, prevMatchLen);
|
|
493
|
+
writeDistanceCode(output, prevMatchDist);
|
|
494
|
+
}
|
|
409
495
|
// Write end-of-block symbol (256)
|
|
410
496
|
writeLiteralCode(output, 256);
|
|
411
497
|
return output.finish();
|
|
@@ -600,7 +686,10 @@ const WINDOW_SIZE = 32768;
|
|
|
600
686
|
* maintains state across multiple `write()` calls:
|
|
601
687
|
*
|
|
602
688
|
* - **LZ77 sliding window**: back-references can span across chunks.
|
|
603
|
-
* - **Hash
|
|
689
|
+
* - **Hash chains**: match positions persist across chunks with typed-array
|
|
690
|
+
* hash tables for fast lookup.
|
|
691
|
+
* - **Lazy matching**: each match is compared with the next position's match
|
|
692
|
+
* to pick the longer one.
|
|
604
693
|
* - **Bit writer**: bit position is preserved, so consecutive blocks form
|
|
605
694
|
* a single valid DEFLATE bit-stream without alignment issues.
|
|
606
695
|
*
|
|
@@ -614,13 +703,20 @@ const WINDOW_SIZE = 32768;
|
|
|
614
703
|
export class SyncDeflater {
|
|
615
704
|
constructor() {
|
|
616
705
|
this._output = new BitWriter();
|
|
617
|
-
|
|
706
|
+
// Hash chain tables — shared across chunks for cross-chunk matching.
|
|
707
|
+
this._head = new Int32Array(HASH_SIZE);
|
|
708
|
+
this._prev = new Int32Array(MAX_DIST);
|
|
618
709
|
/** Sliding window: the last WINDOW_SIZE bytes of uncompressed data. */
|
|
619
710
|
this._window = new Uint8Array(WINDOW_SIZE);
|
|
620
711
|
/** Number of valid bytes currently in the window. */
|
|
621
712
|
this._windowLen = 0;
|
|
622
713
|
/** Total bytes written so far (monotonically increasing; used for hash offsets). */
|
|
623
714
|
this._totalIn = 0;
|
|
715
|
+
// Lazy matching state that may span across chunks.
|
|
716
|
+
this._hasPrevMatch = false;
|
|
717
|
+
this._prevMatchLen = 0;
|
|
718
|
+
this._prevMatchDist = 0;
|
|
719
|
+
this._prevLiteral = 0;
|
|
624
720
|
}
|
|
625
721
|
/**
|
|
626
722
|
* Compress a chunk and return the compressed bytes produced so far.
|
|
@@ -636,57 +732,163 @@ export class SyncDeflater {
|
|
|
636
732
|
out.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
|
|
637
733
|
const window = this._window;
|
|
638
734
|
let wLen = this._windowLen;
|
|
639
|
-
const
|
|
735
|
+
const head = this._head;
|
|
736
|
+
const prevArr = this._prev;
|
|
640
737
|
const totalIn = this._totalIn;
|
|
641
|
-
|
|
738
|
+
let hasPrevMatch = this._hasPrevMatch;
|
|
739
|
+
let prevMatchLen = this._prevMatchLen;
|
|
740
|
+
let prevMatchDist = this._prevMatchDist;
|
|
741
|
+
let prevLiteral = this._prevLiteral;
|
|
742
|
+
/**
|
|
743
|
+
* Insert a global position into the hash chain and the sliding window.
|
|
744
|
+
*/
|
|
745
|
+
const insertHash = (localPos) => {
|
|
746
|
+
if (localPos + 2 >= data.length) {
|
|
747
|
+
return;
|
|
748
|
+
}
|
|
749
|
+
const h = hash3(data[localPos], data[localPos + 1], data[localPos + 2]);
|
|
750
|
+
const globalPos = totalIn + localPos;
|
|
751
|
+
prevArr[globalPos & (MAX_DIST - 1)] = head[h];
|
|
752
|
+
head[h] = globalPos + 1; // 1-based
|
|
753
|
+
};
|
|
754
|
+
const insertWindow = (localPos, count) => {
|
|
755
|
+
for (let i = 0; i < count; i++) {
|
|
756
|
+
window[(wLen + i) & (WINDOW_SIZE - 1)] = data[localPos + i];
|
|
757
|
+
}
|
|
758
|
+
wLen += count;
|
|
759
|
+
};
|
|
760
|
+
let pos = 0;
|
|
761
|
+
for (; pos < data.length;) {
|
|
642
762
|
let bestLen = 0;
|
|
643
763
|
let bestDist = 0;
|
|
644
764
|
if (pos + 2 < data.length) {
|
|
645
|
-
const h = (data[pos]
|
|
646
|
-
const
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
765
|
+
const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
|
|
766
|
+
const globalPos = totalIn + pos;
|
|
767
|
+
// Walk the hash chain
|
|
768
|
+
let chainLen = MAX_CHAIN_LEN;
|
|
769
|
+
let matchHead = head[h];
|
|
770
|
+
while (matchHead > 0 && chainLen-- > 0) {
|
|
771
|
+
const mGlobalPos = matchHead - 1;
|
|
772
|
+
const dist = globalPos - mGlobalPos;
|
|
773
|
+
if (dist > MAX_DIST || dist <= 0) {
|
|
774
|
+
break;
|
|
775
|
+
}
|
|
776
|
+
// Compare bytes through the sliding window + current chunk
|
|
777
|
+
const maxLen = Math.min(MAX_MATCH, data.length - pos);
|
|
778
|
+
let len = 0;
|
|
779
|
+
// Quick reject on the byte beyond current bestLen
|
|
780
|
+
if (bestLen >= MIN_MATCH) {
|
|
781
|
+
const checkOffset = mGlobalPos + bestLen;
|
|
782
|
+
// Determine the byte at checkOffset
|
|
783
|
+
let checkByte;
|
|
784
|
+
const checkLocal = checkOffset - totalIn;
|
|
785
|
+
if (checkLocal >= 0 && checkLocal < data.length) {
|
|
786
|
+
checkByte = data[checkLocal];
|
|
787
|
+
}
|
|
788
|
+
else {
|
|
789
|
+
checkByte = window[checkOffset & (WINDOW_SIZE - 1)];
|
|
790
|
+
}
|
|
791
|
+
if (checkByte !== data[pos + bestLen]) {
|
|
792
|
+
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
793
|
+
continue;
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
while (len < maxLen) {
|
|
797
|
+
const matchOffset = mGlobalPos + len;
|
|
798
|
+
// Get byte from window or current data
|
|
799
|
+
let matchByte;
|
|
800
|
+
const matchLocal = matchOffset - totalIn;
|
|
801
|
+
if (matchLocal >= 0 && matchLocal < data.length) {
|
|
802
|
+
matchByte = data[matchLocal];
|
|
803
|
+
}
|
|
804
|
+
else {
|
|
805
|
+
matchByte = window[matchOffset & (WINDOW_SIZE - 1)];
|
|
806
|
+
}
|
|
807
|
+
if (matchByte !== data[pos + len]) {
|
|
808
|
+
break;
|
|
660
809
|
}
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
810
|
+
len++;
|
|
811
|
+
}
|
|
812
|
+
if (len > bestLen) {
|
|
813
|
+
bestLen = len;
|
|
814
|
+
bestDist = dist;
|
|
815
|
+
if (len >= MAX_MATCH) {
|
|
816
|
+
break;
|
|
664
817
|
}
|
|
665
818
|
}
|
|
819
|
+
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
666
820
|
}
|
|
667
|
-
|
|
821
|
+
// Insert current position into hash chain
|
|
822
|
+
prevArr[globalPos & (MAX_DIST - 1)] = head[h];
|
|
823
|
+
head[h] = globalPos + 1;
|
|
668
824
|
}
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
825
|
+
// --- Lazy matching logic ---
|
|
826
|
+
if (hasPrevMatch) {
|
|
827
|
+
if (bestLen > prevMatchLen) {
|
|
828
|
+
// Current position wins — emit previous as literal
|
|
829
|
+
writeLiteralCode(out, prevLiteral);
|
|
830
|
+
prevMatchLen = bestLen;
|
|
831
|
+
prevMatchDist = bestDist;
|
|
832
|
+
prevLiteral = data[pos];
|
|
833
|
+
insertWindow(pos, 1);
|
|
834
|
+
pos++;
|
|
835
|
+
}
|
|
836
|
+
else {
|
|
837
|
+
// Previous match wins — emit it
|
|
838
|
+
writeLengthCode(out, prevMatchLen);
|
|
839
|
+
writeDistanceCode(out, prevMatchDist);
|
|
840
|
+
// Insert hash entries for skipped positions inside the match
|
|
841
|
+
const matchEnd = pos - 1 + prevMatchLen;
|
|
842
|
+
const insertEnd = Math.min(matchEnd, data.length);
|
|
843
|
+
for (let i = pos; i < insertEnd; i++) {
|
|
844
|
+
insertHash(i);
|
|
845
|
+
}
|
|
846
|
+
insertWindow(pos, insertEnd - pos);
|
|
847
|
+
pos = insertEnd;
|
|
848
|
+
hasPrevMatch = false;
|
|
849
|
+
prevMatchLen = 0;
|
|
676
850
|
}
|
|
677
|
-
|
|
851
|
+
}
|
|
852
|
+
else if (bestLen >= MIN_MATCH) {
|
|
853
|
+
hasPrevMatch = true;
|
|
854
|
+
prevMatchLen = bestLen;
|
|
855
|
+
prevMatchDist = bestDist;
|
|
856
|
+
prevLiteral = data[pos];
|
|
857
|
+
insertWindow(pos, 1);
|
|
858
|
+
pos++;
|
|
678
859
|
}
|
|
679
860
|
else {
|
|
680
861
|
writeLiteralCode(out, data[pos]);
|
|
681
|
-
|
|
682
|
-
wLen++;
|
|
862
|
+
insertWindow(pos, 1);
|
|
683
863
|
pos++;
|
|
684
864
|
}
|
|
685
865
|
}
|
|
866
|
+
// If there's a pending lazy match and we're at chunk boundary,
|
|
867
|
+
// flush it now (the next chunk will start fresh for lazy matching).
|
|
868
|
+
if (hasPrevMatch) {
|
|
869
|
+
writeLengthCode(out, prevMatchLen);
|
|
870
|
+
writeDistanceCode(out, prevMatchDist);
|
|
871
|
+
// The pending match started at pos-1 and covers prevMatchLen bytes.
|
|
872
|
+
// pos-1 was already hashed/windowed when it was first encountered;
|
|
873
|
+
// now insert the remaining positions (pos .. pos-1+prevMatchLen-1)
|
|
874
|
+
// into hash chains and the sliding window so the next chunk can
|
|
875
|
+
// reference them.
|
|
876
|
+
const matchEnd = Math.min(pos - 1 + prevMatchLen, data.length);
|
|
877
|
+
for (let i = pos; i < matchEnd; i++) {
|
|
878
|
+
insertHash(i);
|
|
879
|
+
}
|
|
880
|
+
insertWindow(pos, matchEnd - pos);
|
|
881
|
+
hasPrevMatch = false;
|
|
882
|
+
prevMatchLen = 0;
|
|
883
|
+
}
|
|
686
884
|
// End-of-block symbol
|
|
687
885
|
writeLiteralCode(out, 256);
|
|
688
886
|
this._windowLen = wLen;
|
|
689
887
|
this._totalIn = totalIn + data.length;
|
|
888
|
+
this._hasPrevMatch = hasPrevMatch;
|
|
889
|
+
this._prevMatchLen = prevMatchLen;
|
|
890
|
+
this._prevMatchDist = prevMatchDist;
|
|
891
|
+
this._prevLiteral = prevLiteral;
|
|
690
892
|
// Flush completed bytes from the bit writer
|
|
691
893
|
return out.flushBytes();
|
|
692
894
|
}
|
|
@@ -46,20 +46,32 @@ export declare function createZlibStream(options?: StreamCompressOptions): ZlibS
|
|
|
46
46
|
*/
|
|
47
47
|
export declare function createUnzlibStream(_options?: StreamCompressOptions): UnzlibStream;
|
|
48
48
|
/**
|
|
49
|
-
* Node.js synchronous deflater
|
|
49
|
+
* Node.js synchronous deflater that batches small writes for better
|
|
50
|
+
* compression.
|
|
50
51
|
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
52
|
+
* Previous implementation compressed each `write()` call independently
|
|
53
|
+
* with `deflateRawSync()`, creating a fresh zlib context every time.
|
|
54
|
+
* For streaming workloads that push many small chunks (e.g. WorkbookWriter
|
|
55
|
+
* writing one row at a time), this destroyed the LZ77 dictionary between
|
|
56
|
+
* chunks and caused compression ratios to drop from ~82% to ~58%.
|
|
55
57
|
*
|
|
56
|
-
*
|
|
57
|
-
*
|
|
58
|
-
*
|
|
58
|
+
* The new implementation accumulates incoming data into an internal buffer
|
|
59
|
+
* and only calls `deflateRawSync()` when the buffer reaches 64 KB (or on
|
|
60
|
+
* `finish()`). Each batch is still compressed independently, but 64 KB
|
|
61
|
+
* is enough for zlib to build a good dictionary — the compression ratio
|
|
62
|
+
* is within ~1% of a single-shot compression of the entire input.
|
|
63
|
+
*
|
|
64
|
+
* The trade-off is slightly higher latency (compressed output is not
|
|
65
|
+
* returned byte-for-byte immediately), but this is acceptable because
|
|
66
|
+
* the ZIP writer buffers output anyway and the streaming contract only
|
|
67
|
+
* requires data to flow *eventually*, not after every single write.
|
|
59
68
|
*/
|
|
60
69
|
export declare class SyncDeflater implements SyncDeflaterLike {
|
|
61
70
|
private _level;
|
|
71
|
+
private _pending;
|
|
72
|
+
private _pendingSize;
|
|
62
73
|
constructor(level?: number);
|
|
63
74
|
write(data: Uint8Array): Uint8Array;
|
|
64
75
|
finish(): Uint8Array;
|
|
76
|
+
private _flushBatch;
|
|
65
77
|
}
|
|
@@ -88,36 +88,81 @@ export function createUnzlibStream(_options = {}) {
|
|
|
88
88
|
// Synchronous stateful deflater (Node.js — native zlib)
|
|
89
89
|
// =============================================================================
|
|
90
90
|
/**
|
|
91
|
-
*
|
|
91
|
+
* Minimum batch size before flushing to the native zlib compressor.
|
|
92
92
|
*
|
|
93
|
-
*
|
|
94
|
-
*
|
|
95
|
-
*
|
|
96
|
-
*
|
|
93
|
+
* Small chunks (e.g. one spreadsheet row ≈ 200-400 bytes) compress very
|
|
94
|
+
* poorly when each is given its own zlib context because the LZ77 dictionary
|
|
95
|
+
* starts empty every time. Batching into ≥ 64 KB mega-chunks gives zlib
|
|
96
|
+
* enough history to find good matches, bringing compression ratios within
|
|
97
|
+
* ~1% of single-shot compression.
|
|
97
98
|
*
|
|
98
|
-
*
|
|
99
|
-
*
|
|
100
|
-
|
|
99
|
+
* 64 KB is chosen as a sweet spot: large enough for good compression,
|
|
100
|
+
* small enough to keep memory bounded and latency low.
|
|
101
|
+
*/
|
|
102
|
+
const SYNC_DEFLATE_BATCH_SIZE = 65536;
|
|
103
|
+
/**
|
|
104
|
+
* Node.js synchronous deflater that batches small writes for better
|
|
105
|
+
* compression.
|
|
106
|
+
*
|
|
107
|
+
* Previous implementation compressed each `write()` call independently
|
|
108
|
+
* with `deflateRawSync()`, creating a fresh zlib context every time.
|
|
109
|
+
* For streaming workloads that push many small chunks (e.g. WorkbookWriter
|
|
110
|
+
* writing one row at a time), this destroyed the LZ77 dictionary between
|
|
111
|
+
* chunks and caused compression ratios to drop from ~82% to ~58%.
|
|
112
|
+
*
|
|
113
|
+
* The new implementation accumulates incoming data into an internal buffer
|
|
114
|
+
* and only calls `deflateRawSync()` when the buffer reaches 64 KB (or on
|
|
115
|
+
* `finish()`). Each batch is still compressed independently, but 64 KB
|
|
116
|
+
* is enough for zlib to build a good dictionary — the compression ratio
|
|
117
|
+
* is within ~1% of a single-shot compression of the entire input.
|
|
118
|
+
*
|
|
119
|
+
* The trade-off is slightly higher latency (compressed output is not
|
|
120
|
+
* returned byte-for-byte immediately), but this is acceptable because
|
|
121
|
+
* the ZIP writer buffers output anyway and the streaming contract only
|
|
122
|
+
* requires data to flow *eventually*, not after every single write.
|
|
101
123
|
*/
|
|
102
124
|
export class SyncDeflater {
|
|
103
125
|
constructor(level = DEFAULT_COMPRESS_LEVEL) {
|
|
126
|
+
this._pending = [];
|
|
127
|
+
this._pendingSize = 0;
|
|
104
128
|
this._level = level;
|
|
105
129
|
}
|
|
106
130
|
write(data) {
|
|
107
131
|
if (data.length === 0) {
|
|
108
132
|
return new Uint8Array(0);
|
|
109
133
|
}
|
|
110
|
-
|
|
134
|
+
this._pending.push(data);
|
|
135
|
+
this._pendingSize += data.length;
|
|
136
|
+
if (this._pendingSize >= SYNC_DEFLATE_BATCH_SIZE) {
|
|
137
|
+
return this._flushBatch(false);
|
|
138
|
+
}
|
|
139
|
+
return new Uint8Array(0);
|
|
140
|
+
}
|
|
141
|
+
finish() {
|
|
142
|
+
return this._flushBatch(true);
|
|
143
|
+
}
|
|
144
|
+
_flushBatch(final) {
|
|
145
|
+
let input;
|
|
146
|
+
if (this._pending.length === 0) {
|
|
147
|
+
input = Buffer.alloc(0);
|
|
148
|
+
}
|
|
149
|
+
else if (this._pending.length === 1) {
|
|
150
|
+
input = Buffer.from(this._pending[0]);
|
|
151
|
+
}
|
|
152
|
+
else {
|
|
153
|
+
input = Buffer.concat(this._pending);
|
|
154
|
+
}
|
|
155
|
+
this._pending.length = 0;
|
|
156
|
+
this._pendingSize = 0;
|
|
157
|
+
if (input.length === 0 && !final) {
|
|
158
|
+
return new Uint8Array(0);
|
|
159
|
+
}
|
|
160
|
+
const result = deflateRawSync(input, {
|
|
111
161
|
level: this._level,
|
|
112
|
-
finishFlush: constants.Z_SYNC_FLUSH
|
|
162
|
+
finishFlush: final ? constants.Z_FINISH : constants.Z_SYNC_FLUSH
|
|
113
163
|
});
|
|
114
164
|
// deflateRawSync returns a Buffer sharing a 16 KB slab ArrayBuffer.
|
|
115
165
|
// Copy to a tight Uint8Array so the slab can be reclaimed.
|
|
116
166
|
return new Uint8Array(result);
|
|
117
167
|
}
|
|
118
|
-
finish() {
|
|
119
|
-
// Emit a final empty DEFLATE block (BFINAL=1, BTYPE=01, EOB).
|
|
120
|
-
// This terminates the concatenated DEFLATE stream.
|
|
121
|
-
return new Uint8Array(deflateRawSync(Buffer.alloc(0), { level: this._level }));
|
|
122
|
-
}
|
|
123
168
|
}
|
|
@@ -447,7 +447,7 @@ export class ZipDeflateFile {
|
|
|
447
447
|
// Stateful synchronous compression — maintains LZ77 window and bit position
|
|
448
448
|
// across chunks so the output is a single valid DEFLATE stream.
|
|
449
449
|
if (!this._syncDeflater) {
|
|
450
|
-
this._syncDeflater = new SyncDeflater();
|
|
450
|
+
this._syncDeflater = new SyncDeflater(this.level);
|
|
451
451
|
}
|
|
452
452
|
if (data.length > 0) {
|
|
453
453
|
const compressed = this._syncDeflater.write(data);
|