@cj-tech-master/excelts 6.1.1 → 6.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -351,13 +351,36 @@ function deflateRawStore(data) {
351
351
  return output.subarray(0, outPos);
352
352
  }
353
353
  // ============================================================================
354
- // LZ77 + Huffman Compression (Basic implementation)
354
+ // LZ77 + Huffman Compression
355
355
  // ============================================================================
356
+ // Hash table size must be a power of 2. 32768 entries keeps memory reasonable
357
+ // while providing a good distribution for the 3-byte hash.
358
+ const HASH_SIZE = 32768;
359
+ const HASH_MASK = HASH_SIZE - 1;
360
+ // Maximum hash chain length to walk per position. Longer chains find better
361
+ // matches at the cost of speed. 64 is a good balance (~zlib level 5-6).
362
+ const MAX_CHAIN_LEN = 64;
363
+ // Minimum match length for LZ77 (RFC 1951 minimum).
364
+ const MIN_MATCH = 3;
365
+ // Maximum match length (RFC 1951 maximum).
366
+ const MAX_MATCH = 258;
367
+ // Maximum back-reference distance (RFC 1951 / 32 KB sliding window).
368
+ const MAX_DIST = 32768;
356
369
  /**
357
- * Compress data using DEFLATE with fixed Huffman codes
370
+ * Hash function for 3-byte sequences.
371
+ * Uses a multiplicative hash for better distribution than the naive
372
+ * shift-or approach. The constant 0x1e35a7bd is chosen for good avalanche
373
+ * properties in the lower bits.
374
+ */
375
+ function hash3(a, b, c) {
376
+ return ((((a << 16) | (b << 8) | c) * 0x1e35a7bd) >>> 17) & HASH_MASK;
377
+ }
378
+ /**
379
+ * Compress data using DEFLATE with fixed Huffman codes.
358
380
  *
359
- * This provides real compression using LZ77 + fixed Huffman codes.
360
- * Not as efficient as full DEFLATE but much better than STORE mode.
381
+ * Uses LZ77 with hash chains and lazy matching for significantly better
382
+ * compression than a single-entry hash table. The algorithm is modelled
383
+ * after zlib's "fast" and "slow" deflate strategies.
361
384
  *
362
385
  * @param data - Data to compress
363
386
  * @returns Compressed data in deflate-raw format
@@ -375,43 +398,106 @@ function deflateRawCompressed(data) {
375
398
  // Write final block header with fixed Huffman (BFINAL=1, BTYPE=01)
376
399
  output.writeBits(1, 1); // BFINAL
377
400
  output.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
378
- // LZ77 compression with hash table
379
- const hashTable = new Map();
401
+ // --- Hash chain tables (typed arrays for performance) ---
402
+ // head[h]: most recent position with hash h (0 = unused, positions are 1-based internally)
403
+ // prev[pos & (MAX_DIST-1)]: previous position in the chain for the same hash
404
+ const head = new Int32Array(HASH_SIZE); // filled with 0 (no match)
405
+ const prev = new Int32Array(MAX_DIST);
380
406
  let pos = 0;
407
+ // State for lazy matching:
408
+ // When we find a match at position N, we check position N+1 too.
409
+ // If N+1 has a longer match we emit a literal for N and use the N+1 match.
410
+ let prevMatchLen = 0;
411
+ let prevMatchDist = 0;
412
+ let prevLiteral = 0;
413
+ let hasPrevMatch = false;
381
414
  while (pos < data.length) {
382
- // Try to find a match
383
415
  let bestLen = 0;
384
416
  let bestDist = 0;
385
417
  if (pos + 2 < data.length) {
386
- const hash = (data[pos] << 16) | (data[pos + 1] << 8) | data[pos + 2];
387
- const matchPos = hashTable.get(hash);
388
- if (matchPos !== undefined && pos - matchPos <= 32768) {
389
- const dist = pos - matchPos;
418
+ const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
419
+ // Walk the hash chain to find the best (longest) match
420
+ let chainLen = MAX_CHAIN_LEN;
421
+ let matchHead = head[h];
422
+ while (matchHead > 0 && chainLen-- > 0) {
423
+ const mPos = matchHead - 1; // convert from 1-based to 0-based
424
+ const dist = pos - mPos;
425
+ if (dist > MAX_DIST || dist <= 0) {
426
+ break;
427
+ }
428
+ // Quick check: compare the byte just beyond current best length first
429
+ // to skip obviously shorter matches early.
430
+ if (bestLen >= MIN_MATCH && data[mPos + bestLen] !== data[pos + bestLen]) {
431
+ matchHead = prev[mPos & (MAX_DIST - 1)];
432
+ continue;
433
+ }
434
+ // Full scan
390
435
  let len = 0;
391
- const maxLen = Math.min(258, data.length - pos);
392
- while (len < maxLen && data[matchPos + len] === data[pos + len]) {
436
+ const maxLen = Math.min(MAX_MATCH, data.length - pos);
437
+ while (len < maxLen && data[mPos + len] === data[pos + len]) {
393
438
  len++;
394
439
  }
395
- if (len >= 3) {
440
+ if (len > bestLen) {
396
441
  bestLen = len;
397
442
  bestDist = dist;
443
+ if (len >= MAX_MATCH) {
444
+ break; // can't do better
445
+ }
446
+ }
447
+ matchHead = prev[mPos & (MAX_DIST - 1)];
448
+ }
449
+ // Insert current position into the hash chain
450
+ prev[pos & (MAX_DIST - 1)] = head[h];
451
+ head[h] = pos + 1; // 1-based
452
+ }
453
+ // --- Lazy matching logic ---
454
+ if (hasPrevMatch) {
455
+ if (bestLen > prevMatchLen) {
456
+ // Current position has a better match; emit previous as literal
457
+ writeLiteralCode(output, prevLiteral);
458
+ // Now adopt current match as the pending one
459
+ prevMatchLen = bestLen;
460
+ prevMatchDist = bestDist;
461
+ prevLiteral = data[pos];
462
+ pos++;
463
+ }
464
+ else {
465
+ // Previous match is at least as good; emit it
466
+ writeLengthCode(output, prevMatchLen);
467
+ writeDistanceCode(output, prevMatchDist);
468
+ // Insert hash entries for the skipped bytes (positions inside the match)
469
+ // so future matches can find them. We already inserted pos-1 (the match
470
+ // start); now insert pos through pos + prevMatchLen - 2.
471
+ const matchEnd = pos - 1 + prevMatchLen;
472
+ for (let i = pos; i < matchEnd && i + 2 < data.length; i++) {
473
+ const h = hash3(data[i], data[i + 1], data[i + 2]);
474
+ prev[i & (MAX_DIST - 1)] = head[h];
475
+ head[h] = i + 1;
398
476
  }
477
+ pos = matchEnd;
478
+ hasPrevMatch = false;
479
+ prevMatchLen = 0;
399
480
  }
400
- // Update hash table
401
- hashTable.set(hash, pos);
402
481
  }
403
- if (bestLen >= 3) {
404
- // Write length/distance pair
405
- writeLengthCode(output, bestLen);
406
- writeDistanceCode(output, bestDist);
407
- pos += bestLen;
482
+ else if (bestLen >= MIN_MATCH) {
483
+ // We have a match; hold it and try the next position (lazy evaluation)
484
+ hasPrevMatch = true;
485
+ prevMatchLen = bestLen;
486
+ prevMatchDist = bestDist;
487
+ prevLiteral = data[pos];
488
+ pos++;
408
489
  }
409
490
  else {
410
- // Write literal
491
+ // No match — emit literal
411
492
  writeLiteralCode(output, data[pos]);
412
493
  pos++;
413
494
  }
414
495
  }
496
+ // Flush any pending lazy match
497
+ if (hasPrevMatch) {
498
+ writeLengthCode(output, prevMatchLen);
499
+ writeDistanceCode(output, prevMatchDist);
500
+ }
415
501
  // Write end-of-block symbol (256)
416
502
  writeLiteralCode(output, 256);
417
503
  return output.finish();
@@ -606,7 +692,10 @@ const WINDOW_SIZE = 32768;
606
692
  * maintains state across multiple `write()` calls:
607
693
  *
608
694
  * - **LZ77 sliding window**: back-references can span across chunks.
609
- * - **Hash table**: match positions persist across chunks.
695
+ * - **Hash chains**: match positions persist across chunks with typed-array
696
+ * hash tables for fast lookup.
697
+ * - **Lazy matching**: each match is compared with the next position's match
698
+ * to pick the longer one.
610
699
  * - **Bit writer**: bit position is preserved, so consecutive blocks form
611
700
  * a single valid DEFLATE bit-stream without alignment issues.
612
701
  *
@@ -620,13 +709,20 @@ const WINDOW_SIZE = 32768;
620
709
  class SyncDeflater {
621
710
  constructor() {
622
711
  this._output = new BitWriter();
623
- this._hashTable = new Map();
712
+ // Hash chain tables — shared across chunks for cross-chunk matching.
713
+ this._head = new Int32Array(HASH_SIZE);
714
+ this._prev = new Int32Array(MAX_DIST);
624
715
  /** Sliding window: the last WINDOW_SIZE bytes of uncompressed data. */
625
716
  this._window = new Uint8Array(WINDOW_SIZE);
626
717
  /** Number of valid bytes currently in the window. */
627
718
  this._windowLen = 0;
628
719
  /** Total bytes written so far (monotonically increasing; used for hash offsets). */
629
720
  this._totalIn = 0;
721
+ // Lazy matching state that may span across chunks.
722
+ this._hasPrevMatch = false;
723
+ this._prevMatchLen = 0;
724
+ this._prevMatchDist = 0;
725
+ this._prevLiteral = 0;
630
726
  }
631
727
  /**
632
728
  * Compress a chunk and return the compressed bytes produced so far.
@@ -642,57 +738,163 @@ class SyncDeflater {
642
738
  out.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
643
739
  const window = this._window;
644
740
  let wLen = this._windowLen;
645
- const hashTable = this._hashTable;
741
+ const head = this._head;
742
+ const prevArr = this._prev;
646
743
  const totalIn = this._totalIn;
647
- for (let pos = 0; pos < data.length;) {
744
+ let hasPrevMatch = this._hasPrevMatch;
745
+ let prevMatchLen = this._prevMatchLen;
746
+ let prevMatchDist = this._prevMatchDist;
747
+ let prevLiteral = this._prevLiteral;
748
+ /**
749
+ * Insert a global position into the hash chain and the sliding window.
750
+ */
751
+ const insertHash = (localPos) => {
752
+ if (localPos + 2 >= data.length) {
753
+ return;
754
+ }
755
+ const h = hash3(data[localPos], data[localPos + 1], data[localPos + 2]);
756
+ const globalPos = totalIn + localPos;
757
+ prevArr[globalPos & (MAX_DIST - 1)] = head[h];
758
+ head[h] = globalPos + 1; // 1-based
759
+ };
760
+ const insertWindow = (localPos, count) => {
761
+ for (let i = 0; i < count; i++) {
762
+ window[(wLen + i) & (WINDOW_SIZE - 1)] = data[localPos + i];
763
+ }
764
+ wLen += count;
765
+ };
766
+ let pos = 0;
767
+ for (; pos < data.length;) {
648
768
  let bestLen = 0;
649
769
  let bestDist = 0;
650
770
  if (pos + 2 < data.length) {
651
- const h = (data[pos] << 16) | (data[pos + 1] << 8) | data[pos + 2];
652
- const matchGlobalPos = hashTable.get(h);
653
- if (matchGlobalPos !== undefined) {
654
- const dist = totalIn + pos - matchGlobalPos;
655
- if (dist > 0 && dist <= WINDOW_SIZE) {
656
- // Match candidate scan in the sliding window
657
- const wStart = (((wLen - dist) % WINDOW_SIZE) + WINDOW_SIZE) % WINDOW_SIZE;
658
- const maxLen = Math.min(258, data.length - pos);
659
- let len = 0;
660
- while (len < maxLen) {
661
- const wByte = window[(wStart + len) % WINDOW_SIZE];
662
- if (wByte !== data[pos + len]) {
663
- break;
664
- }
665
- len++;
771
+ const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
772
+ const globalPos = totalIn + pos;
773
+ // Walk the hash chain
774
+ let chainLen = MAX_CHAIN_LEN;
775
+ let matchHead = head[h];
776
+ while (matchHead > 0 && chainLen-- > 0) {
777
+ const mGlobalPos = matchHead - 1;
778
+ const dist = globalPos - mGlobalPos;
779
+ if (dist > MAX_DIST || dist <= 0) {
780
+ break;
781
+ }
782
+ // Compare bytes through the sliding window + current chunk
783
+ const maxLen = Math.min(MAX_MATCH, data.length - pos);
784
+ let len = 0;
785
+ // Quick reject on the byte beyond current bestLen
786
+ if (bestLen >= MIN_MATCH) {
787
+ const checkOffset = mGlobalPos + bestLen;
788
+ // Determine the byte at checkOffset
789
+ let checkByte;
790
+ const checkLocal = checkOffset - totalIn;
791
+ if (checkLocal >= 0 && checkLocal < data.length) {
792
+ checkByte = data[checkLocal];
793
+ }
794
+ else {
795
+ checkByte = window[checkOffset & (WINDOW_SIZE - 1)];
796
+ }
797
+ if (checkByte !== data[pos + bestLen]) {
798
+ matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
799
+ continue;
800
+ }
801
+ }
802
+ while (len < maxLen) {
803
+ const matchOffset = mGlobalPos + len;
804
+ // Get byte from window or current data
805
+ let matchByte;
806
+ const matchLocal = matchOffset - totalIn;
807
+ if (matchLocal >= 0 && matchLocal < data.length) {
808
+ matchByte = data[matchLocal];
809
+ }
810
+ else {
811
+ matchByte = window[matchOffset & (WINDOW_SIZE - 1)];
812
+ }
813
+ if (matchByte !== data[pos + len]) {
814
+ break;
666
815
  }
667
- if (len >= 3) {
668
- bestLen = len;
669
- bestDist = dist;
816
+ len++;
817
+ }
818
+ if (len > bestLen) {
819
+ bestLen = len;
820
+ bestDist = dist;
821
+ if (len >= MAX_MATCH) {
822
+ break;
670
823
  }
671
824
  }
825
+ matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
672
826
  }
673
- hashTable.set(h, totalIn + pos);
827
+ // Insert current position into hash chain
828
+ prevArr[globalPos & (MAX_DIST - 1)] = head[h];
829
+ head[h] = globalPos + 1;
674
830
  }
675
- if (bestLen >= 3) {
676
- writeLengthCode(out, bestLen);
677
- writeDistanceCode(out, bestDist);
678
- // Advance window
679
- for (let i = 0; i < bestLen; i++) {
680
- window[wLen % WINDOW_SIZE] = data[pos + i];
681
- wLen++;
831
+ // --- Lazy matching logic ---
832
+ if (hasPrevMatch) {
833
+ if (bestLen > prevMatchLen) {
834
+ // Current position wins — emit previous as literal
835
+ writeLiteralCode(out, prevLiteral);
836
+ prevMatchLen = bestLen;
837
+ prevMatchDist = bestDist;
838
+ prevLiteral = data[pos];
839
+ insertWindow(pos, 1);
840
+ pos++;
841
+ }
842
+ else {
843
+ // Previous match wins — emit it
844
+ writeLengthCode(out, prevMatchLen);
845
+ writeDistanceCode(out, prevMatchDist);
846
+ // Insert hash entries for skipped positions inside the match
847
+ const matchEnd = pos - 1 + prevMatchLen;
848
+ const insertEnd = Math.min(matchEnd, data.length);
849
+ for (let i = pos; i < insertEnd; i++) {
850
+ insertHash(i);
851
+ }
852
+ insertWindow(pos, insertEnd - pos);
853
+ pos = insertEnd;
854
+ hasPrevMatch = false;
855
+ prevMatchLen = 0;
682
856
  }
683
- pos += bestLen;
857
+ }
858
+ else if (bestLen >= MIN_MATCH) {
859
+ hasPrevMatch = true;
860
+ prevMatchLen = bestLen;
861
+ prevMatchDist = bestDist;
862
+ prevLiteral = data[pos];
863
+ insertWindow(pos, 1);
864
+ pos++;
684
865
  }
685
866
  else {
686
867
  writeLiteralCode(out, data[pos]);
687
- window[wLen % WINDOW_SIZE] = data[pos];
688
- wLen++;
868
+ insertWindow(pos, 1);
689
869
  pos++;
690
870
  }
691
871
  }
872
+ // If there's a pending lazy match and we're at chunk boundary,
873
+ // flush it now (the next chunk will start fresh for lazy matching).
874
+ if (hasPrevMatch) {
875
+ writeLengthCode(out, prevMatchLen);
876
+ writeDistanceCode(out, prevMatchDist);
877
+ // The pending match started at pos-1 and covers prevMatchLen bytes.
878
+ // pos-1 was already hashed/windowed when it was first encountered;
879
+ // now insert the remaining positions (pos .. pos-1+prevMatchLen-1)
880
+ // into hash chains and the sliding window so the next chunk can
881
+ // reference them.
882
+ const matchEnd = Math.min(pos - 1 + prevMatchLen, data.length);
883
+ for (let i = pos; i < matchEnd; i++) {
884
+ insertHash(i);
885
+ }
886
+ insertWindow(pos, matchEnd - pos);
887
+ hasPrevMatch = false;
888
+ prevMatchLen = 0;
889
+ }
692
890
  // End-of-block symbol
693
891
  writeLiteralCode(out, 256);
694
892
  this._windowLen = wLen;
695
893
  this._totalIn = totalIn + data.length;
894
+ this._hasPrevMatch = hasPrevMatch;
895
+ this._prevMatchLen = prevMatchLen;
896
+ this._prevMatchDist = prevMatchDist;
897
+ this._prevLiteral = prevLiteral;
696
898
  // Flush completed bytes from the bit writer
697
899
  return out.flushBytes();
698
900
  }
@@ -98,37 +98,82 @@ function createUnzlibStream(_options = {}) {
98
98
  // Synchronous stateful deflater (Node.js — native zlib)
99
99
  // =============================================================================
100
100
  /**
101
- * Node.js synchronous deflater using `deflateRawSync` with `Z_SYNC_FLUSH`.
101
+ * Minimum batch size before flushing to the native zlib compressor.
102
102
  *
103
- * Each `write()` compresses the chunk independently (no cross-chunk dictionary)
104
- * but uses `Z_SYNC_FLUSH` so the output is byte-aligned and can be concatenated
105
- * into a single valid DEFLATE stream. The final `finish()` emits a proper
106
- * BFINAL=1 block.
103
+ * Small chunks (e.g. one spreadsheet row 200-400 bytes) compress very
104
+ * poorly when each is given its own zlib context because the LZ77 dictionary
105
+ * starts empty every time. Batching into 64 KB mega-chunks gives zlib
106
+ * enough history to find good matches, bringing compression ratios within
107
+ * ~1% of single-shot compression.
107
108
  *
108
- * This is fast (native C zlib) and produces valid output on all Node.js versions
109
- * (20+). The trade-off is ~2% worse compression ratio vs a stateful context,
110
- * which is acceptable for streaming where memory is the priority.
109
+ * 64 KB is chosen as a sweet spot: large enough for good compression,
110
+ * small enough to keep memory bounded and latency low.
111
+ */
112
+ const SYNC_DEFLATE_BATCH_SIZE = 65536;
113
+ /**
114
+ * Node.js synchronous deflater that batches small writes for better
115
+ * compression.
116
+ *
117
+ * Previous implementation compressed each `write()` call independently
118
+ * with `deflateRawSync()`, creating a fresh zlib context every time.
119
+ * For streaming workloads that push many small chunks (e.g. WorkbookWriter
120
+ * writing one row at a time), this destroyed the LZ77 dictionary between
121
+ * chunks and caused compression ratios to drop from ~82% to ~58%.
122
+ *
123
+ * The new implementation accumulates incoming data into an internal buffer
124
+ * and only calls `deflateRawSync()` when the buffer reaches 64 KB (or on
125
+ * `finish()`). Each batch is still compressed independently, but 64 KB
126
+ * is enough for zlib to build a good dictionary — the compression ratio
127
+ * is within ~1% of a single-shot compression of the entire input.
128
+ *
129
+ * The trade-off is slightly higher latency (compressed output is not
130
+ * returned byte-for-byte immediately), but this is acceptable because
131
+ * the ZIP writer buffers output anyway and the streaming contract only
132
+ * requires data to flow *eventually*, not after every single write.
111
133
  */
112
134
  class SyncDeflater {
113
135
  constructor(level = defaults_1.DEFAULT_COMPRESS_LEVEL) {
136
+ this._pending = [];
137
+ this._pendingSize = 0;
114
138
  this._level = level;
115
139
  }
116
140
  write(data) {
117
141
  if (data.length === 0) {
118
142
  return new Uint8Array(0);
119
143
  }
120
- const result = (0, zlib_1.deflateRawSync)(Buffer.from(data), {
144
+ this._pending.push(data);
145
+ this._pendingSize += data.length;
146
+ if (this._pendingSize >= SYNC_DEFLATE_BATCH_SIZE) {
147
+ return this._flushBatch(false);
148
+ }
149
+ return new Uint8Array(0);
150
+ }
151
+ finish() {
152
+ return this._flushBatch(true);
153
+ }
154
+ _flushBatch(final) {
155
+ let input;
156
+ if (this._pending.length === 0) {
157
+ input = Buffer.alloc(0);
158
+ }
159
+ else if (this._pending.length === 1) {
160
+ input = Buffer.from(this._pending[0]);
161
+ }
162
+ else {
163
+ input = Buffer.concat(this._pending);
164
+ }
165
+ this._pending.length = 0;
166
+ this._pendingSize = 0;
167
+ if (input.length === 0 && !final) {
168
+ return new Uint8Array(0);
169
+ }
170
+ const result = (0, zlib_1.deflateRawSync)(input, {
121
171
  level: this._level,
122
- finishFlush: zlib_1.constants.Z_SYNC_FLUSH
172
+ finishFlush: final ? zlib_1.constants.Z_FINISH : zlib_1.constants.Z_SYNC_FLUSH
123
173
  });
124
174
  // deflateRawSync returns a Buffer sharing a 16 KB slab ArrayBuffer.
125
175
  // Copy to a tight Uint8Array so the slab can be reclaimed.
126
176
  return new Uint8Array(result);
127
177
  }
128
- finish() {
129
- // Emit a final empty DEFLATE block (BFINAL=1, BTYPE=01, EOB).
130
- // This terminates the concatenated DEFLATE stream.
131
- return new Uint8Array((0, zlib_1.deflateRawSync)(Buffer.alloc(0), { level: this._level }));
132
- }
133
178
  }
134
179
  exports.SyncDeflater = SyncDeflater;
@@ -450,7 +450,7 @@ class ZipDeflateFile {
450
450
  // Stateful synchronous compression — maintains LZ77 window and bit position
451
451
  // across chunks so the output is a single valid DEFLATE stream.
452
452
  if (!this._syncDeflater) {
453
- this._syncDeflater = new streaming_compress_1.SyncDeflater();
453
+ this._syncDeflater = new streaming_compress_1.SyncDeflater(this.level);
454
454
  }
455
455
  if (data.length > 0) {
456
456
  const compressed = this._syncDeflater.write(data);