@cj-tech-master/excelts 7.6.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +99 -577
- package/README_zh.md +101 -577
- package/dist/browser/index.browser.d.ts +3 -0
- package/dist/browser/index.browser.js +2 -0
- package/dist/browser/index.d.ts +3 -0
- package/dist/browser/index.js +2 -0
- package/dist/browser/modules/archive/compression/compress.browser.js +4 -4
- package/dist/browser/modules/archive/compression/deflate-fallback.d.ts +24 -22
- package/dist/browser/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/browser/modules/archive/compression/streaming-compress.browser.d.ts +7 -0
- package/dist/browser/modules/archive/compression/streaming-compress.browser.js +15 -3
- package/dist/browser/modules/archive/compression/streaming-compress.d.ts +5 -0
- package/dist/browser/modules/archive/compression/streaming-compress.js +7 -0
- package/dist/browser/modules/archive/zip/stream.js +27 -3
- package/dist/browser/modules/excel/workbook.browser.d.ts +72 -0
- package/dist/browser/modules/excel/workbook.browser.js +226 -0
- package/dist/browser/modules/excel/workbook.d.ts +32 -1
- package/dist/browser/modules/excel/workbook.js +47 -2
- package/dist/browser/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/browser/modules/markdown/constants.d.ts +30 -0
- package/dist/browser/modules/markdown/constants.js +30 -0
- package/dist/browser/modules/markdown/errors.d.ts +21 -0
- package/dist/browser/modules/markdown/errors.js +23 -0
- package/dist/browser/modules/markdown/format/index.d.ts +54 -0
- package/dist/browser/modules/markdown/format/index.js +307 -0
- package/dist/browser/modules/markdown/index.d.ts +15 -0
- package/dist/browser/modules/markdown/index.js +22 -0
- package/dist/browser/modules/markdown/parse/index.d.ts +70 -0
- package/dist/browser/modules/markdown/parse/index.js +428 -0
- package/dist/browser/modules/markdown/types.d.ts +130 -0
- package/dist/browser/modules/markdown/types.js +6 -0
- package/dist/cjs/index.js +5 -1
- package/dist/cjs/modules/archive/compression/compress.browser.js +4 -4
- package/dist/cjs/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/cjs/modules/archive/compression/streaming-compress.browser.js +15 -2
- package/dist/cjs/modules/archive/compression/streaming-compress.js +8 -0
- package/dist/cjs/modules/archive/zip/stream.js +26 -2
- package/dist/cjs/modules/excel/workbook.browser.js +226 -0
- package/dist/cjs/modules/excel/workbook.js +46 -1
- package/dist/cjs/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/cjs/modules/markdown/constants.js +33 -0
- package/dist/cjs/modules/markdown/errors.js +28 -0
- package/dist/cjs/modules/markdown/format/index.js +310 -0
- package/dist/cjs/modules/markdown/index.js +30 -0
- package/dist/cjs/modules/markdown/parse/index.js +432 -0
- package/dist/cjs/modules/markdown/types.js +7 -0
- package/dist/esm/index.browser.js +2 -0
- package/dist/esm/index.js +2 -0
- package/dist/esm/modules/archive/compression/compress.browser.js +4 -4
- package/dist/esm/modules/archive/compression/deflate-fallback.js +664 -360
- package/dist/esm/modules/archive/compression/streaming-compress.browser.js +15 -3
- package/dist/esm/modules/archive/compression/streaming-compress.js +7 -0
- package/dist/esm/modules/archive/zip/stream.js +27 -3
- package/dist/esm/modules/excel/workbook.browser.js +226 -0
- package/dist/esm/modules/excel/workbook.js +47 -2
- package/dist/esm/modules/excel/xlsx/xlsx.browser.js +42 -4
- package/dist/esm/modules/markdown/constants.js +30 -0
- package/dist/esm/modules/markdown/errors.js +23 -0
- package/dist/esm/modules/markdown/format/index.js +307 -0
- package/dist/esm/modules/markdown/index.js +22 -0
- package/dist/esm/modules/markdown/parse/index.js +428 -0
- package/dist/esm/modules/markdown/types.js +6 -0
- package/dist/iife/excelts.iife.js +1342 -283
- package/dist/iife/excelts.iife.js.map +1 -1
- package/dist/iife/excelts.iife.min.js +38 -34
- package/dist/types/index.browser.d.ts +3 -0
- package/dist/types/index.d.ts +3 -0
- package/dist/types/modules/archive/compression/deflate-fallback.d.ts +24 -22
- package/dist/types/modules/archive/compression/streaming-compress.browser.d.ts +7 -0
- package/dist/types/modules/archive/compression/streaming-compress.d.ts +5 -0
- package/dist/types/modules/excel/workbook.browser.d.ts +72 -0
- package/dist/types/modules/excel/workbook.d.ts +32 -1
- package/dist/types/modules/markdown/constants.d.ts +30 -0
- package/dist/types/modules/markdown/errors.d.ts +21 -0
- package/dist/types/modules/markdown/format/index.d.ts +54 -0
- package/dist/types/modules/markdown/index.d.ts +15 -0
- package/dist/types/modules/markdown/parse/index.d.ts +70 -0
- package/dist/types/modules/markdown/types.d.ts +130 -0
- package/package.json +56 -32
|
@@ -351,15 +351,33 @@ export function deflateRawStore(data) {
|
|
|
351
351
|
// while providing a good distribution for the 3-byte hash.
|
|
352
352
|
const HASH_SIZE = 32768;
|
|
353
353
|
const HASH_MASK = HASH_SIZE - 1;
|
|
354
|
-
// Maximum hash chain length to walk per position. Longer chains find better
|
|
355
|
-
// matches at the cost of speed. 64 is a good balance (~zlib level 5-6).
|
|
356
|
-
const MAX_CHAIN_LEN = 64;
|
|
357
354
|
// Minimum match length for LZ77 (RFC 1951 minimum).
|
|
358
355
|
const MIN_MATCH = 3;
|
|
359
356
|
// Maximum match length (RFC 1951 maximum).
|
|
360
357
|
const MAX_MATCH = 258;
|
|
361
358
|
// Maximum back-reference distance (RFC 1951 / 32 KB sliding window).
|
|
362
359
|
const MAX_DIST = 32768;
|
|
360
|
+
/**
|
|
361
|
+
* Get LZ77 configuration for the given compression level (1-9).
|
|
362
|
+
* Modelled after zlib's configuration_table.
|
|
363
|
+
*/
|
|
364
|
+
function getLZ77Config(level) {
|
|
365
|
+
// Level 0 should be handled by the caller (store mode).
|
|
366
|
+
if (level <= 1) {
|
|
367
|
+
return { maxChainLen: 4, goodLen: 4, niceLen: 8, lazy: false };
|
|
368
|
+
}
|
|
369
|
+
if (level <= 3) {
|
|
370
|
+
return { maxChainLen: 8, goodLen: 8, niceLen: 32, lazy: true };
|
|
371
|
+
}
|
|
372
|
+
if (level <= 5) {
|
|
373
|
+
return { maxChainLen: 32, goodLen: 16, niceLen: 128, lazy: true };
|
|
374
|
+
}
|
|
375
|
+
if (level <= 7) {
|
|
376
|
+
return { maxChainLen: 64, goodLen: 32, niceLen: 258, lazy: true };
|
|
377
|
+
}
|
|
378
|
+
// level 8-9
|
|
379
|
+
return { maxChainLen: 128, goodLen: 64, niceLen: 258, lazy: true };
|
|
380
|
+
}
|
|
363
381
|
/**
|
|
364
382
|
* Hash function for 3-byte sequences.
|
|
365
383
|
* Uses a multiplicative hash for better distribution than the naive
|
|
@@ -370,16 +388,16 @@ function hash3(a, b, c) {
|
|
|
370
388
|
return ((((a << 16) | (b << 8) | c) * 0x1e35a7bd) >>> 17) & HASH_MASK;
|
|
371
389
|
}
|
|
372
390
|
/**
|
|
373
|
-
* Compress data using DEFLATE with
|
|
391
|
+
* Compress data using DEFLATE with Dynamic Huffman codes (BTYPE=2).
|
|
374
392
|
*
|
|
375
|
-
* Uses LZ77 with hash chains and lazy matching for
|
|
376
|
-
*
|
|
377
|
-
* after zlib's "fast" and "slow" deflate strategies.
|
|
393
|
+
* Uses LZ77 with hash chains and lazy matching for match finding, then builds
|
|
394
|
+
* optimal Huffman trees from the symbol frequencies for entropy coding.
|
|
378
395
|
*
|
|
379
396
|
* @param data - Data to compress
|
|
397
|
+
* @param level - Compression level (1-9, default 6)
|
|
380
398
|
* @returns Compressed data in deflate-raw format
|
|
381
399
|
*/
|
|
382
|
-
export function deflateRawCompressed(data) {
|
|
400
|
+
export function deflateRawCompressed(data, level = 6) {
|
|
383
401
|
if (data.length === 0) {
|
|
384
402
|
// Empty input: single final block with just end-of-block symbol
|
|
385
403
|
return new Uint8Array([0x03, 0x00]);
|
|
@@ -388,112 +406,12 @@ export function deflateRawCompressed(data) {
|
|
|
388
406
|
if (data.length < 100) {
|
|
389
407
|
return deflateRawStore(data);
|
|
390
408
|
}
|
|
409
|
+
const config = getLZ77Config(level);
|
|
410
|
+
// --- Phase 1: LZ77 match finding → collect symbols ---
|
|
411
|
+
const lz77Symbols = lz77Compress(data, 0, data.length, config, null);
|
|
412
|
+
// --- Phase 2: Encode as a single final DEFLATE block ---
|
|
391
413
|
const output = new BitWriter();
|
|
392
|
-
|
|
393
|
-
output.writeBits(1, 1); // BFINAL
|
|
394
|
-
output.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
|
|
395
|
-
// --- Hash chain tables (typed arrays for performance) ---
|
|
396
|
-
// head[h]: most recent position with hash h (0 = unused, positions are 1-based internally)
|
|
397
|
-
// prev[pos & (MAX_DIST-1)]: previous position in the chain for the same hash
|
|
398
|
-
const head = new Int32Array(HASH_SIZE); // filled with 0 (no match)
|
|
399
|
-
const prev = new Int32Array(MAX_DIST);
|
|
400
|
-
let pos = 0;
|
|
401
|
-
// State for lazy matching:
|
|
402
|
-
// When we find a match at position N, we check position N+1 too.
|
|
403
|
-
// If N+1 has a longer match we emit a literal for N and use the N+1 match.
|
|
404
|
-
let prevMatchLen = 0;
|
|
405
|
-
let prevMatchDist = 0;
|
|
406
|
-
let prevLiteral = 0;
|
|
407
|
-
let hasPrevMatch = false;
|
|
408
|
-
while (pos < data.length) {
|
|
409
|
-
let bestLen = 0;
|
|
410
|
-
let bestDist = 0;
|
|
411
|
-
if (pos + 2 < data.length) {
|
|
412
|
-
const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
|
|
413
|
-
// Walk the hash chain to find the best (longest) match
|
|
414
|
-
let chainLen = MAX_CHAIN_LEN;
|
|
415
|
-
let matchHead = head[h];
|
|
416
|
-
while (matchHead > 0 && chainLen-- > 0) {
|
|
417
|
-
const mPos = matchHead - 1; // convert from 1-based to 0-based
|
|
418
|
-
const dist = pos - mPos;
|
|
419
|
-
if (dist > MAX_DIST || dist <= 0) {
|
|
420
|
-
break;
|
|
421
|
-
}
|
|
422
|
-
// Quick check: compare the byte just beyond current best length first
|
|
423
|
-
// to skip obviously shorter matches early.
|
|
424
|
-
if (bestLen >= MIN_MATCH && data[mPos + bestLen] !== data[pos + bestLen]) {
|
|
425
|
-
matchHead = prev[mPos & (MAX_DIST - 1)];
|
|
426
|
-
continue;
|
|
427
|
-
}
|
|
428
|
-
// Full scan
|
|
429
|
-
let len = 0;
|
|
430
|
-
const maxLen = Math.min(MAX_MATCH, data.length - pos);
|
|
431
|
-
while (len < maxLen && data[mPos + len] === data[pos + len]) {
|
|
432
|
-
len++;
|
|
433
|
-
}
|
|
434
|
-
if (len > bestLen) {
|
|
435
|
-
bestLen = len;
|
|
436
|
-
bestDist = dist;
|
|
437
|
-
if (len >= MAX_MATCH) {
|
|
438
|
-
break; // can't do better
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
matchHead = prev[mPos & (MAX_DIST - 1)];
|
|
442
|
-
}
|
|
443
|
-
// Insert current position into the hash chain
|
|
444
|
-
prev[pos & (MAX_DIST - 1)] = head[h];
|
|
445
|
-
head[h] = pos + 1; // 1-based
|
|
446
|
-
}
|
|
447
|
-
// --- Lazy matching logic ---
|
|
448
|
-
if (hasPrevMatch) {
|
|
449
|
-
if (bestLen > prevMatchLen) {
|
|
450
|
-
// Current position has a better match; emit previous as literal
|
|
451
|
-
writeLiteralCode(output, prevLiteral);
|
|
452
|
-
// Now adopt current match as the pending one
|
|
453
|
-
prevMatchLen = bestLen;
|
|
454
|
-
prevMatchDist = bestDist;
|
|
455
|
-
prevLiteral = data[pos];
|
|
456
|
-
pos++;
|
|
457
|
-
}
|
|
458
|
-
else {
|
|
459
|
-
// Previous match is at least as good; emit it
|
|
460
|
-
writeLengthCode(output, prevMatchLen);
|
|
461
|
-
writeDistanceCode(output, prevMatchDist);
|
|
462
|
-
// Insert hash entries for the skipped bytes (positions inside the match)
|
|
463
|
-
// so future matches can find them. We already inserted pos-1 (the match
|
|
464
|
-
// start); now insert pos through pos + prevMatchLen - 2.
|
|
465
|
-
const matchEnd = pos - 1 + prevMatchLen;
|
|
466
|
-
for (let i = pos; i < matchEnd && i + 2 < data.length; i++) {
|
|
467
|
-
const h = hash3(data[i], data[i + 1], data[i + 2]);
|
|
468
|
-
prev[i & (MAX_DIST - 1)] = head[h];
|
|
469
|
-
head[h] = i + 1;
|
|
470
|
-
}
|
|
471
|
-
pos = matchEnd;
|
|
472
|
-
hasPrevMatch = false;
|
|
473
|
-
prevMatchLen = 0;
|
|
474
|
-
}
|
|
475
|
-
}
|
|
476
|
-
else if (bestLen >= MIN_MATCH) {
|
|
477
|
-
// We have a match; hold it and try the next position (lazy evaluation)
|
|
478
|
-
hasPrevMatch = true;
|
|
479
|
-
prevMatchLen = bestLen;
|
|
480
|
-
prevMatchDist = bestDist;
|
|
481
|
-
prevLiteral = data[pos];
|
|
482
|
-
pos++;
|
|
483
|
-
}
|
|
484
|
-
else {
|
|
485
|
-
// No match — emit literal
|
|
486
|
-
writeLiteralCode(output, data[pos]);
|
|
487
|
-
pos++;
|
|
488
|
-
}
|
|
489
|
-
}
|
|
490
|
-
// Flush any pending lazy match
|
|
491
|
-
if (hasPrevMatch) {
|
|
492
|
-
writeLengthCode(output, prevMatchLen);
|
|
493
|
-
writeDistanceCode(output, prevMatchDist);
|
|
494
|
-
}
|
|
495
|
-
// Write end-of-block symbol (256)
|
|
496
|
-
writeLiteralCode(output, 256);
|
|
414
|
+
emitDynamicBlock(output, lz77Symbols, true);
|
|
497
415
|
return output.finish();
|
|
498
416
|
}
|
|
499
417
|
/**
|
|
@@ -506,6 +424,14 @@ class BitWriter {
|
|
|
506
424
|
this.bitBuf = 0;
|
|
507
425
|
this.bitCount = 0;
|
|
508
426
|
}
|
|
427
|
+
/**
|
|
428
|
+
* Align to the next byte boundary by padding with zero bits.
|
|
429
|
+
*/
|
|
430
|
+
alignToByte() {
|
|
431
|
+
if (this.bitCount > 0) {
|
|
432
|
+
this.writeBits(0, 8 - this.bitCount);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
509
435
|
writeBits(value, count) {
|
|
510
436
|
this.bitBuf |= value << this.bitCount;
|
|
511
437
|
this.bitCount += count;
|
|
@@ -599,258 +525,512 @@ function writeLiteralCode(output, symbol) {
|
|
|
599
525
|
output.writeBitsReverse(code, len);
|
|
600
526
|
}
|
|
601
527
|
/**
|
|
602
|
-
*
|
|
528
|
+
* Compute the DEFLATE length code (257..285) and extra bits for a given
|
|
529
|
+
* match length (3..258).
|
|
603
530
|
*/
|
|
604
|
-
function
|
|
605
|
-
let
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
else if (length <= 18) {
|
|
614
|
-
const base = length - 11;
|
|
615
|
-
code = 265 + Math.floor(base / 2);
|
|
616
|
-
extraBits = 1;
|
|
617
|
-
extraValue = base % 2;
|
|
618
|
-
}
|
|
619
|
-
else if (length <= 34) {
|
|
620
|
-
const base = length - 19;
|
|
621
|
-
code = 269 + Math.floor(base / 4);
|
|
622
|
-
extraBits = 2;
|
|
623
|
-
extraValue = base % 4;
|
|
624
|
-
}
|
|
625
|
-
else if (length <= 66) {
|
|
626
|
-
const base = length - 35;
|
|
627
|
-
code = 273 + Math.floor(base / 8);
|
|
628
|
-
extraBits = 3;
|
|
629
|
-
extraValue = base % 8;
|
|
630
|
-
}
|
|
631
|
-
else if (length <= 130) {
|
|
632
|
-
const base = length - 67;
|
|
633
|
-
code = 277 + Math.floor(base / 16);
|
|
634
|
-
extraBits = 4;
|
|
635
|
-
extraValue = base % 16;
|
|
636
|
-
}
|
|
637
|
-
else if (length <= 257) {
|
|
638
|
-
const base = length - 131;
|
|
639
|
-
code = 281 + Math.floor(base / 32);
|
|
640
|
-
extraBits = 5;
|
|
641
|
-
extraValue = base % 32;
|
|
531
|
+
function getLengthSymbol(length) {
|
|
532
|
+
for (let i = 0; i < LENGTH_BASE.length; i++) {
|
|
533
|
+
if (i === LENGTH_BASE.length - 1 || length < LENGTH_BASE[i + 1]) {
|
|
534
|
+
return {
|
|
535
|
+
code: 257 + i,
|
|
536
|
+
extra: length - LENGTH_BASE[i],
|
|
537
|
+
extraBits: LENGTH_EXTRA[i]
|
|
538
|
+
};
|
|
539
|
+
}
|
|
642
540
|
}
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
541
|
+
return { code: 285, extra: 0, extraBits: 0 };
|
|
542
|
+
}
|
|
543
|
+
/**
|
|
544
|
+
* Compute the DEFLATE distance code (0..29) and extra bits for a given
|
|
545
|
+
* distance (1..32768).
|
|
546
|
+
*/
|
|
547
|
+
function getDistSymbol(distance) {
|
|
548
|
+
for (let i = 0; i < DIST_TABLE.length; i++) {
|
|
549
|
+
const [maxDist, c, extraBitsCount] = DIST_TABLE[i];
|
|
550
|
+
if (distance <= maxDist) {
|
|
551
|
+
const baseVal = i === 0 ? 1 : DIST_TABLE[i - 1][0] + 1;
|
|
552
|
+
return { code: c, extra: distance - baseVal, extraBits: extraBitsCount };
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
// Fallback (should not reach for valid distances)
|
|
556
|
+
return { code: 29, extra: 0, extraBits: 13 };
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Build canonical Huffman code lengths from symbol frequencies.
|
|
560
|
+
* Uses a bottom-up approach: build a Huffman tree from a priority queue,
|
|
561
|
+
* then extract depths. Limits maximum code length to maxBits using
|
|
562
|
+
* the algorithm from zlib's build_tree() / gen_bitlen().
|
|
563
|
+
*
|
|
564
|
+
* Returns an array of code lengths indexed by symbol.
|
|
565
|
+
*/
|
|
566
|
+
function buildCodeLengths(freqs, maxBits) {
|
|
567
|
+
const n = freqs.length;
|
|
568
|
+
const codeLens = new Uint8Array(n);
|
|
569
|
+
// Count symbols with non-zero frequency
|
|
570
|
+
const activeSymbols = [];
|
|
571
|
+
for (let i = 0; i < n; i++) {
|
|
572
|
+
if (freqs[i] > 0) {
|
|
573
|
+
activeSymbols.push({ sym: i, freq: freqs[i] });
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
if (activeSymbols.length === 0) {
|
|
577
|
+
return codeLens;
|
|
578
|
+
}
|
|
579
|
+
// RFC 1951 requires a complete prefix code. For a single symbol, we need
|
|
580
|
+
// at least 2 entries to form a valid tree. We assign code length 1 to the
|
|
581
|
+
// symbol — the decoder uses only 1 bit but the tree is valid because
|
|
582
|
+
// DEFLATE decoders handle this as per the spec (the other 1-bit code is
|
|
583
|
+
// simply unused). This matches zlib's behavior.
|
|
584
|
+
if (activeSymbols.length === 1) {
|
|
585
|
+
codeLens[activeSymbols[0].sym] = 1;
|
|
586
|
+
return codeLens;
|
|
587
|
+
}
|
|
588
|
+
// Sort by frequency (ascending), then by symbol (ascending) for stability
|
|
589
|
+
activeSymbols.sort((a, b) => a.freq - b.freq || a.sym - b.sym);
|
|
590
|
+
const nodes = activeSymbols.map(s => ({
|
|
591
|
+
freq: s.freq,
|
|
592
|
+
sym: s.sym,
|
|
593
|
+
left: null,
|
|
594
|
+
right: null
|
|
595
|
+
}));
|
|
596
|
+
let leafIdx = 0;
|
|
597
|
+
let intIdx = 0;
|
|
598
|
+
const intNodes = [];
|
|
599
|
+
function getMin() {
|
|
600
|
+
const hasLeaf = leafIdx < nodes.length;
|
|
601
|
+
const hasInt = intIdx < intNodes.length;
|
|
602
|
+
if (hasLeaf && hasInt) {
|
|
603
|
+
if (nodes[leafIdx].freq <= intNodes[intIdx].freq) {
|
|
604
|
+
return nodes[leafIdx++];
|
|
605
|
+
}
|
|
606
|
+
return intNodes[intIdx++];
|
|
607
|
+
}
|
|
608
|
+
if (hasLeaf) {
|
|
609
|
+
return nodes[leafIdx++];
|
|
610
|
+
}
|
|
611
|
+
return intNodes[intIdx++];
|
|
612
|
+
}
|
|
613
|
+
const totalNodes = activeSymbols.length;
|
|
614
|
+
for (let i = 0; i < totalNodes - 1; i++) {
|
|
615
|
+
const a = getMin();
|
|
616
|
+
const b = getMin();
|
|
617
|
+
const merged = {
|
|
618
|
+
freq: a.freq + b.freq,
|
|
619
|
+
sym: -1,
|
|
620
|
+
left: a,
|
|
621
|
+
right: b
|
|
622
|
+
};
|
|
623
|
+
intNodes.push(merged);
|
|
624
|
+
}
|
|
625
|
+
// Extract depths from the root (last internal node)
|
|
626
|
+
const root = intNodes[intNodes.length - 1];
|
|
627
|
+
function extractDepths(node, depth) {
|
|
628
|
+
if (node.sym >= 0) {
|
|
629
|
+
codeLens[node.sym] = depth;
|
|
630
|
+
return;
|
|
631
|
+
}
|
|
632
|
+
if (node.left) {
|
|
633
|
+
extractDepths(node.left, depth + 1);
|
|
634
|
+
}
|
|
635
|
+
if (node.right) {
|
|
636
|
+
extractDepths(node.right, depth + 1);
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
extractDepths(root, 0);
|
|
640
|
+
// --- Length limiting using the zlib bl_count redistribution algorithm ---
|
|
641
|
+
// Count code lengths at each bit depth
|
|
642
|
+
const blCount = new Uint16Array(maxBits + 1);
|
|
643
|
+
for (let i = 0; i < n; i++) {
|
|
644
|
+
if (codeLens[i] > 0) {
|
|
645
|
+
if (codeLens[i] > maxBits) {
|
|
646
|
+
blCount[maxBits]++;
|
|
647
|
+
codeLens[i] = maxBits;
|
|
648
|
+
}
|
|
649
|
+
else {
|
|
650
|
+
blCount[codeLens[i]]++;
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
// Check Kraft inequality: sum of 2^(maxBits - len) must equal 2^maxBits
|
|
655
|
+
let kraft = 0;
|
|
656
|
+
for (let bits = 1; bits <= maxBits; bits++) {
|
|
657
|
+
kraft += blCount[bits] << (maxBits - bits);
|
|
658
|
+
}
|
|
659
|
+
const target = 1 << maxBits;
|
|
660
|
+
if (kraft === target) {
|
|
661
|
+
return codeLens; // Already valid
|
|
662
|
+
}
|
|
663
|
+
// Redistribute to satisfy Kraft's inequality.
|
|
664
|
+
// Strategy: move symbols from shorter lengths to maxBits until balanced.
|
|
665
|
+
// Each symbol moved from length `bits` to `maxBits` reduces kraft by
|
|
666
|
+
// (2^(maxBits-bits) - 1) — we remove a large weight and add a weight of 1.
|
|
667
|
+
while (kraft > target) {
|
|
668
|
+
// Find a code length < maxBits that has symbols we can push down.
|
|
669
|
+
// Start from maxBits-1 to minimize the damage per move.
|
|
670
|
+
let bits = maxBits - 1;
|
|
671
|
+
while (bits > 0 && blCount[bits] === 0) {
|
|
672
|
+
bits--;
|
|
673
|
+
}
|
|
674
|
+
if (bits === 0) {
|
|
675
|
+
break; // Can't redistribute further
|
|
676
|
+
}
|
|
677
|
+
// Move one symbol from length `bits` to length `maxBits`
|
|
678
|
+
blCount[bits]--;
|
|
679
|
+
blCount[maxBits]++;
|
|
680
|
+
// Kraft change: removed 2^(maxBits-bits), added 2^0 = 1
|
|
681
|
+
kraft -= (1 << (maxBits - bits)) - 1;
|
|
647
682
|
}
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
683
|
+
// If kraft < target (under-allocated), add dummy codes at maxBits.
|
|
684
|
+
// This can happen when we overshoot during redistribution.
|
|
685
|
+
while (kraft < target) {
|
|
686
|
+
blCount[maxBits]++;
|
|
687
|
+
kraft++;
|
|
651
688
|
}
|
|
689
|
+
// Reassign code lengths to symbols (preserve relative order: longer
|
|
690
|
+
// codes go to less frequent symbols, matching the Huffman property).
|
|
691
|
+
// Sort symbols by their original code length (longest first), then by
|
|
692
|
+
// frequency (rarest first) for same length.
|
|
693
|
+
const symbolsByLen = [];
|
|
694
|
+
for (let i = 0; i < n; i++) {
|
|
695
|
+
if (codeLens[i] > 0) {
|
|
696
|
+
symbolsByLen.push({ sym: i, origLen: codeLens[i], freq: freqs[i] });
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
symbolsByLen.sort((a, b) => b.origLen - a.origLen || a.freq - b.freq);
|
|
700
|
+
// Assign new lengths from the bl_count distribution
|
|
701
|
+
codeLens.fill(0);
|
|
702
|
+
let symIdx = 0;
|
|
703
|
+
for (let bits = maxBits; bits >= 1; bits--) {
|
|
704
|
+
for (let count = blCount[bits]; count > 0; count--) {
|
|
705
|
+
if (symIdx < symbolsByLen.length) {
|
|
706
|
+
codeLens[symbolsByLen[symIdx].sym] = bits;
|
|
707
|
+
symIdx++;
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
return codeLens;
|
|
652
712
|
}
|
|
653
713
|
/**
|
|
654
|
-
*
|
|
714
|
+
* Build canonical Huffman codes from code lengths (RFC 1951 §3.2.2).
|
|
715
|
+
* Returns [code, length] pairs indexed by symbol.
|
|
655
716
|
*/
|
|
656
|
-
function
|
|
657
|
-
|
|
717
|
+
function buildCanonicalCodes(codeLens) {
|
|
718
|
+
const n = codeLens.length;
|
|
719
|
+
const codes = new Array(n);
|
|
720
|
+
const blCount = new Uint16Array(16);
|
|
721
|
+
for (let i = 0; i < n; i++) {
|
|
722
|
+
if (codeLens[i] > 0) {
|
|
723
|
+
blCount[codeLens[i]]++;
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
const nextCode = new Uint16Array(16);
|
|
658
727
|
let code = 0;
|
|
659
|
-
let
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
728
|
+
for (let bits = 1; bits <= 15; bits++) {
|
|
729
|
+
code = (code + blCount[bits - 1]) << 1;
|
|
730
|
+
nextCode[bits] = code;
|
|
731
|
+
}
|
|
732
|
+
for (let i = 0; i < n; i++) {
|
|
733
|
+
const len = codeLens[i];
|
|
734
|
+
if (len > 0) {
|
|
735
|
+
codes[i] = [nextCode[len]++, len];
|
|
736
|
+
}
|
|
737
|
+
else {
|
|
738
|
+
codes[i] = [0, 0];
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
return codes;
|
|
742
|
+
}
|
|
743
|
+
/**
|
|
744
|
+
* Emit a Dynamic Huffman DEFLATE block (BTYPE=2).
|
|
745
|
+
*
|
|
746
|
+
* Takes the LZ77 symbol sequence, builds optimal Huffman trees,
|
|
747
|
+
* encodes the tree descriptions, then encodes the symbols.
|
|
748
|
+
*/
|
|
749
|
+
function emitDynamicBlock(out, symbols, isFinal) {
|
|
750
|
+
// --- Step 1: Collect frequencies ---
|
|
751
|
+
const litLenFreqs = new Uint32Array(286);
|
|
752
|
+
const distFreqs = new Uint32Array(30);
|
|
753
|
+
// Always include EOB
|
|
754
|
+
litLenFreqs[256] = 1;
|
|
755
|
+
for (const sym of symbols) {
|
|
756
|
+
if (sym.dist === 0) {
|
|
757
|
+
litLenFreqs[sym.litOrLen]++;
|
|
758
|
+
}
|
|
759
|
+
else {
|
|
760
|
+
const ls = getLengthSymbol(sym.litOrLen);
|
|
761
|
+
litLenFreqs[ls.code]++;
|
|
762
|
+
const ds = getDistSymbol(sym.dist);
|
|
763
|
+
distFreqs[ds.code]++;
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
// --- Step 2: Build Huffman trees ---
|
|
767
|
+
const litLenLens = buildCodeLengths(litLenFreqs, 15);
|
|
768
|
+
let distLens = buildCodeLengths(distFreqs, 15);
|
|
769
|
+
// DEFLATE requires at least 1 distance code even if unused.
|
|
770
|
+
// Assign two codes at length 1 to form a complete prefix code.
|
|
771
|
+
let hasDistCodes = false;
|
|
772
|
+
for (let i = 0; i < distLens.length; i++) {
|
|
773
|
+
if (distLens[i] > 0) {
|
|
774
|
+
hasDistCodes = true;
|
|
666
775
|
break;
|
|
667
776
|
}
|
|
668
|
-
baseDistance = maxDist + 1;
|
|
669
777
|
}
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
778
|
+
if (!hasDistCodes) {
|
|
779
|
+
distLens = new Uint8Array(30);
|
|
780
|
+
distLens[0] = 1;
|
|
781
|
+
distLens[1] = 1;
|
|
782
|
+
}
|
|
783
|
+
const litLenCodes = buildCanonicalCodes(litLenLens);
|
|
784
|
+
const distCodes = buildCanonicalCodes(distLens);
|
|
785
|
+
// --- Step 3: Determine HLIT and HDIST ---
|
|
786
|
+
let hlit = 286;
|
|
787
|
+
while (hlit > 257 && litLenLens[hlit - 1] === 0) {
|
|
788
|
+
hlit--;
|
|
789
|
+
}
|
|
790
|
+
let hdist = 30;
|
|
791
|
+
while (hdist > 1 && distLens[hdist - 1] === 0) {
|
|
792
|
+
hdist--;
|
|
793
|
+
}
|
|
794
|
+
// --- Step 4: Run-length encode the code lengths ---
|
|
795
|
+
const combined = new Uint8Array(hlit + hdist);
|
|
796
|
+
combined.set(litLenLens.subarray(0, hlit));
|
|
797
|
+
combined.set(distLens.subarray(0, hdist), hlit);
|
|
798
|
+
const clSymbols = [];
|
|
799
|
+
const clFreqs = new Uint32Array(19);
|
|
800
|
+
for (let i = 0; i < combined.length;) {
|
|
801
|
+
const val = combined[i];
|
|
802
|
+
if (val === 0) {
|
|
803
|
+
let run = 1;
|
|
804
|
+
while (i + run < combined.length && combined[i + run] === 0) {
|
|
805
|
+
run++;
|
|
806
|
+
}
|
|
807
|
+
while (run > 0) {
|
|
808
|
+
if (run >= 11) {
|
|
809
|
+
const repeat = Math.min(run, 138);
|
|
810
|
+
clSymbols.push({ sym: 18, extra: repeat - 11, extraBits: 7 });
|
|
811
|
+
clFreqs[18]++;
|
|
812
|
+
run -= repeat;
|
|
813
|
+
i += repeat;
|
|
814
|
+
}
|
|
815
|
+
else if (run >= 3) {
|
|
816
|
+
const repeat = Math.min(run, 10);
|
|
817
|
+
clSymbols.push({ sym: 17, extra: repeat - 3, extraBits: 3 });
|
|
818
|
+
clFreqs[17]++;
|
|
819
|
+
run -= repeat;
|
|
820
|
+
i += repeat;
|
|
821
|
+
}
|
|
822
|
+
else {
|
|
823
|
+
clSymbols.push({ sym: 0, extra: 0, extraBits: 0 });
|
|
824
|
+
clFreqs[0]++;
|
|
825
|
+
run--;
|
|
826
|
+
i++;
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
else {
|
|
831
|
+
clSymbols.push({ sym: val, extra: 0, extraBits: 0 });
|
|
832
|
+
clFreqs[val]++;
|
|
833
|
+
i++;
|
|
834
|
+
let run = 0;
|
|
835
|
+
while (i + run < combined.length && combined[i + run] === val) {
|
|
836
|
+
run++;
|
|
837
|
+
}
|
|
838
|
+
while (run >= 3) {
|
|
839
|
+
const repeat = Math.min(run, 6);
|
|
840
|
+
clSymbols.push({ sym: 16, extra: repeat - 3, extraBits: 2 });
|
|
841
|
+
clFreqs[16]++;
|
|
842
|
+
run -= repeat;
|
|
843
|
+
i += repeat;
|
|
844
|
+
}
|
|
845
|
+
while (run > 0) {
|
|
846
|
+
clSymbols.push({ sym: val, extra: 0, extraBits: 0 });
|
|
847
|
+
clFreqs[val]++;
|
|
848
|
+
run--;
|
|
849
|
+
i++;
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
// --- Step 5: Build code-length Huffman tree ---
|
|
854
|
+
const clLens = buildCodeLengths(clFreqs, 7);
|
|
855
|
+
const clCodes = buildCanonicalCodes(clLens);
|
|
856
|
+
let hclen = 19;
|
|
857
|
+
while (hclen > 4 && clLens[CODE_LENGTH_ORDER[hclen - 1]] === 0) {
|
|
858
|
+
hclen--;
|
|
859
|
+
}
|
|
860
|
+
// --- Step 6: Write block header ---
|
|
861
|
+
out.writeBits(isFinal ? 1 : 0, 1); // BFINAL
|
|
862
|
+
out.writeBits(2, 2); // BTYPE = 10 (dynamic Huffman)
|
|
863
|
+
out.writeBits(hlit - 257, 5);
|
|
864
|
+
out.writeBits(hdist - 1, 5);
|
|
865
|
+
out.writeBits(hclen - 4, 4);
|
|
866
|
+
for (let i = 0; i < hclen; i++) {
|
|
867
|
+
out.writeBits(clLens[CODE_LENGTH_ORDER[i]], 3);
|
|
868
|
+
}
|
|
869
|
+
for (const cls of clSymbols) {
|
|
870
|
+
const [clCode, clLen] = clCodes[cls.sym];
|
|
871
|
+
out.writeBitsReverse(clCode, clLen);
|
|
872
|
+
if (cls.extraBits > 0) {
|
|
873
|
+
out.writeBits(cls.extra, cls.extraBits);
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
// --- Step 7: Write compressed data ---
|
|
877
|
+
for (const sym of symbols) {
|
|
878
|
+
if (sym.dist === 0) {
|
|
879
|
+
const [lCode, lLen] = litLenCodes[sym.litOrLen];
|
|
880
|
+
out.writeBitsReverse(lCode, lLen);
|
|
881
|
+
}
|
|
882
|
+
else {
|
|
883
|
+
const ls = getLengthSymbol(sym.litOrLen);
|
|
884
|
+
const [lCode, lLen] = litLenCodes[ls.code];
|
|
885
|
+
out.writeBitsReverse(lCode, lLen);
|
|
886
|
+
if (ls.extraBits > 0) {
|
|
887
|
+
out.writeBits(ls.extra, ls.extraBits);
|
|
888
|
+
}
|
|
889
|
+
const ds = getDistSymbol(sym.dist);
|
|
890
|
+
const [dCode, dLen] = distCodes[ds.code];
|
|
891
|
+
out.writeBitsReverse(dCode, dLen);
|
|
892
|
+
if (ds.extraBits > 0) {
|
|
893
|
+
out.writeBits(ds.extra, ds.extraBits);
|
|
894
|
+
}
|
|
895
|
+
}
|
|
675
896
|
}
|
|
897
|
+
// End of block
|
|
898
|
+
const [eobCode, eobLen] = litLenCodes[256];
|
|
899
|
+
out.writeBitsReverse(eobCode, eobLen);
|
|
676
900
|
}
|
|
677
|
-
// ============================================================================
|
|
678
|
-
// Stateful Streaming Deflater
|
|
679
|
-
// ============================================================================
|
|
680
|
-
/** Maximum LZ77 sliding window size (32 KB per RFC 1951). */
|
|
681
|
-
const WINDOW_SIZE = 32768;
|
|
682
901
|
/**
|
|
683
|
-
*
|
|
684
|
-
*
|
|
685
|
-
* Unlike `deflateRawCompressed` (which is a one-shot function), this class
|
|
686
|
-
* maintains state across multiple `write()` calls:
|
|
687
|
-
*
|
|
688
|
-
* - **LZ77 sliding window**: back-references can span across chunks.
|
|
689
|
-
* - **Hash chains**: match positions persist across chunks with typed-array
|
|
690
|
-
* hash tables for fast lookup.
|
|
691
|
-
* - **Lazy matching**: each match is compared with the next position's match
|
|
692
|
-
* to pick the longer one.
|
|
693
|
-
* - **Bit writer**: bit position is preserved, so consecutive blocks form
|
|
694
|
-
* a single valid DEFLATE bit-stream without alignment issues.
|
|
902
|
+
* Run LZ77 match-finding on `data[start..end)`.
|
|
695
903
|
*
|
|
696
|
-
*
|
|
697
|
-
* `
|
|
904
|
+
* When `state` is null, performs one-shot compression with fresh hash tables.
|
|
905
|
+
* When `state` is provided, maintains sliding window and hash chains across calls.
|
|
698
906
|
*
|
|
699
|
-
*
|
|
700
|
-
* `Z_SYNC_FLUSH`, used by the streaming ZIP writer (`pushSync`) to achieve
|
|
701
|
-
* constant-memory streaming in both Node.js and browsers.
|
|
907
|
+
* Returns an array of LZ77 symbols (literals + length/distance pairs).
|
|
702
908
|
*/
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
909
|
+
function lz77Compress(data, start, end, config, state) {
|
|
910
|
+
const symbols = [];
|
|
911
|
+
const maxChainLen = config.maxChainLen;
|
|
912
|
+
const goodLen = config.goodLen;
|
|
913
|
+
const niceLen = config.niceLen;
|
|
914
|
+
const useLazy = config.lazy;
|
|
915
|
+
let head;
|
|
916
|
+
let prevArr;
|
|
917
|
+
let window;
|
|
918
|
+
let wLen;
|
|
919
|
+
let totalIn;
|
|
920
|
+
let hasPrevMatch;
|
|
921
|
+
let prevMatchLen;
|
|
922
|
+
let prevMatchDist;
|
|
923
|
+
let prevLiteral;
|
|
924
|
+
if (state) {
|
|
925
|
+
head = state.head;
|
|
926
|
+
prevArr = state.prev;
|
|
927
|
+
window = state.window;
|
|
928
|
+
wLen = state.windowLen;
|
|
929
|
+
totalIn = state.totalIn;
|
|
930
|
+
hasPrevMatch = state.hasPrevMatch;
|
|
931
|
+
prevMatchLen = state.prevMatchLen;
|
|
932
|
+
prevMatchDist = state.prevMatchDist;
|
|
933
|
+
prevLiteral = state.prevLiteral;
|
|
720
934
|
}
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
935
|
+
else {
|
|
936
|
+
head = new Int32Array(HASH_SIZE);
|
|
937
|
+
prevArr = new Int32Array(MAX_DIST);
|
|
938
|
+
window = null;
|
|
939
|
+
wLen = 0;
|
|
940
|
+
totalIn = 0;
|
|
941
|
+
hasPrevMatch = false;
|
|
942
|
+
prevMatchLen = 0;
|
|
943
|
+
prevMatchDist = 0;
|
|
944
|
+
prevLiteral = 0;
|
|
945
|
+
}
|
|
946
|
+
const getByte = state
|
|
947
|
+
? (globalPos) => {
|
|
948
|
+
const localPos = globalPos - totalIn;
|
|
949
|
+
if (localPos >= start && localPos < end) {
|
|
950
|
+
return data[localPos];
|
|
951
|
+
}
|
|
952
|
+
return window[globalPos & (MAX_DIST - 1)];
|
|
728
953
|
}
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
const window = this._window;
|
|
734
|
-
let wLen = this._windowLen;
|
|
735
|
-
const head = this._head;
|
|
736
|
-
const prevArr = this._prev;
|
|
737
|
-
const totalIn = this._totalIn;
|
|
738
|
-
let hasPrevMatch = this._hasPrevMatch;
|
|
739
|
-
let prevMatchLen = this._prevMatchLen;
|
|
740
|
-
let prevMatchDist = this._prevMatchDist;
|
|
741
|
-
let prevLiteral = this._prevLiteral;
|
|
742
|
-
/**
|
|
743
|
-
* Insert a global position into the hash chain and the sliding window.
|
|
744
|
-
*/
|
|
745
|
-
const insertHash = (localPos) => {
|
|
746
|
-
if (localPos + 2 >= data.length) {
|
|
954
|
+
: (globalPos) => data[globalPos];
|
|
955
|
+
const insertHash = state
|
|
956
|
+
? (localPos) => {
|
|
957
|
+
if (localPos + 2 >= end) {
|
|
747
958
|
return;
|
|
748
959
|
}
|
|
749
960
|
const h = hash3(data[localPos], data[localPos + 1], data[localPos + 2]);
|
|
750
|
-
const
|
|
751
|
-
prevArr[
|
|
752
|
-
head[h] =
|
|
961
|
+
const gp = totalIn + localPos;
|
|
962
|
+
prevArr[gp & (MAX_DIST - 1)] = head[h];
|
|
963
|
+
head[h] = gp + 1;
|
|
964
|
+
}
|
|
965
|
+
: (localPos) => {
|
|
966
|
+
if (localPos + 2 >= end) {
|
|
967
|
+
return;
|
|
968
|
+
}
|
|
969
|
+
const h = hash3(data[localPos], data[localPos + 1], data[localPos + 2]);
|
|
970
|
+
prevArr[localPos & (MAX_DIST - 1)] = head[h];
|
|
971
|
+
head[h] = localPos + 1;
|
|
753
972
|
};
|
|
754
|
-
|
|
973
|
+
const insertWindow = state
|
|
974
|
+
? (localPos, count) => {
|
|
755
975
|
for (let i = 0; i < count; i++) {
|
|
756
|
-
window[(wLen + i) & (
|
|
976
|
+
window[(wLen + i) & (MAX_DIST - 1)] = data[localPos + i];
|
|
757
977
|
}
|
|
758
978
|
wLen += count;
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
if (
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
let checkByte;
|
|
784
|
-
const checkLocal = checkOffset - totalIn;
|
|
785
|
-
if (checkLocal >= 0 && checkLocal < data.length) {
|
|
786
|
-
checkByte = data[checkLocal];
|
|
787
|
-
}
|
|
788
|
-
else {
|
|
789
|
-
checkByte = window[checkOffset & (WINDOW_SIZE - 1)];
|
|
790
|
-
}
|
|
791
|
-
if (checkByte !== data[pos + bestLen]) {
|
|
792
|
-
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
793
|
-
continue;
|
|
794
|
-
}
|
|
979
|
+
}
|
|
980
|
+
: (_localPos, _count) => { };
|
|
981
|
+
let pos = start;
|
|
982
|
+
for (; pos < end;) {
|
|
983
|
+
let bestLen = 0;
|
|
984
|
+
let bestDist = 0;
|
|
985
|
+
if (pos + 2 < end) {
|
|
986
|
+
const h = hash3(data[pos], data[pos + 1], data[pos + 2]);
|
|
987
|
+
const globalPos = state ? totalIn + pos : pos;
|
|
988
|
+
// When we already have a good match from a previous lazy evaluation,
|
|
989
|
+
// reduce the chain search length (matching zlib's good_length behavior).
|
|
990
|
+
let chainRemaining = useLazy && hasPrevMatch && prevMatchLen >= goodLen ? maxChainLen >> 2 : maxChainLen;
|
|
991
|
+
let matchHead = head[h];
|
|
992
|
+
while (matchHead > 0 && chainRemaining-- > 0) {
|
|
993
|
+
const mGlobalPos = matchHead - 1;
|
|
994
|
+
const dist = globalPos - mGlobalPos;
|
|
995
|
+
if (dist > MAX_DIST || dist <= 0) {
|
|
996
|
+
break;
|
|
997
|
+
}
|
|
998
|
+
if (bestLen >= MIN_MATCH) {
|
|
999
|
+
const checkGlobal = mGlobalPos + bestLen;
|
|
1000
|
+
if (getByte(checkGlobal) !== data[pos + bestLen]) {
|
|
1001
|
+
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
1002
|
+
continue;
|
|
795
1003
|
}
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
matchByte = data[matchLocal];
|
|
803
|
-
}
|
|
804
|
-
else {
|
|
805
|
-
matchByte = window[matchOffset & (WINDOW_SIZE - 1)];
|
|
806
|
-
}
|
|
807
|
-
if (matchByte !== data[pos + len]) {
|
|
808
|
-
break;
|
|
809
|
-
}
|
|
810
|
-
len++;
|
|
1004
|
+
}
|
|
1005
|
+
const maxLen = Math.min(MAX_MATCH, end - pos);
|
|
1006
|
+
let len = 0;
|
|
1007
|
+
while (len < maxLen) {
|
|
1008
|
+
if (getByte(mGlobalPos + len) !== data[pos + len]) {
|
|
1009
|
+
break;
|
|
811
1010
|
}
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
1011
|
+
len++;
|
|
1012
|
+
}
|
|
1013
|
+
if (len > bestLen) {
|
|
1014
|
+
bestLen = len;
|
|
1015
|
+
bestDist = dist;
|
|
1016
|
+
if (len >= niceLen) {
|
|
1017
|
+
break;
|
|
818
1018
|
}
|
|
819
|
-
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
820
1019
|
}
|
|
821
|
-
|
|
1020
|
+
matchHead = prevArr[mGlobalPos & (MAX_DIST - 1)];
|
|
1021
|
+
}
|
|
1022
|
+
if (state) {
|
|
822
1023
|
prevArr[globalPos & (MAX_DIST - 1)] = head[h];
|
|
823
1024
|
head[h] = globalPos + 1;
|
|
824
1025
|
}
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
// Current position wins — emit previous as literal
|
|
829
|
-
writeLiteralCode(out, prevLiteral);
|
|
830
|
-
prevMatchLen = bestLen;
|
|
831
|
-
prevMatchDist = bestDist;
|
|
832
|
-
prevLiteral = data[pos];
|
|
833
|
-
insertWindow(pos, 1);
|
|
834
|
-
pos++;
|
|
835
|
-
}
|
|
836
|
-
else {
|
|
837
|
-
// Previous match wins — emit it
|
|
838
|
-
writeLengthCode(out, prevMatchLen);
|
|
839
|
-
writeDistanceCode(out, prevMatchDist);
|
|
840
|
-
// Insert hash entries for skipped positions inside the match
|
|
841
|
-
const matchEnd = pos - 1 + prevMatchLen;
|
|
842
|
-
const insertEnd = Math.min(matchEnd, data.length);
|
|
843
|
-
for (let i = pos; i < insertEnd; i++) {
|
|
844
|
-
insertHash(i);
|
|
845
|
-
}
|
|
846
|
-
insertWindow(pos, insertEnd - pos);
|
|
847
|
-
pos = insertEnd;
|
|
848
|
-
hasPrevMatch = false;
|
|
849
|
-
prevMatchLen = 0;
|
|
850
|
-
}
|
|
1026
|
+
else {
|
|
1027
|
+
prevArr[pos & (MAX_DIST - 1)] = head[h];
|
|
1028
|
+
head[h] = pos + 1;
|
|
851
1029
|
}
|
|
852
|
-
|
|
853
|
-
|
|
1030
|
+
}
|
|
1031
|
+
if (useLazy && hasPrevMatch) {
|
|
1032
|
+
if (bestLen > prevMatchLen) {
|
|
1033
|
+
symbols.push({ litOrLen: prevLiteral, dist: 0 });
|
|
854
1034
|
prevMatchLen = bestLen;
|
|
855
1035
|
prevMatchDist = bestDist;
|
|
856
1036
|
prevLiteral = data[pos];
|
|
@@ -858,38 +1038,132 @@ export class SyncDeflater {
|
|
|
858
1038
|
pos++;
|
|
859
1039
|
}
|
|
860
1040
|
else {
|
|
861
|
-
|
|
1041
|
+
symbols.push({ litOrLen: prevMatchLen, dist: prevMatchDist });
|
|
1042
|
+
const matchEnd = Math.min(pos - 1 + prevMatchLen, end);
|
|
1043
|
+
for (let i = pos; i < matchEnd; i++) {
|
|
1044
|
+
insertHash(i);
|
|
1045
|
+
}
|
|
1046
|
+
insertWindow(pos, matchEnd - pos);
|
|
1047
|
+
pos = matchEnd;
|
|
1048
|
+
hasPrevMatch = false;
|
|
1049
|
+
prevMatchLen = 0;
|
|
1050
|
+
}
|
|
1051
|
+
}
|
|
1052
|
+
else if (bestLen >= MIN_MATCH) {
|
|
1053
|
+
if (useLazy) {
|
|
1054
|
+
hasPrevMatch = true;
|
|
1055
|
+
prevMatchLen = bestLen;
|
|
1056
|
+
prevMatchDist = bestDist;
|
|
1057
|
+
prevLiteral = data[pos];
|
|
862
1058
|
insertWindow(pos, 1);
|
|
863
1059
|
pos++;
|
|
864
1060
|
}
|
|
1061
|
+
else {
|
|
1062
|
+
symbols.push({ litOrLen: bestLen, dist: bestDist });
|
|
1063
|
+
const matchEnd = Math.min(pos + bestLen, end);
|
|
1064
|
+
for (let i = pos + 1; i < matchEnd; i++) {
|
|
1065
|
+
insertHash(i);
|
|
1066
|
+
}
|
|
1067
|
+
insertWindow(pos, matchEnd - pos);
|
|
1068
|
+
pos = matchEnd;
|
|
1069
|
+
}
|
|
865
1070
|
}
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
1071
|
+
else {
|
|
1072
|
+
if (hasPrevMatch) {
|
|
1073
|
+
// Non-lazy mode shouldn't reach here, but handle gracefully
|
|
1074
|
+
symbols.push({ litOrLen: prevMatchLen, dist: prevMatchDist });
|
|
1075
|
+
hasPrevMatch = false;
|
|
1076
|
+
prevMatchLen = 0;
|
|
1077
|
+
}
|
|
1078
|
+
symbols.push({ litOrLen: data[pos], dist: 0 });
|
|
1079
|
+
insertWindow(pos, 1);
|
|
1080
|
+
pos++;
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
// Flush pending lazy match
|
|
1084
|
+
if (hasPrevMatch) {
|
|
1085
|
+
symbols.push({ litOrLen: prevMatchLen, dist: prevMatchDist });
|
|
1086
|
+
const matchEnd = Math.min(pos - 1 + prevMatchLen, end);
|
|
1087
|
+
for (let i = pos; i < matchEnd; i++) {
|
|
1088
|
+
insertHash(i);
|
|
1089
|
+
}
|
|
1090
|
+
insertWindow(pos, matchEnd - pos);
|
|
1091
|
+
hasPrevMatch = false;
|
|
1092
|
+
prevMatchLen = 0;
|
|
1093
|
+
}
|
|
1094
|
+
if (state) {
|
|
1095
|
+
state.windowLen = wLen;
|
|
1096
|
+
state.totalIn = totalIn + (end - start);
|
|
1097
|
+
state.hasPrevMatch = hasPrevMatch;
|
|
1098
|
+
state.prevMatchLen = prevMatchLen;
|
|
1099
|
+
state.prevMatchDist = prevMatchDist;
|
|
1100
|
+
state.prevLiteral = prevLiteral;
|
|
1101
|
+
}
|
|
1102
|
+
return symbols;
|
|
1103
|
+
}
|
|
1104
|
+
// ============================================================================
|
|
1105
|
+
// Stateful Streaming Deflater
|
|
1106
|
+
// ============================================================================
|
|
1107
|
+
/**
|
|
1108
|
+
* Stateful synchronous DEFLATE compressor with Dynamic Huffman encoding.
|
|
1109
|
+
*
|
|
1110
|
+
* Unlike `deflateRawCompressed` (which is a one-shot function), this class
|
|
1111
|
+
* maintains state across multiple `write()` calls:
|
|
1112
|
+
*
|
|
1113
|
+
* - **LZ77 sliding window**: back-references can span across chunks.
|
|
1114
|
+
* - **Hash chains**: match positions persist across chunks with typed-array
|
|
1115
|
+
* hash tables for fast lookup.
|
|
1116
|
+
* - **Lazy matching**: configurable per compression level.
|
|
1117
|
+
* - **Dynamic Huffman**: each block builds optimal Huffman trees from
|
|
1118
|
+
* actual symbol frequencies (BTYPE=2), producing significantly smaller
|
|
1119
|
+
* output than fixed Huffman (BTYPE=1).
|
|
1120
|
+
* - **Bit writer**: bit position is preserved, so consecutive blocks form
|
|
1121
|
+
* a single valid DEFLATE bit-stream without alignment issues.
|
|
1122
|
+
*
|
|
1123
|
+
* Each `write()` emits one non-final Dynamic Huffman block (BFINAL=0).
|
|
1124
|
+
* `finish()` emits a final empty fixed-Huffman block (BFINAL=1).
|
|
1125
|
+
*
|
|
1126
|
+
* This is the pure-JS equivalent of Node.js `zlib.deflateRawSync` with
|
|
1127
|
+
* `Z_SYNC_FLUSH`, used by the streaming ZIP writer (`pushSync`) to achieve
|
|
1128
|
+
* constant-memory streaming in both Node.js and browsers.
|
|
1129
|
+
*
|
|
1130
|
+
* @param level - Compression level (0-9). Level 0 emits STORE blocks.
|
|
1131
|
+
* Default: 6 (matching zlib default).
|
|
1132
|
+
*/
|
|
1133
|
+
export class SyncDeflater {
|
|
1134
|
+
constructor(level = 6) {
|
|
1135
|
+
this._output = new BitWriter();
|
|
1136
|
+
this._state = {
|
|
1137
|
+
head: new Int32Array(HASH_SIZE),
|
|
1138
|
+
prev: new Int32Array(MAX_DIST),
|
|
1139
|
+
window: new Uint8Array(MAX_DIST),
|
|
1140
|
+
windowLen: 0,
|
|
1141
|
+
totalIn: 0,
|
|
1142
|
+
hasPrevMatch: false,
|
|
1143
|
+
prevMatchLen: 0,
|
|
1144
|
+
prevMatchDist: 0,
|
|
1145
|
+
prevLiteral: 0
|
|
1146
|
+
};
|
|
1147
|
+
this._level = Math.max(0, Math.min(9, level));
|
|
1148
|
+
this._config = getLZ77Config(this._level);
|
|
1149
|
+
}
|
|
1150
|
+
/**
|
|
1151
|
+
* Compress a chunk and return the compressed bytes produced so far.
|
|
1152
|
+
* The output is a valid prefix of a DEFLATE stream (one or more non-final blocks).
|
|
1153
|
+
*/
|
|
1154
|
+
write(data) {
|
|
1155
|
+
if (data.length === 0) {
|
|
1156
|
+
return new Uint8Array(0);
|
|
1157
|
+
}
|
|
1158
|
+
const out = this._output;
|
|
1159
|
+
if (this._level === 0) {
|
|
1160
|
+
// Store mode: emit uncompressed block(s)
|
|
1161
|
+
this._writeStore(data);
|
|
1162
|
+
return out.flushBytes();
|
|
1163
|
+
}
|
|
1164
|
+
// LZ77 + Dynamic Huffman
|
|
1165
|
+
const symbols = lz77Compress(data, 0, data.length, this._config, this._state);
|
|
1166
|
+
emitDynamicBlock(out, symbols, false);
|
|
893
1167
|
return out.flushBytes();
|
|
894
1168
|
}
|
|
895
1169
|
/**
|
|
@@ -900,8 +1174,38 @@ export class SyncDeflater {
|
|
|
900
1174
|
const out = this._output;
|
|
901
1175
|
// Final block: BFINAL=1, BTYPE=01, immediately followed by EOB (symbol 256)
|
|
902
1176
|
out.writeBits(1, 1); // BFINAL = 1
|
|
903
|
-
out.writeBits(1, 2); // BTYPE = 01
|
|
1177
|
+
out.writeBits(1, 2); // BTYPE = 01 (fixed Huffman)
|
|
904
1178
|
writeLiteralCode(out, 256);
|
|
905
1179
|
return out.finish();
|
|
906
1180
|
}
|
|
1181
|
+
/**
|
|
1182
|
+
* Write STORE (uncompressed) blocks for level=0.
|
|
1183
|
+
* Each block is non-final (BFINAL=0); the final block is emitted by finish().
|
|
1184
|
+
*/
|
|
1185
|
+
_writeStore(data) {
|
|
1186
|
+
const out = this._output;
|
|
1187
|
+
const MAX_BLOCK_SIZE = 65535;
|
|
1188
|
+
let offset = 0;
|
|
1189
|
+
while (offset < data.length) {
|
|
1190
|
+
const remaining = data.length - offset;
|
|
1191
|
+
const blockSize = Math.min(MAX_BLOCK_SIZE, remaining);
|
|
1192
|
+
// Align to byte boundary before stored block header
|
|
1193
|
+
out.alignToByte();
|
|
1194
|
+
out.writeBits(0, 1); // BFINAL = 0 (never final; finish() handles that)
|
|
1195
|
+
out.writeBits(0, 2); // BTYPE = 00 (stored)
|
|
1196
|
+
// Align to byte boundary after block header (3 bits → pad to 8)
|
|
1197
|
+
out.alignToByte();
|
|
1198
|
+
// LEN
|
|
1199
|
+
out.writeBits(blockSize & 0xff, 8);
|
|
1200
|
+
out.writeBits((blockSize >> 8) & 0xff, 8);
|
|
1201
|
+
// NLEN
|
|
1202
|
+
out.writeBits(~blockSize & 0xff, 8);
|
|
1203
|
+
out.writeBits((~blockSize >> 8) & 0xff, 8);
|
|
1204
|
+
// Data
|
|
1205
|
+
for (let i = 0; i < blockSize; i++) {
|
|
1206
|
+
out.writeBits(data[offset + i], 8);
|
|
1207
|
+
}
|
|
1208
|
+
offset += blockSize;
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
907
1211
|
}
|