zstdify 1.1.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -2
- package/dist/bitstream/bitReaderReverse.d.ts +6 -0
- package/dist/bitstream/bitReaderReverse.js +51 -1
- package/dist/bitstream/bitReaderReverse.js.map +1 -1
- package/dist/bitstream/index.d.ts +1 -0
- package/dist/bitstream/index.js +1 -0
- package/dist/bitstream/index.js.map +1 -1
- package/dist/bitstream/reverseBitWriter.d.ts +1 -0
- package/dist/bitstream/reverseBitWriter.js +66 -0
- package/dist/bitstream/reverseBitWriter.js.map +1 -0
- package/dist/compress.js +47 -7
- package/dist/compress.js.map +1 -1
- package/dist/decode/debugTrace.d.ts +31 -0
- package/dist/decode/debugTrace.js +2 -0
- package/dist/decode/debugTrace.js.map +1 -0
- package/dist/decode/decompressFrame.d.ts +3 -1
- package/dist/decode/decompressFrame.js +153 -59
- package/dist/decode/decompressFrame.js.map +1 -1
- package/dist/decode/fusedSequences.d.ts +9 -0
- package/dist/decode/fusedSequences.js +26 -0
- package/dist/decode/fusedSequences.js.map +1 -0
- package/dist/decode/literals.js +164 -111
- package/dist/decode/literals.js.map +1 -1
- package/dist/decode/reconstruct.d.ts +33 -1
- package/dist/decode/reconstruct.js +591 -24
- package/dist/decode/reconstruct.js.map +1 -1
- package/dist/decode/sequences.d.ts +19 -7
- package/dist/decode/sequences.js +225 -133
- package/dist/decode/sequences.js.map +1 -1
- package/dist/decompress.d.ts +10 -0
- package/dist/decompress.js +5 -3
- package/dist/decompress.js.map +1 -1
- package/dist/encode/blockWriter.js +8 -2
- package/dist/encode/blockWriter.js.map +1 -1
- package/dist/encode/compressedBlock.d.ts +27 -1
- package/dist/encode/compressedBlock.js +594 -339
- package/dist/encode/compressedBlock.js.map +1 -1
- package/dist/encode/fastMatcher.d.ts +7 -0
- package/dist/encode/fastMatcher.js +13 -0
- package/dist/encode/fastMatcher.js.map +1 -0
- package/dist/encode/greedySequences.d.ts +9 -6
- package/dist/encode/greedySequences.js +21 -77
- package/dist/encode/greedySequences.js.map +1 -1
- package/dist/encode/lazyMatcher.d.ts +7 -0
- package/dist/encode/lazyMatcher.js +13 -0
- package/dist/encode/lazyMatcher.js.map +1 -0
- package/dist/encode/literalsEncoder.d.ts +14 -0
- package/dist/encode/literalsEncoder.js +343 -0
- package/dist/encode/literalsEncoder.js.map +1 -0
- package/dist/encode/optimalParser.d.ts +7 -0
- package/dist/encode/optimalParser.js +13 -0
- package/dist/encode/optimalParser.js.map +1 -0
- package/dist/encode/sequencePlanner.d.ts +23 -0
- package/dist/encode/sequencePlanner.js +280 -0
- package/dist/encode/sequencePlanner.js.map +1 -0
- package/dist/entropy/fse.d.ts +13 -6
- package/dist/entropy/fse.js +176 -13
- package/dist/entropy/fse.js.map +1 -1
- package/dist/entropy/huffman.d.ts +7 -5
- package/dist/entropy/huffman.js +18 -7
- package/dist/entropy/huffman.js.map +1 -1
- package/dist/entropy/index.d.ts +2 -2
- package/dist/entropy/weights.js +20 -14
- package/dist/entropy/weights.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,7 +1,19 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { buildHuffmanDecodeTable, weightsToNumBits } from '../entropy/huffman.js';
|
|
1
|
+
import { buildFSEDecodeTable, normalizeCountsForTable, writeNCount } from '../entropy/fse.js';
|
|
2
|
+
import { encodeReverseBitstream } from '../bitstream/reverseBitWriter.js';
|
|
4
3
|
import { LITERALS_LENGTH_DEFAULT_DISTRIBUTION, LITERALS_LENGTH_TABLE_LOG, MATCH_LENGTH_DEFAULT_DISTRIBUTION, MATCH_LENGTH_TABLE_LOG, OFFSET_CODE_DEFAULT_DISTRIBUTION, OFFSET_CODE_TABLE_LOG, } from '../entropy/predefined.js';
|
|
4
|
+
import { buildGeneralCompressedLiteralsForBench, encodeLiteralsSection, } from './literalsEncoder.js';
|
|
5
|
+
// Predefined FSE tables built once and reused for sequence encoding.
|
|
6
|
+
let cachedLLTable = null;
|
|
7
|
+
let cachedOFTable = null;
|
|
8
|
+
let cachedMLTable = null;
|
|
9
|
+
function getPredefinedFSETables() {
|
|
10
|
+
if (!cachedLLTable) {
|
|
11
|
+
cachedLLTable = buildFSEDecodeTable(LITERALS_LENGTH_DEFAULT_DISTRIBUTION, LITERALS_LENGTH_TABLE_LOG);
|
|
12
|
+
cachedOFTable = buildFSEDecodeTable(OFFSET_CODE_DEFAULT_DISTRIBUTION, OFFSET_CODE_TABLE_LOG);
|
|
13
|
+
cachedMLTable = buildFSEDecodeTable(MATCH_LENGTH_DEFAULT_DISTRIBUTION, MATCH_LENGTH_TABLE_LOG);
|
|
14
|
+
}
|
|
15
|
+
return { ll: cachedLLTable, of: cachedOFTable, ml: cachedMLTable };
|
|
16
|
+
}
|
|
5
17
|
const LL_BASELINE = [
|
|
6
18
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 128, 256, 512, 1024, 2048,
|
|
7
19
|
4096, 8192, 16384, 32768, 65536,
|
|
@@ -22,32 +34,168 @@ function writeU24LE(arr, offset, value) {
|
|
|
22
34
|
arr[offset + 1] = (value >> 8) & 0xff;
|
|
23
35
|
arr[offset + 2] = (value >> 16) & 0xff;
|
|
24
36
|
}
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
37
|
+
const U32_ALL_BITS = 0xffff_ffff >>> 0;
|
|
38
|
+
const pathTableCache = new WeakMap();
|
|
39
|
+
let pathMasksScratch = null;
|
|
40
|
+
let pathNextChoiceScratch = null;
|
|
41
|
+
let sequenceReadCountsScratch = null;
|
|
42
|
+
let sequenceReadValuesScratch = null;
|
|
43
|
+
function rangeMask(startBit, endBit) {
|
|
44
|
+
if (startBit === 0 && endBit === 31)
|
|
45
|
+
return U32_ALL_BITS;
|
|
46
|
+
const startMask = (U32_ALL_BITS << startBit) >>> 0;
|
|
47
|
+
const endMask = endBit === 31 ? U32_ALL_BITS : ((1 << (endBit + 1)) - 1) >>> 0;
|
|
48
|
+
return (startMask & endMask) >>> 0;
|
|
49
|
+
}
|
|
50
|
+
function setMaskBit(mask, maskOffset, bit) {
|
|
51
|
+
const word = bit >>> 5;
|
|
52
|
+
mask[maskOffset + word] = (mask[maskOffset + word] | (1 << (bit & 31))) >>> 0;
|
|
53
|
+
}
|
|
54
|
+
function isMaskEmpty(mask, maskOffset, wordCount) {
|
|
55
|
+
for (let i = 0; i < wordCount; i++) {
|
|
56
|
+
if ((mask[maskOffset + i] ?? 0) !== 0)
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
61
|
+
function firstBitInWord(word) {
|
|
62
|
+
const normalized = word >>> 0;
|
|
63
|
+
const lsb = (normalized & -normalized) >>> 0;
|
|
64
|
+
return 31 - Math.clz32(lsb);
|
|
65
|
+
}
|
|
66
|
+
function findFirstSetBit(mask, maskOffset, wordCount) {
|
|
67
|
+
for (let wi = 0; wi < wordCount; wi++) {
|
|
68
|
+
const word = mask[maskOffset + wi] ?? 0;
|
|
69
|
+
if (word !== 0) {
|
|
70
|
+
return (wi << 5) + firstBitInWord(word);
|
|
30
71
|
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
72
|
+
}
|
|
73
|
+
return -1;
|
|
74
|
+
}
|
|
75
|
+
function codesAreUniform(codes) {
|
|
76
|
+
if (codes.length === 0)
|
|
77
|
+
return -1;
|
|
78
|
+
const first = codes[0] ?? -1;
|
|
79
|
+
for (let i = 1; i < codes.length; i++) {
|
|
80
|
+
if ((codes[i] ?? -1) !== first)
|
|
81
|
+
return -1;
|
|
82
|
+
}
|
|
83
|
+
return first;
|
|
84
|
+
}
|
|
85
|
+
function findFirstSetBitInRange(mask, maskOffset, wordCount, minState, maxState) {
|
|
86
|
+
if (wordCount <= 0)
|
|
87
|
+
return -1;
|
|
88
|
+
let min = minState;
|
|
89
|
+
let max = maxState;
|
|
90
|
+
const maxBit = (wordCount << 5) - 1;
|
|
91
|
+
if (min < 0)
|
|
92
|
+
min = 0;
|
|
93
|
+
if (max > maxBit)
|
|
94
|
+
max = maxBit;
|
|
95
|
+
if (min > max)
|
|
96
|
+
return -1;
|
|
97
|
+
const startWord = min >>> 5;
|
|
98
|
+
const endWord = max >>> 5;
|
|
99
|
+
if (startWord === endWord) {
|
|
100
|
+
const masked = ((mask[maskOffset + startWord] ?? 0) & rangeMask(min & 31, max & 31)) >>> 0;
|
|
101
|
+
if (masked === 0)
|
|
102
|
+
return -1;
|
|
103
|
+
return (startWord << 5) + firstBitInWord(masked);
|
|
104
|
+
}
|
|
105
|
+
const firstMasked = ((mask[maskOffset + startWord] ?? 0) & rangeMask(min & 31, 31)) >>> 0;
|
|
106
|
+
if (firstMasked !== 0) {
|
|
107
|
+
return (startWord << 5) + firstBitInWord(firstMasked);
|
|
108
|
+
}
|
|
109
|
+
for (let wi = startWord + 1; wi < endWord; wi++) {
|
|
110
|
+
const word = mask[maskOffset + wi] ?? 0;
|
|
111
|
+
if (word !== 0)
|
|
112
|
+
return (wi << 5) + firstBitInWord(word);
|
|
113
|
+
}
|
|
114
|
+
const lastMasked = ((mask[maskOffset + endWord] ?? 0) & rangeMask(0, max & 31)) >>> 0;
|
|
115
|
+
if (lastMasked === 0)
|
|
116
|
+
return -1;
|
|
117
|
+
return (endWord << 5) + firstBitInWord(lastMasked);
|
|
118
|
+
}
|
|
119
|
+
function getPrecomputedPathTable(table) {
|
|
120
|
+
const cached = pathTableCache.get(table);
|
|
121
|
+
if (cached)
|
|
122
|
+
return cached;
|
|
123
|
+
const tableSize = table.length;
|
|
124
|
+
const wordCount = Math.max(1, Math.ceil(tableSize / 32));
|
|
125
|
+
const baselineByState = new Int32Array(tableSize);
|
|
126
|
+
const minNextByState = new Int32Array(tableSize);
|
|
127
|
+
const maxNextByState = new Int32Array(tableSize);
|
|
128
|
+
let maxSymbol = -1;
|
|
129
|
+
for (let s = 0; s < tableSize; s++) {
|
|
130
|
+
const baseline = table.baseline[s];
|
|
131
|
+
const bits = table.numBits[s];
|
|
132
|
+
baselineByState[s] = baseline;
|
|
133
|
+
const width = bits > 0 ? 1 << bits : 1;
|
|
134
|
+
const minNext = baseline;
|
|
135
|
+
const maxNext = baseline + width - 1;
|
|
136
|
+
minNextByState[s] = minNext < 0 ? 0 : minNext;
|
|
137
|
+
maxNextByState[s] = maxNext >= tableSize ? tableSize - 1 : maxNext;
|
|
138
|
+
const symbol = table.symbol[s];
|
|
139
|
+
if (symbol > maxSymbol)
|
|
140
|
+
maxSymbol = symbol;
|
|
141
|
+
}
|
|
142
|
+
const statesBySymbol = Array.from({ length: maxSymbol + 1 }, () => []);
|
|
143
|
+
const symbolMasks = Array.from({ length: maxSymbol + 1 }, () => new Uint32Array(wordCount));
|
|
144
|
+
const bitTotalsBySymbol = new Float64Array(maxSymbol + 1);
|
|
145
|
+
const stateCountsBySymbol = new Uint32Array(maxSymbol + 1);
|
|
146
|
+
for (let s = 0; s < tableSize; s++) {
|
|
147
|
+
const sym = table.symbol[s];
|
|
148
|
+
const stateList = statesBySymbol[sym];
|
|
149
|
+
const stateMask = symbolMasks[sym];
|
|
150
|
+
if (!stateList || !stateMask)
|
|
35
151
|
continue;
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
while ((bits.length & 7) !== 0) {
|
|
41
|
-
bits.push(0);
|
|
42
|
-
}
|
|
43
|
-
const out = new Uint8Array(Math.ceil(bits.length / 8));
|
|
44
|
-
for (let i = 0; i < bits.length; i++) {
|
|
45
|
-
if ((bits[i] ?? 0) !== 0) {
|
|
46
|
-
const idx = i >>> 3;
|
|
47
|
-
out[idx] = ((out[idx] ?? 0) | (1 << (i & 7))) & 0xff;
|
|
48
|
-
}
|
|
152
|
+
stateList.push(s);
|
|
153
|
+
stateMask[s >>> 5] = (stateMask[s >>> 5] | (1 << (s & 31))) >>> 0;
|
|
154
|
+
bitTotalsBySymbol[sym] = (bitTotalsBySymbol[sym] ?? 0) + (table.numBits[s] ?? 0);
|
|
155
|
+
stateCountsBySymbol[sym] = (stateCountsBySymbol[sym] ?? 0) + 1;
|
|
49
156
|
}
|
|
50
|
-
|
|
157
|
+
const avgBitsBySymbol = new Float64Array(maxSymbol + 1);
|
|
158
|
+
for (let sym = 0; sym < avgBitsBySymbol.length; sym++) {
|
|
159
|
+
const count = stateCountsBySymbol[sym] ?? 0;
|
|
160
|
+
avgBitsBySymbol[sym] = count > 0 ? (bitTotalsBySymbol[sym] ?? 0) / count : Number.POSITIVE_INFINITY;
|
|
161
|
+
}
|
|
162
|
+
const precomputed = {
|
|
163
|
+
tableSize,
|
|
164
|
+
wordCount,
|
|
165
|
+
statesBySymbol,
|
|
166
|
+
symbolMasks,
|
|
167
|
+
avgBitsBySymbol,
|
|
168
|
+
baselineByState,
|
|
169
|
+
minNextByState,
|
|
170
|
+
maxNextByState,
|
|
171
|
+
};
|
|
172
|
+
pathTableCache.set(table, precomputed);
|
|
173
|
+
return precomputed;
|
|
174
|
+
}
|
|
175
|
+
function getPathMasksScratch(requiredLength) {
|
|
176
|
+
if (!pathMasksScratch || pathMasksScratch.length < requiredLength) {
|
|
177
|
+
pathMasksScratch = new Uint32Array(requiredLength);
|
|
178
|
+
}
|
|
179
|
+
pathMasksScratch.fill(0, 0, requiredLength);
|
|
180
|
+
return pathMasksScratch;
|
|
181
|
+
}
|
|
182
|
+
function getPathNextChoiceScratch(requiredLength) {
|
|
183
|
+
if (!pathNextChoiceScratch || pathNextChoiceScratch.length < requiredLength) {
|
|
184
|
+
pathNextChoiceScratch = new Int32Array(requiredLength);
|
|
185
|
+
}
|
|
186
|
+
return pathNextChoiceScratch;
|
|
187
|
+
}
|
|
188
|
+
function getSequenceReadCountsScratch(requiredLength) {
|
|
189
|
+
if (!sequenceReadCountsScratch || sequenceReadCountsScratch.length < requiredLength) {
|
|
190
|
+
sequenceReadCountsScratch = new Uint8Array(requiredLength);
|
|
191
|
+
}
|
|
192
|
+
return sequenceReadCountsScratch;
|
|
193
|
+
}
|
|
194
|
+
function getSequenceReadValuesScratch(requiredLength) {
|
|
195
|
+
if (!sequenceReadValuesScratch || sequenceReadValuesScratch.length < requiredLength) {
|
|
196
|
+
sequenceReadValuesScratch = new Uint32Array(requiredLength);
|
|
197
|
+
}
|
|
198
|
+
return sequenceReadValuesScratch;
|
|
51
199
|
}
|
|
52
200
|
function findLengthCode(value, baseline, extraBits, directMax, directBias) {
|
|
53
201
|
if (value <= directMax) {
|
|
@@ -65,189 +213,6 @@ function findLengthCode(value, baseline, extraBits, directMax, directBias) {
|
|
|
65
213
|
}
|
|
66
214
|
return null;
|
|
67
215
|
}
|
|
68
|
-
function buildSingleSymbolCompressedLiterals(literals) {
|
|
69
|
-
if (literals.length === 0 || literals.length > 1023)
|
|
70
|
-
return null;
|
|
71
|
-
const sym = literals[0] ?? 0;
|
|
72
|
-
for (let i = 1; i < literals.length; i++) {
|
|
73
|
-
if ((literals[i] ?? 0) !== sym)
|
|
74
|
-
return null;
|
|
75
|
-
}
|
|
76
|
-
if (sym > 127)
|
|
77
|
-
return null;
|
|
78
|
-
const numWeights = sym + 1;
|
|
79
|
-
if (numWeights < 1 || numWeights > 128)
|
|
80
|
-
return null;
|
|
81
|
-
const weights = new Array(numWeights).fill(0);
|
|
82
|
-
weights[sym] = 1;
|
|
83
|
-
let partialSum = 0;
|
|
84
|
-
for (const w of weights) {
|
|
85
|
-
if (w > 0)
|
|
86
|
-
partialSum += 1 << (w - 1);
|
|
87
|
-
}
|
|
88
|
-
if (partialSum === 0)
|
|
89
|
-
return null;
|
|
90
|
-
const maxNumBits = 32 - Math.clz32(partialSum);
|
|
91
|
-
const total = 1 << maxNumBits;
|
|
92
|
-
const remainder = total - partialSum;
|
|
93
|
-
if (remainder <= 0 || (remainder & (remainder - 1)) !== 0)
|
|
94
|
-
return null;
|
|
95
|
-
const lastWeight = 32 - Math.clz32(remainder);
|
|
96
|
-
const fullWeights = [...weights, lastWeight];
|
|
97
|
-
while (fullWeights.length < 256)
|
|
98
|
-
fullWeights.push(0);
|
|
99
|
-
const numBits = weightsToNumBits(fullWeights, maxNumBits);
|
|
100
|
-
const table = buildHuffmanDecodeTable(numBits, maxNumBits);
|
|
101
|
-
const symbolCode = table.findIndex((row) => row?.symbol === sym);
|
|
102
|
-
if (symbolCode < 0)
|
|
103
|
-
return null;
|
|
104
|
-
const stream = encodeReverseBitstream(new Array(literals.length).fill(0).map(() => ({ n: maxNumBits, value: symbolCode })));
|
|
105
|
-
const directHeader = 127 + numWeights;
|
|
106
|
-
const weightWriter = new BitWriter();
|
|
107
|
-
for (let i = 0; i < weights.length; i += 2) {
|
|
108
|
-
const hi = weights[i] ?? 0;
|
|
109
|
-
const lo = weights[i + 1] ?? 0;
|
|
110
|
-
weightWriter.writeBits(8, ((hi & 0xf) << 4) | (lo & 0xf));
|
|
111
|
-
}
|
|
112
|
-
const weightBytes = weightWriter.flush();
|
|
113
|
-
const compressedSize = 1 + weightBytes.length + stream.length;
|
|
114
|
-
if (compressedSize > 1023)
|
|
115
|
-
return null;
|
|
116
|
-
const b0 = (2 | (0 << 2) | ((literals.length & 0x0f) << 4)) & 0xff;
|
|
117
|
-
const b1 = (((literals.length >> 4) & 0x3f) | ((compressedSize & 0x03) << 6)) & 0xff;
|
|
118
|
-
const b2 = (compressedSize >> 2) & 0xff;
|
|
119
|
-
const out = new Uint8Array(3 + 1 + weightBytes.length + stream.length);
|
|
120
|
-
out[0] = b0;
|
|
121
|
-
out[1] = b1;
|
|
122
|
-
out[2] = b2;
|
|
123
|
-
out[3] = directHeader & 0xff;
|
|
124
|
-
out.set(weightBytes, 4);
|
|
125
|
-
out.set(stream, 4 + weightBytes.length);
|
|
126
|
-
return out;
|
|
127
|
-
}
|
|
128
|
-
function splitPowerTerms(targetSum, count) {
|
|
129
|
-
if (count < 1 || count > targetSum)
|
|
130
|
-
return null;
|
|
131
|
-
const terms = [];
|
|
132
|
-
for (let bit = 31; bit >= 0; bit--) {
|
|
133
|
-
const value = 1 << bit;
|
|
134
|
-
if ((targetSum & value) !== 0) {
|
|
135
|
-
terms.push(value);
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
while (terms.length < count) {
|
|
139
|
-
let splitIndex = -1;
|
|
140
|
-
let largest = 0;
|
|
141
|
-
for (let i = 0; i < terms.length; i++) {
|
|
142
|
-
const term = terms[i] ?? 0;
|
|
143
|
-
if (term > largest) {
|
|
144
|
-
largest = term;
|
|
145
|
-
splitIndex = i;
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
if (splitIndex < 0 || largest <= 1) {
|
|
149
|
-
return null;
|
|
150
|
-
}
|
|
151
|
-
const half = largest >>> 1;
|
|
152
|
-
terms.splice(splitIndex, 1, half, half);
|
|
153
|
-
}
|
|
154
|
-
return terms;
|
|
155
|
-
}
|
|
156
|
-
function buildGeneralCompressedLiterals(literals) {
|
|
157
|
-
if (literals.length === 0 || literals.length > 1023)
|
|
158
|
-
return null;
|
|
159
|
-
const symbols = new Set();
|
|
160
|
-
for (const byte of literals) {
|
|
161
|
-
symbols.add(byte);
|
|
162
|
-
}
|
|
163
|
-
if (symbols.size === 0 || symbols.size > 128)
|
|
164
|
-
return null;
|
|
165
|
-
const sortedSymbols = [...symbols].sort((a, b) => a - b);
|
|
166
|
-
const maxSymbol = sortedSymbols[sortedSymbols.length - 1] ?? 0;
|
|
167
|
-
if (maxSymbol > 127)
|
|
168
|
-
return null;
|
|
169
|
-
// Construct a valid direct-weight table over symbols <= 127.
|
|
170
|
-
const partialTarget = 128; // maxNumBits=8 => total 256, remainder is 128 (power of two).
|
|
171
|
-
const contributions = splitPowerTerms(partialTarget, sortedSymbols.length);
|
|
172
|
-
if (!contributions)
|
|
173
|
-
return null;
|
|
174
|
-
contributions.sort((a, b) => b - a);
|
|
175
|
-
const weights = new Array(maxSymbol + 1).fill(0);
|
|
176
|
-
for (let i = 0; i < sortedSymbols.length; i++) {
|
|
177
|
-
const symbol = sortedSymbols[i] ?? 0;
|
|
178
|
-
const contribution = contributions[i] ?? 1;
|
|
179
|
-
const weight = 32 - Math.clz32(contribution);
|
|
180
|
-
if (weight < 1 || weight > 15)
|
|
181
|
-
return null;
|
|
182
|
-
weights[symbol] = weight;
|
|
183
|
-
}
|
|
184
|
-
const fullWeights = [...weights, 8];
|
|
185
|
-
while (fullWeights.length < 256)
|
|
186
|
-
fullWeights.push(0);
|
|
187
|
-
const numBits = weightsToNumBits(fullWeights, 8);
|
|
188
|
-
const table = buildHuffmanDecodeTable(numBits, 8);
|
|
189
|
-
const symbolCode = new Map();
|
|
190
|
-
for (const symbol of sortedSymbols) {
|
|
191
|
-
const code = table.findIndex((row) => row?.symbol === symbol);
|
|
192
|
-
if (code < 0)
|
|
193
|
-
return null;
|
|
194
|
-
symbolCode.set(symbol, code);
|
|
195
|
-
}
|
|
196
|
-
const stream = encodeReverseBitstream(new Array(literals.length).fill(0).map((_, i) => ({
|
|
197
|
-
n: 8,
|
|
198
|
-
value: symbolCode.get(literals[i] ?? 0) ?? 0,
|
|
199
|
-
})));
|
|
200
|
-
const numWeights = weights.length;
|
|
201
|
-
if (numWeights < 1 || numWeights > 128)
|
|
202
|
-
return null;
|
|
203
|
-
const directHeader = 127 + numWeights;
|
|
204
|
-
const weightWriter = new BitWriter();
|
|
205
|
-
for (let i = 0; i < weights.length; i += 2) {
|
|
206
|
-
const hi = weights[i] ?? 0;
|
|
207
|
-
const lo = weights[i + 1] ?? 0;
|
|
208
|
-
weightWriter.writeBits(8, ((hi & 0xf) << 4) | (lo & 0xf));
|
|
209
|
-
}
|
|
210
|
-
const weightBytes = weightWriter.flush();
|
|
211
|
-
const compressedSize = 1 + weightBytes.length + stream.length;
|
|
212
|
-
if (compressedSize > 1023)
|
|
213
|
-
return null;
|
|
214
|
-
const b0 = (2 | (0 << 2) | ((literals.length & 0x0f) << 4)) & 0xff;
|
|
215
|
-
const b1 = (((literals.length >> 4) & 0x3f) | ((compressedSize & 0x03) << 6)) & 0xff;
|
|
216
|
-
const b2 = (compressedSize >> 2) & 0xff;
|
|
217
|
-
const out = new Uint8Array(3 + 1 + weightBytes.length + stream.length);
|
|
218
|
-
out[0] = b0;
|
|
219
|
-
out[1] = b1;
|
|
220
|
-
out[2] = b2;
|
|
221
|
-
out[3] = directHeader & 0xff;
|
|
222
|
-
out.set(weightBytes, 4);
|
|
223
|
-
out.set(stream, 4 + weightBytes.length);
|
|
224
|
-
return out;
|
|
225
|
-
}
|
|
226
|
-
function buildRawLiteralsSection(literals) {
|
|
227
|
-
const size = literals.length;
|
|
228
|
-
if (size <= 31) {
|
|
229
|
-
const out = new Uint8Array(1 + size);
|
|
230
|
-
out[0] = (size << 3) | 0;
|
|
231
|
-
out.set(literals, 1);
|
|
232
|
-
return out;
|
|
233
|
-
}
|
|
234
|
-
if (size <= 0x0fff) {
|
|
235
|
-
const out = new Uint8Array(2 + size);
|
|
236
|
-
out[0] = ((size & 0x0f) << 4) | (1 << 2);
|
|
237
|
-
out[1] = (size >>> 4) & 0xff;
|
|
238
|
-
out.set(literals, 2);
|
|
239
|
-
return out;
|
|
240
|
-
}
|
|
241
|
-
if (size <= 0x0f_ffff) {
|
|
242
|
-
const out = new Uint8Array(3 + size);
|
|
243
|
-
out[0] = ((size & 0x0f) << 4) | (3 << 2);
|
|
244
|
-
out[1] = (size >>> 4) & 0xff;
|
|
245
|
-
out[2] = (size >>> 12) & 0xff;
|
|
246
|
-
out.set(literals, 3);
|
|
247
|
-
return out;
|
|
248
|
-
}
|
|
249
|
-
return null;
|
|
250
|
-
}
|
|
251
216
|
function encodeNumSequences(numSequences) {
|
|
252
217
|
if (numSequences < 0 || numSequences > 0xffff + 0x7f00)
|
|
253
218
|
return null;
|
|
@@ -265,100 +230,270 @@ function encodeNumSequences(numSequences) {
|
|
|
265
230
|
function buildStatePath(codes, table) {
|
|
266
231
|
if (codes.length === 0)
|
|
267
232
|
return { states: [], updateBits: [] };
|
|
268
|
-
const
|
|
269
|
-
const
|
|
270
|
-
|
|
271
|
-
const row = table[s];
|
|
272
|
-
if (!row)
|
|
273
|
-
continue;
|
|
274
|
-
const arr = statesByCode.get(row.symbol) ?? [];
|
|
275
|
-
arr.push(s);
|
|
276
|
-
statesByCode.set(row.symbol, arr);
|
|
277
|
-
}
|
|
278
|
-
const possible = new Array(codes.length);
|
|
279
|
-
const nextChoice = new Array(codes.length);
|
|
280
|
-
for (let i = 0; i < codes.length; i++) {
|
|
281
|
-
possible[i] = new Set();
|
|
282
|
-
nextChoice[i] = new Map();
|
|
283
|
-
}
|
|
284
|
-
const lastCandidates = statesByCode.get(codes[codes.length - 1] ?? -1) ?? [];
|
|
285
|
-
const lastSet = possible[codes.length - 1];
|
|
286
|
-
if (!lastSet)
|
|
233
|
+
const pre = getPrecomputedPathTable(table);
|
|
234
|
+
const { tableSize, wordCount, statesBySymbol, symbolMasks, minNextByState, maxNextByState, baselineByState } = pre;
|
|
235
|
+
if (tableSize <= 0)
|
|
287
236
|
return null;
|
|
288
|
-
|
|
289
|
-
|
|
237
|
+
const rowCount = codes.length;
|
|
238
|
+
if (rowCount === 1) {
|
|
239
|
+
const onlyCode = codes[0] ?? -1;
|
|
240
|
+
if (onlyCode < 0 || onlyCode >= statesBySymbol.length)
|
|
241
|
+
return null;
|
|
242
|
+
const onlyStates = statesBySymbol[onlyCode];
|
|
243
|
+
if (!onlyStates || onlyStates.length === 0)
|
|
244
|
+
return null;
|
|
245
|
+
const firstState = onlyStates[0];
|
|
246
|
+
if (firstState === undefined)
|
|
247
|
+
return null;
|
|
248
|
+
return { states: [firstState], updateBits: [] };
|
|
290
249
|
}
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
const
|
|
295
|
-
|
|
296
|
-
const curSet = possible[i];
|
|
297
|
-
const curNextChoice = nextChoice[i];
|
|
298
|
-
if (!nextSet || !curSet || !curNextChoice)
|
|
250
|
+
const uniformCode = codesAreUniform(codes);
|
|
251
|
+
if (uniformCode >= 0 && uniformCode < statesBySymbol.length) {
|
|
252
|
+
const candidateStates = statesBySymbol[uniformCode];
|
|
253
|
+
const candidateMask = symbolMasks[uniformCode];
|
|
254
|
+
if (!candidateStates || candidateStates.length === 0 || !candidateMask)
|
|
299
255
|
return null;
|
|
300
|
-
for (
|
|
301
|
-
const
|
|
302
|
-
if (
|
|
256
|
+
for (let startIndex = 0; startIndex < candidateStates.length; startIndex++) {
|
|
257
|
+
const startState = candidateStates[startIndex];
|
|
258
|
+
if (startState === undefined)
|
|
303
259
|
continue;
|
|
304
|
-
const
|
|
305
|
-
const
|
|
306
|
-
|
|
307
|
-
let
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
260
|
+
const states = new Array(rowCount);
|
|
261
|
+
const updateBits = new Array(rowCount - 1);
|
|
262
|
+
states[0] = startState;
|
|
263
|
+
let state = startState;
|
|
264
|
+
let valid = true;
|
|
265
|
+
for (let row = 0; row < rowCount - 1; row++) {
|
|
266
|
+
const nextState = findFirstSetBitInRange(candidateMask, 0, wordCount, minNextByState[state], maxNextByState[state]);
|
|
267
|
+
if (nextState < 0) {
|
|
268
|
+
valid = false;
|
|
311
269
|
break;
|
|
312
270
|
}
|
|
271
|
+
states[row + 1] = nextState;
|
|
272
|
+
updateBits[row] = nextState - baselineByState[state];
|
|
273
|
+
state = nextState;
|
|
313
274
|
}
|
|
314
|
-
if (
|
|
315
|
-
|
|
316
|
-
curNextChoice.set(s, chosen);
|
|
317
|
-
}
|
|
275
|
+
if (valid)
|
|
276
|
+
return { states, updateBits };
|
|
318
277
|
}
|
|
319
|
-
if (curSet.size === 0)
|
|
320
|
-
return null;
|
|
321
278
|
}
|
|
322
|
-
const
|
|
323
|
-
const
|
|
324
|
-
const
|
|
325
|
-
|
|
279
|
+
const masks = getPathMasksScratch(rowCount * wordCount);
|
|
280
|
+
const nextChoice = getPathNextChoiceScratch(Math.max(0, rowCount - 1) * tableSize);
|
|
281
|
+
const maskOffset = (rowIndex) => rowIndex * wordCount;
|
|
282
|
+
const nextChoiceOffset = (rowIndex) => rowIndex * tableSize;
|
|
283
|
+
const lastCode = codes[rowCount - 1] ?? -1;
|
|
284
|
+
if (lastCode < 0 || lastCode >= symbolMasks.length)
|
|
326
285
|
return null;
|
|
327
|
-
const
|
|
328
|
-
if (
|
|
286
|
+
const lastMask = symbolMasks[lastCode];
|
|
287
|
+
if (!lastMask)
|
|
329
288
|
return null;
|
|
330
|
-
|
|
331
|
-
for (let
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
289
|
+
const lastMaskOffset = maskOffset(rowCount - 1);
|
|
290
|
+
for (let wi = 0; wi < wordCount; wi++) {
|
|
291
|
+
masks[lastMaskOffset + wi] = lastMask[wi];
|
|
292
|
+
}
|
|
293
|
+
if (isMaskEmpty(masks, lastMaskOffset, wordCount))
|
|
294
|
+
return null;
|
|
295
|
+
for (let i = rowCount - 2; i >= 0; i--) {
|
|
296
|
+
const code = codes[i] ?? -1;
|
|
297
|
+
if (code < 0 || code >= statesBySymbol.length)
|
|
337
298
|
return null;
|
|
338
|
-
const
|
|
339
|
-
if (
|
|
299
|
+
const candidates = statesBySymbol[code];
|
|
300
|
+
if (!candidates || candidates.length === 0)
|
|
340
301
|
return null;
|
|
341
|
-
|
|
342
|
-
const
|
|
343
|
-
|
|
302
|
+
const curMaskOffset = maskOffset(i);
|
|
303
|
+
const nextMaskOffset = maskOffset(i + 1);
|
|
304
|
+
const curNextOffset = nextChoiceOffset(i);
|
|
305
|
+
for (let ci = 0; ci < candidates.length; ci++) {
|
|
306
|
+
const state = candidates[ci];
|
|
307
|
+
if (state === undefined)
|
|
308
|
+
continue;
|
|
309
|
+
const chosenNext = findFirstSetBitInRange(masks, nextMaskOffset, wordCount, minNextByState[state], maxNextByState[state]);
|
|
310
|
+
if (chosenNext >= 0) {
|
|
311
|
+
setMaskBit(masks, curMaskOffset, state);
|
|
312
|
+
nextChoice[curNextOffset + state] = chosenNext;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
if (isMaskEmpty(masks, curMaskOffset, wordCount))
|
|
344
316
|
return null;
|
|
345
|
-
|
|
317
|
+
}
|
|
318
|
+
const states = new Array(rowCount);
|
|
319
|
+
const updateBits = new Array(Math.max(0, rowCount - 1));
|
|
320
|
+
let state = findFirstSetBit(masks, maskOffset(0), wordCount);
|
|
321
|
+
if (state < 0)
|
|
322
|
+
return null;
|
|
323
|
+
states[0] = state;
|
|
324
|
+
for (let i = 0; i < rowCount - 1; i++) {
|
|
325
|
+
const nextState = nextChoice[nextChoiceOffset(i) + state];
|
|
326
|
+
states[i + 1] = nextState;
|
|
327
|
+
updateBits[i] = nextState - baselineByState[state];
|
|
328
|
+
state = nextState;
|
|
346
329
|
}
|
|
347
330
|
return { states, updateBits };
|
|
348
331
|
}
|
|
349
|
-
|
|
332
|
+
let symbolizedScratch = null;
|
|
333
|
+
function ensureSymbolizedScratch(minLength) {
|
|
334
|
+
const existing = symbolizedScratch;
|
|
335
|
+
if (existing && existing.llCodes.length >= minLength) {
|
|
336
|
+
return existing;
|
|
337
|
+
}
|
|
338
|
+
let capacity = existing?.llCodes.length ?? 0;
|
|
339
|
+
if (capacity === 0)
|
|
340
|
+
capacity = 32;
|
|
341
|
+
while (capacity < minLength)
|
|
342
|
+
capacity *= 2;
|
|
343
|
+
symbolizedScratch = {
|
|
344
|
+
llCodes: new Uint8Array(capacity),
|
|
345
|
+
llExtraN: new Uint8Array(capacity),
|
|
346
|
+
llExtraValue: new Uint32Array(capacity),
|
|
347
|
+
mlCodes: new Uint8Array(capacity),
|
|
348
|
+
mlExtraN: new Uint8Array(capacity),
|
|
349
|
+
mlExtraValue: new Uint32Array(capacity),
|
|
350
|
+
ofCodes: new Uint8Array(capacity),
|
|
351
|
+
ofExtraN: new Uint8Array(capacity),
|
|
352
|
+
ofExtraValue: new Uint32Array(capacity),
|
|
353
|
+
};
|
|
354
|
+
return symbolizedScratch;
|
|
355
|
+
}
|
|
356
|
+
function symbolRange(codes) {
|
|
357
|
+
let max = 0;
|
|
358
|
+
for (let i = 0; i < codes.length; i++) {
|
|
359
|
+
const value = codes[i] ?? 0;
|
|
360
|
+
if (value > max)
|
|
361
|
+
max = value;
|
|
362
|
+
}
|
|
363
|
+
return max + 1;
|
|
364
|
+
}
|
|
365
|
+
function buildHistogram(codes, alphabetSize) {
|
|
366
|
+
const out = new Uint32Array(alphabetSize);
|
|
367
|
+
for (let i = 0; i < codes.length; i++) {
|
|
368
|
+
const c = codes[i] ?? 0;
|
|
369
|
+
if (c < 0 || c >= alphabetSize)
|
|
370
|
+
continue;
|
|
371
|
+
out[c] = (out[c] ?? 0) + 1;
|
|
372
|
+
}
|
|
373
|
+
return out;
|
|
374
|
+
}
|
|
375
|
+
function scorePath(path, table, tableLog) {
|
|
376
|
+
if (path.states.length === 0)
|
|
377
|
+
return 0;
|
|
378
|
+
let bits = tableLog;
|
|
379
|
+
for (let i = 0; i < path.states.length - 1; i++) {
|
|
380
|
+
const state = path.states[i] ?? -1;
|
|
381
|
+
if (state < 0 || state >= table.length)
|
|
382
|
+
return Number.POSITIVE_INFINITY;
|
|
383
|
+
bits += table.numBits[state];
|
|
384
|
+
}
|
|
385
|
+
return bits;
|
|
386
|
+
}
|
|
387
|
+
function estimatePathBitsFromHistogram(histogram, table, tableLog, extraHeaderBits) {
|
|
388
|
+
const pre = getPrecomputedPathTable(table);
|
|
389
|
+
const avgBits = pre.avgBitsBySymbol;
|
|
390
|
+
let bits = tableLog + extraHeaderBits;
|
|
391
|
+
for (let sym = 0; sym < histogram.length; sym++) {
|
|
392
|
+
const freq = histogram[sym] ?? 0;
|
|
393
|
+
if (freq <= 0)
|
|
394
|
+
continue;
|
|
395
|
+
const avg = avgBits[sym] ?? Number.POSITIVE_INFINITY;
|
|
396
|
+
if (!Number.isFinite(avg)) {
|
|
397
|
+
return Number.POSITIVE_INFINITY;
|
|
398
|
+
}
|
|
399
|
+
bits += avg * freq;
|
|
400
|
+
}
|
|
401
|
+
return bits;
|
|
402
|
+
}
|
|
403
|
+
const normalizedTableCache = new Map();
|
|
404
|
+
function hashHistogram(histogram) {
|
|
405
|
+
let hash = 2166136261 >>> 0;
|
|
406
|
+
for (let i = 0; i < histogram.length; i++) {
|
|
407
|
+
hash ^= histogram[i] ?? 0;
|
|
408
|
+
hash = Math.imul(hash, 16777619) >>> 0;
|
|
409
|
+
}
|
|
410
|
+
return hash >>> 0;
|
|
411
|
+
}
|
|
412
|
+
function histogramsEqual(a, b) {
|
|
413
|
+
if (a.length !== b.length)
|
|
414
|
+
return false;
|
|
415
|
+
for (let i = 0; i < a.length; i++) {
|
|
416
|
+
if ((a[i] ?? 0) !== (b[i] ?? 0))
|
|
417
|
+
return false;
|
|
418
|
+
}
|
|
419
|
+
return true;
|
|
420
|
+
}
|
|
421
|
+
function getNormalizedTableCandidates(codes, maxTableLog) {
|
|
422
|
+
const alphabetSize = symbolRange(codes);
|
|
423
|
+
if (alphabetSize <= 0)
|
|
424
|
+
return [];
|
|
425
|
+
const histogram = buildHistogram(codes, alphabetSize);
|
|
426
|
+
let distinct = 0;
|
|
427
|
+
for (let i = 0; i < histogram.length; i++) {
|
|
428
|
+
if ((histogram[i] ?? 0) > 0)
|
|
429
|
+
distinct++;
|
|
430
|
+
}
|
|
431
|
+
if (distinct <= 1)
|
|
432
|
+
return [];
|
|
433
|
+
let minTableLog = 5;
|
|
434
|
+
while (1 << minTableLog < distinct && minTableLog < maxTableLog)
|
|
435
|
+
minTableLog++;
|
|
436
|
+
if (1 << minTableLog < distinct)
|
|
437
|
+
return [];
|
|
438
|
+
const maxLogFromSamples = codes.length > 1 ? 31 - Math.clz32(codes.length - 1) : 5;
|
|
439
|
+
const limit = Math.max(minTableLog, Math.min(maxTableLog, maxLogFromSamples + 1));
|
|
440
|
+
const results = [];
|
|
441
|
+
const histogramHash = hashHistogram(histogram);
|
|
442
|
+
for (let tableLog = minTableLog; tableLog <= limit; tableLog++) {
|
|
443
|
+
const key = `${tableLog}:${alphabetSize}:${histogramHash}`;
|
|
444
|
+
const cachedBucket = normalizedTableCache.get(key);
|
|
445
|
+
if (cachedBucket) {
|
|
446
|
+
let matched = false;
|
|
447
|
+
for (const cached of cachedBucket) {
|
|
448
|
+
if (histogramsEqual(cached.histogram, histogram)) {
|
|
449
|
+
results.push(cached);
|
|
450
|
+
matched = true;
|
|
451
|
+
break;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
if (matched)
|
|
455
|
+
continue;
|
|
456
|
+
}
|
|
457
|
+
try {
|
|
458
|
+
const { normalizedCounter, maxSymbolValue } = normalizeCountsForTable(Array.from(histogram), tableLog);
|
|
459
|
+
const header = writeNCount(normalizedCounter, maxSymbolValue, tableLog);
|
|
460
|
+
const table = buildFSEDecodeTable(normalizedCounter, tableLog);
|
|
461
|
+
const out = {
|
|
462
|
+
histogram: histogram.slice(0),
|
|
463
|
+
table,
|
|
464
|
+
tableLog,
|
|
465
|
+
header,
|
|
466
|
+
};
|
|
467
|
+
if (!cachedBucket) {
|
|
468
|
+
normalizedTableCache.set(key, [out]);
|
|
469
|
+
}
|
|
470
|
+
else {
|
|
471
|
+
cachedBucket.push(out);
|
|
472
|
+
}
|
|
473
|
+
results.push(out);
|
|
474
|
+
}
|
|
475
|
+
catch {
|
|
476
|
+
// Skip invalid normalizations for this table log.
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
return results;
|
|
480
|
+
}
|
|
481
|
+
function symbolizedSequences(sequences) {
|
|
350
482
|
if (sequences.length === 0)
|
|
351
483
|
return null;
|
|
352
|
-
const
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
const
|
|
356
|
-
const
|
|
357
|
-
const mlCodes =
|
|
358
|
-
const
|
|
359
|
-
const
|
|
360
|
-
const
|
|
361
|
-
|
|
484
|
+
const numSequences = sequences.length;
|
|
485
|
+
const scratch = ensureSymbolizedScratch(numSequences);
|
|
486
|
+
const llCodes = scratch.llCodes.subarray(0, numSequences);
|
|
487
|
+
const llExtraN = scratch.llExtraN.subarray(0, numSequences);
|
|
488
|
+
const llExtraValue = scratch.llExtraValue.subarray(0, numSequences);
|
|
489
|
+
const mlCodes = scratch.mlCodes.subarray(0, numSequences);
|
|
490
|
+
const mlExtraN = scratch.mlExtraN.subarray(0, numSequences);
|
|
491
|
+
const mlExtraValue = scratch.mlExtraValue.subarray(0, numSequences);
|
|
492
|
+
const ofCodes = scratch.ofCodes.subarray(0, numSequences);
|
|
493
|
+
const ofExtraN = scratch.ofExtraN.subarray(0, numSequences);
|
|
494
|
+
const ofExtraValue = scratch.ofExtraValue.subarray(0, numSequences);
|
|
495
|
+
for (let i = 0; i < numSequences; i++) {
|
|
496
|
+
const sequence = sequences[i];
|
|
362
497
|
const ll = findLengthCode(sequence.literalsLength, LL_BASELINE, LL_NUMBITS, 15, 0);
|
|
363
498
|
const ml = findLengthCode(sequence.matchLength, ML_BASELINE, ML_NUMBITS, 34, 3);
|
|
364
499
|
if (!ll || !ml)
|
|
@@ -370,71 +505,184 @@ function buildPredefinedSequenceSection(sequences) {
|
|
|
370
505
|
if (ofCode < 0 || ofCode > 28)
|
|
371
506
|
return null;
|
|
372
507
|
const ofEx = offsetValue - (1 << ofCode);
|
|
373
|
-
llCodes
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
508
|
+
llCodes[i] = ll.code;
|
|
509
|
+
llExtraN[i] = ll.extraN;
|
|
510
|
+
llExtraValue[i] = ll.extra;
|
|
511
|
+
mlCodes[i] = ml.code;
|
|
512
|
+
mlExtraN[i] = ml.extraN;
|
|
513
|
+
mlExtraValue[i] = ml.extra;
|
|
514
|
+
ofCodes[i] = ofCode;
|
|
515
|
+
ofExtraN[i] = ofCode;
|
|
516
|
+
ofExtraValue[i] = ofEx;
|
|
517
|
+
}
|
|
518
|
+
return { llCodes, llExtraN, llExtraValue, mlCodes, mlExtraN, mlExtraValue, ofCodes, ofExtraN, ofExtraValue };
|
|
519
|
+
}
|
|
520
|
+
function chooseStreamMode(codes, predefinedTable, predefinedTableLog, maxTableLog, prevTable, prevTableLog) {
|
|
521
|
+
const alphabetSize = symbolRange(codes);
|
|
522
|
+
const histogram = alphabetSize > 0 ? buildHistogram(codes, alphabetSize) : new Uint32Array(0);
|
|
523
|
+
const predefinedPath = buildStatePath(codes, predefinedTable);
|
|
524
|
+
if (!predefinedPath)
|
|
387
525
|
return null;
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
const
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
readChunks.push({ n: mlRow.numBits, value: mlPath.updateBits[i] ?? 0 });
|
|
408
|
-
readChunks.push({ n: ofRow.numBits, value: ofPath.updateBits[i] ?? 0 });
|
|
526
|
+
let best = {
|
|
527
|
+
mode: 0,
|
|
528
|
+
table: predefinedTable,
|
|
529
|
+
tableLog: predefinedTableLog,
|
|
530
|
+
path: predefinedPath,
|
|
531
|
+
tableHeader: new Uint8Array(0),
|
|
532
|
+
};
|
|
533
|
+
let bestScore = scorePath(predefinedPath, predefinedTable, predefinedTableLog);
|
|
534
|
+
if (prevTable && prevTableLog !== null) {
|
|
535
|
+
const repeatEstimate = estimatePathBitsFromHistogram(histogram, prevTable, prevTableLog, 0);
|
|
536
|
+
if (repeatEstimate < bestScore + 16) {
|
|
537
|
+
const repeatPath = buildStatePath(codes, prevTable);
|
|
538
|
+
if (repeatPath) {
|
|
539
|
+
const repeatScore = scorePath(repeatPath, prevTable, prevTableLog);
|
|
540
|
+
if (repeatScore < bestScore) {
|
|
541
|
+
best = { mode: 3, table: prevTable, tableLog: prevTableLog, path: repeatPath, tableHeader: new Uint8Array(0) };
|
|
542
|
+
bestScore = repeatScore;
|
|
543
|
+
}
|
|
544
|
+
}
|
|
409
545
|
}
|
|
410
546
|
}
|
|
411
|
-
const
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
547
|
+
const compressedCandidates = getNormalizedTableCandidates(codes, maxTableLog);
|
|
548
|
+
if (compressedCandidates.length > 0) {
|
|
549
|
+
const ranked = compressedCandidates
|
|
550
|
+
.map((compressed) => ({
|
|
551
|
+
compressed,
|
|
552
|
+
estimate: estimatePathBitsFromHistogram(histogram, compressed.table, compressed.tableLog, compressed.header.length * 8),
|
|
553
|
+
}))
|
|
554
|
+
.sort((a, b) => a.estimate - b.estimate);
|
|
555
|
+
const evalCount = Math.min(2, ranked.length);
|
|
556
|
+
for (let i = 0; i < evalCount; i++) {
|
|
557
|
+
const candidate = ranked[i];
|
|
558
|
+
if (!candidate || candidate.estimate >= bestScore + 16)
|
|
559
|
+
continue;
|
|
560
|
+
const compressedPath = buildStatePath(codes, candidate.compressed.table);
|
|
561
|
+
if (compressedPath) {
|
|
562
|
+
const compressedScore = scorePath(compressedPath, candidate.compressed.table, candidate.compressed.tableLog) +
|
|
563
|
+
candidate.compressed.header.length * 8;
|
|
564
|
+
if (compressedScore < bestScore) {
|
|
565
|
+
best = {
|
|
566
|
+
mode: 2,
|
|
567
|
+
table: candidate.compressed.table,
|
|
568
|
+
tableLog: candidate.compressed.tableLog,
|
|
569
|
+
path: compressedPath,
|
|
570
|
+
tableHeader: candidate.compressed.header,
|
|
571
|
+
};
|
|
572
|
+
bestScore = compressedScore;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
return best;
|
|
417
578
|
}
|
|
418
|
-
|
|
419
|
-
const
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
if (
|
|
579
|
+
function buildSequenceSection(sequences, context) {
|
|
580
|
+
const encoded = symbolizedSequences(sequences);
|
|
581
|
+
if (!encoded)
|
|
582
|
+
return null;
|
|
583
|
+
const numSequences = sequences.length;
|
|
584
|
+
const numSequencesBytes = encodeNumSequences(numSequences);
|
|
585
|
+
if (!numSequencesBytes)
|
|
425
586
|
return null;
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
587
|
+
const { ll: llTable, of: ofTable, ml: mlTable } = getPredefinedFSETables();
|
|
588
|
+
const llChoice = chooseStreamMode(encoded.llCodes, llTable, LITERALS_LENGTH_TABLE_LOG, 9, context?.prevTables?.llTable ?? null, context?.prevTables?.llTableLog ?? null);
|
|
589
|
+
const ofChoice = chooseStreamMode(encoded.ofCodes, ofTable, OFFSET_CODE_TABLE_LOG, 8, context?.prevTables?.ofTable ?? null, context?.prevTables?.ofTableLog ?? null);
|
|
590
|
+
const mlChoice = chooseStreamMode(encoded.mlCodes, mlTable, MATCH_LENGTH_TABLE_LOG, 9, context?.prevTables?.mlTable ?? null, context?.prevTables?.mlTableLog ?? null);
|
|
591
|
+
if (!llChoice || !ofChoice || !mlChoice)
|
|
592
|
+
return null;
|
|
593
|
+
const chunkCount = numSequences * 6;
|
|
594
|
+
const readCounts = getSequenceReadCountsScratch(chunkCount).subarray(0, chunkCount);
|
|
595
|
+
const readValues = getSequenceReadValuesScratch(chunkCount).subarray(0, chunkCount);
|
|
596
|
+
const llStates = llChoice.path.states;
|
|
597
|
+
const llUpdates = llChoice.path.updateBits;
|
|
598
|
+
const ofStates = ofChoice.path.states;
|
|
599
|
+
const ofUpdates = ofChoice.path.updateBits;
|
|
600
|
+
const mlStates = mlChoice.path.states;
|
|
601
|
+
const mlUpdates = mlChoice.path.updateBits;
|
|
602
|
+
const ofExtraN = encoded.ofExtraN;
|
|
603
|
+
const ofExtraValue = encoded.ofExtraValue;
|
|
604
|
+
const mlExtraN = encoded.mlExtraN;
|
|
605
|
+
const mlExtraValue = encoded.mlExtraValue;
|
|
606
|
+
const llExtraN = encoded.llExtraN;
|
|
607
|
+
const llExtraValue = encoded.llExtraValue;
|
|
608
|
+
let readPos = 0;
|
|
609
|
+
readCounts[readPos] = llChoice.tableLog;
|
|
610
|
+
readValues[readPos++] = llStates[0];
|
|
611
|
+
readCounts[readPos] = ofChoice.tableLog;
|
|
612
|
+
readValues[readPos++] = ofStates[0];
|
|
613
|
+
readCounts[readPos] = mlChoice.tableLog;
|
|
614
|
+
readValues[readPos++] = mlStates[0];
|
|
615
|
+
for (let i = 0; i < numSequences; i++) {
|
|
616
|
+
readCounts[readPos] = ofExtraN[i];
|
|
617
|
+
readValues[readPos++] = ofExtraValue[i];
|
|
618
|
+
readCounts[readPos] = mlExtraN[i];
|
|
619
|
+
readValues[readPos++] = mlExtraValue[i];
|
|
620
|
+
readCounts[readPos] = llExtraN[i];
|
|
621
|
+
readValues[readPos++] = llExtraValue[i];
|
|
622
|
+
if (i !== numSequences - 1) {
|
|
623
|
+
const llState = llStates[i];
|
|
624
|
+
const mlState = mlStates[i];
|
|
625
|
+
const ofState = ofStates[i];
|
|
626
|
+
if (llState < 0 ||
|
|
627
|
+
llState >= llChoice.table.length ||
|
|
628
|
+
mlState < 0 ||
|
|
629
|
+
mlState >= mlChoice.table.length ||
|
|
630
|
+
ofState < 0 ||
|
|
631
|
+
ofState >= ofChoice.table.length) {
|
|
632
|
+
return null;
|
|
633
|
+
}
|
|
634
|
+
readCounts[readPos] = llChoice.table.numBits[llState];
|
|
635
|
+
readValues[readPos++] = llUpdates[i];
|
|
636
|
+
readCounts[readPos] = mlChoice.table.numBits[mlState];
|
|
637
|
+
readValues[readPos++] = mlUpdates[i];
|
|
638
|
+
readCounts[readPos] = ofChoice.table.numBits[ofState];
|
|
639
|
+
readValues[readPos++] = ofUpdates[i];
|
|
430
640
|
}
|
|
431
641
|
}
|
|
432
|
-
const
|
|
642
|
+
const bitstream = encodeReverseBitstream(readCounts, readValues);
|
|
643
|
+
const tableHeaderSize = llChoice.tableHeader.length + ofChoice.tableHeader.length + mlChoice.tableHeader.length;
|
|
644
|
+
const out = new Uint8Array(numSequencesBytes.length + 1 + tableHeaderSize + bitstream.length);
|
|
645
|
+
out.set(numSequencesBytes, 0);
|
|
646
|
+
const modeByte = (llChoice.mode << 6) | (ofChoice.mode << 4) | (mlChoice.mode << 2);
|
|
647
|
+
out[numSequencesBytes.length] = modeByte & 0xff;
|
|
648
|
+
let pos = numSequencesBytes.length + 1;
|
|
649
|
+
out.set(llChoice.tableHeader, pos);
|
|
650
|
+
pos += llChoice.tableHeader.length;
|
|
651
|
+
out.set(ofChoice.tableHeader, pos);
|
|
652
|
+
pos += ofChoice.tableHeader.length;
|
|
653
|
+
out.set(mlChoice.tableHeader, pos);
|
|
654
|
+
pos += mlChoice.tableHeader.length;
|
|
655
|
+
out.set(bitstream, pos);
|
|
656
|
+
return {
|
|
657
|
+
section: out,
|
|
658
|
+
tables: {
|
|
659
|
+
llTable: llChoice.table,
|
|
660
|
+
llTableLog: llChoice.tableLog,
|
|
661
|
+
ofTable: ofChoice.table,
|
|
662
|
+
ofTableLog: ofChoice.tableLog,
|
|
663
|
+
mlTable: mlChoice.table,
|
|
664
|
+
mlTableLog: mlChoice.tableLog,
|
|
665
|
+
},
|
|
666
|
+
};
|
|
667
|
+
}
|
|
668
|
+
export function buildCompressedBlockPayload(literals, sequences, context) {
|
|
669
|
+
const literalsContext = {
|
|
670
|
+
prevTable: context?.prevLiteralsTable ?? null,
|
|
671
|
+
};
|
|
672
|
+
const encodedLiterals = encodeLiteralsSection(literals, literalsContext);
|
|
673
|
+
if (!encodedLiterals)
|
|
674
|
+
return null;
|
|
675
|
+
const literalsSection = encodedLiterals.section;
|
|
676
|
+
const seqSection = buildSequenceSection(sequences, context);
|
|
433
677
|
if (!seqSection)
|
|
434
678
|
return null;
|
|
435
|
-
const out = new Uint8Array(literalsSection.length + seqSection.length);
|
|
679
|
+
const out = new Uint8Array(literalsSection.length + seqSection.section.length);
|
|
436
680
|
out.set(literalsSection, 0);
|
|
437
|
-
out.set(seqSection, literalsSection.length);
|
|
681
|
+
out.set(seqSection.section, literalsSection.length);
|
|
682
|
+
if (context) {
|
|
683
|
+
context.prevTables = seqSection.tables;
|
|
684
|
+
context.prevLiteralsTable = encodedLiterals.table;
|
|
685
|
+
}
|
|
438
686
|
return out;
|
|
439
687
|
}
|
|
440
688
|
export function writeCompressedBlock(payload, last) {
|
|
@@ -446,4 +694,11 @@ export function writeCompressedBlock(payload, last) {
|
|
|
446
694
|
out.set(payload, 3);
|
|
447
695
|
return out;
|
|
448
696
|
}
|
|
697
|
+
// Internal benchmark hooks for hot-path profiling.
|
|
698
|
+
export const __benchInternals = {
|
|
699
|
+
encodeReverseBitstream,
|
|
700
|
+
buildGeneralCompressedLiterals: buildGeneralCompressedLiteralsForBench,
|
|
701
|
+
buildPredefinedSequenceSection: (sequences) => buildSequenceSection(sequences)?.section ?? null,
|
|
702
|
+
buildSequenceSection,
|
|
703
|
+
};
|
|
449
704
|
//# sourceMappingURL=compressedBlock.js.map
|