zstdify 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -5
- package/dist/bitstream/bitReaderReverse.d.ts +6 -0
- package/dist/bitstream/bitReaderReverse.js +36 -12
- package/dist/bitstream/bitReaderReverse.js.map +1 -1
- package/dist/bitstream/index.d.ts +1 -0
- package/dist/bitstream/index.js +1 -0
- package/dist/bitstream/index.js.map +1 -1
- package/dist/bitstream/reverseBitWriter.d.ts +1 -0
- package/dist/bitstream/reverseBitWriter.js +66 -0
- package/dist/bitstream/reverseBitWriter.js.map +1 -0
- package/dist/compress.js +46 -5
- package/dist/compress.js.map +1 -1
- package/dist/decode/debugTrace.d.ts +31 -0
- package/dist/decode/debugTrace.js +2 -0
- package/dist/decode/debugTrace.js.map +1 -0
- package/dist/decode/decompressFrame.d.ts +2 -1
- package/dist/decode/decompressFrame.js +96 -18
- package/dist/decode/decompressFrame.js.map +1 -1
- package/dist/decode/fusedSequences.d.ts +9 -0
- package/dist/decode/fusedSequences.js +26 -0
- package/dist/decode/fusedSequences.js.map +1 -0
- package/dist/decode/literals.js +129 -79
- package/dist/decode/literals.js.map +1 -1
- package/dist/decode/reconstruct.d.ts +14 -2
- package/dist/decode/reconstruct.js +378 -21
- package/dist/decode/reconstruct.js.map +1 -1
- package/dist/decode/sequences.d.ts +19 -7
- package/dist/decode/sequences.js +127 -48
- package/dist/decode/sequences.js.map +1 -1
- package/dist/decompress.d.ts +3 -0
- package/dist/decompress.js +1 -1
- package/dist/decompress.js.map +1 -1
- package/dist/encode/compressedBlock.d.ts +27 -1
- package/dist/encode/compressedBlock.js +567 -367
- package/dist/encode/compressedBlock.js.map +1 -1
- package/dist/encode/fastMatcher.d.ts +7 -0
- package/dist/encode/fastMatcher.js +13 -0
- package/dist/encode/fastMatcher.js.map +1 -0
- package/dist/encode/greedySequences.d.ts +9 -6
- package/dist/encode/greedySequences.js +22 -101
- package/dist/encode/greedySequences.js.map +1 -1
- package/dist/encode/lazyMatcher.d.ts +7 -0
- package/dist/encode/lazyMatcher.js +13 -0
- package/dist/encode/lazyMatcher.js.map +1 -0
- package/dist/encode/literalsEncoder.d.ts +14 -0
- package/dist/encode/literalsEncoder.js +343 -0
- package/dist/encode/literalsEncoder.js.map +1 -0
- package/dist/encode/optimalParser.d.ts +7 -0
- package/dist/encode/optimalParser.js +13 -0
- package/dist/encode/optimalParser.js.map +1 -0
- package/dist/encode/sequencePlanner.d.ts +23 -0
- package/dist/encode/sequencePlanner.js +280 -0
- package/dist/encode/sequencePlanner.js.map +1 -0
- package/dist/entropy/fse.d.ts +13 -6
- package/dist/entropy/fse.js +175 -8
- package/dist/entropy/fse.js.map +1 -1
- package/dist/entropy/huffman.d.ts +7 -5
- package/dist/entropy/huffman.js +18 -7
- package/dist/entropy/huffman.js.map +1 -1
- package/dist/entropy/index.d.ts +2 -2
- package/dist/entropy/weights.js +20 -14
- package/dist/entropy/weights.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { buildHuffmanDecodeTable, weightsToNumBits } from '../entropy/huffman.js';
|
|
1
|
+
import { buildFSEDecodeTable, normalizeCountsForTable, writeNCount } from '../entropy/fse.js';
|
|
2
|
+
import { encodeReverseBitstream } from '../bitstream/reverseBitWriter.js';
|
|
4
3
|
import { LITERALS_LENGTH_DEFAULT_DISTRIBUTION, LITERALS_LENGTH_TABLE_LOG, MATCH_LENGTH_DEFAULT_DISTRIBUTION, MATCH_LENGTH_TABLE_LOG, OFFSET_CODE_DEFAULT_DISTRIBUTION, OFFSET_CODE_TABLE_LOG, } from '../entropy/predefined.js';
|
|
4
|
+
import { buildGeneralCompressedLiteralsForBench, encodeLiteralsSection, } from './literalsEncoder.js';
|
|
5
5
|
// Predefined FSE tables built once and reused for sequence encoding.
|
|
6
6
|
let cachedLLTable = null;
|
|
7
7
|
let cachedOFTable = null;
|
|
@@ -34,32 +34,168 @@ function writeU24LE(arr, offset, value) {
|
|
|
34
34
|
arr[offset + 1] = (value >> 8) & 0xff;
|
|
35
35
|
arr[offset + 2] = (value >> 16) & 0xff;
|
|
36
36
|
}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
37
|
+
const U32_ALL_BITS = 0xffff_ffff >>> 0;
|
|
38
|
+
const pathTableCache = new WeakMap();
|
|
39
|
+
let pathMasksScratch = null;
|
|
40
|
+
let pathNextChoiceScratch = null;
|
|
41
|
+
let sequenceReadCountsScratch = null;
|
|
42
|
+
let sequenceReadValuesScratch = null;
|
|
43
|
+
function rangeMask(startBit, endBit) {
|
|
44
|
+
if (startBit === 0 && endBit === 31)
|
|
45
|
+
return U32_ALL_BITS;
|
|
46
|
+
const startMask = (U32_ALL_BITS << startBit) >>> 0;
|
|
47
|
+
const endMask = endBit === 31 ? U32_ALL_BITS : ((1 << (endBit + 1)) - 1) >>> 0;
|
|
48
|
+
return (startMask & endMask) >>> 0;
|
|
49
|
+
}
|
|
50
|
+
function setMaskBit(mask, maskOffset, bit) {
|
|
51
|
+
const word = bit >>> 5;
|
|
52
|
+
mask[maskOffset + word] = (mask[maskOffset + word] | (1 << (bit & 31))) >>> 0;
|
|
53
|
+
}
|
|
54
|
+
function isMaskEmpty(mask, maskOffset, wordCount) {
|
|
55
|
+
for (let i = 0; i < wordCount; i++) {
|
|
56
|
+
if ((mask[maskOffset + i] ?? 0) !== 0)
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
61
|
+
function firstBitInWord(word) {
|
|
62
|
+
const normalized = word >>> 0;
|
|
63
|
+
const lsb = (normalized & -normalized) >>> 0;
|
|
64
|
+
return 31 - Math.clz32(lsb);
|
|
65
|
+
}
|
|
66
|
+
function findFirstSetBit(mask, maskOffset, wordCount) {
|
|
67
|
+
for (let wi = 0; wi < wordCount; wi++) {
|
|
68
|
+
const word = mask[maskOffset + wi] ?? 0;
|
|
69
|
+
if (word !== 0) {
|
|
70
|
+
return (wi << 5) + firstBitInWord(word);
|
|
42
71
|
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
72
|
+
}
|
|
73
|
+
return -1;
|
|
74
|
+
}
|
|
75
|
+
function codesAreUniform(codes) {
|
|
76
|
+
if (codes.length === 0)
|
|
77
|
+
return -1;
|
|
78
|
+
const first = codes[0] ?? -1;
|
|
79
|
+
for (let i = 1; i < codes.length; i++) {
|
|
80
|
+
if ((codes[i] ?? -1) !== first)
|
|
81
|
+
return -1;
|
|
82
|
+
}
|
|
83
|
+
return first;
|
|
84
|
+
}
|
|
85
|
+
function findFirstSetBitInRange(mask, maskOffset, wordCount, minState, maxState) {
|
|
86
|
+
if (wordCount <= 0)
|
|
87
|
+
return -1;
|
|
88
|
+
let min = minState;
|
|
89
|
+
let max = maxState;
|
|
90
|
+
const maxBit = (wordCount << 5) - 1;
|
|
91
|
+
if (min < 0)
|
|
92
|
+
min = 0;
|
|
93
|
+
if (max > maxBit)
|
|
94
|
+
max = maxBit;
|
|
95
|
+
if (min > max)
|
|
96
|
+
return -1;
|
|
97
|
+
const startWord = min >>> 5;
|
|
98
|
+
const endWord = max >>> 5;
|
|
99
|
+
if (startWord === endWord) {
|
|
100
|
+
const masked = ((mask[maskOffset + startWord] ?? 0) & rangeMask(min & 31, max & 31)) >>> 0;
|
|
101
|
+
if (masked === 0)
|
|
102
|
+
return -1;
|
|
103
|
+
return (startWord << 5) + firstBitInWord(masked);
|
|
104
|
+
}
|
|
105
|
+
const firstMasked = ((mask[maskOffset + startWord] ?? 0) & rangeMask(min & 31, 31)) >>> 0;
|
|
106
|
+
if (firstMasked !== 0) {
|
|
107
|
+
return (startWord << 5) + firstBitInWord(firstMasked);
|
|
108
|
+
}
|
|
109
|
+
for (let wi = startWord + 1; wi < endWord; wi++) {
|
|
110
|
+
const word = mask[maskOffset + wi] ?? 0;
|
|
111
|
+
if (word !== 0)
|
|
112
|
+
return (wi << 5) + firstBitInWord(word);
|
|
113
|
+
}
|
|
114
|
+
const lastMasked = ((mask[maskOffset + endWord] ?? 0) & rangeMask(0, max & 31)) >>> 0;
|
|
115
|
+
if (lastMasked === 0)
|
|
116
|
+
return -1;
|
|
117
|
+
return (endWord << 5) + firstBitInWord(lastMasked);
|
|
118
|
+
}
|
|
119
|
+
function getPrecomputedPathTable(table) {
|
|
120
|
+
const cached = pathTableCache.get(table);
|
|
121
|
+
if (cached)
|
|
122
|
+
return cached;
|
|
123
|
+
const tableSize = table.length;
|
|
124
|
+
const wordCount = Math.max(1, Math.ceil(tableSize / 32));
|
|
125
|
+
const baselineByState = new Int32Array(tableSize);
|
|
126
|
+
const minNextByState = new Int32Array(tableSize);
|
|
127
|
+
const maxNextByState = new Int32Array(tableSize);
|
|
128
|
+
let maxSymbol = -1;
|
|
129
|
+
for (let s = 0; s < tableSize; s++) {
|
|
130
|
+
const baseline = table.baseline[s];
|
|
131
|
+
const bits = table.numBits[s];
|
|
132
|
+
baselineByState[s] = baseline;
|
|
133
|
+
const width = bits > 0 ? 1 << bits : 1;
|
|
134
|
+
const minNext = baseline;
|
|
135
|
+
const maxNext = baseline + width - 1;
|
|
136
|
+
minNextByState[s] = minNext < 0 ? 0 : minNext;
|
|
137
|
+
maxNextByState[s] = maxNext >= tableSize ? tableSize - 1 : maxNext;
|
|
138
|
+
const symbol = table.symbol[s];
|
|
139
|
+
if (symbol > maxSymbol)
|
|
140
|
+
maxSymbol = symbol;
|
|
141
|
+
}
|
|
142
|
+
const statesBySymbol = Array.from({ length: maxSymbol + 1 }, () => []);
|
|
143
|
+
const symbolMasks = Array.from({ length: maxSymbol + 1 }, () => new Uint32Array(wordCount));
|
|
144
|
+
const bitTotalsBySymbol = new Float64Array(maxSymbol + 1);
|
|
145
|
+
const stateCountsBySymbol = new Uint32Array(maxSymbol + 1);
|
|
146
|
+
for (let s = 0; s < tableSize; s++) {
|
|
147
|
+
const sym = table.symbol[s];
|
|
148
|
+
const stateList = statesBySymbol[sym];
|
|
149
|
+
const stateMask = symbolMasks[sym];
|
|
150
|
+
if (!stateList || !stateMask)
|
|
47
151
|
continue;
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
while ((bits.length & 7) !== 0) {
|
|
53
|
-
bits.push(0);
|
|
54
|
-
}
|
|
55
|
-
const out = new Uint8Array(Math.ceil(bits.length / 8));
|
|
56
|
-
for (let i = 0; i < bits.length; i++) {
|
|
57
|
-
if ((bits[i] ?? 0) !== 0) {
|
|
58
|
-
const idx = i >>> 3;
|
|
59
|
-
out[idx] = ((out[idx] ?? 0) | (1 << (i & 7))) & 0xff;
|
|
60
|
-
}
|
|
152
|
+
stateList.push(s);
|
|
153
|
+
stateMask[s >>> 5] = (stateMask[s >>> 5] | (1 << (s & 31))) >>> 0;
|
|
154
|
+
bitTotalsBySymbol[sym] = (bitTotalsBySymbol[sym] ?? 0) + (table.numBits[s] ?? 0);
|
|
155
|
+
stateCountsBySymbol[sym] = (stateCountsBySymbol[sym] ?? 0) + 1;
|
|
61
156
|
}
|
|
62
|
-
|
|
157
|
+
const avgBitsBySymbol = new Float64Array(maxSymbol + 1);
|
|
158
|
+
for (let sym = 0; sym < avgBitsBySymbol.length; sym++) {
|
|
159
|
+
const count = stateCountsBySymbol[sym] ?? 0;
|
|
160
|
+
avgBitsBySymbol[sym] = count > 0 ? (bitTotalsBySymbol[sym] ?? 0) / count : Number.POSITIVE_INFINITY;
|
|
161
|
+
}
|
|
162
|
+
const precomputed = {
|
|
163
|
+
tableSize,
|
|
164
|
+
wordCount,
|
|
165
|
+
statesBySymbol,
|
|
166
|
+
symbolMasks,
|
|
167
|
+
avgBitsBySymbol,
|
|
168
|
+
baselineByState,
|
|
169
|
+
minNextByState,
|
|
170
|
+
maxNextByState,
|
|
171
|
+
};
|
|
172
|
+
pathTableCache.set(table, precomputed);
|
|
173
|
+
return precomputed;
|
|
174
|
+
}
|
|
175
|
+
function getPathMasksScratch(requiredLength) {
|
|
176
|
+
if (!pathMasksScratch || pathMasksScratch.length < requiredLength) {
|
|
177
|
+
pathMasksScratch = new Uint32Array(requiredLength);
|
|
178
|
+
}
|
|
179
|
+
pathMasksScratch.fill(0, 0, requiredLength);
|
|
180
|
+
return pathMasksScratch;
|
|
181
|
+
}
|
|
182
|
+
function getPathNextChoiceScratch(requiredLength) {
|
|
183
|
+
if (!pathNextChoiceScratch || pathNextChoiceScratch.length < requiredLength) {
|
|
184
|
+
pathNextChoiceScratch = new Int32Array(requiredLength);
|
|
185
|
+
}
|
|
186
|
+
return pathNextChoiceScratch;
|
|
187
|
+
}
|
|
188
|
+
function getSequenceReadCountsScratch(requiredLength) {
|
|
189
|
+
if (!sequenceReadCountsScratch || sequenceReadCountsScratch.length < requiredLength) {
|
|
190
|
+
sequenceReadCountsScratch = new Uint8Array(requiredLength);
|
|
191
|
+
}
|
|
192
|
+
return sequenceReadCountsScratch;
|
|
193
|
+
}
|
|
194
|
+
function getSequenceReadValuesScratch(requiredLength) {
|
|
195
|
+
if (!sequenceReadValuesScratch || sequenceReadValuesScratch.length < requiredLength) {
|
|
196
|
+
sequenceReadValuesScratch = new Uint32Array(requiredLength);
|
|
197
|
+
}
|
|
198
|
+
return sequenceReadValuesScratch;
|
|
63
199
|
}
|
|
64
200
|
function findLengthCode(value, baseline, extraBits, directMax, directBias) {
|
|
65
201
|
if (value <= directMax) {
|
|
@@ -77,211 +213,6 @@ function findLengthCode(value, baseline, extraBits, directMax, directBias) {
|
|
|
77
213
|
}
|
|
78
214
|
return null;
|
|
79
215
|
}
|
|
80
|
-
function buildSingleSymbolCompressedLiterals(literals) {
|
|
81
|
-
if (literals.length === 0 || literals.length > 1023)
|
|
82
|
-
return null;
|
|
83
|
-
const sym = literals[0] ?? 0;
|
|
84
|
-
for (let i = 1; i < literals.length; i++) {
|
|
85
|
-
if ((literals[i] ?? 0) !== sym)
|
|
86
|
-
return null;
|
|
87
|
-
}
|
|
88
|
-
if (sym > 127)
|
|
89
|
-
return null;
|
|
90
|
-
const numWeights = sym + 1;
|
|
91
|
-
if (numWeights < 1 || numWeights > 128)
|
|
92
|
-
return null;
|
|
93
|
-
const weights = new Array(numWeights).fill(0);
|
|
94
|
-
weights[sym] = 1;
|
|
95
|
-
let partialSum = 0;
|
|
96
|
-
for (const w of weights) {
|
|
97
|
-
if (w > 0)
|
|
98
|
-
partialSum += 1 << (w - 1);
|
|
99
|
-
}
|
|
100
|
-
if (partialSum === 0)
|
|
101
|
-
return null;
|
|
102
|
-
const maxNumBits = 32 - Math.clz32(partialSum);
|
|
103
|
-
const total = 1 << maxNumBits;
|
|
104
|
-
const remainder = total - partialSum;
|
|
105
|
-
if (remainder <= 0 || (remainder & (remainder - 1)) !== 0)
|
|
106
|
-
return null;
|
|
107
|
-
const lastWeight = 32 - Math.clz32(remainder);
|
|
108
|
-
const fullWeights = [...weights, lastWeight];
|
|
109
|
-
while (fullWeights.length < 256)
|
|
110
|
-
fullWeights.push(0);
|
|
111
|
-
const numBits = weightsToNumBits(fullWeights, maxNumBits);
|
|
112
|
-
const table = buildHuffmanDecodeTable(numBits, maxNumBits);
|
|
113
|
-
const symbolCode = table.findIndex((row) => row?.symbol === sym);
|
|
114
|
-
if (symbolCode < 0)
|
|
115
|
-
return null;
|
|
116
|
-
const bitCounts = new Uint8Array(literals.length);
|
|
117
|
-
bitCounts.fill(maxNumBits);
|
|
118
|
-
const bitValues = new Uint16Array(literals.length);
|
|
119
|
-
bitValues.fill(symbolCode);
|
|
120
|
-
const stream = encodeReverseBitstream(bitCounts, bitValues);
|
|
121
|
-
const directHeader = 127 + numWeights;
|
|
122
|
-
const weightWriter = new BitWriter();
|
|
123
|
-
for (let i = 0; i < weights.length; i += 2) {
|
|
124
|
-
const hi = weights[i] ?? 0;
|
|
125
|
-
const lo = weights[i + 1] ?? 0;
|
|
126
|
-
weightWriter.writeBits(8, ((hi & 0xf) << 4) | (lo & 0xf));
|
|
127
|
-
}
|
|
128
|
-
const weightBytes = weightWriter.flush();
|
|
129
|
-
const compressedSize = 1 + weightBytes.length + stream.length;
|
|
130
|
-
if (compressedSize > 1023)
|
|
131
|
-
return null;
|
|
132
|
-
const b0 = (2 | (0 << 2) | ((literals.length & 0x0f) << 4)) & 0xff;
|
|
133
|
-
const b1 = (((literals.length >> 4) & 0x3f) | ((compressedSize & 0x03) << 6)) & 0xff;
|
|
134
|
-
const b2 = (compressedSize >> 2) & 0xff;
|
|
135
|
-
const out = new Uint8Array(3 + 1 + weightBytes.length + stream.length);
|
|
136
|
-
out[0] = b0;
|
|
137
|
-
out[1] = b1;
|
|
138
|
-
out[2] = b2;
|
|
139
|
-
out[3] = directHeader & 0xff;
|
|
140
|
-
out.set(weightBytes, 4);
|
|
141
|
-
out.set(stream, 4 + weightBytes.length);
|
|
142
|
-
return out;
|
|
143
|
-
}
|
|
144
|
-
function splitPowerTerms(targetSum, count) {
|
|
145
|
-
if (count < 1 || count > targetSum)
|
|
146
|
-
return null;
|
|
147
|
-
const terms = [];
|
|
148
|
-
for (let bit = 31; bit >= 0; bit--) {
|
|
149
|
-
const value = 1 << bit;
|
|
150
|
-
if ((targetSum & value) !== 0) {
|
|
151
|
-
terms.push(value);
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
while (terms.length < count) {
|
|
155
|
-
let splitIndex = -1;
|
|
156
|
-
let largest = 0;
|
|
157
|
-
for (let i = 0; i < terms.length; i++) {
|
|
158
|
-
const term = terms[i] ?? 0;
|
|
159
|
-
if (term > largest) {
|
|
160
|
-
largest = term;
|
|
161
|
-
splitIndex = i;
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
if (splitIndex < 0 || largest <= 1) {
|
|
165
|
-
return null;
|
|
166
|
-
}
|
|
167
|
-
const half = largest >>> 1;
|
|
168
|
-
terms.splice(splitIndex, 1, half, half);
|
|
169
|
-
}
|
|
170
|
-
return terms;
|
|
171
|
-
}
|
|
172
|
-
function buildGeneralCompressedLiterals(literals) {
|
|
173
|
-
if (literals.length === 0 || literals.length > 1023)
|
|
174
|
-
return null;
|
|
175
|
-
const seen = new Uint8Array(256);
|
|
176
|
-
let numSymbols = 0;
|
|
177
|
-
let maxSymbol = 0;
|
|
178
|
-
for (let i = 0; i < literals.length; i++) {
|
|
179
|
-
const b = literals[i];
|
|
180
|
-
if (seen[b] === 0) {
|
|
181
|
-
seen[b] = 1;
|
|
182
|
-
numSymbols++;
|
|
183
|
-
if (b > maxSymbol)
|
|
184
|
-
maxSymbol = b;
|
|
185
|
-
}
|
|
186
|
-
}
|
|
187
|
-
if (numSymbols === 0 || numSymbols > 128)
|
|
188
|
-
return null;
|
|
189
|
-
if (maxSymbol > 127)
|
|
190
|
-
return null;
|
|
191
|
-
const sortedSymbols = [];
|
|
192
|
-
for (let s = 0; s <= maxSymbol; s++) {
|
|
193
|
-
if (seen[s] !== 0)
|
|
194
|
-
sortedSymbols.push(s);
|
|
195
|
-
}
|
|
196
|
-
// Construct a valid direct-weight table over symbols <= 127.
|
|
197
|
-
const partialTarget = 128; // maxNumBits=8 => total 256, remainder is 128 (power of two).
|
|
198
|
-
const contributions = splitPowerTerms(partialTarget, sortedSymbols.length);
|
|
199
|
-
if (!contributions)
|
|
200
|
-
return null;
|
|
201
|
-
contributions.sort((a, b) => b - a);
|
|
202
|
-
const weights = new Array(maxSymbol + 1).fill(0);
|
|
203
|
-
for (let i = 0; i < sortedSymbols.length; i++) {
|
|
204
|
-
const symbol = sortedSymbols[i] ?? 0;
|
|
205
|
-
const contribution = contributions[i] ?? 1;
|
|
206
|
-
const weight = 32 - Math.clz32(contribution);
|
|
207
|
-
if (weight < 1 || weight > 15)
|
|
208
|
-
return null;
|
|
209
|
-
weights[symbol] = weight;
|
|
210
|
-
}
|
|
211
|
-
const fullWeights = [...weights, 8];
|
|
212
|
-
while (fullWeights.length < 256)
|
|
213
|
-
fullWeights.push(0);
|
|
214
|
-
const numBits = weightsToNumBits(fullWeights, 8);
|
|
215
|
-
const table = buildHuffmanDecodeTable(numBits, 8);
|
|
216
|
-
const codeBySymbol = new Int32Array(256).fill(-1);
|
|
217
|
-
for (let i = 0; i < sortedSymbols.length; i++) {
|
|
218
|
-
const symbol = sortedSymbols[i];
|
|
219
|
-
const code = table.findIndex((row) => row?.symbol === symbol);
|
|
220
|
-
if (code < 0)
|
|
221
|
-
return null;
|
|
222
|
-
codeBySymbol[symbol] = code;
|
|
223
|
-
}
|
|
224
|
-
const readCounts = new Uint8Array(literals.length);
|
|
225
|
-
const readValues = new Uint16Array(literals.length);
|
|
226
|
-
for (let i = 0; i < literals.length; i++) {
|
|
227
|
-
const code = codeBySymbol[literals[i]];
|
|
228
|
-
if (code < 0)
|
|
229
|
-
return null;
|
|
230
|
-
readCounts[i] = 8;
|
|
231
|
-
readValues[i] = code;
|
|
232
|
-
}
|
|
233
|
-
const stream = encodeReverseBitstream(readCounts, readValues);
|
|
234
|
-
const numWeights = weights.length;
|
|
235
|
-
if (numWeights < 1 || numWeights > 128)
|
|
236
|
-
return null;
|
|
237
|
-
const directHeader = 127 + numWeights;
|
|
238
|
-
const weightWriter = new BitWriter();
|
|
239
|
-
for (let i = 0; i < weights.length; i += 2) {
|
|
240
|
-
const hi = weights[i] ?? 0;
|
|
241
|
-
const lo = weights[i + 1] ?? 0;
|
|
242
|
-
weightWriter.writeBits(8, ((hi & 0xf) << 4) | (lo & 0xf));
|
|
243
|
-
}
|
|
244
|
-
const weightBytes = weightWriter.flush();
|
|
245
|
-
const compressedSize = 1 + weightBytes.length + stream.length;
|
|
246
|
-
if (compressedSize > 1023)
|
|
247
|
-
return null;
|
|
248
|
-
const b0 = (2 | (0 << 2) | ((literals.length & 0x0f) << 4)) & 0xff;
|
|
249
|
-
const b1 = (((literals.length >> 4) & 0x3f) | ((compressedSize & 0x03) << 6)) & 0xff;
|
|
250
|
-
const b2 = (compressedSize >> 2) & 0xff;
|
|
251
|
-
const out = new Uint8Array(3 + 1 + weightBytes.length + stream.length);
|
|
252
|
-
out[0] = b0;
|
|
253
|
-
out[1] = b1;
|
|
254
|
-
out[2] = b2;
|
|
255
|
-
out[3] = directHeader & 0xff;
|
|
256
|
-
out.set(weightBytes, 4);
|
|
257
|
-
out.set(stream, 4 + weightBytes.length);
|
|
258
|
-
return out;
|
|
259
|
-
}
|
|
260
|
-
function buildRawLiteralsSection(literals) {
|
|
261
|
-
const size = literals.length;
|
|
262
|
-
if (size <= 31) {
|
|
263
|
-
const out = new Uint8Array(1 + size);
|
|
264
|
-
out[0] = (size << 3) | 0;
|
|
265
|
-
out.set(literals, 1);
|
|
266
|
-
return out;
|
|
267
|
-
}
|
|
268
|
-
if (size <= 0x0fff) {
|
|
269
|
-
const out = new Uint8Array(2 + size);
|
|
270
|
-
out[0] = ((size & 0x0f) << 4) | (1 << 2);
|
|
271
|
-
out[1] = (size >>> 4) & 0xff;
|
|
272
|
-
out.set(literals, 2);
|
|
273
|
-
return out;
|
|
274
|
-
}
|
|
275
|
-
if (size <= 0x0f_ffff) {
|
|
276
|
-
const out = new Uint8Array(3 + size);
|
|
277
|
-
out[0] = ((size & 0x0f) << 4) | (3 << 2);
|
|
278
|
-
out[1] = (size >>> 4) & 0xff;
|
|
279
|
-
out[2] = (size >>> 12) & 0xff;
|
|
280
|
-
out.set(literals, 3);
|
|
281
|
-
return out;
|
|
282
|
-
}
|
|
283
|
-
return null;
|
|
284
|
-
}
|
|
285
216
|
function encodeNumSequences(numSequences) {
|
|
286
217
|
if (numSequences < 0 || numSequences > 0xffff + 0x7f00)
|
|
287
218
|
return null;
|
|
@@ -299,103 +230,268 @@ function encodeNumSequences(numSequences) {
|
|
|
299
230
|
function buildStatePath(codes, table) {
|
|
300
231
|
if (codes.length === 0)
|
|
301
232
|
return { states: [], updateBits: [] };
|
|
302
|
-
const
|
|
303
|
-
const
|
|
304
|
-
|
|
305
|
-
const row = table[s];
|
|
306
|
-
if (!row)
|
|
307
|
-
continue;
|
|
308
|
-
const sym = row.symbol;
|
|
309
|
-
if (!statesByCode[sym])
|
|
310
|
-
statesByCode[sym] = [];
|
|
311
|
-
statesByCode[sym].push(s);
|
|
312
|
-
}
|
|
313
|
-
const possible = Array.from({ length: codes.length }, () => []);
|
|
314
|
-
const nextChoice = Array.from({ length: codes.length }, () => new Int32Array(tableSize).fill(-1));
|
|
315
|
-
const lastCode = codes[codes.length - 1] ?? -1;
|
|
316
|
-
const lastCandidates = statesByCode[lastCode] ?? [];
|
|
317
|
-
const lastArr = possible[codes.length - 1];
|
|
318
|
-
if (!lastArr)
|
|
233
|
+
const pre = getPrecomputedPathTable(table);
|
|
234
|
+
const { tableSize, wordCount, statesBySymbol, symbolMasks, minNextByState, maxNextByState, baselineByState } = pre;
|
|
235
|
+
if (tableSize <= 0)
|
|
319
236
|
return null;
|
|
320
|
-
|
|
321
|
-
|
|
237
|
+
const rowCount = codes.length;
|
|
238
|
+
if (rowCount === 1) {
|
|
239
|
+
const onlyCode = codes[0] ?? -1;
|
|
240
|
+
if (onlyCode < 0 || onlyCode >= statesBySymbol.length)
|
|
241
|
+
return null;
|
|
242
|
+
const onlyStates = statesBySymbol[onlyCode];
|
|
243
|
+
if (!onlyStates || onlyStates.length === 0)
|
|
244
|
+
return null;
|
|
245
|
+
const firstState = onlyStates[0];
|
|
246
|
+
if (firstState === undefined)
|
|
247
|
+
return null;
|
|
248
|
+
return { states: [firstState], updateBits: [] };
|
|
322
249
|
}
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
const curNext = nextChoice[i];
|
|
333
|
-
const nextPresent = new Uint8Array(tableSize);
|
|
334
|
-
for (let j = 0; j < nextArr.length; j++) {
|
|
335
|
-
nextPresent[nextArr[j]] = 1;
|
|
336
|
-
}
|
|
337
|
-
const nextFrom = new Int32Array(tableSize + 1);
|
|
338
|
-
nextFrom[tableSize] = -1;
|
|
339
|
-
for (let s = tableSize - 1; s >= 0; s--) {
|
|
340
|
-
nextFrom[s] = nextPresent[s] !== 0 ? s : nextFrom[s + 1];
|
|
341
|
-
}
|
|
342
|
-
for (let si = 0; si < candidates.length; si++) {
|
|
343
|
-
const s = candidates[si];
|
|
344
|
-
const row = table[s];
|
|
345
|
-
if (!row)
|
|
346
|
-
continue;
|
|
347
|
-
const width = row.numBits > 0 ? 1 << row.numBits : 1;
|
|
348
|
-
const minNext = row.baseline;
|
|
349
|
-
const maxNext = row.baseline + width - 1;
|
|
350
|
-
if (maxNext < 0 || minNext >= tableSize)
|
|
250
|
+
const uniformCode = codesAreUniform(codes);
|
|
251
|
+
if (uniformCode >= 0 && uniformCode < statesBySymbol.length) {
|
|
252
|
+
const candidateStates = statesBySymbol[uniformCode];
|
|
253
|
+
const candidateMask = symbolMasks[uniformCode];
|
|
254
|
+
if (!candidateStates || candidateStates.length === 0 || !candidateMask)
|
|
255
|
+
return null;
|
|
256
|
+
for (let startIndex = 0; startIndex < candidateStates.length; startIndex++) {
|
|
257
|
+
const startState = candidateStates[startIndex];
|
|
258
|
+
if (startState === undefined)
|
|
351
259
|
continue;
|
|
352
|
-
const
|
|
353
|
-
const
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
260
|
+
const states = new Array(rowCount);
|
|
261
|
+
const updateBits = new Array(rowCount - 1);
|
|
262
|
+
states[0] = startState;
|
|
263
|
+
let state = startState;
|
|
264
|
+
let valid = true;
|
|
265
|
+
for (let row = 0; row < rowCount - 1; row++) {
|
|
266
|
+
const nextState = findFirstSetBitInRange(candidateMask, 0, wordCount, minNextByState[state], maxNextByState[state]);
|
|
267
|
+
if (nextState < 0) {
|
|
268
|
+
valid = false;
|
|
269
|
+
break;
|
|
358
270
|
}
|
|
271
|
+
states[row + 1] = nextState;
|
|
272
|
+
updateBits[row] = nextState - baselineByState[state];
|
|
273
|
+
state = nextState;
|
|
359
274
|
}
|
|
275
|
+
if (valid)
|
|
276
|
+
return { states, updateBits };
|
|
360
277
|
}
|
|
361
|
-
if (curArr.length === 0)
|
|
362
|
-
return null;
|
|
363
278
|
}
|
|
364
|
-
const
|
|
365
|
-
const
|
|
366
|
-
const
|
|
367
|
-
|
|
279
|
+
const masks = getPathMasksScratch(rowCount * wordCount);
|
|
280
|
+
const nextChoice = getPathNextChoiceScratch(Math.max(0, rowCount - 1) * tableSize);
|
|
281
|
+
const maskOffset = (rowIndex) => rowIndex * wordCount;
|
|
282
|
+
const nextChoiceOffset = (rowIndex) => rowIndex * tableSize;
|
|
283
|
+
const lastCode = codes[rowCount - 1] ?? -1;
|
|
284
|
+
if (lastCode < 0 || lastCode >= symbolMasks.length)
|
|
285
|
+
return null;
|
|
286
|
+
const lastMask = symbolMasks[lastCode];
|
|
287
|
+
if (!lastMask)
|
|
288
|
+
return null;
|
|
289
|
+
const lastMaskOffset = maskOffset(rowCount - 1);
|
|
290
|
+
for (let wi = 0; wi < wordCount; wi++) {
|
|
291
|
+
masks[lastMaskOffset + wi] = lastMask[wi];
|
|
292
|
+
}
|
|
293
|
+
if (isMaskEmpty(masks, lastMaskOffset, wordCount))
|
|
368
294
|
return null;
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
const next = nextChoice[i][cur];
|
|
373
|
-
if (next === undefined || next < 0)
|
|
295
|
+
for (let i = rowCount - 2; i >= 0; i--) {
|
|
296
|
+
const code = codes[i] ?? -1;
|
|
297
|
+
if (code < 0 || code >= statesBySymbol.length)
|
|
374
298
|
return null;
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
299
|
+
const candidates = statesBySymbol[code];
|
|
300
|
+
if (!candidates || candidates.length === 0)
|
|
301
|
+
return null;
|
|
302
|
+
const curMaskOffset = maskOffset(i);
|
|
303
|
+
const nextMaskOffset = maskOffset(i + 1);
|
|
304
|
+
const curNextOffset = nextChoiceOffset(i);
|
|
305
|
+
for (let ci = 0; ci < candidates.length; ci++) {
|
|
306
|
+
const state = candidates[ci];
|
|
307
|
+
if (state === undefined)
|
|
308
|
+
continue;
|
|
309
|
+
const chosenNext = findFirstSetBitInRange(masks, nextMaskOffset, wordCount, minNextByState[state], maxNextByState[state]);
|
|
310
|
+
if (chosenNext >= 0) {
|
|
311
|
+
setMaskBit(masks, curMaskOffset, state);
|
|
312
|
+
nextChoice[curNextOffset + state] = chosenNext;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
if (isMaskEmpty(masks, curMaskOffset, wordCount))
|
|
378
316
|
return null;
|
|
379
|
-
|
|
317
|
+
}
|
|
318
|
+
const states = new Array(rowCount);
|
|
319
|
+
const updateBits = new Array(Math.max(0, rowCount - 1));
|
|
320
|
+
let state = findFirstSetBit(masks, maskOffset(0), wordCount);
|
|
321
|
+
if (state < 0)
|
|
322
|
+
return null;
|
|
323
|
+
states[0] = state;
|
|
324
|
+
for (let i = 0; i < rowCount - 1; i++) {
|
|
325
|
+
const nextState = nextChoice[nextChoiceOffset(i) + state];
|
|
326
|
+
states[i + 1] = nextState;
|
|
327
|
+
updateBits[i] = nextState - baselineByState[state];
|
|
328
|
+
state = nextState;
|
|
380
329
|
}
|
|
381
330
|
return { states, updateBits };
|
|
382
331
|
}
|
|
383
|
-
|
|
332
|
+
let symbolizedScratch = null;
|
|
333
|
+
function ensureSymbolizedScratch(minLength) {
|
|
334
|
+
const existing = symbolizedScratch;
|
|
335
|
+
if (existing && existing.llCodes.length >= minLength) {
|
|
336
|
+
return existing;
|
|
337
|
+
}
|
|
338
|
+
let capacity = existing?.llCodes.length ?? 0;
|
|
339
|
+
if (capacity === 0)
|
|
340
|
+
capacity = 32;
|
|
341
|
+
while (capacity < minLength)
|
|
342
|
+
capacity *= 2;
|
|
343
|
+
symbolizedScratch = {
|
|
344
|
+
llCodes: new Uint8Array(capacity),
|
|
345
|
+
llExtraN: new Uint8Array(capacity),
|
|
346
|
+
llExtraValue: new Uint32Array(capacity),
|
|
347
|
+
mlCodes: new Uint8Array(capacity),
|
|
348
|
+
mlExtraN: new Uint8Array(capacity),
|
|
349
|
+
mlExtraValue: new Uint32Array(capacity),
|
|
350
|
+
ofCodes: new Uint8Array(capacity),
|
|
351
|
+
ofExtraN: new Uint8Array(capacity),
|
|
352
|
+
ofExtraValue: new Uint32Array(capacity),
|
|
353
|
+
};
|
|
354
|
+
return symbolizedScratch;
|
|
355
|
+
}
|
|
356
|
+
function symbolRange(codes) {
|
|
357
|
+
let max = 0;
|
|
358
|
+
for (let i = 0; i < codes.length; i++) {
|
|
359
|
+
const value = codes[i] ?? 0;
|
|
360
|
+
if (value > max)
|
|
361
|
+
max = value;
|
|
362
|
+
}
|
|
363
|
+
return max + 1;
|
|
364
|
+
}
|
|
365
|
+
function buildHistogram(codes, alphabetSize) {
|
|
366
|
+
const out = new Uint32Array(alphabetSize);
|
|
367
|
+
for (let i = 0; i < codes.length; i++) {
|
|
368
|
+
const c = codes[i] ?? 0;
|
|
369
|
+
if (c < 0 || c >= alphabetSize)
|
|
370
|
+
continue;
|
|
371
|
+
out[c] = (out[c] ?? 0) + 1;
|
|
372
|
+
}
|
|
373
|
+
return out;
|
|
374
|
+
}
|
|
375
|
+
function scorePath(path, table, tableLog) {
|
|
376
|
+
if (path.states.length === 0)
|
|
377
|
+
return 0;
|
|
378
|
+
let bits = tableLog;
|
|
379
|
+
for (let i = 0; i < path.states.length - 1; i++) {
|
|
380
|
+
const state = path.states[i] ?? -1;
|
|
381
|
+
if (state < 0 || state >= table.length)
|
|
382
|
+
return Number.POSITIVE_INFINITY;
|
|
383
|
+
bits += table.numBits[state];
|
|
384
|
+
}
|
|
385
|
+
return bits;
|
|
386
|
+
}
|
|
387
|
+
function estimatePathBitsFromHistogram(histogram, table, tableLog, extraHeaderBits) {
|
|
388
|
+
const pre = getPrecomputedPathTable(table);
|
|
389
|
+
const avgBits = pre.avgBitsBySymbol;
|
|
390
|
+
let bits = tableLog + extraHeaderBits;
|
|
391
|
+
for (let sym = 0; sym < histogram.length; sym++) {
|
|
392
|
+
const freq = histogram[sym] ?? 0;
|
|
393
|
+
if (freq <= 0)
|
|
394
|
+
continue;
|
|
395
|
+
const avg = avgBits[sym] ?? Number.POSITIVE_INFINITY;
|
|
396
|
+
if (!Number.isFinite(avg)) {
|
|
397
|
+
return Number.POSITIVE_INFINITY;
|
|
398
|
+
}
|
|
399
|
+
bits += avg * freq;
|
|
400
|
+
}
|
|
401
|
+
return bits;
|
|
402
|
+
}
|
|
403
|
+
const normalizedTableCache = new Map();
|
|
404
|
+
function hashHistogram(histogram) {
|
|
405
|
+
let hash = 2166136261 >>> 0;
|
|
406
|
+
for (let i = 0; i < histogram.length; i++) {
|
|
407
|
+
hash ^= histogram[i] ?? 0;
|
|
408
|
+
hash = Math.imul(hash, 16777619) >>> 0;
|
|
409
|
+
}
|
|
410
|
+
return hash >>> 0;
|
|
411
|
+
}
|
|
412
|
+
function histogramsEqual(a, b) {
|
|
413
|
+
if (a.length !== b.length)
|
|
414
|
+
return false;
|
|
415
|
+
for (let i = 0; i < a.length; i++) {
|
|
416
|
+
if ((a[i] ?? 0) !== (b[i] ?? 0))
|
|
417
|
+
return false;
|
|
418
|
+
}
|
|
419
|
+
return true;
|
|
420
|
+
}
|
|
421
|
+
function getNormalizedTableCandidates(codes, maxTableLog) {
|
|
422
|
+
const alphabetSize = symbolRange(codes);
|
|
423
|
+
if (alphabetSize <= 0)
|
|
424
|
+
return [];
|
|
425
|
+
const histogram = buildHistogram(codes, alphabetSize);
|
|
426
|
+
let distinct = 0;
|
|
427
|
+
for (let i = 0; i < histogram.length; i++) {
|
|
428
|
+
if ((histogram[i] ?? 0) > 0)
|
|
429
|
+
distinct++;
|
|
430
|
+
}
|
|
431
|
+
if (distinct <= 1)
|
|
432
|
+
return [];
|
|
433
|
+
let minTableLog = 5;
|
|
434
|
+
while (1 << minTableLog < distinct && minTableLog < maxTableLog)
|
|
435
|
+
minTableLog++;
|
|
436
|
+
if (1 << minTableLog < distinct)
|
|
437
|
+
return [];
|
|
438
|
+
const maxLogFromSamples = codes.length > 1 ? 31 - Math.clz32(codes.length - 1) : 5;
|
|
439
|
+
const limit = Math.max(minTableLog, Math.min(maxTableLog, maxLogFromSamples + 1));
|
|
440
|
+
const results = [];
|
|
441
|
+
const histogramHash = hashHistogram(histogram);
|
|
442
|
+
for (let tableLog = minTableLog; tableLog <= limit; tableLog++) {
|
|
443
|
+
const key = `${tableLog}:${alphabetSize}:${histogramHash}`;
|
|
444
|
+
const cachedBucket = normalizedTableCache.get(key);
|
|
445
|
+
if (cachedBucket) {
|
|
446
|
+
let matched = false;
|
|
447
|
+
for (const cached of cachedBucket) {
|
|
448
|
+
if (histogramsEqual(cached.histogram, histogram)) {
|
|
449
|
+
results.push(cached);
|
|
450
|
+
matched = true;
|
|
451
|
+
break;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
if (matched)
|
|
455
|
+
continue;
|
|
456
|
+
}
|
|
457
|
+
try {
|
|
458
|
+
const { normalizedCounter, maxSymbolValue } = normalizeCountsForTable(Array.from(histogram), tableLog);
|
|
459
|
+
const header = writeNCount(normalizedCounter, maxSymbolValue, tableLog);
|
|
460
|
+
const table = buildFSEDecodeTable(normalizedCounter, tableLog);
|
|
461
|
+
const out = {
|
|
462
|
+
histogram: histogram.slice(0),
|
|
463
|
+
table,
|
|
464
|
+
tableLog,
|
|
465
|
+
header,
|
|
466
|
+
};
|
|
467
|
+
if (!cachedBucket) {
|
|
468
|
+
normalizedTableCache.set(key, [out]);
|
|
469
|
+
}
|
|
470
|
+
else {
|
|
471
|
+
cachedBucket.push(out);
|
|
472
|
+
}
|
|
473
|
+
results.push(out);
|
|
474
|
+
}
|
|
475
|
+
catch {
|
|
476
|
+
// Skip invalid normalizations for this table log.
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
return results;
|
|
480
|
+
}
|
|
481
|
+
function symbolizedSequences(sequences) {
|
|
384
482
|
if (sequences.length === 0)
|
|
385
483
|
return null;
|
|
386
484
|
const numSequences = sequences.length;
|
|
387
|
-
const
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
const
|
|
391
|
-
const
|
|
392
|
-
const
|
|
393
|
-
const
|
|
394
|
-
const
|
|
395
|
-
const
|
|
396
|
-
const
|
|
397
|
-
const ofExtraN = new Uint8Array(numSequences);
|
|
398
|
-
const ofExtraValue = new Uint32Array(numSequences);
|
|
485
|
+
const scratch = ensureSymbolizedScratch(numSequences);
|
|
486
|
+
const llCodes = scratch.llCodes.subarray(0, numSequences);
|
|
487
|
+
const llExtraN = scratch.llExtraN.subarray(0, numSequences);
|
|
488
|
+
const llExtraValue = scratch.llExtraValue.subarray(0, numSequences);
|
|
489
|
+
const mlCodes = scratch.mlCodes.subarray(0, numSequences);
|
|
490
|
+
const mlExtraN = scratch.mlExtraN.subarray(0, numSequences);
|
|
491
|
+
const mlExtraValue = scratch.mlExtraValue.subarray(0, numSequences);
|
|
492
|
+
const ofCodes = scratch.ofCodes.subarray(0, numSequences);
|
|
493
|
+
const ofExtraN = scratch.ofExtraN.subarray(0, numSequences);
|
|
494
|
+
const ofExtraValue = scratch.ofExtraValue.subarray(0, numSequences);
|
|
399
495
|
for (let i = 0; i < numSequences; i++) {
|
|
400
496
|
const sequence = sequences[i];
|
|
401
497
|
const ll = findLengthCode(sequence.literalsLength, LL_BASELINE, LL_NUMBITS, 15, 0);
|
|
@@ -419,77 +515,174 @@ function buildPredefinedSequenceSection(sequences) {
|
|
|
419
515
|
ofExtraN[i] = ofCode;
|
|
420
516
|
ofExtraValue[i] = ofEx;
|
|
421
517
|
}
|
|
518
|
+
return { llCodes, llExtraN, llExtraValue, mlCodes, mlExtraN, mlExtraValue, ofCodes, ofExtraN, ofExtraValue };
|
|
519
|
+
}
|
|
520
|
+
function chooseStreamMode(codes, predefinedTable, predefinedTableLog, maxTableLog, prevTable, prevTableLog) {
|
|
521
|
+
const alphabetSize = symbolRange(codes);
|
|
522
|
+
const histogram = alphabetSize > 0 ? buildHistogram(codes, alphabetSize) : new Uint32Array(0);
|
|
523
|
+
const predefinedPath = buildStatePath(codes, predefinedTable);
|
|
524
|
+
if (!predefinedPath)
|
|
525
|
+
return null;
|
|
526
|
+
let best = {
|
|
527
|
+
mode: 0,
|
|
528
|
+
table: predefinedTable,
|
|
529
|
+
tableLog: predefinedTableLog,
|
|
530
|
+
path: predefinedPath,
|
|
531
|
+
tableHeader: new Uint8Array(0),
|
|
532
|
+
};
|
|
533
|
+
let bestScore = scorePath(predefinedPath, predefinedTable, predefinedTableLog);
|
|
534
|
+
if (prevTable && prevTableLog !== null) {
|
|
535
|
+
const repeatEstimate = estimatePathBitsFromHistogram(histogram, prevTable, prevTableLog, 0);
|
|
536
|
+
if (repeatEstimate < bestScore + 16) {
|
|
537
|
+
const repeatPath = buildStatePath(codes, prevTable);
|
|
538
|
+
if (repeatPath) {
|
|
539
|
+
const repeatScore = scorePath(repeatPath, prevTable, prevTableLog);
|
|
540
|
+
if (repeatScore < bestScore) {
|
|
541
|
+
best = { mode: 3, table: prevTable, tableLog: prevTableLog, path: repeatPath, tableHeader: new Uint8Array(0) };
|
|
542
|
+
bestScore = repeatScore;
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
const compressedCandidates = getNormalizedTableCandidates(codes, maxTableLog);
|
|
548
|
+
if (compressedCandidates.length > 0) {
|
|
549
|
+
const ranked = compressedCandidates
|
|
550
|
+
.map((compressed) => ({
|
|
551
|
+
compressed,
|
|
552
|
+
estimate: estimatePathBitsFromHistogram(histogram, compressed.table, compressed.tableLog, compressed.header.length * 8),
|
|
553
|
+
}))
|
|
554
|
+
.sort((a, b) => a.estimate - b.estimate);
|
|
555
|
+
const evalCount = Math.min(2, ranked.length);
|
|
556
|
+
for (let i = 0; i < evalCount; i++) {
|
|
557
|
+
const candidate = ranked[i];
|
|
558
|
+
if (!candidate || candidate.estimate >= bestScore + 16)
|
|
559
|
+
continue;
|
|
560
|
+
const compressedPath = buildStatePath(codes, candidate.compressed.table);
|
|
561
|
+
if (compressedPath) {
|
|
562
|
+
const compressedScore = scorePath(compressedPath, candidate.compressed.table, candidate.compressed.tableLog) +
|
|
563
|
+
candidate.compressed.header.length * 8;
|
|
564
|
+
if (compressedScore < bestScore) {
|
|
565
|
+
best = {
|
|
566
|
+
mode: 2,
|
|
567
|
+
table: candidate.compressed.table,
|
|
568
|
+
tableLog: candidate.compressed.tableLog,
|
|
569
|
+
path: compressedPath,
|
|
570
|
+
tableHeader: candidate.compressed.header,
|
|
571
|
+
};
|
|
572
|
+
bestScore = compressedScore;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
return best;
|
|
578
|
+
}
|
|
579
|
+
function buildSequenceSection(sequences, context) {
|
|
580
|
+
const encoded = symbolizedSequences(sequences);
|
|
581
|
+
if (!encoded)
|
|
582
|
+
return null;
|
|
583
|
+
const numSequences = sequences.length;
|
|
584
|
+
const numSequencesBytes = encodeNumSequences(numSequences);
|
|
585
|
+
if (!numSequencesBytes)
|
|
586
|
+
return null;
|
|
422
587
|
const { ll: llTable, of: ofTable, ml: mlTable } = getPredefinedFSETables();
|
|
423
|
-
const
|
|
424
|
-
const
|
|
425
|
-
const
|
|
426
|
-
if (!
|
|
588
|
+
const llChoice = chooseStreamMode(encoded.llCodes, llTable, LITERALS_LENGTH_TABLE_LOG, 9, context?.prevTables?.llTable ?? null, context?.prevTables?.llTableLog ?? null);
|
|
589
|
+
const ofChoice = chooseStreamMode(encoded.ofCodes, ofTable, OFFSET_CODE_TABLE_LOG, 8, context?.prevTables?.ofTable ?? null, context?.prevTables?.ofTableLog ?? null);
|
|
590
|
+
const mlChoice = chooseStreamMode(encoded.mlCodes, mlTable, MATCH_LENGTH_TABLE_LOG, 9, context?.prevTables?.mlTable ?? null, context?.prevTables?.mlTableLog ?? null);
|
|
591
|
+
if (!llChoice || !ofChoice || !mlChoice)
|
|
427
592
|
return null;
|
|
428
593
|
const chunkCount = numSequences * 6;
|
|
429
|
-
const readCounts =
|
|
430
|
-
const readValues =
|
|
594
|
+
const readCounts = getSequenceReadCountsScratch(chunkCount).subarray(0, chunkCount);
|
|
595
|
+
const readValues = getSequenceReadValuesScratch(chunkCount).subarray(0, chunkCount);
|
|
596
|
+
const llStates = llChoice.path.states;
|
|
597
|
+
const llUpdates = llChoice.path.updateBits;
|
|
598
|
+
const ofStates = ofChoice.path.states;
|
|
599
|
+
const ofUpdates = ofChoice.path.updateBits;
|
|
600
|
+
const mlStates = mlChoice.path.states;
|
|
601
|
+
const mlUpdates = mlChoice.path.updateBits;
|
|
602
|
+
const ofExtraN = encoded.ofExtraN;
|
|
603
|
+
const ofExtraValue = encoded.ofExtraValue;
|
|
604
|
+
const mlExtraN = encoded.mlExtraN;
|
|
605
|
+
const mlExtraValue = encoded.mlExtraValue;
|
|
606
|
+
const llExtraN = encoded.llExtraN;
|
|
607
|
+
const llExtraValue = encoded.llExtraValue;
|
|
431
608
|
let readPos = 0;
|
|
432
|
-
readCounts[readPos] =
|
|
433
|
-
readValues[readPos++] =
|
|
434
|
-
readCounts[readPos] =
|
|
435
|
-
readValues[readPos++] =
|
|
436
|
-
readCounts[readPos] =
|
|
437
|
-
readValues[readPos++] =
|
|
609
|
+
readCounts[readPos] = llChoice.tableLog;
|
|
610
|
+
readValues[readPos++] = llStates[0];
|
|
611
|
+
readCounts[readPos] = ofChoice.tableLog;
|
|
612
|
+
readValues[readPos++] = ofStates[0];
|
|
613
|
+
readCounts[readPos] = mlChoice.tableLog;
|
|
614
|
+
readValues[readPos++] = mlStates[0];
|
|
438
615
|
for (let i = 0; i < numSequences; i++) {
|
|
439
|
-
readCounts[readPos] = ofExtraN[i]
|
|
440
|
-
readValues[readPos++] = ofExtraValue[i]
|
|
441
|
-
readCounts[readPos] = mlExtraN[i]
|
|
442
|
-
readValues[readPos++] = mlExtraValue[i]
|
|
443
|
-
readCounts[readPos] = llExtraN[i]
|
|
444
|
-
readValues[readPos++] = llExtraValue[i]
|
|
616
|
+
readCounts[readPos] = ofExtraN[i];
|
|
617
|
+
readValues[readPos++] = ofExtraValue[i];
|
|
618
|
+
readCounts[readPos] = mlExtraN[i];
|
|
619
|
+
readValues[readPos++] = mlExtraValue[i];
|
|
620
|
+
readCounts[readPos] = llExtraN[i];
|
|
621
|
+
readValues[readPos++] = llExtraValue[i];
|
|
445
622
|
if (i !== numSequences - 1) {
|
|
446
|
-
const llState =
|
|
447
|
-
const mlState =
|
|
448
|
-
const ofState =
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
623
|
+
const llState = llStates[i];
|
|
624
|
+
const mlState = mlStates[i];
|
|
625
|
+
const ofState = ofStates[i];
|
|
626
|
+
if (llState < 0 ||
|
|
627
|
+
llState >= llChoice.table.length ||
|
|
628
|
+
mlState < 0 ||
|
|
629
|
+
mlState >= mlChoice.table.length ||
|
|
630
|
+
ofState < 0 ||
|
|
631
|
+
ofState >= ofChoice.table.length) {
|
|
453
632
|
return null;
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
633
|
+
}
|
|
634
|
+
readCounts[readPos] = llChoice.table.numBits[llState];
|
|
635
|
+
readValues[readPos++] = llUpdates[i];
|
|
636
|
+
readCounts[readPos] = mlChoice.table.numBits[mlState];
|
|
637
|
+
readValues[readPos++] = mlUpdates[i];
|
|
638
|
+
readCounts[readPos] = ofChoice.table.numBits[ofState];
|
|
639
|
+
readValues[readPos++] = ofUpdates[i];
|
|
460
640
|
}
|
|
461
641
|
}
|
|
462
642
|
const bitstream = encodeReverseBitstream(readCounts, readValues);
|
|
463
|
-
const
|
|
643
|
+
const tableHeaderSize = llChoice.tableHeader.length + ofChoice.tableHeader.length + mlChoice.tableHeader.length;
|
|
644
|
+
const out = new Uint8Array(numSequencesBytes.length + 1 + tableHeaderSize + bitstream.length);
|
|
464
645
|
out.set(numSequencesBytes, 0);
|
|
465
|
-
|
|
466
|
-
out
|
|
467
|
-
|
|
646
|
+
const modeByte = (llChoice.mode << 6) | (ofChoice.mode << 4) | (mlChoice.mode << 2);
|
|
647
|
+
out[numSequencesBytes.length] = modeByte & 0xff;
|
|
648
|
+
let pos = numSequencesBytes.length + 1;
|
|
649
|
+
out.set(llChoice.tableHeader, pos);
|
|
650
|
+
pos += llChoice.tableHeader.length;
|
|
651
|
+
out.set(ofChoice.tableHeader, pos);
|
|
652
|
+
pos += ofChoice.tableHeader.length;
|
|
653
|
+
out.set(mlChoice.tableHeader, pos);
|
|
654
|
+
pos += mlChoice.tableHeader.length;
|
|
655
|
+
out.set(bitstream, pos);
|
|
656
|
+
return {
|
|
657
|
+
section: out,
|
|
658
|
+
tables: {
|
|
659
|
+
llTable: llChoice.table,
|
|
660
|
+
llTableLog: llChoice.tableLog,
|
|
661
|
+
ofTable: ofChoice.table,
|
|
662
|
+
ofTableLog: ofChoice.tableLog,
|
|
663
|
+
mlTable: mlChoice.table,
|
|
664
|
+
mlTableLog: mlChoice.tableLog,
|
|
665
|
+
},
|
|
666
|
+
};
|
|
468
667
|
}
|
|
469
|
-
export function buildCompressedBlockPayload(literals, sequences) {
|
|
470
|
-
const
|
|
471
|
-
|
|
472
|
-
|
|
668
|
+
export function buildCompressedBlockPayload(literals, sequences, context) {
|
|
669
|
+
const literalsContext = {
|
|
670
|
+
prevTable: context?.prevLiteralsTable ?? null,
|
|
671
|
+
};
|
|
672
|
+
const encodedLiterals = encodeLiteralsSection(literals, literalsContext);
|
|
673
|
+
if (!encodedLiterals)
|
|
473
674
|
return null;
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
const single = buildSingleSymbolCompressedLiterals(literals);
|
|
477
|
-
if (single && single.length < literalsSection.length) {
|
|
478
|
-
literalsSection = single;
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
|
-
if (literalsLength >= 16 && literalsLength <= 1023) {
|
|
482
|
-
const general = buildGeneralCompressedLiterals(literals);
|
|
483
|
-
if (general && general.length < literalsSection.length) {
|
|
484
|
-
literalsSection = general;
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
const seqSection = buildPredefinedSequenceSection(sequences);
|
|
675
|
+
const literalsSection = encodedLiterals.section;
|
|
676
|
+
const seqSection = buildSequenceSection(sequences, context);
|
|
488
677
|
if (!seqSection)
|
|
489
678
|
return null;
|
|
490
|
-
const out = new Uint8Array(literalsSection.length + seqSection.length);
|
|
679
|
+
const out = new Uint8Array(literalsSection.length + seqSection.section.length);
|
|
491
680
|
out.set(literalsSection, 0);
|
|
492
|
-
out.set(seqSection, literalsSection.length);
|
|
681
|
+
out.set(seqSection.section, literalsSection.length);
|
|
682
|
+
if (context) {
|
|
683
|
+
context.prevTables = seqSection.tables;
|
|
684
|
+
context.prevLiteralsTable = encodedLiterals.table;
|
|
685
|
+
}
|
|
493
686
|
return out;
|
|
494
687
|
}
|
|
495
688
|
export function writeCompressedBlock(payload, last) {
|
|
@@ -501,4 +694,11 @@ export function writeCompressedBlock(payload, last) {
|
|
|
501
694
|
out.set(payload, 3);
|
|
502
695
|
return out;
|
|
503
696
|
}
|
|
697
|
+
// Internal benchmark hooks for hot-path profiling.
|
|
698
|
+
export const __benchInternals = {
|
|
699
|
+
encodeReverseBitstream,
|
|
700
|
+
buildGeneralCompressedLiterals: buildGeneralCompressedLiteralsForBench,
|
|
701
|
+
buildPredefinedSequenceSection: (sequences) => buildSequenceSection(sequences)?.section ?? null,
|
|
702
|
+
buildSequenceSection,
|
|
703
|
+
};
|
|
504
704
|
//# sourceMappingURL=compressedBlock.js.map
|