zstd-ruby 1.5.2.2 → 1.5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -3
- data/ext/zstdruby/common.h +7 -0
- data/ext/zstdruby/libzstd/common/bits.h +175 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +18 -59
- data/ext/zstdruby/libzstd/common/compiler.h +22 -3
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +1 -1
- data/ext/zstdruby/libzstd/common/entropy_common.c +12 -40
- data/ext/zstdruby/libzstd/common/error_private.c +9 -2
- data/ext/zstdruby/libzstd/common/error_private.h +1 -1
- data/ext/zstdruby/libzstd/common/fse.h +5 -83
- data/ext/zstdruby/libzstd/common/fse_decompress.c +7 -99
- data/ext/zstdruby/libzstd/common/huf.h +65 -156
- data/ext/zstdruby/libzstd/common/mem.h +39 -46
- data/ext/zstdruby/libzstd/common/pool.c +26 -10
- data/ext/zstdruby/libzstd/common/pool.h +7 -1
- data/ext/zstdruby/libzstd/common/portability_macros.h +22 -3
- data/ext/zstdruby/libzstd/common/threading.c +68 -14
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +2 -2
- data/ext/zstdruby/libzstd/common/xxhash.h +8 -8
- data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +17 -113
- data/ext/zstdruby/libzstd/common/zstd_trace.h +3 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +1 -1
- data/ext/zstdruby/libzstd/compress/fse_compress.c +7 -124
- data/ext/zstdruby/libzstd/compress/hist.c +1 -1
- data/ext/zstdruby/libzstd/compress/hist.h +1 -1
- data/ext/zstdruby/libzstd/compress/huf_compress.c +234 -169
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +1055 -455
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +165 -145
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +115 -39
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -8
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +5 -3
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +95 -33
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +433 -148
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +306 -283
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +5 -5
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +104 -80
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +12 -5
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -1
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +434 -441
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +30 -39
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +3 -4
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +164 -42
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +186 -65
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +4 -2
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +19 -15
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -87
- data/ext/zstdruby/libzstd/zdict.h +53 -31
- data/ext/zstdruby/libzstd/zstd.h +489 -90
- data/ext/zstdruby/libzstd/zstd_errors.h +27 -8
- data/ext/zstdruby/main.c +4 -0
- data/ext/zstdruby/streaming_compress.c +1 -7
- data/ext/zstdruby/zstdruby.c +110 -26
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- metadata +7 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -25,6 +25,8 @@ extern "C" {
|
|
|
25
25
|
*/
|
|
26
26
|
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
|
|
27
27
|
|
|
28
|
+
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
|
|
29
|
+
|
|
28
30
|
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
|
29
31
|
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
|
|
30
32
|
|
|
@@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
|
|
|
116
118
|
size_t ZSTD_compressBlock_btlazy2_extDict(
|
|
117
119
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
118
120
|
void const* src, size_t srcSize);
|
|
119
|
-
|
|
121
|
+
|
|
120
122
|
|
|
121
123
|
#if defined (__cplusplus)
|
|
122
124
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -242,11 +242,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
|
|
242
242
|
switch(ms->cParams.strategy)
|
|
243
243
|
{
|
|
244
244
|
case ZSTD_fast:
|
|
245
|
-
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
|
|
245
|
+
ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
|
|
246
246
|
break;
|
|
247
247
|
|
|
248
248
|
case ZSTD_dfast:
|
|
249
|
-
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
|
|
249
|
+
ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
|
|
250
250
|
break;
|
|
251
251
|
|
|
252
252
|
case ZSTD_greedy:
|
|
@@ -549,7 +549,7 @@ size_t ZSTD_ldm_generateSequences(
|
|
|
549
549
|
* the window through early invalidation.
|
|
550
550
|
* TODO: * Test the chunk size.
|
|
551
551
|
* * Try invalidation after the sequence generation and test the
|
|
552
|
-
*
|
|
552
|
+
* offset against maxDist directly.
|
|
553
553
|
*
|
|
554
554
|
* NOTE: Because of dictionaries + sequence splitting we MUST make sure
|
|
555
555
|
* that any offset used is valid at the END of the sequence, since it may
|
|
@@ -711,7 +711,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|
|
711
711
|
rep[0] = sequence.offset;
|
|
712
712
|
/* Store the sequence */
|
|
713
713
|
ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
|
|
714
|
-
|
|
714
|
+
OFFSET_TO_OFFBASE(sequence.offset),
|
|
715
715
|
sequence.matchLength);
|
|
716
716
|
ip += sequence.matchLength;
|
|
717
717
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
|
17
17
|
#define ZSTD_MAX_PRICE (1<<30)
|
|
18
18
|
|
|
19
|
-
#define ZSTD_PREDEF_THRESHOLD
|
|
19
|
+
#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
/*-*************************************
|
|
@@ -26,27 +26,35 @@
|
|
|
26
26
|
#if 0 /* approximation at bit level (for tests) */
|
|
27
27
|
# define BITCOST_ACCURACY 0
|
|
28
28
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
29
|
-
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
|
|
29
|
+
# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
|
|
30
30
|
#elif 0 /* fractional bit accuracy (for tests) */
|
|
31
31
|
# define BITCOST_ACCURACY 8
|
|
32
32
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
33
|
-
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
|
33
|
+
# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
|
|
34
34
|
#else /* opt==approx, ultra==accurate */
|
|
35
35
|
# define BITCOST_ACCURACY 8
|
|
36
36
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
37
|
-
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
|
37
|
+
# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
|
38
38
|
#endif
|
|
39
39
|
|
|
40
|
+
/* ZSTD_bitWeight() :
|
|
41
|
+
* provide estimated "cost" of a stat in full bits only */
|
|
40
42
|
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
|
|
41
43
|
{
|
|
42
44
|
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
|
|
43
45
|
}
|
|
44
46
|
|
|
47
|
+
/* ZSTD_fracWeight() :
|
|
48
|
+
* provide fractional-bit "cost" of a stat,
|
|
49
|
+
* using linear interpolation approximation */
|
|
45
50
|
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
|
46
51
|
{
|
|
47
52
|
U32 const stat = rawStat + 1;
|
|
48
53
|
U32 const hb = ZSTD_highbit32(stat);
|
|
49
54
|
U32 const BWeight = hb * BITCOST_MULTIPLIER;
|
|
55
|
+
/* Fweight was meant for "Fractional weight"
|
|
56
|
+
* but it's effectively a value between 1 and 2
|
|
57
|
+
* using fixed point arithmetic */
|
|
50
58
|
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
|
|
51
59
|
U32 const weight = BWeight + FWeight;
|
|
52
60
|
assert(hb + BITCOST_ACCURACY < 31);
|
|
@@ -57,7 +65,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
|
|
57
65
|
/* debugging function,
|
|
58
66
|
* @return price in bytes as fractional value
|
|
59
67
|
* for debug messages only */
|
|
60
|
-
MEM_STATIC double ZSTD_fCost(
|
|
68
|
+
MEM_STATIC double ZSTD_fCost(int price)
|
|
61
69
|
{
|
|
62
70
|
return (double)price / (BITCOST_MULTIPLIER*8);
|
|
63
71
|
}
|
|
@@ -88,20 +96,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
|
|
|
88
96
|
return total;
|
|
89
97
|
}
|
|
90
98
|
|
|
91
|
-
|
|
99
|
+
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
|
|
100
|
+
|
|
101
|
+
static U32
|
|
102
|
+
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
|
|
92
103
|
{
|
|
93
104
|
U32 s, sum=0;
|
|
94
|
-
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
|
|
105
|
+
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
|
|
106
|
+
(unsigned)lastEltIndex+1, (unsigned)shift );
|
|
95
107
|
assert(shift < 30);
|
|
96
108
|
for (s=0; s<lastEltIndex+1; s++) {
|
|
97
|
-
|
|
98
|
-
|
|
109
|
+
unsigned const base = base1 ? 1 : (table[s]>0);
|
|
110
|
+
unsigned const newStat = base + (table[s] >> shift);
|
|
111
|
+
sum += newStat;
|
|
112
|
+
table[s] = newStat;
|
|
99
113
|
}
|
|
100
114
|
return sum;
|
|
101
115
|
}
|
|
102
116
|
|
|
103
117
|
/* ZSTD_scaleStats() :
|
|
104
|
-
* reduce all
|
|
118
|
+
* reduce all elt frequencies in table if sum too large
|
|
105
119
|
* return the resulting sum of elements */
|
|
106
120
|
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
|
107
121
|
{
|
|
@@ -110,7 +124,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
|
|
110
124
|
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
|
111
125
|
assert(logTarget < 30);
|
|
112
126
|
if (factor <= 1) return prevsum;
|
|
113
|
-
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
|
|
127
|
+
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
|
|
114
128
|
}
|
|
115
129
|
|
|
116
130
|
/* ZSTD_rescaleFreqs() :
|
|
@@ -129,18 +143,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
129
143
|
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
|
|
130
144
|
optPtr->priceType = zop_dynamic;
|
|
131
145
|
|
|
132
|
-
if (optPtr->litLengthSum == 0) { /* first block
|
|
133
|
-
|
|
134
|
-
|
|
146
|
+
if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
|
|
147
|
+
|
|
148
|
+
/* heuristic: use pre-defined stats for too small inputs */
|
|
149
|
+
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
|
|
150
|
+
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
|
|
135
151
|
optPtr->priceType = zop_predef;
|
|
136
152
|
}
|
|
137
153
|
|
|
138
154
|
assert(optPtr->symbolCosts != NULL);
|
|
139
155
|
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
|
|
140
|
-
|
|
156
|
+
|
|
157
|
+
/* huffman stats covering the full value set : table presumed generated by dictionary */
|
|
141
158
|
optPtr->priceType = zop_dynamic;
|
|
142
159
|
|
|
143
160
|
if (compressedLiterals) {
|
|
161
|
+
/* generate literals statistics from huffman table */
|
|
144
162
|
unsigned lit;
|
|
145
163
|
assert(optPtr->litFreq != NULL);
|
|
146
164
|
optPtr->litSum = 0;
|
|
@@ -188,13 +206,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
188
206
|
optPtr->offCodeSum += optPtr->offCodeFreq[of];
|
|
189
207
|
} }
|
|
190
208
|
|
|
191
|
-
} else { /*
|
|
209
|
+
} else { /* first block, no dictionary */
|
|
192
210
|
|
|
193
211
|
assert(optPtr->litFreq != NULL);
|
|
194
212
|
if (compressedLiterals) {
|
|
213
|
+
/* base initial cost of literals on direct frequency within src */
|
|
195
214
|
unsigned lit = MaxLit;
|
|
196
215
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
|
197
|
-
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
|
|
216
|
+
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
|
|
198
217
|
}
|
|
199
218
|
|
|
200
219
|
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
|
@@ -224,10 +243,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
224
243
|
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
|
225
244
|
}
|
|
226
245
|
|
|
227
|
-
|
|
228
246
|
}
|
|
229
247
|
|
|
230
|
-
} else { /* new block :
|
|
248
|
+
} else { /* new block : scale down accumulated statistics */
|
|
231
249
|
|
|
232
250
|
if (compressedLiterals)
|
|
233
251
|
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
|
@@ -255,11 +273,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
|
255
273
|
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
|
|
256
274
|
|
|
257
275
|
/* dynamic statistics */
|
|
258
|
-
{ U32 price =
|
|
276
|
+
{ U32 price = optPtr->litSumBasePrice * litLength;
|
|
277
|
+
U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
|
|
259
278
|
U32 u;
|
|
279
|
+
assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
|
|
260
280
|
for (u=0; u < litLength; u++) {
|
|
261
|
-
|
|
262
|
-
|
|
281
|
+
U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
|
|
282
|
+
if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
|
|
283
|
+
price -= litPrice;
|
|
263
284
|
}
|
|
264
285
|
return price;
|
|
265
286
|
}
|
|
@@ -272,10 +293,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|
|
272
293
|
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
|
|
273
294
|
if (optPtr->priceType == zop_predef)
|
|
274
295
|
return WEIGHT(litLength, optLevel);
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
*
|
|
278
|
-
* would
|
|
296
|
+
|
|
297
|
+
/* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
|
|
298
|
+
* because it isn't representable in the zstd format.
|
|
299
|
+
* So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
|
|
300
|
+
* In such a case, the block would be all literals.
|
|
279
301
|
*/
|
|
280
302
|
if (litLength == ZSTD_BLOCKSIZE_MAX)
|
|
281
303
|
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
|
|
@@ -289,24 +311,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|
|
289
311
|
}
|
|
290
312
|
|
|
291
313
|
/* ZSTD_getMatchPrice() :
|
|
292
|
-
* Provides the cost of the match part (offset + matchLength) of a sequence
|
|
314
|
+
* Provides the cost of the match part (offset + matchLength) of a sequence.
|
|
293
315
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
|
294
|
-
* @
|
|
316
|
+
* @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
|
|
295
317
|
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
|
|
296
318
|
*/
|
|
297
319
|
FORCE_INLINE_TEMPLATE U32
|
|
298
|
-
ZSTD_getMatchPrice(U32 const
|
|
320
|
+
ZSTD_getMatchPrice(U32 const offBase,
|
|
299
321
|
U32 const matchLength,
|
|
300
322
|
const optState_t* const optPtr,
|
|
301
323
|
int const optLevel)
|
|
302
324
|
{
|
|
303
325
|
U32 price;
|
|
304
|
-
U32 const offCode = ZSTD_highbit32(
|
|
326
|
+
U32 const offCode = ZSTD_highbit32(offBase);
|
|
305
327
|
U32 const mlBase = matchLength - MINMATCH;
|
|
306
328
|
assert(matchLength >= MINMATCH);
|
|
307
329
|
|
|
308
|
-
if (optPtr->priceType == zop_predef) /* fixed scheme,
|
|
309
|
-
return WEIGHT(mlBase, optLevel)
|
|
330
|
+
if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
|
|
331
|
+
return WEIGHT(mlBase, optLevel)
|
|
332
|
+
+ ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
|
|
310
333
|
|
|
311
334
|
/* dynamic statistics */
|
|
312
335
|
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
|
|
@@ -325,10 +348,10 @@ ZSTD_getMatchPrice(U32 const offcode,
|
|
|
325
348
|
}
|
|
326
349
|
|
|
327
350
|
/* ZSTD_updateStats() :
|
|
328
|
-
* assumption : literals +
|
|
351
|
+
* assumption : literals + litLength <= iend */
|
|
329
352
|
static void ZSTD_updateStats(optState_t* const optPtr,
|
|
330
353
|
U32 litLength, const BYTE* literals,
|
|
331
|
-
U32
|
|
354
|
+
U32 offBase, U32 matchLength)
|
|
332
355
|
{
|
|
333
356
|
/* literals */
|
|
334
357
|
if (ZSTD_compressedLiterals(optPtr)) {
|
|
@@ -344,8 +367,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
|
|
|
344
367
|
optPtr->litLengthSum++;
|
|
345
368
|
}
|
|
346
369
|
|
|
347
|
-
/* offset code :
|
|
348
|
-
{ U32 const offCode = ZSTD_highbit32(
|
|
370
|
+
/* offset code : follows storeSeq() numeric representation */
|
|
371
|
+
{ U32 const offCode = ZSTD_highbit32(offBase);
|
|
349
372
|
assert(offCode <= MaxOff);
|
|
350
373
|
optPtr->offCodeFreq[offCode]++;
|
|
351
374
|
optPtr->offCodeSum++;
|
|
@@ -552,16 +575,17 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
|
|
552
575
|
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
|
|
553
576
|
}
|
|
554
577
|
|
|
555
|
-
FORCE_INLINE_TEMPLATE
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
578
|
+
FORCE_INLINE_TEMPLATE U32
|
|
579
|
+
ZSTD_insertBtAndGetAllMatches (
|
|
580
|
+
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
|
581
|
+
ZSTD_matchState_t* ms,
|
|
582
|
+
U32* nextToUpdate3,
|
|
583
|
+
const BYTE* const ip, const BYTE* const iLimit,
|
|
584
|
+
const ZSTD_dictMode_e dictMode,
|
|
585
|
+
const U32 rep[ZSTD_REP_NUM],
|
|
586
|
+
const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
|
587
|
+
const U32 lengthToBeat,
|
|
588
|
+
const U32 mls /* template */)
|
|
565
589
|
{
|
|
566
590
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
567
591
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
|
@@ -644,7 +668,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
644
668
|
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
|
|
645
669
|
repCode, ll0, repOffset, repLen);
|
|
646
670
|
bestLength = repLen;
|
|
647
|
-
matches[mnum].off =
|
|
671
|
+
matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
|
|
648
672
|
matches[mnum].len = (U32)repLen;
|
|
649
673
|
mnum++;
|
|
650
674
|
if ( (repLen > sufficient_len)
|
|
@@ -673,7 +697,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
673
697
|
bestLength = mlen;
|
|
674
698
|
assert(curr > matchIndex3);
|
|
675
699
|
assert(mnum==0); /* no prior solution */
|
|
676
|
-
matches[0].off =
|
|
700
|
+
matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
|
|
677
701
|
matches[0].len = (U32)mlen;
|
|
678
702
|
mnum = 1;
|
|
679
703
|
if ( (mlen > sufficient_len) |
|
|
@@ -706,13 +730,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
706
730
|
}
|
|
707
731
|
|
|
708
732
|
if (matchLength > bestLength) {
|
|
709
|
-
DEBUGLOG(8, "found match of length %u at distance %u (
|
|
710
|
-
(U32)matchLength, curr - matchIndex,
|
|
733
|
+
DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
|
|
734
|
+
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
|
|
711
735
|
assert(matchEndIdx > matchIndex);
|
|
712
736
|
if (matchLength > matchEndIdx - matchIndex)
|
|
713
737
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
714
738
|
bestLength = matchLength;
|
|
715
|
-
matches[mnum].off =
|
|
739
|
+
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
|
|
716
740
|
matches[mnum].len = (U32)matchLength;
|
|
717
741
|
mnum++;
|
|
718
742
|
if ( (matchLength > ZSTD_OPT_NUM)
|
|
@@ -754,12 +778,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
754
778
|
|
|
755
779
|
if (matchLength > bestLength) {
|
|
756
780
|
matchIndex = dictMatchIndex + dmsIndexDelta;
|
|
757
|
-
DEBUGLOG(8, "found dms match of length %u at distance %u (
|
|
758
|
-
(U32)matchLength, curr - matchIndex,
|
|
781
|
+
DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
|
|
782
|
+
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
|
|
759
783
|
if (matchLength > matchEndIdx - matchIndex)
|
|
760
784
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
761
785
|
bestLength = matchLength;
|
|
762
|
-
matches[mnum].off =
|
|
786
|
+
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
|
|
763
787
|
matches[mnum].len = (U32)matchLength;
|
|
764
788
|
mnum++;
|
|
765
789
|
if ( (matchLength > ZSTD_OPT_NUM)
|
|
@@ -960,7 +984,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
|
|
960
984
|
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
|
|
961
985
|
{
|
|
962
986
|
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
|
|
963
|
-
/* Note: ZSTD_match_t actually contains
|
|
987
|
+
/* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
|
|
964
988
|
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
|
965
989
|
|
|
966
990
|
/* Ensure that current block position is not outside of the match */
|
|
@@ -971,11 +995,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
|
|
971
995
|
}
|
|
972
996
|
|
|
973
997
|
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
|
|
974
|
-
U32 const
|
|
975
|
-
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (
|
|
976
|
-
|
|
998
|
+
U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
|
|
999
|
+
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
|
|
1000
|
+
candidateOffBase, candidateMatchLength, currPosInBlock);
|
|
977
1001
|
matches[*nbMatches].len = candidateMatchLength;
|
|
978
|
-
matches[*nbMatches].off =
|
|
1002
|
+
matches[*nbMatches].off = candidateOffBase;
|
|
979
1003
|
(*nbMatches)++;
|
|
980
1004
|
}
|
|
981
1005
|
}
|
|
@@ -1098,14 +1122,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1098
1122
|
|
|
1099
1123
|
/* large match -> immediate encoding */
|
|
1100
1124
|
{ U32 const maxML = matches[nbMatches-1].len;
|
|
1101
|
-
U32 const
|
|
1102
|
-
DEBUGLOG(6, "found %u matches of maxLength=%u and
|
|
1103
|
-
nbMatches, maxML,
|
|
1125
|
+
U32 const maxOffBase = matches[nbMatches-1].off;
|
|
1126
|
+
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
|
|
1127
|
+
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
|
|
1104
1128
|
|
|
1105
1129
|
if (maxML > sufficient_len) {
|
|
1106
1130
|
lastSequence.litlen = litlen;
|
|
1107
1131
|
lastSequence.mlen = maxML;
|
|
1108
|
-
lastSequence.off =
|
|
1132
|
+
lastSequence.off = maxOffBase;
|
|
1109
1133
|
DEBUGLOG(6, "large match (%u>%u), immediate encoding",
|
|
1110
1134
|
maxML, sufficient_len);
|
|
1111
1135
|
cur = 0;
|
|
@@ -1122,15 +1146,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1122
1146
|
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
|
|
1123
1147
|
}
|
|
1124
1148
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
|
1125
|
-
U32 const
|
|
1149
|
+
U32 const offBase = matches[matchNb].off;
|
|
1126
1150
|
U32 const end = matches[matchNb].len;
|
|
1127
1151
|
for ( ; pos <= end ; pos++ ) {
|
|
1128
|
-
U32 const matchPrice = ZSTD_getMatchPrice(
|
|
1152
|
+
U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
|
|
1129
1153
|
U32 const sequencePrice = literalsPrice + matchPrice;
|
|
1130
1154
|
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
|
1131
|
-
pos, ZSTD_fCost(sequencePrice));
|
|
1155
|
+
pos, ZSTD_fCost((int)sequencePrice));
|
|
1132
1156
|
opt[pos].mlen = pos;
|
|
1133
|
-
opt[pos].off =
|
|
1157
|
+
opt[pos].off = offBase;
|
|
1134
1158
|
opt[pos].litlen = litlen;
|
|
1135
1159
|
opt[pos].price = (int)sequencePrice;
|
|
1136
1160
|
} }
|
|
@@ -1230,7 +1254,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1230
1254
|
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
|
|
1231
1255
|
U32 mlen;
|
|
1232
1256
|
|
|
1233
|
-
DEBUGLOG(7, "testing match %u =>
|
|
1257
|
+
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
|
|
1234
1258
|
matchNb, matches[matchNb].off, lastML, litlen);
|
|
1235
1259
|
|
|
1236
1260
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
|
@@ -1296,7 +1320,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
|
1296
1320
|
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
|
|
1297
1321
|
U32 const llen = opt[storePos].litlen;
|
|
1298
1322
|
U32 const mlen = opt[storePos].mlen;
|
|
1299
|
-
U32 const
|
|
1323
|
+
U32 const offBase = opt[storePos].off;
|
|
1300
1324
|
U32 const advance = llen + mlen;
|
|
1301
1325
|
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
|
|
1302
1326
|
anchor - istart, (unsigned)llen, (unsigned)mlen);
|
|
@@ -1308,8 +1332,8 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
|
1308
1332
|
}
|
|
1309
1333
|
|
|
1310
1334
|
assert(anchor + llen <= iend);
|
|
1311
|
-
ZSTD_updateStats(optStatePtr, llen, anchor,
|
|
1312
|
-
ZSTD_storeSeq(seqStore, llen, anchor, iend,
|
|
1335
|
+
ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
|
|
1336
|
+
ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
|
|
1313
1337
|
anchor += advance;
|
|
1314
1338
|
ip = anchor;
|
|
1315
1339
|
} }
|
|
@@ -1349,7 +1373,7 @@ size_t ZSTD_compressBlock_btopt(
|
|
|
1349
1373
|
/* ZSTD_initStats_ultra():
|
|
1350
1374
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
|
1351
1375
|
* only works on first block, with no dictionary and no ldm.
|
|
1352
|
-
* this function cannot error,
|
|
1376
|
+
* this function cannot error out, its narrow contract must be respected.
|
|
1353
1377
|
*/
|
|
1354
1378
|
static void
|
|
1355
1379
|
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
@@ -1368,7 +1392,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
|
1368
1392
|
|
|
1369
1393
|
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
|
|
1370
1394
|
|
|
1371
|
-
/* invalidate first scan from history */
|
|
1395
|
+
/* invalidate first scan from history, only keep entropy stats */
|
|
1372
1396
|
ZSTD_resetSeqStore(seqStore);
|
|
1373
1397
|
ms->window.base -= srcSize;
|
|
1374
1398
|
ms->window.dictLimit += (U32)srcSize;
|
|
@@ -1392,20 +1416,20 @@ size_t ZSTD_compressBlock_btultra2(
|
|
|
1392
1416
|
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
|
|
1393
1417
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
|
1394
1418
|
|
|
1395
|
-
/* 2-
|
|
1419
|
+
/* 2-passes strategy:
|
|
1396
1420
|
* this strategy makes a first pass over first block to collect statistics
|
|
1397
|
-
*
|
|
1398
|
-
* After 1st pass, function forgets
|
|
1421
|
+
* in order to seed next round's statistics with it.
|
|
1422
|
+
* After 1st pass, function forgets history, and starts a new block.
|
|
1399
1423
|
* Consequently, this can only work if no data has been previously loaded in tables,
|
|
1400
1424
|
* aka, no dictionary, no prefix, no ldm preprocessing.
|
|
1401
1425
|
* The compression ratio gain is generally small (~0.5% on first block),
|
|
1402
|
-
|
|
1426
|
+
** the cost is 2x cpu time on first block. */
|
|
1403
1427
|
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
|
1404
1428
|
if ( (ms->opt.litLengthSum==0) /* first block */
|
|
1405
1429
|
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
|
1406
1430
|
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
|
1407
|
-
&& (curr == ms->window.dictLimit)
|
|
1408
|
-
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
|
1431
|
+
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
|
1432
|
+
&& (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
|
|
1409
1433
|
) {
|
|
1410
1434
|
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
|
1411
1435
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -266,11 +266,11 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
|
|
|
266
266
|
* 1 buffer for input loading
|
|
267
267
|
* 1 buffer for "next input" when submitting current one
|
|
268
268
|
* 1 buffer stuck in queue */
|
|
269
|
-
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) 2*nbWorkers + 3
|
|
269
|
+
#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3)
|
|
270
270
|
|
|
271
271
|
/* After a worker releases its rawSeqStore, it is immediately ready for reuse.
|
|
272
272
|
* So we only need one seq buffer per worker. */
|
|
273
|
-
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) nbWorkers
|
|
273
|
+
#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers)
|
|
274
274
|
|
|
275
275
|
/* ===== Seq Pool Wrapper ====== */
|
|
276
276
|
|
|
@@ -1734,7 +1734,7 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1734
1734
|
}
|
|
1735
1735
|
} else {
|
|
1736
1736
|
/* We have enough bytes buffered to initialize the hash,
|
|
1737
|
-
* and
|
|
1737
|
+
* and have processed enough bytes to find a sync point.
|
|
1738
1738
|
* Start scanning at the beginning of the input.
|
|
1739
1739
|
*/
|
|
1740
1740
|
assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE);
|
|
@@ -1761,17 +1761,24 @@ findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
|
|
1761
1761
|
* then a block will be emitted anyways, but this is okay, since if we
|
|
1762
1762
|
* are already synchronized we will remain synchronized.
|
|
1763
1763
|
*/
|
|
1764
|
+
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1764
1765
|
for (; pos < syncPoint.toLoad; ++pos) {
|
|
1765
1766
|
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
|
1766
|
-
|
|
1767
|
+
/* This assert is very expensive, and Debian compiles with asserts enabled.
|
|
1768
|
+
* So disable it for now. We can get similar coverage by checking it at the
|
|
1769
|
+
* beginning & end of the loop.
|
|
1770
|
+
* assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1771
|
+
*/
|
|
1767
1772
|
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
|
1768
1773
|
assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE);
|
|
1769
1774
|
if ((hash & hitMask) == hitMask) {
|
|
1770
1775
|
syncPoint.toLoad = pos + 1;
|
|
1771
1776
|
syncPoint.flush = 1;
|
|
1777
|
+
++pos; /* for assert */
|
|
1772
1778
|
break;
|
|
1773
1779
|
}
|
|
1774
1780
|
}
|
|
1781
|
+
assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash);
|
|
1775
1782
|
return syncPoint;
|
|
1776
1783
|
}
|
|
1777
1784
|
|