zstd-ruby 1.4.4.0 → 1.5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +241 -173
- data/ext/zstdruby/libzstd/README.md +76 -18
- data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
- data/ext/zstdruby/libzstd/common/compiler.h +196 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +51 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
- data/ext/zstdruby/libzstd/common/huf.h +60 -54
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +10 -8
- data/ext/zstdruby/libzstd/common/threading.h +4 -3
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +760 -234
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +20 -9
- data/.travis.yml +0 -14
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
|
17
|
-
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
|
18
17
|
#define ZSTD_MAX_PRICE (1<<30)
|
|
19
18
|
|
|
20
19
|
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
|
@@ -24,11 +23,11 @@
|
|
|
24
23
|
* Price functions for optimal parser
|
|
25
24
|
***************************************/
|
|
26
25
|
|
|
27
|
-
#if 0 /* approximation at bit level */
|
|
26
|
+
#if 0 /* approximation at bit level (for tests) */
|
|
28
27
|
# define BITCOST_ACCURACY 0
|
|
29
28
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
30
|
-
# define WEIGHT(stat)
|
|
31
|
-
#elif 0 /* fractional bit accuracy */
|
|
29
|
+
# define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
|
|
30
|
+
#elif 0 /* fractional bit accuracy (for tests) */
|
|
32
31
|
# define BITCOST_ACCURACY 8
|
|
33
32
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
34
33
|
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
|
@@ -66,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
|
|
66
65
|
|
|
67
66
|
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
|
68
67
|
{
|
|
69
|
-
return optPtr->literalCompressionMode !=
|
|
68
|
+
return optPtr->literalCompressionMode != ZSTD_ps_disable;
|
|
70
69
|
}
|
|
71
70
|
|
|
72
71
|
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
@@ -79,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
|
79
78
|
}
|
|
80
79
|
|
|
81
80
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
81
|
+
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
|
82
|
+
{
|
|
83
|
+
size_t n;
|
|
84
|
+
U32 total = 0;
|
|
85
|
+
for (n=0; n<nbElts; n++) {
|
|
86
|
+
total += table[n];
|
|
87
|
+
}
|
|
88
|
+
return total;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
|
|
86
92
|
{
|
|
87
93
|
U32 s, sum=0;
|
|
88
|
-
DEBUGLOG(5, "
|
|
89
|
-
assert(
|
|
94
|
+
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
|
|
95
|
+
assert(shift < 30);
|
|
90
96
|
for (s=0; s<lastEltIndex+1; s++) {
|
|
91
|
-
table[s] = 1 + (table[s] >>
|
|
97
|
+
table[s] = 1 + (table[s] >> shift);
|
|
92
98
|
sum += table[s];
|
|
93
99
|
}
|
|
94
100
|
return sum;
|
|
95
101
|
}
|
|
96
102
|
|
|
103
|
+
/* ZSTD_scaleStats() :
|
|
104
|
+
* reduce all elements in table is sum too large
|
|
105
|
+
* return the resulting sum of elements */
|
|
106
|
+
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
|
107
|
+
{
|
|
108
|
+
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
|
109
|
+
U32 const factor = prevsum >> logTarget;
|
|
110
|
+
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
|
111
|
+
assert(logTarget < 30);
|
|
112
|
+
if (factor <= 1) return prevsum;
|
|
113
|
+
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
|
|
114
|
+
}
|
|
115
|
+
|
|
97
116
|
/* ZSTD_rescaleFreqs() :
|
|
98
117
|
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
|
99
118
|
* take hints from dictionary if there is one
|
|
100
|
-
*
|
|
119
|
+
* and init from zero if there is none,
|
|
120
|
+
* using src for literals stats, and baseline stats for sequence symbols
|
|
101
121
|
* otherwise downscale existing stats, to be used as seed for next block.
|
|
102
122
|
*/
|
|
103
123
|
static void
|
|
@@ -126,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
126
146
|
optPtr->litSum = 0;
|
|
127
147
|
for (lit=0; lit<=MaxLit; lit++) {
|
|
128
148
|
U32 const scaleLog = 11; /* scale to 2K */
|
|
129
|
-
U32 const bitCost =
|
|
149
|
+
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
|
130
150
|
assert(bitCost <= scaleLog);
|
|
131
151
|
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
|
132
152
|
optPtr->litSum += optPtr->litFreq[lit];
|
|
@@ -174,14 +194,18 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
174
194
|
if (compressedLiterals) {
|
|
175
195
|
unsigned lit = MaxLit;
|
|
176
196
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
|
177
|
-
optPtr->litSum =
|
|
197
|
+
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
|
|
178
198
|
}
|
|
179
199
|
|
|
180
|
-
{ unsigned
|
|
181
|
-
|
|
182
|
-
|
|
200
|
+
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
|
201
|
+
4, 2, 1, 1, 1, 1, 1, 1,
|
|
202
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
|
203
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
|
204
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
|
205
|
+
1, 1, 1, 1
|
|
206
|
+
};
|
|
207
|
+
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
|
|
183
208
|
}
|
|
184
|
-
optPtr->litLengthSum = MaxLL+1;
|
|
185
209
|
|
|
186
210
|
{ unsigned ml;
|
|
187
211
|
for (ml=0; ml<=MaxML; ml++)
|
|
@@ -189,21 +213,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
189
213
|
}
|
|
190
214
|
optPtr->matchLengthSum = MaxML+1;
|
|
191
215
|
|
|
192
|
-
{ unsigned
|
|
193
|
-
|
|
194
|
-
|
|
216
|
+
{ unsigned const baseOFCfreqs[MaxOff+1] = {
|
|
217
|
+
6, 2, 1, 1, 2, 3, 4, 4,
|
|
218
|
+
4, 3, 2, 1, 1, 1, 1, 1,
|
|
219
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
|
220
|
+
1, 1, 1, 1, 1, 1, 1, 1
|
|
221
|
+
};
|
|
222
|
+
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
|
195
223
|
}
|
|
196
|
-
|
|
224
|
+
|
|
197
225
|
|
|
198
226
|
}
|
|
199
227
|
|
|
200
228
|
} else { /* new block : re-use previous statistics, scaled down */
|
|
201
229
|
|
|
202
230
|
if (compressedLiterals)
|
|
203
|
-
optPtr->litSum =
|
|
204
|
-
optPtr->litLengthSum =
|
|
205
|
-
optPtr->matchLengthSum =
|
|
206
|
-
optPtr->offCodeSum =
|
|
231
|
+
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
|
232
|
+
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
|
|
233
|
+
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
|
|
234
|
+
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
|
|
207
235
|
}
|
|
208
236
|
|
|
209
237
|
ZSTD_setBasePrices(optPtr, optLevel);
|
|
@@ -249,40 +277,6 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|
|
249
277
|
}
|
|
250
278
|
}
|
|
251
279
|
|
|
252
|
-
/* ZSTD_litLengthContribution() :
|
|
253
|
-
* @return ( cost(litlength) - cost(0) )
|
|
254
|
-
* this value can then be added to rawLiteralsCost()
|
|
255
|
-
* to provide a cost which is directly comparable to a match ending at same position */
|
|
256
|
-
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
|
257
|
-
{
|
|
258
|
-
if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
|
|
259
|
-
|
|
260
|
-
/* dynamic statistics */
|
|
261
|
-
{ U32 const llCode = ZSTD_LLcode(litLength);
|
|
262
|
-
int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
|
|
263
|
-
+ (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
|
264
|
-
- (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
|
265
|
-
#if 1
|
|
266
|
-
return contribution;
|
|
267
|
-
#else
|
|
268
|
-
return MAX(0, contribution); /* sometimes better, sometimes not ... */
|
|
269
|
-
#endif
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
/* ZSTD_literalsContribution() :
|
|
274
|
-
* creates a fake cost for the literals part of a sequence
|
|
275
|
-
* which can be compared to the ending cost of a match
|
|
276
|
-
* should a new match start at this position */
|
|
277
|
-
static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
|
|
278
|
-
const optState_t* const optPtr,
|
|
279
|
-
int optLevel)
|
|
280
|
-
{
|
|
281
|
-
int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
|
282
|
-
+ ZSTD_litLengthContribution(litLength, optPtr, optLevel);
|
|
283
|
-
return contribution;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
280
|
/* ZSTD_getMatchPrice() :
|
|
287
281
|
* Provides the cost of the match part (offset + matchLength) of a sequence
|
|
288
282
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
|
@@ -372,7 +366,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
|
372
366
|
|
|
373
367
|
/* Update hashTable3 up to ip (excluded)
|
|
374
368
|
Assumption : always within prefix (i.e. not within extDict) */
|
|
375
|
-
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
369
|
+
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
|
376
370
|
U32* nextToUpdate3,
|
|
377
371
|
const BYTE* const ip)
|
|
378
372
|
{
|
|
@@ -398,11 +392,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
|
398
392
|
* Binary Tree search
|
|
399
393
|
***************************************/
|
|
400
394
|
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
|
401
|
-
*
|
|
395
|
+
* @param ip assumed <= iend-8 .
|
|
396
|
+
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
|
402
397
|
* @return : nb of positions added */
|
|
403
398
|
static U32 ZSTD_insertBt1(
|
|
404
|
-
ZSTD_matchState_t* ms,
|
|
399
|
+
const ZSTD_matchState_t* ms,
|
|
405
400
|
const BYTE* const ip, const BYTE* const iend,
|
|
401
|
+
U32 const target,
|
|
406
402
|
U32 const mls, const int extDict)
|
|
407
403
|
{
|
|
408
404
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
@@ -420,32 +416,36 @@ static U32 ZSTD_insertBt1(
|
|
|
420
416
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
|
421
417
|
const BYTE* const prefixStart = base + dictLimit;
|
|
422
418
|
const BYTE* match;
|
|
423
|
-
const U32
|
|
424
|
-
const U32 btLow = btMask >=
|
|
425
|
-
U32* smallerPtr = bt + 2*(
|
|
419
|
+
const U32 curr = (U32)(ip-base);
|
|
420
|
+
const U32 btLow = btMask >= curr ? 0 : curr - btMask;
|
|
421
|
+
U32* smallerPtr = bt + 2*(curr&btMask);
|
|
426
422
|
U32* largerPtr = smallerPtr + 1;
|
|
427
423
|
U32 dummy32; /* to be nullified at the end */
|
|
428
|
-
|
|
429
|
-
|
|
424
|
+
/* windowLow is based on target because
|
|
425
|
+
* we only need positions that will be in the window at the end of the tree update.
|
|
426
|
+
*/
|
|
427
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
|
428
|
+
U32 matchEndIdx = curr+8+1;
|
|
430
429
|
size_t bestLength = 8;
|
|
431
430
|
U32 nbCompares = 1U << cParams->searchLog;
|
|
432
431
|
#ifdef ZSTD_C_PREDICT
|
|
433
|
-
U32 predictedSmall = *(bt + 2*((
|
|
434
|
-
U32 predictedLarge = *(bt + 2*((
|
|
432
|
+
U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
|
|
433
|
+
U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
|
|
435
434
|
predictedSmall += (predictedSmall>0);
|
|
436
435
|
predictedLarge += (predictedLarge>0);
|
|
437
436
|
#endif /* ZSTD_C_PREDICT */
|
|
438
437
|
|
|
439
|
-
DEBUGLOG(8, "ZSTD_insertBt1 (%u)",
|
|
438
|
+
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
|
440
439
|
|
|
440
|
+
assert(curr <= target);
|
|
441
441
|
assert(ip <= iend-8); /* required for h calculation */
|
|
442
|
-
hashTable[h] =
|
|
442
|
+
hashTable[h] = curr; /* Update Hash Table */
|
|
443
443
|
|
|
444
444
|
assert(windowLow > 0);
|
|
445
|
-
|
|
445
|
+
for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
|
|
446
446
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
|
447
447
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
448
|
-
assert(matchIndex <
|
|
448
|
+
assert(matchIndex < curr);
|
|
449
449
|
|
|
450
450
|
#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
|
|
451
451
|
const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
|
|
@@ -508,8 +508,8 @@ static U32 ZSTD_insertBt1(
|
|
|
508
508
|
*smallerPtr = *largerPtr = 0;
|
|
509
509
|
{ U32 positions = 0;
|
|
510
510
|
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
|
511
|
-
assert(matchEndIdx >
|
|
512
|
-
return MAX(positions, matchEndIdx - (
|
|
511
|
+
assert(matchEndIdx > curr + 8);
|
|
512
|
+
return MAX(positions, matchEndIdx - (curr + 8));
|
|
513
513
|
}
|
|
514
514
|
}
|
|
515
515
|
|
|
@@ -526,7 +526,7 @@ void ZSTD_updateTree_internal(
|
|
|
526
526
|
idx, target, dictMode);
|
|
527
527
|
|
|
528
528
|
while(idx < target) {
|
|
529
|
-
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
|
529
|
+
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
|
530
530
|
assert(idx < (U32)(idx + forward));
|
|
531
531
|
idx += forward;
|
|
532
532
|
}
|
|
@@ -553,7 +553,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
553
553
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
554
554
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
|
555
555
|
const BYTE* const base = ms->window.base;
|
|
556
|
-
U32 const
|
|
556
|
+
U32 const curr = (U32)(ip-base);
|
|
557
557
|
U32 const hashLog = cParams->hashLog;
|
|
558
558
|
U32 const minMatch = (mls==3) ? 3 : 4;
|
|
559
559
|
U32* const hashTable = ms->hashTable;
|
|
@@ -567,12 +567,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
567
567
|
U32 const dictLimit = ms->window.dictLimit;
|
|
568
568
|
const BYTE* const dictEnd = dictBase + dictLimit;
|
|
569
569
|
const BYTE* const prefixStart = base + dictLimit;
|
|
570
|
-
U32 const btLow = (btMask >=
|
|
571
|
-
U32 const windowLow = ZSTD_getLowestMatchIndex(ms,
|
|
570
|
+
U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
|
|
571
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
|
|
572
572
|
U32 const matchLow = windowLow ? windowLow : 1;
|
|
573
|
-
U32* smallerPtr = bt + 2*(
|
|
574
|
-
U32* largerPtr = bt + 2*(
|
|
575
|
-
U32 matchEndIdx =
|
|
573
|
+
U32* smallerPtr = bt + 2*(curr&btMask);
|
|
574
|
+
U32* largerPtr = bt + 2*(curr&btMask) + 1;
|
|
575
|
+
U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */
|
|
576
576
|
U32 dummy32; /* to be nullified at the end */
|
|
577
577
|
U32 mnum = 0;
|
|
578
578
|
U32 nbCompares = 1U << cParams->searchLog;
|
|
@@ -591,7 +591,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
591
591
|
U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
|
|
592
592
|
|
|
593
593
|
size_t bestLength = lengthToBeat-1;
|
|
594
|
-
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u",
|
|
594
|
+
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
|
|
595
595
|
|
|
596
596
|
/* check repCode */
|
|
597
597
|
assert(ll0 <= 1); /* necessarily 1 or 0 */
|
|
@@ -599,26 +599,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
599
599
|
U32 repCode;
|
|
600
600
|
for (repCode = ll0; repCode < lastR; repCode++) {
|
|
601
601
|
U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
|
602
|
-
U32 const repIndex =
|
|
602
|
+
U32 const repIndex = curr - repOffset;
|
|
603
603
|
U32 repLen = 0;
|
|
604
|
-
assert(
|
|
605
|
-
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ <
|
|
606
|
-
|
|
604
|
+
assert(curr >= dictLimit);
|
|
605
|
+
if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */
|
|
606
|
+
/* We must validate the repcode offset because when we're using a dictionary the
|
|
607
|
+
* valid offset range shrinks when the dictionary goes out of bounds.
|
|
608
|
+
*/
|
|
609
|
+
if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
|
|
607
610
|
repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
|
|
608
611
|
}
|
|
609
|
-
} else { /* repIndex < dictLimit || repIndex >=
|
|
612
|
+
} else { /* repIndex < dictLimit || repIndex >= curr */
|
|
610
613
|
const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
|
|
611
614
|
dmsBase + repIndex - dmsIndexDelta :
|
|
612
615
|
dictBase + repIndex;
|
|
613
|
-
assert(
|
|
616
|
+
assert(curr >= windowLow);
|
|
614
617
|
if ( dictMode == ZSTD_extDict
|
|
615
|
-
&& ( ((repOffset-1) /*intentional overflow*/ <
|
|
618
|
+
&& ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
|
|
616
619
|
& (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
|
|
617
620
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
|
618
621
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
|
|
619
622
|
}
|
|
620
623
|
if (dictMode == ZSTD_dictMatchState
|
|
621
|
-
&& ( ((repOffset-1) /*intentional overflow*/ <
|
|
624
|
+
&& ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
|
|
622
625
|
& ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
|
623
626
|
&& (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
|
|
624
627
|
repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
|
|
@@ -640,7 +643,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
640
643
|
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
|
641
644
|
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
|
|
642
645
|
if ((matchIndex3 >= matchLow)
|
|
643
|
-
& (
|
|
646
|
+
& (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
|
644
647
|
size_t mlen;
|
|
645
648
|
if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
|
|
646
649
|
const BYTE* const match = base + matchIndex3;
|
|
@@ -655,26 +658,26 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
655
658
|
DEBUGLOG(8, "found small match with hlog3, of length %u",
|
|
656
659
|
(U32)mlen);
|
|
657
660
|
bestLength = mlen;
|
|
658
|
-
assert(
|
|
661
|
+
assert(curr > matchIndex3);
|
|
659
662
|
assert(mnum==0); /* no prior solution */
|
|
660
|
-
matches[0].off = (
|
|
663
|
+
matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
|
|
661
664
|
matches[0].len = (U32)mlen;
|
|
662
665
|
mnum = 1;
|
|
663
666
|
if ( (mlen > sufficient_len) |
|
|
664
667
|
(ip+mlen == iLimit) ) { /* best possible length */
|
|
665
|
-
ms->nextToUpdate =
|
|
668
|
+
ms->nextToUpdate = curr+1; /* skip insertion */
|
|
666
669
|
return 1;
|
|
667
670
|
} } }
|
|
668
671
|
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
|
669
|
-
}
|
|
672
|
+
} /* if (mls == 3) */
|
|
670
673
|
|
|
671
|
-
hashTable[h] =
|
|
674
|
+
hashTable[h] = curr; /* Update Hash Table */
|
|
672
675
|
|
|
673
|
-
|
|
676
|
+
for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
|
|
674
677
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
|
675
678
|
const BYTE* match;
|
|
676
679
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
677
|
-
assert(
|
|
680
|
+
assert(curr > matchIndex);
|
|
678
681
|
|
|
679
682
|
if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
|
|
680
683
|
assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
|
|
@@ -691,20 +694,19 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
691
694
|
|
|
692
695
|
if (matchLength > bestLength) {
|
|
693
696
|
DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
|
|
694
|
-
(U32)matchLength,
|
|
697
|
+
(U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
|
|
695
698
|
assert(matchEndIdx > matchIndex);
|
|
696
699
|
if (matchLength > matchEndIdx - matchIndex)
|
|
697
700
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
698
701
|
bestLength = matchLength;
|
|
699
|
-
matches[mnum].off = (
|
|
702
|
+
matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
|
|
700
703
|
matches[mnum].len = (U32)matchLength;
|
|
701
704
|
mnum++;
|
|
702
705
|
if ( (matchLength > ZSTD_OPT_NUM)
|
|
703
706
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
|
704
707
|
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
|
705
708
|
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
|
706
|
-
|
|
707
|
-
}
|
|
709
|
+
} }
|
|
708
710
|
|
|
709
711
|
if (match[matchLength] < ip[matchLength]) {
|
|
710
712
|
/* match smaller than current */
|
|
@@ -723,12 +725,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
723
725
|
|
|
724
726
|
*smallerPtr = *largerPtr = 0;
|
|
725
727
|
|
|
728
|
+
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
|
726
729
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
|
727
730
|
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
|
|
728
731
|
U32 dictMatchIndex = dms->hashTable[dmsH];
|
|
729
732
|
const U32* const dmsBt = dms->chainTable;
|
|
730
733
|
commonLengthSmaller = commonLengthLarger = 0;
|
|
731
|
-
|
|
734
|
+
for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
|
|
732
735
|
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
|
|
733
736
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
734
737
|
const BYTE* match = dmsBase + dictMatchIndex;
|
|
@@ -739,18 +742,17 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
739
742
|
if (matchLength > bestLength) {
|
|
740
743
|
matchIndex = dictMatchIndex + dmsIndexDelta;
|
|
741
744
|
DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
|
|
742
|
-
(U32)matchLength,
|
|
745
|
+
(U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
|
|
743
746
|
if (matchLength > matchEndIdx - matchIndex)
|
|
744
747
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
745
748
|
bestLength = matchLength;
|
|
746
|
-
matches[mnum].off = (
|
|
749
|
+
matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
|
|
747
750
|
matches[mnum].len = (U32)matchLength;
|
|
748
751
|
mnum++;
|
|
749
752
|
if ( (matchLength > ZSTD_OPT_NUM)
|
|
750
753
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
|
751
754
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
|
752
|
-
|
|
753
|
-
}
|
|
755
|
+
} }
|
|
754
756
|
|
|
755
757
|
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
|
756
758
|
if (match[matchLength] < ip[matchLength]) {
|
|
@@ -760,71 +762,232 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
760
762
|
/* match is larger than current */
|
|
761
763
|
commonLengthLarger = matchLength;
|
|
762
764
|
dictMatchIndex = nextPtr[0];
|
|
763
|
-
|
|
764
|
-
}
|
|
765
|
-
}
|
|
765
|
+
} } } /* if (dictMode == ZSTD_dictMatchState) */
|
|
766
766
|
|
|
767
|
-
assert(matchEndIdx >
|
|
767
|
+
assert(matchEndIdx > curr+8);
|
|
768
768
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
|
769
769
|
return mnum;
|
|
770
770
|
}
|
|
771
771
|
|
|
772
|
+
typedef U32 (*ZSTD_getAllMatchesFn)(
|
|
773
|
+
ZSTD_match_t*,
|
|
774
|
+
ZSTD_matchState_t*,
|
|
775
|
+
U32*,
|
|
776
|
+
const BYTE*,
|
|
777
|
+
const BYTE*,
|
|
778
|
+
const U32 rep[ZSTD_REP_NUM],
|
|
779
|
+
U32 const ll0,
|
|
780
|
+
U32 const lengthToBeat);
|
|
781
|
+
|
|
782
|
+
FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
|
|
783
|
+
ZSTD_match_t* matches,
|
|
784
|
+
ZSTD_matchState_t* ms,
|
|
785
|
+
U32* nextToUpdate3,
|
|
786
|
+
const BYTE* ip,
|
|
787
|
+
const BYTE* const iHighLimit,
|
|
788
|
+
const U32 rep[ZSTD_REP_NUM],
|
|
789
|
+
U32 const ll0,
|
|
790
|
+
U32 const lengthToBeat,
|
|
791
|
+
const ZSTD_dictMode_e dictMode,
|
|
792
|
+
const U32 mls)
|
|
793
|
+
{
|
|
794
|
+
assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
|
|
795
|
+
DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
|
|
796
|
+
if (ip < ms->window.base + ms->nextToUpdate)
|
|
797
|
+
return 0; /* skipped area */
|
|
798
|
+
ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
|
|
799
|
+
return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
|
|
803
|
+
|
|
804
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
|
|
805
|
+
static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
|
|
806
|
+
ZSTD_match_t* matches, \
|
|
807
|
+
ZSTD_matchState_t* ms, \
|
|
808
|
+
U32* nextToUpdate3, \
|
|
809
|
+
const BYTE* ip, \
|
|
810
|
+
const BYTE* const iHighLimit, \
|
|
811
|
+
const U32 rep[ZSTD_REP_NUM], \
|
|
812
|
+
U32 const ll0, \
|
|
813
|
+
U32 const lengthToBeat) \
|
|
814
|
+
{ \
|
|
815
|
+
return ZSTD_btGetAllMatches_internal( \
|
|
816
|
+
matches, ms, nextToUpdate3, ip, iHighLimit, \
|
|
817
|
+
rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
|
|
821
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
|
|
822
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
|
|
823
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
|
|
824
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
|
|
825
|
+
|
|
826
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
|
|
827
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
|
|
828
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
|
|
829
|
+
|
|
830
|
+
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
|
|
831
|
+
{ \
|
|
832
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
|
|
833
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
|
|
834
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
|
|
835
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
|
|
836
|
+
}
|
|
772
837
|
|
|
773
|
-
|
|
774
|
-
ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
|
|
775
|
-
ZSTD_matchState_t* ms,
|
|
776
|
-
U32* nextToUpdate3,
|
|
777
|
-
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
|
|
778
|
-
const U32 rep[ZSTD_REP_NUM],
|
|
779
|
-
U32 const ll0,
|
|
780
|
-
U32 const lengthToBeat)
|
|
838
|
+
static ZSTD_getAllMatchesFn ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
|
|
781
839
|
{
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
840
|
+
ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
|
|
841
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
|
|
842
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
|
|
843
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
|
|
844
|
+
};
|
|
845
|
+
U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
|
|
846
|
+
assert((U32)dictMode < 3);
|
|
847
|
+
assert(mls - 3 < 4);
|
|
848
|
+
return getAllMatchesFns[(int)dictMode][mls - 3];
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
/*************************
|
|
852
|
+
* LDM helper functions *
|
|
853
|
+
*************************/
|
|
854
|
+
|
|
855
|
+
/* Struct containing info needed to make decision about ldm inclusion */
|
|
856
|
+
typedef struct {
|
|
857
|
+
rawSeqStore_t seqStore; /* External match candidates store for this block */
|
|
858
|
+
U32 startPosInBlock; /* Start position of the current match candidate */
|
|
859
|
+
U32 endPosInBlock; /* End position of the current match candidate */
|
|
860
|
+
U32 offset; /* Offset of the match candidate */
|
|
861
|
+
} ZSTD_optLdm_t;
|
|
862
|
+
|
|
863
|
+
/* ZSTD_optLdm_skipRawSeqStoreBytes():
|
|
864
|
+
* Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
|
|
865
|
+
*/
|
|
866
|
+
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
|
|
867
|
+
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
|
868
|
+
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
|
869
|
+
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
|
870
|
+
if (currPos >= currSeq.litLength + currSeq.matchLength) {
|
|
871
|
+
currPos -= currSeq.litLength + currSeq.matchLength;
|
|
872
|
+
rawSeqStore->pos++;
|
|
873
|
+
} else {
|
|
874
|
+
rawSeqStore->posInSequence = currPos;
|
|
875
|
+
break;
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
|
|
879
|
+
rawSeqStore->posInSequence = 0;
|
|
795
880
|
}
|
|
796
881
|
}
|
|
797
882
|
|
|
883
|
+
/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
|
|
884
|
+
* Calculates the beginning and end of the next match in the current block.
|
|
885
|
+
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
|
|
886
|
+
*/
|
|
887
|
+
static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
|
|
888
|
+
U32 blockBytesRemaining) {
|
|
889
|
+
rawSeq currSeq;
|
|
890
|
+
U32 currBlockEndPos;
|
|
891
|
+
U32 literalsBytesRemaining;
|
|
892
|
+
U32 matchBytesRemaining;
|
|
893
|
+
|
|
894
|
+
/* Setting match end position to MAX to ensure we never use an LDM during this block */
|
|
895
|
+
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
|
896
|
+
optLdm->startPosInBlock = UINT_MAX;
|
|
897
|
+
optLdm->endPosInBlock = UINT_MAX;
|
|
898
|
+
return;
|
|
899
|
+
}
|
|
900
|
+
/* Calculate appropriate bytes left in matchLength and litLength after adjusting
|
|
901
|
+
based on ldmSeqStore->posInSequence */
|
|
902
|
+
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
|
|
903
|
+
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
|
|
904
|
+
currBlockEndPos = currPosInBlock + blockBytesRemaining;
|
|
905
|
+
literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
|
|
906
|
+
currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
|
|
907
|
+
0;
|
|
908
|
+
matchBytesRemaining = (literalsBytesRemaining == 0) ?
|
|
909
|
+
currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
|
|
910
|
+
currSeq.matchLength;
|
|
911
|
+
|
|
912
|
+
/* If there are more literal bytes than bytes remaining in block, no ldm is possible */
|
|
913
|
+
if (literalsBytesRemaining >= blockBytesRemaining) {
|
|
914
|
+
optLdm->startPosInBlock = UINT_MAX;
|
|
915
|
+
optLdm->endPosInBlock = UINT_MAX;
|
|
916
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
|
|
917
|
+
return;
|
|
918
|
+
}
|
|
798
919
|
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
920
|
+
/* Matches may be < MINMATCH by this process. In that case, we will reject them
|
|
921
|
+
when we are deciding whether or not to add the ldm */
|
|
922
|
+
optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
|
|
923
|
+
optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
|
|
924
|
+
optLdm->offset = currSeq.offset;
|
|
925
|
+
|
|
926
|
+
if (optLdm->endPosInBlock > currBlockEndPos) {
|
|
927
|
+
/* Match ends after the block ends, we can't use the whole match */
|
|
928
|
+
optLdm->endPosInBlock = currBlockEndPos;
|
|
929
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
|
|
930
|
+
} else {
|
|
931
|
+
/* Consume nb of bytes equal to size of sequence left */
|
|
932
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
|
|
933
|
+
}
|
|
934
|
+
}
|
|
805
935
|
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
936
|
+
/* ZSTD_optLdm_maybeAddMatch():
|
|
937
|
+
* Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
|
|
938
|
+
* and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
|
|
939
|
+
*/
|
|
940
|
+
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
|
941
|
+
ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
|
|
942
|
+
U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
|
|
943
|
+
/* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
|
|
944
|
+
U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
|
945
|
+
U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
|
|
946
|
+
|
|
947
|
+
/* Ensure that current block position is not outside of the match */
|
|
948
|
+
if (currPosInBlock < optLdm->startPosInBlock
|
|
949
|
+
|| currPosInBlock >= optLdm->endPosInBlock
|
|
950
|
+
|| candidateMatchLength < MINMATCH) {
|
|
951
|
+
return;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
|
|
955
|
+
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
|
|
956
|
+
candidateOffCode, candidateMatchLength, currPosInBlock);
|
|
957
|
+
matches[*nbMatches].len = candidateMatchLength;
|
|
958
|
+
matches[*nbMatches].off = candidateOffCode;
|
|
959
|
+
(*nbMatches)++;
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
/* ZSTD_optLdm_processMatchCandidate():
|
|
964
|
+
* Wrapper function to update ldm seq store and call ldm functions as necessary.
|
|
965
|
+
*/
|
|
966
|
+
static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
|
|
967
|
+
U32 currPosInBlock, U32 remainingBytes) {
|
|
968
|
+
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
|
969
|
+
return;
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
if (currPosInBlock >= optLdm->endPosInBlock) {
|
|
973
|
+
if (currPosInBlock > optLdm->endPosInBlock) {
|
|
974
|
+
/* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
|
|
975
|
+
* at the end of a match from the ldm seq store, and will often be some bytes
|
|
976
|
+
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
|
|
977
|
+
*/
|
|
978
|
+
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
|
979
|
+
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
|
822
980
|
}
|
|
981
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
|
823
982
|
}
|
|
824
|
-
|
|
983
|
+
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
|
825
984
|
}
|
|
826
985
|
|
|
827
986
|
|
|
987
|
+
/*-*******************************
|
|
988
|
+
* Optimal parser
|
|
989
|
+
*********************************/
|
|
990
|
+
|
|
828
991
|
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
|
829
992
|
{
|
|
830
993
|
return sol.litlen + sol.mlen;
|
|
@@ -839,7 +1002,7 @@ listStats(const U32* table, int lastEltID)
|
|
|
839
1002
|
int enb;
|
|
840
1003
|
for (enb=0; enb < nbElts; enb++) {
|
|
841
1004
|
(void)table;
|
|
842
|
-
|
|
1005
|
+
/* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */
|
|
843
1006
|
RAWLOG(2, "%4i,", table[enb]);
|
|
844
1007
|
}
|
|
845
1008
|
RAWLOG(2, " \n");
|
|
@@ -865,6 +1028,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
865
1028
|
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
|
866
1029
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
867
1030
|
|
|
1031
|
+
ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
|
|
1032
|
+
|
|
868
1033
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
|
869
1034
|
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
|
870
1035
|
U32 nextToUpdate3 = ms->nextToUpdate;
|
|
@@ -872,6 +1037,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
872
1037
|
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
|
873
1038
|
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
|
874
1039
|
ZSTD_optimal_t lastSequence;
|
|
1040
|
+
ZSTD_optLdm_t optLdm;
|
|
1041
|
+
|
|
1042
|
+
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
|
|
1043
|
+
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
|
|
1044
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
|
|
875
1045
|
|
|
876
1046
|
/* init */
|
|
877
1047
|
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
|
@@ -887,14 +1057,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
887
1057
|
/* find first match */
|
|
888
1058
|
{ U32 const litlen = (U32)(ip - anchor);
|
|
889
1059
|
U32 const ll0 = !litlen;
|
|
890
|
-
U32
|
|
1060
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
|
|
1061
|
+
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
|
1062
|
+
(U32)(ip-istart), (U32)(iend - ip));
|
|
891
1063
|
if (!nbMatches) { ip++; continue; }
|
|
892
1064
|
|
|
893
1065
|
/* initialize opt[0] */
|
|
894
1066
|
{ U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
|
|
895
1067
|
opt[0].mlen = 0; /* means is_a_literal */
|
|
896
1068
|
opt[0].litlen = litlen;
|
|
897
|
-
|
|
1069
|
+
/* We don't need to include the actual price of the literals because
|
|
1070
|
+
* it is static for the duration of the forward pass, and is included
|
|
1071
|
+
* in every price. We include the literal length to avoid negative
|
|
1072
|
+
* prices when we subtract the previous literal length.
|
|
1073
|
+
*/
|
|
1074
|
+
opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
|
898
1075
|
|
|
899
1076
|
/* large match -> immediate encoding */
|
|
900
1077
|
{ U32 const maxML = matches[nbMatches-1].len;
|
|
@@ -914,7 +1091,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
914
1091
|
} }
|
|
915
1092
|
|
|
916
1093
|
/* set prices for first matches starting position == 0 */
|
|
917
|
-
|
|
1094
|
+
assert(opt[0].price >= 0);
|
|
1095
|
+
{ U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
|
918
1096
|
U32 pos;
|
|
919
1097
|
U32 matchNb;
|
|
920
1098
|
for (pos = 1; pos < minMatch; pos++) {
|
|
@@ -923,7 +1101,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
923
1101
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
|
924
1102
|
U32 const offset = matches[matchNb].off;
|
|
925
1103
|
U32 const end = matches[matchNb].len;
|
|
926
|
-
repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
|
|
927
1104
|
for ( ; pos <= end ; pos++ ) {
|
|
928
1105
|
U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
|
|
929
1106
|
U32 const sequencePrice = literalsPrice + matchPrice;
|
|
@@ -932,9 +1109,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
932
1109
|
opt[pos].mlen = pos;
|
|
933
1110
|
opt[pos].off = offset;
|
|
934
1111
|
opt[pos].litlen = litlen;
|
|
935
|
-
opt[pos].price = sequencePrice;
|
|
936
|
-
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
|
|
937
|
-
memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
|
|
1112
|
+
opt[pos].price = (int)sequencePrice;
|
|
938
1113
|
} }
|
|
939
1114
|
last_pos = pos-1;
|
|
940
1115
|
}
|
|
@@ -949,9 +1124,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
949
1124
|
/* Fix current position with one literal if cheaper */
|
|
950
1125
|
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
|
|
951
1126
|
int const price = opt[cur-1].price
|
|
952
|
-
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
|
953
|
-
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
|
954
|
-
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
|
1127
|
+
+ (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
|
1128
|
+
+ (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
|
1129
|
+
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
|
955
1130
|
assert(price < 1000000000); /* overflow check */
|
|
956
1131
|
if (price <= opt[cur].price) {
|
|
957
1132
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
|
@@ -961,7 +1136,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
961
1136
|
opt[cur].off = 0;
|
|
962
1137
|
opt[cur].litlen = litlen;
|
|
963
1138
|
opt[cur].price = price;
|
|
964
|
-
memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
|
|
965
1139
|
} else {
|
|
966
1140
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
|
|
967
1141
|
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
|
|
@@ -969,6 +1143,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
969
1143
|
}
|
|
970
1144
|
}
|
|
971
1145
|
|
|
1146
|
+
/* Set the repcodes of the current position. We must do it here
|
|
1147
|
+
* because we rely on the repcodes of the 2nd to last sequence being
|
|
1148
|
+
* correct to set the next chunks repcodes during the backward
|
|
1149
|
+
* traversal.
|
|
1150
|
+
*/
|
|
1151
|
+
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
|
|
1152
|
+
assert(cur >= opt[cur].mlen);
|
|
1153
|
+
if (opt[cur].mlen != 0) {
|
|
1154
|
+
U32 const prev = cur - opt[cur].mlen;
|
|
1155
|
+
repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
|
|
1156
|
+
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
|
1157
|
+
} else {
|
|
1158
|
+
ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
|
1159
|
+
}
|
|
1160
|
+
|
|
972
1161
|
/* last match must start at a minimum distance of 8 from oend */
|
|
973
1162
|
if (inr > ilimit) continue;
|
|
974
1163
|
|
|
@@ -980,12 +1169,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
980
1169
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
|
981
1170
|
}
|
|
982
1171
|
|
|
1172
|
+
assert(opt[cur].price >= 0);
|
|
983
1173
|
{ U32 const ll0 = (opt[cur].mlen != 0);
|
|
984
1174
|
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
|
985
|
-
U32 const previousPrice = opt[cur].price;
|
|
1175
|
+
U32 const previousPrice = (U32)opt[cur].price;
|
|
986
1176
|
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
|
987
|
-
U32
|
|
1177
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
|
|
988
1178
|
U32 matchNb;
|
|
1179
|
+
|
|
1180
|
+
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
|
1181
|
+
(U32)(inr-istart), (U32)(iend-inr));
|
|
1182
|
+
|
|
989
1183
|
if (!nbMatches) {
|
|
990
1184
|
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
|
991
1185
|
continue;
|
|
@@ -1009,7 +1203,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1009
1203
|
/* set prices using matches found at position == cur */
|
|
1010
1204
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
|
1011
1205
|
U32 const offset = matches[matchNb].off;
|
|
1012
|
-
repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0);
|
|
1013
1206
|
U32 const lastML = matches[matchNb].len;
|
|
1014
1207
|
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
|
|
1015
1208
|
U32 mlen;
|
|
@@ -1019,7 +1212,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1019
1212
|
|
|
1020
1213
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
|
1021
1214
|
U32 const pos = cur + mlen;
|
|
1022
|
-
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
|
1215
|
+
int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
|
1023
1216
|
|
|
1024
1217
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
|
1025
1218
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
|
@@ -1029,8 +1222,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1029
1222
|
opt[pos].off = offset;
|
|
1030
1223
|
opt[pos].litlen = litlen;
|
|
1031
1224
|
opt[pos].price = price;
|
|
1032
|
-
ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
|
|
1033
|
-
memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
|
|
1034
1225
|
} else {
|
|
1035
1226
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
|
|
1036
1227
|
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
|
@@ -1046,6 +1237,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1046
1237
|
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1047
1238
|
assert(opt[0].mlen == 0);
|
|
1048
1239
|
|
|
1240
|
+
/* Set the next chunk's repcodes based on the repcodes of the beginning
|
|
1241
|
+
* of the last match, and the last sequence. This avoids us having to
|
|
1242
|
+
* update them while traversing the sequences.
|
|
1243
|
+
*/
|
|
1244
|
+
if (lastSequence.mlen != 0) {
|
|
1245
|
+
repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
|
|
1246
|
+
ZSTD_memcpy(rep, &reps, sizeof(reps));
|
|
1247
|
+
} else {
|
|
1248
|
+
ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1049
1251
|
{ U32 const storeEnd = cur + 1;
|
|
1050
1252
|
U32 storeStart = storeEnd;
|
|
1051
1253
|
U32 seqPos = cur;
|
|
@@ -1082,20 +1284,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
|
1082
1284
|
continue; /* will finish */
|
|
1083
1285
|
}
|
|
1084
1286
|
|
|
1085
|
-
/* repcodes update : like ZSTD_updateRep(), but update in place */
|
|
1086
|
-
if (offCode >= ZSTD_REP_NUM) { /* full offset */
|
|
1087
|
-
rep[2] = rep[1];
|
|
1088
|
-
rep[1] = rep[0];
|
|
1089
|
-
rep[0] = offCode - ZSTD_REP_MOVE;
|
|
1090
|
-
} else { /* repcode */
|
|
1091
|
-
U32 const repCode = offCode + (llen==0);
|
|
1092
|
-
if (repCode) { /* note : if repCode==0, no change */
|
|
1093
|
-
U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
|
|
1094
|
-
if (repCode >= 2) rep[2] = rep[1];
|
|
1095
|
-
rep[1] = rep[0];
|
|
1096
|
-
rep[0] = currentOffset;
|
|
1097
|
-
} }
|
|
1098
|
-
|
|
1099
1287
|
assert(anchor + llen <= iend);
|
|
1100
1288
|
ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
|
|
1101
1289
|
ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
|
|
@@ -1104,45 +1292,36 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
|
1104
1292
|
} }
|
|
1105
1293
|
ZSTD_setBasePrices(optStatePtr, optLevel);
|
|
1106
1294
|
}
|
|
1107
|
-
|
|
1108
1295
|
} /* while (ip < ilimit) */
|
|
1109
1296
|
|
|
1110
1297
|
/* Return the last literals size */
|
|
1111
1298
|
return (size_t)(iend - anchor);
|
|
1112
1299
|
}
|
|
1113
1300
|
|
|
1301
|
+
static size_t ZSTD_compressBlock_opt0(
|
|
1302
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1303
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
|
1304
|
+
{
|
|
1305
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
static size_t ZSTD_compressBlock_opt2(
|
|
1309
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1310
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
|
1311
|
+
{
|
|
1312
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
|
|
1313
|
+
}
|
|
1114
1314
|
|
|
1115
1315
|
size_t ZSTD_compressBlock_btopt(
|
|
1116
1316
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1117
1317
|
const void* src, size_t srcSize)
|
|
1118
1318
|
{
|
|
1119
1319
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
|
1120
|
-
return
|
|
1320
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
|
1121
1321
|
}
|
|
1122
1322
|
|
|
1123
1323
|
|
|
1124
|
-
/* used in 2-pass strategy */
|
|
1125
|
-
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
|
1126
|
-
{
|
|
1127
|
-
U32 s, sum=0;
|
|
1128
|
-
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
|
1129
|
-
for (s=0; s<lastEltIndex+1; s++) {
|
|
1130
|
-
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
|
1131
|
-
table[s]--;
|
|
1132
|
-
sum += table[s];
|
|
1133
|
-
}
|
|
1134
|
-
return sum;
|
|
1135
|
-
}
|
|
1136
1324
|
|
|
1137
|
-
/* used in 2-pass strategy */
|
|
1138
|
-
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
|
1139
|
-
{
|
|
1140
|
-
if (ZSTD_compressedLiterals(optPtr))
|
|
1141
|
-
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
|
1142
|
-
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
|
1143
|
-
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
|
1144
|
-
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
|
1145
|
-
}
|
|
1146
1325
|
|
|
1147
1326
|
/* ZSTD_initStats_ultra():
|
|
1148
1327
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
|
@@ -1156,7 +1335,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
|
1156
1335
|
const void* src, size_t srcSize)
|
|
1157
1336
|
{
|
|
1158
1337
|
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
|
|
1159
|
-
|
|
1338
|
+
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
|
|
1160
1339
|
|
|
1161
1340
|
DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
|
|
1162
1341
|
assert(ms->opt.litLengthSum == 0); /* first block */
|
|
@@ -1164,7 +1343,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
|
1164
1343
|
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
|
1165
1344
|
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
|
1166
1345
|
|
|
1167
|
-
|
|
1346
|
+
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
|
|
1168
1347
|
|
|
1169
1348
|
/* invalidate first scan from history */
|
|
1170
1349
|
ZSTD_resetSeqStore(seqStore);
|
|
@@ -1173,8 +1352,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
|
1173
1352
|
ms->window.lowLimit = ms->window.dictLimit;
|
|
1174
1353
|
ms->nextToUpdate = ms->window.dictLimit;
|
|
1175
1354
|
|
|
1176
|
-
/* re-inforce weight of collected statistics */
|
|
1177
|
-
ZSTD_upscaleStats(&ms->opt);
|
|
1178
1355
|
}
|
|
1179
1356
|
|
|
1180
1357
|
size_t ZSTD_compressBlock_btultra(
|
|
@@ -1182,14 +1359,14 @@ size_t ZSTD_compressBlock_btultra(
|
|
|
1182
1359
|
const void* src, size_t srcSize)
|
|
1183
1360
|
{
|
|
1184
1361
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
|
1185
|
-
return
|
|
1362
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
|
1186
1363
|
}
|
|
1187
1364
|
|
|
1188
1365
|
size_t ZSTD_compressBlock_btultra2(
|
|
1189
1366
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1190
1367
|
const void* src, size_t srcSize)
|
|
1191
1368
|
{
|
|
1192
|
-
U32 const
|
|
1369
|
+
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
|
|
1193
1370
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
|
1194
1371
|
|
|
1195
1372
|
/* 2-pass strategy:
|
|
@@ -1204,41 +1381,41 @@ size_t ZSTD_compressBlock_btultra2(
|
|
|
1204
1381
|
if ( (ms->opt.litLengthSum==0) /* first block */
|
|
1205
1382
|
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
|
1206
1383
|
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
|
1207
|
-
&& (
|
|
1384
|
+
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
|
1208
1385
|
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
|
1209
1386
|
) {
|
|
1210
1387
|
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
|
1211
1388
|
}
|
|
1212
1389
|
|
|
1213
|
-
return
|
|
1390
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
|
1214
1391
|
}
|
|
1215
1392
|
|
|
1216
1393
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
|
1217
1394
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1218
1395
|
const void* src, size_t srcSize)
|
|
1219
1396
|
{
|
|
1220
|
-
return
|
|
1397
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
|
1221
1398
|
}
|
|
1222
1399
|
|
|
1223
1400
|
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
|
1224
1401
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1225
1402
|
const void* src, size_t srcSize)
|
|
1226
1403
|
{
|
|
1227
|
-
return
|
|
1404
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
|
1228
1405
|
}
|
|
1229
1406
|
|
|
1230
1407
|
size_t ZSTD_compressBlock_btopt_extDict(
|
|
1231
1408
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1232
1409
|
const void* src, size_t srcSize)
|
|
1233
1410
|
{
|
|
1234
|
-
return
|
|
1411
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
|
1235
1412
|
}
|
|
1236
1413
|
|
|
1237
1414
|
size_t ZSTD_compressBlock_btultra_extDict(
|
|
1238
1415
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1239
1416
|
const void* src, size_t srcSize)
|
|
1240
1417
|
{
|
|
1241
|
-
return
|
|
1418
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
|
1242
1419
|
}
|
|
1243
1420
|
|
|
1244
1421
|
/* note : no btultra2 variant for extDict nor dictMatchState,
|