extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -12,42 +12,52 @@
|
|
12
12
|
#include "hist.h"
|
13
13
|
#include "zstd_opt.h"
|
14
14
|
|
15
|
+
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|
16
|
+
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|
17
|
+
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
|
15
18
|
|
16
19
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
17
|
-
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
18
20
|
#define ZSTD_MAX_PRICE (1<<30)
|
19
21
|
|
20
|
-
#define ZSTD_PREDEF_THRESHOLD
|
22
|
+
#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
21
23
|
|
22
24
|
|
23
25
|
/*-*************************************
|
24
26
|
* Price functions for optimal parser
|
25
27
|
***************************************/
|
26
28
|
|
27
|
-
#if 0 /* approximation at bit level */
|
29
|
+
#if 0 /* approximation at bit level (for tests) */
|
28
30
|
# define BITCOST_ACCURACY 0
|
29
31
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
30
|
-
# define WEIGHT(stat)
|
31
|
-
#elif 0 /* fractional bit accuracy */
|
32
|
+
# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
|
33
|
+
#elif 0 /* fractional bit accuracy (for tests) */
|
32
34
|
# define BITCOST_ACCURACY 8
|
33
35
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
34
|
-
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
36
|
+
# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
|
35
37
|
#else /* opt==approx, ultra==accurate */
|
36
38
|
# define BITCOST_ACCURACY 8
|
37
39
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
38
|
-
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
40
|
+
# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
39
41
|
#endif
|
40
42
|
|
43
|
+
/* ZSTD_bitWeight() :
|
44
|
+
* provide estimated "cost" of a stat in full bits only */
|
41
45
|
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
|
42
46
|
{
|
43
47
|
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
|
44
48
|
}
|
45
49
|
|
50
|
+
/* ZSTD_fracWeight() :
|
51
|
+
* provide fractional-bit "cost" of a stat,
|
52
|
+
* using linear interpolation approximation */
|
46
53
|
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
47
54
|
{
|
48
55
|
U32 const stat = rawStat + 1;
|
49
56
|
U32 const hb = ZSTD_highbit32(stat);
|
50
57
|
U32 const BWeight = hb * BITCOST_MULTIPLIER;
|
58
|
+
/* Fweight was meant for "Fractional weight"
|
59
|
+
* but it's effectively a value between 1 and 2
|
60
|
+
* using fixed point arithmetic */
|
51
61
|
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
|
52
62
|
U32 const weight = BWeight + FWeight;
|
53
63
|
assert(hb + BITCOST_ACCURACY < 31);
|
@@ -58,7 +68,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
|
58
68
|
/* debugging function,
|
59
69
|
* @return price in bytes as fractional value
|
60
70
|
* for debug messages only */
|
61
|
-
MEM_STATIC double ZSTD_fCost(
|
71
|
+
MEM_STATIC double ZSTD_fCost(int price)
|
62
72
|
{
|
63
73
|
return (double)price / (BITCOST_MULTIPLIER*8);
|
64
74
|
}
|
@@ -66,7 +76,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
|
66
76
|
|
67
77
|
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
68
78
|
{
|
69
|
-
return optPtr->literalCompressionMode !=
|
79
|
+
return optPtr->literalCompressionMode != ZSTD_ps_disable;
|
70
80
|
}
|
71
81
|
|
72
82
|
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
@@ -79,25 +89,52 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
79
89
|
}
|
80
90
|
|
81
91
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
92
|
+
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
93
|
+
{
|
94
|
+
size_t n;
|
95
|
+
U32 total = 0;
|
96
|
+
for (n=0; n<nbElts; n++) {
|
97
|
+
total += table[n];
|
98
|
+
}
|
99
|
+
return total;
|
100
|
+
}
|
101
|
+
|
102
|
+
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
|
103
|
+
|
104
|
+
static U32
|
105
|
+
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
|
86
106
|
{
|
87
107
|
U32 s, sum=0;
|
88
|
-
DEBUGLOG(5, "
|
89
|
-
|
108
|
+
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
|
109
|
+
(unsigned)lastEltIndex+1, (unsigned)shift );
|
110
|
+
assert(shift < 30);
|
90
111
|
for (s=0; s<lastEltIndex+1; s++) {
|
91
|
-
|
92
|
-
|
112
|
+
unsigned const base = base1 ? 1 : (table[s]>0);
|
113
|
+
unsigned const newStat = base + (table[s] >> shift);
|
114
|
+
sum += newStat;
|
115
|
+
table[s] = newStat;
|
93
116
|
}
|
94
117
|
return sum;
|
95
118
|
}
|
96
119
|
|
120
|
+
/* ZSTD_scaleStats() :
|
121
|
+
* reduce all elt frequencies in table if sum too large
|
122
|
+
* return the resulting sum of elements */
|
123
|
+
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
124
|
+
{
|
125
|
+
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
126
|
+
U32 const factor = prevsum >> logTarget;
|
127
|
+
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
128
|
+
assert(logTarget < 30);
|
129
|
+
if (factor <= 1) return prevsum;
|
130
|
+
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
|
131
|
+
}
|
132
|
+
|
97
133
|
/* ZSTD_rescaleFreqs() :
|
98
134
|
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
99
135
|
* take hints from dictionary if there is one
|
100
|
-
*
|
136
|
+
* and init from zero if there is none,
|
137
|
+
* using src for literals stats, and baseline stats for sequence symbols
|
101
138
|
* otherwise downscale existing stats, to be used as seed for next block.
|
102
139
|
*/
|
103
140
|
static void
|
@@ -109,24 +146,28 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
109
146
|
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
|
110
147
|
optPtr->priceType = zop_dynamic;
|
111
148
|
|
112
|
-
if (optPtr->litLengthSum == 0) { /* first block
|
113
|
-
|
114
|
-
|
149
|
+
if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
|
150
|
+
|
151
|
+
/* heuristic: use pre-defined stats for too small inputs */
|
152
|
+
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
|
153
|
+
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
|
115
154
|
optPtr->priceType = zop_predef;
|
116
155
|
}
|
117
156
|
|
118
157
|
assert(optPtr->symbolCosts != NULL);
|
119
158
|
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
|
120
|
-
|
159
|
+
|
160
|
+
/* huffman stats covering the full value set : table presumed generated by dictionary */
|
121
161
|
optPtr->priceType = zop_dynamic;
|
122
162
|
|
123
163
|
if (compressedLiterals) {
|
164
|
+
/* generate literals statistics from huffman table */
|
124
165
|
unsigned lit;
|
125
166
|
assert(optPtr->litFreq != NULL);
|
126
167
|
optPtr->litSum = 0;
|
127
168
|
for (lit=0; lit<=MaxLit; lit++) {
|
128
169
|
U32 const scaleLog = 11; /* scale to 2K */
|
129
|
-
U32 const bitCost =
|
170
|
+
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
130
171
|
assert(bitCost <= scaleLog);
|
131
172
|
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
132
173
|
optPtr->litSum += optPtr->litFreq[lit];
|
@@ -168,20 +209,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
168
209
|
optPtr->offCodeSum += optPtr->offCodeFreq[of];
|
169
210
|
} }
|
170
211
|
|
171
|
-
} else { /*
|
212
|
+
} else { /* first block, no dictionary */
|
172
213
|
|
173
214
|
assert(optPtr->litFreq != NULL);
|
174
215
|
if (compressedLiterals) {
|
216
|
+
/* base initial cost of literals on direct frequency within src */
|
175
217
|
unsigned lit = MaxLit;
|
176
218
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
177
|
-
optPtr->litSum =
|
219
|
+
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
|
178
220
|
}
|
179
221
|
|
180
|
-
{ unsigned
|
181
|
-
|
182
|
-
|
222
|
+
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
223
|
+
4, 2, 1, 1, 1, 1, 1, 1,
|
224
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
225
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
226
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
227
|
+
1, 1, 1, 1
|
228
|
+
};
|
229
|
+
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
|
230
|
+
optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
|
183
231
|
}
|
184
|
-
optPtr->litLengthSum = MaxLL+1;
|
185
232
|
|
186
233
|
{ unsigned ml;
|
187
234
|
for (ml=0; ml<=MaxML; ml++)
|
@@ -189,21 +236,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
189
236
|
}
|
190
237
|
optPtr->matchLengthSum = MaxML+1;
|
191
238
|
|
192
|
-
{ unsigned
|
193
|
-
|
194
|
-
|
239
|
+
{ unsigned const baseOFCfreqs[MaxOff+1] = {
|
240
|
+
6, 2, 1, 1, 2, 3, 4, 4,
|
241
|
+
4, 3, 2, 1, 1, 1, 1, 1,
|
242
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
243
|
+
1, 1, 1, 1, 1, 1, 1, 1
|
244
|
+
};
|
245
|
+
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
|
246
|
+
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
195
247
|
}
|
196
|
-
optPtr->offCodeSum = MaxOff+1;
|
197
248
|
|
198
249
|
}
|
199
250
|
|
200
|
-
} else { /* new block :
|
251
|
+
} else { /* new block : scale down accumulated statistics */
|
201
252
|
|
202
253
|
if (compressedLiterals)
|
203
|
-
optPtr->litSum =
|
204
|
-
optPtr->litLengthSum =
|
205
|
-
optPtr->matchLengthSum =
|
206
|
-
optPtr->offCodeSum =
|
254
|
+
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
255
|
+
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
|
256
|
+
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
|
257
|
+
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
|
207
258
|
}
|
208
259
|
|
209
260
|
ZSTD_setBasePrices(optPtr, optLevel);
|
@@ -216,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
216
267
|
const optState_t* const optPtr,
|
217
268
|
int optLevel)
|
218
269
|
{
|
270
|
+
DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
|
219
271
|
if (litLength == 0) return 0;
|
220
272
|
|
221
273
|
if (!ZSTD_compressedLiterals(optPtr))
|
@@ -225,11 +277,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
225
277
|
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
|
226
278
|
|
227
279
|
/* dynamic statistics */
|
228
|
-
{ U32 price =
|
280
|
+
{ U32 price = optPtr->litSumBasePrice * litLength;
|
281
|
+
U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
|
229
282
|
U32 u;
|
283
|
+
assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
|
230
284
|
for (u=0; u < litLength; u++) {
|
231
|
-
|
232
|
-
|
285
|
+
U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
|
286
|
+
if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
|
287
|
+
price -= litPrice;
|
233
288
|
}
|
234
289
|
return price;
|
235
290
|
}
|
@@ -239,7 +294,17 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
239
294
|
* cost of literalLength symbol */
|
240
295
|
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
241
296
|
{
|
242
|
-
|
297
|
+
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
|
298
|
+
if (optPtr->priceType == zop_predef)
|
299
|
+
return WEIGHT(litLength, optLevel);
|
300
|
+
|
301
|
+
/* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
|
302
|
+
* because it isn't representable in the zstd format.
|
303
|
+
* So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
|
304
|
+
* In such a case, the block would be all literals.
|
305
|
+
*/
|
306
|
+
if (litLength == ZSTD_BLOCKSIZE_MAX)
|
307
|
+
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
|
243
308
|
|
244
309
|
/* dynamic statistics */
|
245
310
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
@@ -250,22 +315,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|
250
315
|
}
|
251
316
|
|
252
317
|
/* ZSTD_getMatchPrice() :
|
253
|
-
* Provides the cost of the match part (offset + matchLength) of a sequence
|
318
|
+
* Provides the cost of the match part (offset + matchLength) of a sequence.
|
254
319
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
255
|
-
*
|
320
|
+
* @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
|
321
|
+
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
|
322
|
+
*/
|
256
323
|
FORCE_INLINE_TEMPLATE U32
|
257
|
-
ZSTD_getMatchPrice(U32 const
|
324
|
+
ZSTD_getMatchPrice(U32 const offBase,
|
258
325
|
U32 const matchLength,
|
259
326
|
const optState_t* const optPtr,
|
260
327
|
int const optLevel)
|
261
328
|
{
|
262
329
|
U32 price;
|
263
|
-
U32 const offCode = ZSTD_highbit32(
|
330
|
+
U32 const offCode = ZSTD_highbit32(offBase);
|
264
331
|
U32 const mlBase = matchLength - MINMATCH;
|
265
332
|
assert(matchLength >= MINMATCH);
|
266
333
|
|
267
|
-
if (optPtr->priceType == zop_predef) /* fixed scheme,
|
268
|
-
return WEIGHT(mlBase, optLevel)
|
334
|
+
if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
|
335
|
+
return WEIGHT(mlBase, optLevel)
|
336
|
+
+ ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
|
269
337
|
|
270
338
|
/* dynamic statistics */
|
271
339
|
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
|
@@ -284,10 +352,10 @@ ZSTD_getMatchPrice(U32 const offset,
|
|
284
352
|
}
|
285
353
|
|
286
354
|
/* ZSTD_updateStats() :
|
287
|
-
* assumption : literals +
|
355
|
+
* assumption : literals + litLength <= iend */
|
288
356
|
static void ZSTD_updateStats(optState_t* const optPtr,
|
289
357
|
U32 litLength, const BYTE* literals,
|
290
|
-
U32
|
358
|
+
U32 offBase, U32 matchLength)
|
291
359
|
{
|
292
360
|
/* literals */
|
293
361
|
if (ZSTD_compressedLiterals(optPtr)) {
|
@@ -303,8 +371,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
|
|
303
371
|
optPtr->litLengthSum++;
|
304
372
|
}
|
305
373
|
|
306
|
-
/*
|
307
|
-
{ U32 const offCode = ZSTD_highbit32(
|
374
|
+
/* offset code : follows storeSeq() numeric representation */
|
375
|
+
{ U32 const offCode = ZSTD_highbit32(offBase);
|
308
376
|
assert(offCode <= MaxOff);
|
309
377
|
optPtr->offCodeFreq[offCode]++;
|
310
378
|
optPtr->offCodeSum++;
|
@@ -338,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
338
406
|
|
339
407
|
/* Update hashTable3 up to ip (excluded)
|
340
408
|
Assumption : always within prefix (i.e. not within extDict) */
|
341
|
-
static
|
342
|
-
|
343
|
-
|
409
|
+
static
|
410
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
411
|
+
U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
412
|
+
U32* nextToUpdate3,
|
413
|
+
const BYTE* const ip)
|
344
414
|
{
|
345
415
|
U32* const hashTable3 = ms->hashTable3;
|
346
416
|
U32 const hashLog3 = ms->hashLog3;
|
@@ -364,11 +434,15 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
364
434
|
* Binary Tree search
|
365
435
|
***************************************/
|
366
436
|
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
367
|
-
*
|
437
|
+
* @param ip assumed <= iend-8 .
|
438
|
+
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
368
439
|
* @return : nb of positions added */
|
369
|
-
static
|
370
|
-
|
440
|
+
static
|
441
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
442
|
+
U32 ZSTD_insertBt1(
|
443
|
+
const ZSTD_matchState_t* ms,
|
371
444
|
const BYTE* const ip, const BYTE* const iend,
|
445
|
+
U32 const target,
|
372
446
|
U32 const mls, const int extDict)
|
373
447
|
{
|
374
448
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -391,7 +465,10 @@ static U32 ZSTD_insertBt1(
|
|
391
465
|
U32* smallerPtr = bt + 2*(curr&btMask);
|
392
466
|
U32* largerPtr = smallerPtr + 1;
|
393
467
|
U32 dummy32; /* to be nullified at the end */
|
394
|
-
|
468
|
+
/* windowLow is based on target because
|
469
|
+
* we only need positions that will be in the window at the end of the tree update.
|
470
|
+
*/
|
471
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
395
472
|
U32 matchEndIdx = curr+8+1;
|
396
473
|
size_t bestLength = 8;
|
397
474
|
U32 nbCompares = 1U << cParams->searchLog;
|
@@ -404,11 +481,12 @@ static U32 ZSTD_insertBt1(
|
|
404
481
|
|
405
482
|
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
406
483
|
|
484
|
+
assert(curr <= target);
|
407
485
|
assert(ip <= iend-8); /* required for h calculation */
|
408
486
|
hashTable[h] = curr; /* Update Hash Table */
|
409
487
|
|
410
488
|
assert(windowLow > 0);
|
411
|
-
|
489
|
+
for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
|
412
490
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
413
491
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
414
492
|
assert(matchIndex < curr);
|
@@ -480,6 +558,7 @@ static U32 ZSTD_insertBt1(
|
|
480
558
|
}
|
481
559
|
|
482
560
|
FORCE_INLINE_TEMPLATE
|
561
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
483
562
|
void ZSTD_updateTree_internal(
|
484
563
|
ZSTD_matchState_t* ms,
|
485
564
|
const BYTE* const ip, const BYTE* const iend,
|
@@ -488,11 +567,11 @@ void ZSTD_updateTree_internal(
|
|
488
567
|
const BYTE* const base = ms->window.base;
|
489
568
|
U32 const target = (U32)(ip - base);
|
490
569
|
U32 idx = ms->nextToUpdate;
|
491
|
-
DEBUGLOG(
|
570
|
+
DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
492
571
|
idx, target, dictMode);
|
493
572
|
|
494
573
|
while(idx < target) {
|
495
|
-
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
574
|
+
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
496
575
|
assert(idx < (U32)(idx + forward));
|
497
576
|
idx += forward;
|
498
577
|
}
|
@@ -506,15 +585,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
|
506
585
|
}
|
507
586
|
|
508
587
|
FORCE_INLINE_TEMPLATE
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
588
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
589
|
+
U32
|
590
|
+
ZSTD_insertBtAndGetAllMatches (
|
591
|
+
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
592
|
+
ZSTD_matchState_t* ms,
|
593
|
+
U32* nextToUpdate3,
|
594
|
+
const BYTE* const ip, const BYTE* const iLimit,
|
595
|
+
const ZSTD_dictMode_e dictMode,
|
596
|
+
const U32 rep[ZSTD_REP_NUM],
|
597
|
+
const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
598
|
+
const U32 lengthToBeat,
|
599
|
+
const U32 mls /* template */)
|
518
600
|
{
|
519
601
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
520
602
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
@@ -597,7 +679,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
597
679
|
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
|
598
680
|
repCode, ll0, repOffset, repLen);
|
599
681
|
bestLength = repLen;
|
600
|
-
matches[mnum].off = repCode - ll0;
|
682
|
+
matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
|
601
683
|
matches[mnum].len = (U32)repLen;
|
602
684
|
mnum++;
|
603
685
|
if ( (repLen > sufficient_len)
|
@@ -626,7 +708,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
626
708
|
bestLength = mlen;
|
627
709
|
assert(curr > matchIndex3);
|
628
710
|
assert(mnum==0); /* no prior solution */
|
629
|
-
matches[0].off = (curr - matchIndex3)
|
711
|
+
matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
|
630
712
|
matches[0].len = (U32)mlen;
|
631
713
|
mnum = 1;
|
632
714
|
if ( (mlen > sufficient_len) |
|
@@ -635,11 +717,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
635
717
|
return 1;
|
636
718
|
} } }
|
637
719
|
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
638
|
-
}
|
720
|
+
} /* if (mls == 3) */
|
639
721
|
|
640
722
|
hashTable[h] = curr; /* Update Hash Table */
|
641
723
|
|
642
|
-
|
724
|
+
for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
|
643
725
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
644
726
|
const BYTE* match;
|
645
727
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
@@ -659,21 +741,20 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
659
741
|
}
|
660
742
|
|
661
743
|
if (matchLength > bestLength) {
|
662
|
-
DEBUGLOG(8, "found match of length %u at distance %u (
|
663
|
-
(U32)matchLength, curr - matchIndex, curr - matchIndex
|
744
|
+
DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
|
745
|
+
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
|
664
746
|
assert(matchEndIdx > matchIndex);
|
665
747
|
if (matchLength > matchEndIdx - matchIndex)
|
666
748
|
matchEndIdx = matchIndex + (U32)matchLength;
|
667
749
|
bestLength = matchLength;
|
668
|
-
matches[mnum].off = (curr - matchIndex)
|
750
|
+
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
|
669
751
|
matches[mnum].len = (U32)matchLength;
|
670
752
|
mnum++;
|
671
753
|
if ( (matchLength > ZSTD_OPT_NUM)
|
672
754
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
673
755
|
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
674
756
|
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
675
|
-
|
676
|
-
}
|
757
|
+
} }
|
677
758
|
|
678
759
|
if (match[matchLength] < ip[matchLength]) {
|
679
760
|
/* match smaller than current */
|
@@ -692,12 +773,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
692
773
|
|
693
774
|
*smallerPtr = *largerPtr = 0;
|
694
775
|
|
776
|
+
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
695
777
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
696
778
|
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
|
697
779
|
U32 dictMatchIndex = dms->hashTable[dmsH];
|
698
780
|
const U32* const dmsBt = dms->chainTable;
|
699
781
|
commonLengthSmaller = commonLengthLarger = 0;
|
700
|
-
|
782
|
+
for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
|
701
783
|
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
|
702
784
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
703
785
|
const BYTE* match = dmsBase + dictMatchIndex;
|
@@ -707,19 +789,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
707
789
|
|
708
790
|
if (matchLength > bestLength) {
|
709
791
|
matchIndex = dictMatchIndex + dmsIndexDelta;
|
710
|
-
DEBUGLOG(8, "found dms match of length %u at distance %u (
|
711
|
-
(U32)matchLength, curr - matchIndex, curr - matchIndex
|
792
|
+
DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
|
793
|
+
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
|
712
794
|
if (matchLength > matchEndIdx - matchIndex)
|
713
795
|
matchEndIdx = matchIndex + (U32)matchLength;
|
714
796
|
bestLength = matchLength;
|
715
|
-
matches[mnum].off = (curr - matchIndex)
|
797
|
+
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
|
716
798
|
matches[mnum].len = (U32)matchLength;
|
717
799
|
mnum++;
|
718
800
|
if ( (matchLength > ZSTD_OPT_NUM)
|
719
801
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
720
802
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
721
|
-
|
722
|
-
}
|
803
|
+
} }
|
723
804
|
|
724
805
|
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
725
806
|
if (match[matchLength] < ip[matchLength]) {
|
@@ -729,39 +810,93 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
729
810
|
/* match is larger than current */
|
730
811
|
commonLengthLarger = matchLength;
|
731
812
|
dictMatchIndex = nextPtr[0];
|
732
|
-
|
733
|
-
}
|
734
|
-
}
|
813
|
+
} } } /* if (dictMode == ZSTD_dictMatchState) */
|
735
814
|
|
736
815
|
assert(matchEndIdx > curr+8);
|
737
816
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
738
817
|
return mnum;
|
739
818
|
}
|
740
819
|
|
820
|
+
typedef U32 (*ZSTD_getAllMatchesFn)(
|
821
|
+
ZSTD_match_t*,
|
822
|
+
ZSTD_matchState_t*,
|
823
|
+
U32*,
|
824
|
+
const BYTE*,
|
825
|
+
const BYTE*,
|
826
|
+
const U32 rep[ZSTD_REP_NUM],
|
827
|
+
U32 const ll0,
|
828
|
+
U32 const lengthToBeat);
|
741
829
|
|
742
|
-
FORCE_INLINE_TEMPLATE
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
830
|
+
FORCE_INLINE_TEMPLATE
|
831
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
832
|
+
U32 ZSTD_btGetAllMatches_internal(
|
833
|
+
ZSTD_match_t* matches,
|
834
|
+
ZSTD_matchState_t* ms,
|
835
|
+
U32* nextToUpdate3,
|
836
|
+
const BYTE* ip,
|
837
|
+
const BYTE* const iHighLimit,
|
838
|
+
const U32 rep[ZSTD_REP_NUM],
|
839
|
+
U32 const ll0,
|
840
|
+
U32 const lengthToBeat,
|
841
|
+
const ZSTD_dictMode_e dictMode,
|
842
|
+
const U32 mls)
|
750
843
|
{
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
ZSTD_updateTree_internal(ms, ip, iHighLimit,
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
844
|
+
assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
|
845
|
+
DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
|
846
|
+
if (ip < ms->window.base + ms->nextToUpdate)
|
847
|
+
return 0; /* skipped area */
|
848
|
+
ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
|
849
|
+
return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
|
850
|
+
}
|
851
|
+
|
852
|
+
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
|
853
|
+
|
854
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
|
855
|
+
static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
|
856
|
+
ZSTD_match_t* matches, \
|
857
|
+
ZSTD_matchState_t* ms, \
|
858
|
+
U32* nextToUpdate3, \
|
859
|
+
const BYTE* ip, \
|
860
|
+
const BYTE* const iHighLimit, \
|
861
|
+
const U32 rep[ZSTD_REP_NUM], \
|
862
|
+
U32 const ll0, \
|
863
|
+
U32 const lengthToBeat) \
|
864
|
+
{ \
|
865
|
+
return ZSTD_btGetAllMatches_internal( \
|
866
|
+
matches, ms, nextToUpdate3, ip, iHighLimit, \
|
867
|
+
rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
|
764
868
|
}
|
869
|
+
|
870
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
|
871
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
|
872
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
|
873
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
|
874
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
|
875
|
+
|
876
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
|
877
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
|
878
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
|
879
|
+
|
880
|
+
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
|
881
|
+
{ \
|
882
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
|
883
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
|
884
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
|
885
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
|
886
|
+
}
|
887
|
+
|
888
|
+
static ZSTD_getAllMatchesFn
|
889
|
+
ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
|
890
|
+
{
|
891
|
+
ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
|
892
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
|
893
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
|
894
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
|
895
|
+
};
|
896
|
+
U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
|
897
|
+
assert((U32)dictMode < 3);
|
898
|
+
assert(mls - 3 < 4);
|
899
|
+
return getAllMatchesFns[(int)dictMode][mls - 3];
|
765
900
|
}
|
766
901
|
|
767
902
|
/*************************
|
@@ -770,16 +905,18 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
|
770
905
|
|
771
906
|
/* Struct containing info needed to make decision about ldm inclusion */
|
772
907
|
typedef struct {
|
773
|
-
rawSeqStore_t seqStore;
|
774
|
-
U32 startPosInBlock;
|
775
|
-
U32 endPosInBlock;
|
776
|
-
U32 offset;
|
908
|
+
rawSeqStore_t seqStore; /* External match candidates store for this block */
|
909
|
+
U32 startPosInBlock; /* Start position of the current match candidate */
|
910
|
+
U32 endPosInBlock; /* End position of the current match candidate */
|
911
|
+
U32 offset; /* Offset of the match candidate */
|
777
912
|
} ZSTD_optLdm_t;
|
778
913
|
|
779
914
|
/* ZSTD_optLdm_skipRawSeqStoreBytes():
|
780
|
-
* Moves forward in rawSeqStore by nbBytes,
|
915
|
+
* Moves forward in @rawSeqStore by @nbBytes,
|
916
|
+
* which will update the fields 'pos' and 'posInSequence'.
|
781
917
|
*/
|
782
|
-
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
|
918
|
+
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
|
919
|
+
{
|
783
920
|
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
784
921
|
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
785
922
|
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
@@ -800,8 +937,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t
|
|
800
937
|
* Calculates the beginning and end of the next match in the current block.
|
801
938
|
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
|
802
939
|
*/
|
803
|
-
static void
|
804
|
-
|
940
|
+
static void
|
941
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
|
942
|
+
U32 blockBytesRemaining)
|
943
|
+
{
|
805
944
|
rawSeq currSeq;
|
806
945
|
U32 currBlockEndPos;
|
807
946
|
U32 literalsBytesRemaining;
|
@@ -813,8 +952,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
|
|
813
952
|
optLdm->endPosInBlock = UINT_MAX;
|
814
953
|
return;
|
815
954
|
}
|
816
|
-
/* Calculate appropriate bytes left in matchLength and litLength
|
817
|
-
|
955
|
+
/* Calculate appropriate bytes left in matchLength and litLength
|
956
|
+
* after adjusting based on ldmSeqStore->posInSequence */
|
818
957
|
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
|
819
958
|
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
|
820
959
|
currBlockEndPos = currPosInBlock + blockBytesRemaining;
|
@@ -850,15 +989,16 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
|
|
850
989
|
}
|
851
990
|
|
852
991
|
/* ZSTD_optLdm_maybeAddMatch():
|
853
|
-
* Adds a match if it's long enough,
|
854
|
-
*
|
992
|
+
* Adds a match if it's long enough,
|
993
|
+
* based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
|
994
|
+
* into 'matches'. Maintains the correct ordering of 'matches'.
|
855
995
|
*/
|
856
996
|
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
857
|
-
ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
U32
|
997
|
+
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
|
998
|
+
{
|
999
|
+
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
|
1000
|
+
/* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
|
1001
|
+
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
862
1002
|
|
863
1003
|
/* Ensure that current block position is not outside of the match */
|
864
1004
|
if (currPosInBlock < optLdm->startPosInBlock
|
@@ -868,10 +1008,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
|
868
1008
|
}
|
869
1009
|
|
870
1010
|
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
|
871
|
-
|
872
|
-
|
1011
|
+
U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
|
1012
|
+
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
|
1013
|
+
candidateOffBase, candidateMatchLength, currPosInBlock);
|
873
1014
|
matches[*nbMatches].len = candidateMatchLength;
|
874
|
-
matches[*nbMatches].off =
|
1015
|
+
matches[*nbMatches].off = candidateOffBase;
|
875
1016
|
(*nbMatches)++;
|
876
1017
|
}
|
877
1018
|
}
|
@@ -879,8 +1020,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
|
879
1020
|
/* ZSTD_optLdm_processMatchCandidate():
|
880
1021
|
* Wrapper function to update ldm seq store and call ldm functions as necessary.
|
881
1022
|
*/
|
882
|
-
static void
|
883
|
-
|
1023
|
+
static void
|
1024
|
+
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
|
1025
|
+
ZSTD_match_t* matches, U32* nbMatches,
|
1026
|
+
U32 currPosInBlock, U32 remainingBytes)
|
1027
|
+
{
|
884
1028
|
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
885
1029
|
return;
|
886
1030
|
}
|
@@ -891,24 +1035,19 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
|
|
891
1035
|
* at the end of a match from the ldm seq store, and will often be some bytes
|
892
1036
|
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
|
893
1037
|
*/
|
894
|
-
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
1038
|
+
U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
895
1039
|
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
896
|
-
}
|
1040
|
+
}
|
897
1041
|
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
898
1042
|
}
|
899
1043
|
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
900
1044
|
}
|
901
1045
|
|
1046
|
+
|
902
1047
|
/*-*******************************
|
903
1048
|
* Optimal parser
|
904
1049
|
*********************************/
|
905
1050
|
|
906
|
-
|
907
|
-
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
908
|
-
{
|
909
|
-
return sol.litlen + sol.mlen;
|
910
|
-
}
|
911
|
-
|
912
1051
|
#if 0 /* debug */
|
913
1052
|
|
914
1053
|
static void
|
@@ -926,7 +1065,13 @@ listStats(const U32* table, int lastEltID)
|
|
926
1065
|
|
927
1066
|
#endif
|
928
1067
|
|
929
|
-
|
1068
|
+
#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
|
1069
|
+
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
|
1070
|
+
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
|
1071
|
+
|
1072
|
+
FORCE_INLINE_TEMPLATE
|
1073
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1074
|
+
size_t
|
930
1075
|
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
931
1076
|
seqStore_t* seqStore,
|
932
1077
|
U32 rep[ZSTD_REP_NUM],
|
@@ -944,15 +1089,19 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
944
1089
|
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
945
1090
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
946
1091
|
|
1092
|
+
ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
|
1093
|
+
|
947
1094
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
948
1095
|
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
949
1096
|
U32 nextToUpdate3 = ms->nextToUpdate;
|
950
1097
|
|
951
1098
|
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
952
1099
|
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
953
|
-
ZSTD_optimal_t
|
1100
|
+
ZSTD_optimal_t lastStretch;
|
954
1101
|
ZSTD_optLdm_t optLdm;
|
955
1102
|
|
1103
|
+
ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
|
1104
|
+
|
956
1105
|
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
|
957
1106
|
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
|
958
1107
|
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
|
@@ -971,104 +1120,141 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
971
1120
|
/* find first match */
|
972
1121
|
{ U32 const litlen = (U32)(ip - anchor);
|
973
1122
|
U32 const ll0 = !litlen;
|
974
|
-
U32 nbMatches =
|
1123
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
|
975
1124
|
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
976
|
-
(U32)(ip-istart), (U32)(iend
|
977
|
-
if (!nbMatches) {
|
1125
|
+
(U32)(ip-istart), (U32)(iend-ip));
|
1126
|
+
if (!nbMatches) {
|
1127
|
+
DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
|
1128
|
+
ip++;
|
1129
|
+
continue;
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
/* Match found: let's store this solution, and eventually find more candidates.
|
1133
|
+
* During this forward pass, @opt is used to store stretches,
|
1134
|
+
* defined as "a match followed by N literals".
|
1135
|
+
* Note how this is different from a Sequence, which is "N literals followed by a match".
|
1136
|
+
* Storing stretches allows us to store different match predecessors
|
1137
|
+
* for each literal position part of a literals run. */
|
978
1138
|
|
979
1139
|
/* initialize opt[0] */
|
980
|
-
|
981
|
-
opt[0].mlen = 0; /* means is_a_literal */
|
1140
|
+
opt[0].mlen = 0; /* there are only literals so far */
|
982
1141
|
opt[0].litlen = litlen;
|
983
|
-
/*
|
984
|
-
* it is static for the duration of the forward pass, and is included
|
985
|
-
* in every price.
|
986
|
-
*
|
1142
|
+
/* No need to include the actual price of the literals before the first match
|
1143
|
+
* because it is static for the duration of the forward pass, and is included
|
1144
|
+
* in every subsequent price. But, we include the literal length because
|
1145
|
+
* the cost variation of litlen depends on the value of litlen.
|
987
1146
|
*/
|
988
|
-
opt[0].price =
|
1147
|
+
opt[0].price = LL_PRICE(litlen);
|
1148
|
+
ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
|
1149
|
+
ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
|
989
1150
|
|
990
1151
|
/* large match -> immediate encoding */
|
991
1152
|
{ U32 const maxML = matches[nbMatches-1].len;
|
992
|
-
U32 const
|
993
|
-
DEBUGLOG(6, "found %u matches of maxLength=%u and
|
994
|
-
nbMatches, maxML,
|
1153
|
+
U32 const maxOffBase = matches[nbMatches-1].off;
|
1154
|
+
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
|
1155
|
+
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
|
995
1156
|
|
996
1157
|
if (maxML > sufficient_len) {
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
DEBUGLOG(6, "large match (%u>%u)
|
1158
|
+
lastStretch.litlen = 0;
|
1159
|
+
lastStretch.mlen = maxML;
|
1160
|
+
lastStretch.off = maxOffBase;
|
1161
|
+
DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
|
1001
1162
|
maxML, sufficient_len);
|
1002
1163
|
cur = 0;
|
1003
|
-
last_pos =
|
1164
|
+
last_pos = maxML;
|
1004
1165
|
goto _shortestPath;
|
1005
1166
|
} }
|
1006
1167
|
|
1007
1168
|
/* set prices for first matches starting position == 0 */
|
1008
|
-
|
1009
|
-
|
1169
|
+
assert(opt[0].price >= 0);
|
1170
|
+
{ U32 pos;
|
1010
1171
|
U32 matchNb;
|
1011
1172
|
for (pos = 1; pos < minMatch; pos++) {
|
1012
|
-
opt[pos].price = ZSTD_MAX_PRICE;
|
1173
|
+
opt[pos].price = ZSTD_MAX_PRICE;
|
1174
|
+
opt[pos].mlen = 0;
|
1175
|
+
opt[pos].litlen = litlen + pos;
|
1013
1176
|
}
|
1014
1177
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
1015
|
-
U32 const
|
1178
|
+
U32 const offBase = matches[matchNb].off;
|
1016
1179
|
U32 const end = matches[matchNb].len;
|
1017
1180
|
for ( ; pos <= end ; pos++ ) {
|
1018
|
-
|
1019
|
-
|
1181
|
+
int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
|
1182
|
+
int const sequencePrice = opt[0].price + matchPrice;
|
1020
1183
|
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
1021
1184
|
pos, ZSTD_fCost(sequencePrice));
|
1022
1185
|
opt[pos].mlen = pos;
|
1023
|
-
opt[pos].off =
|
1024
|
-
opt[pos].litlen =
|
1025
|
-
opt[pos].price = sequencePrice;
|
1026
|
-
|
1186
|
+
opt[pos].off = offBase;
|
1187
|
+
opt[pos].litlen = 0; /* end of match */
|
1188
|
+
opt[pos].price = sequencePrice + LL_PRICE(0);
|
1189
|
+
}
|
1190
|
+
}
|
1027
1191
|
last_pos = pos-1;
|
1192
|
+
opt[pos].price = ZSTD_MAX_PRICE;
|
1028
1193
|
}
|
1029
1194
|
}
|
1030
1195
|
|
1031
1196
|
/* check further positions */
|
1032
1197
|
for (cur = 1; cur <= last_pos; cur++) {
|
1033
1198
|
const BYTE* const inr = ip + cur;
|
1034
|
-
assert(cur
|
1035
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
|
1199
|
+
assert(cur <= ZSTD_OPT_NUM);
|
1200
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
|
1036
1201
|
|
1037
1202
|
/* Fix current position with one literal if cheaper */
|
1038
|
-
{ U32 const litlen =
|
1203
|
+
{ U32 const litlen = opt[cur-1].litlen + 1;
|
1039
1204
|
int const price = opt[cur-1].price
|
1040
|
-
+
|
1041
|
-
+
|
1042
|
-
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
1205
|
+
+ LIT_PRICE(ip+cur-1)
|
1206
|
+
+ LL_INCPRICE(litlen);
|
1043
1207
|
assert(price < 1000000000); /* overflow check */
|
1044
1208
|
if (price <= opt[cur].price) {
|
1209
|
+
ZSTD_optimal_t const prevMatch = opt[cur];
|
1045
1210
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
1046
1211
|
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
|
1047
1212
|
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
|
1048
|
-
opt[cur]
|
1049
|
-
opt[cur].off = 0;
|
1213
|
+
opt[cur] = opt[cur-1];
|
1050
1214
|
opt[cur].litlen = litlen;
|
1051
1215
|
opt[cur].price = price;
|
1216
|
+
if ( (optLevel >= 1) /* additional check only for higher modes */
|
1217
|
+
&& (prevMatch.litlen == 0) /* replace a match */
|
1218
|
+
&& (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
|
1219
|
+
&& LIKELY(ip + cur < iend)
|
1220
|
+
) {
|
1221
|
+
/* check next position, in case it would be cheaper */
|
1222
|
+
int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
|
1223
|
+
int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
|
1224
|
+
DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
|
1225
|
+
cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
|
1226
|
+
if ( (with1literal < withMoreLiterals)
|
1227
|
+
&& (with1literal < opt[cur+1].price) ) {
|
1228
|
+
/* update offset history - before it disappears */
|
1229
|
+
U32 const prev = cur - prevMatch.mlen;
|
1230
|
+
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
|
1231
|
+
assert(cur >= prevMatch.mlen);
|
1232
|
+
DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
|
1233
|
+
ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
|
1234
|
+
newReps.rep[0], newReps.rep[1], newReps.rep[2] );
|
1235
|
+
opt[cur+1] = prevMatch; /* mlen & offbase */
|
1236
|
+
ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
|
1237
|
+
opt[cur+1].litlen = 1;
|
1238
|
+
opt[cur+1].price = with1literal;
|
1239
|
+
if (last_pos < cur+1) last_pos = cur+1;
|
1240
|
+
}
|
1241
|
+
}
|
1052
1242
|
} else {
|
1053
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)
|
1054
|
-
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)
|
1055
|
-
opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
|
1243
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
|
1244
|
+
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
|
1056
1245
|
}
|
1057
1246
|
}
|
1058
1247
|
|
1059
|
-
/*
|
1060
|
-
*
|
1061
|
-
* correct to set the next chunks repcodes during the backward
|
1062
|
-
* traversal.
|
1248
|
+
/* Offset history is not updated during match comparison.
|
1249
|
+
* Do it here, now that the match is selected and confirmed.
|
1063
1250
|
*/
|
1064
1251
|
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
|
1065
1252
|
assert(cur >= opt[cur].mlen);
|
1066
|
-
if (opt[cur].
|
1253
|
+
if (opt[cur].litlen == 0) {
|
1254
|
+
/* just finished a match => alter offset history */
|
1067
1255
|
U32 const prev = cur - opt[cur].mlen;
|
1068
|
-
repcodes_t newReps =
|
1256
|
+
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
|
1069
1257
|
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
1070
|
-
} else {
|
1071
|
-
ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
1072
1258
|
}
|
1073
1259
|
|
1074
1260
|
/* last match must start at a minimum distance of 8 from oend */
|
@@ -1078,15 +1264,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1078
1264
|
|
1079
1265
|
if ( (optLevel==0) /*static_test*/
|
1080
1266
|
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
|
1081
|
-
DEBUGLOG(7, "
|
1267
|
+
DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
|
1082
1268
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
1083
1269
|
}
|
1084
1270
|
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
U32 nbMatches =
|
1271
|
+
assert(opt[cur].price >= 0);
|
1272
|
+
{ U32 const ll0 = (opt[cur].litlen == 0);
|
1273
|
+
int const previousPrice = opt[cur].price;
|
1274
|
+
int const basePrice = previousPrice + LL_PRICE(0);
|
1275
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
|
1090
1276
|
U32 matchNb;
|
1091
1277
|
|
1092
1278
|
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
@@ -1097,18 +1283,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1097
1283
|
continue;
|
1098
1284
|
}
|
1099
1285
|
|
1100
|
-
{ U32 const
|
1101
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of
|
1102
|
-
inr-istart, cur, nbMatches,
|
1103
|
-
|
1104
|
-
if ( (
|
1105
|
-
|| (cur +
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
last_pos = cur +
|
1111
|
-
if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
|
1286
|
+
{ U32 const longestML = matches[nbMatches-1].len;
|
1287
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
|
1288
|
+
inr-istart, cur, nbMatches, longestML);
|
1289
|
+
|
1290
|
+
if ( (longestML > sufficient_len)
|
1291
|
+
|| (cur + longestML >= ZSTD_OPT_NUM)
|
1292
|
+
|| (ip + cur + longestML >= iend) ) {
|
1293
|
+
lastStretch.mlen = longestML;
|
1294
|
+
lastStretch.off = matches[nbMatches-1].off;
|
1295
|
+
lastStretch.litlen = 0;
|
1296
|
+
last_pos = cur + longestML;
|
1112
1297
|
goto _shortestPath;
|
1113
1298
|
} }
|
1114
1299
|
|
@@ -1119,20 +1304,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1119
1304
|
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
|
1120
1305
|
U32 mlen;
|
1121
1306
|
|
1122
|
-
DEBUGLOG(7, "testing match %u =>
|
1123
|
-
matchNb, matches[matchNb].off, lastML, litlen);
|
1307
|
+
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
|
1308
|
+
matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
|
1124
1309
|
|
1125
1310
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
1126
1311
|
U32 const pos = cur + mlen;
|
1127
|
-
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
1312
|
+
int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
1128
1313
|
|
1129
1314
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
1130
1315
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
1131
1316
|
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
1132
|
-
while (last_pos < pos) {
|
1317
|
+
while (last_pos < pos) {
|
1318
|
+
/* fill empty positions, for future comparisons */
|
1319
|
+
last_pos++;
|
1320
|
+
opt[last_pos].price = ZSTD_MAX_PRICE;
|
1321
|
+
opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
|
1322
|
+
}
|
1133
1323
|
opt[pos].mlen = mlen;
|
1134
1324
|
opt[pos].off = offset;
|
1135
|
-
opt[pos].litlen =
|
1325
|
+
opt[pos].litlen = 0;
|
1136
1326
|
opt[pos].price = price;
|
1137
1327
|
} else {
|
1138
1328
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
|
@@ -1140,52 +1330,86 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1140
1330
|
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
|
1141
1331
|
}
|
1142
1332
|
} } }
|
1333
|
+
opt[last_pos+1].price = ZSTD_MAX_PRICE;
|
1143
1334
|
} /* for (cur = 1; cur <= last_pos; cur++) */
|
1144
1335
|
|
1145
|
-
|
1146
|
-
cur
|
1147
|
-
|
1336
|
+
lastStretch = opt[last_pos];
|
1337
|
+
assert(cur >= lastStretch.mlen);
|
1338
|
+
cur = last_pos - lastStretch.mlen;
|
1148
1339
|
|
1149
1340
|
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
1150
1341
|
assert(opt[0].mlen == 0);
|
1342
|
+
assert(last_pos >= lastStretch.mlen);
|
1343
|
+
assert(cur == last_pos - lastStretch.mlen);
|
1151
1344
|
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1345
|
+
if (lastStretch.mlen==0) {
|
1346
|
+
/* no solution : all matches have been converted into literals */
|
1347
|
+
assert(lastStretch.litlen == (ip - anchor) + last_pos);
|
1348
|
+
ip += last_pos;
|
1349
|
+
continue;
|
1350
|
+
}
|
1351
|
+
assert(lastStretch.off > 0);
|
1352
|
+
|
1353
|
+
/* Update offset history */
|
1354
|
+
if (lastStretch.litlen == 0) {
|
1355
|
+
/* finishing on a match : update offset history */
|
1356
|
+
repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
|
1357
|
+
ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
|
1159
1358
|
} else {
|
1160
|
-
ZSTD_memcpy(rep,
|
1359
|
+
ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
|
1360
|
+
assert(cur >= lastStretch.litlen);
|
1361
|
+
cur -= lastStretch.litlen;
|
1161
1362
|
}
|
1162
1363
|
|
1163
|
-
|
1364
|
+
/* Let's write the shortest path solution.
|
1365
|
+
* It is stored in @opt in reverse order,
|
1366
|
+
* starting from @storeEnd (==cur+2),
|
1367
|
+
* effectively partially @opt overwriting.
|
1368
|
+
* Content is changed too:
|
1369
|
+
* - So far, @opt stored stretches, aka a match followed by literals
|
1370
|
+
* - Now, it will store sequences, aka literals followed by a match
|
1371
|
+
*/
|
1372
|
+
{ U32 const storeEnd = cur + 2;
|
1164
1373
|
U32 storeStart = storeEnd;
|
1165
|
-
U32
|
1374
|
+
U32 stretchPos = cur;
|
1166
1375
|
|
1167
1376
|
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
|
1168
1377
|
last_pos, cur); (void)last_pos;
|
1169
|
-
assert(storeEnd <
|
1170
|
-
DEBUGLOG(6, "last
|
1171
|
-
storeEnd,
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1378
|
+
assert(storeEnd < ZSTD_OPT_SIZE);
|
1379
|
+
DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
|
1380
|
+
storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
|
1381
|
+
if (lastStretch.litlen > 0) {
|
1382
|
+
/* last "sequence" is unfinished: just a bunch of literals */
|
1383
|
+
opt[storeEnd].litlen = lastStretch.litlen;
|
1384
|
+
opt[storeEnd].mlen = 0;
|
1385
|
+
storeStart = storeEnd-1;
|
1386
|
+
opt[storeStart] = lastStretch;
|
1387
|
+
} {
|
1388
|
+
opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
|
1389
|
+
storeStart = storeEnd;
|
1390
|
+
}
|
1391
|
+
while (1) {
|
1392
|
+
ZSTD_optimal_t nextStretch = opt[stretchPos];
|
1393
|
+
opt[storeStart].litlen = nextStretch.litlen;
|
1394
|
+
DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
|
1395
|
+
opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
|
1396
|
+
if (nextStretch.mlen == 0) {
|
1397
|
+
/* reaching beginning of segment */
|
1398
|
+
break;
|
1399
|
+
}
|
1175
1400
|
storeStart--;
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
|
1401
|
+
opt[storeStart] = nextStretch; /* note: litlen will be fixed */
|
1402
|
+
assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
|
1403
|
+
stretchPos -= nextStretch.litlen + nextStretch.mlen;
|
1180
1404
|
}
|
1181
1405
|
|
1182
1406
|
/* save sequences */
|
1183
|
-
DEBUGLOG(6, "sending selected sequences into seqStore")
|
1407
|
+
DEBUGLOG(6, "sending selected sequences into seqStore");
|
1184
1408
|
{ U32 storePos;
|
1185
1409
|
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
|
1186
1410
|
U32 const llen = opt[storePos].litlen;
|
1187
1411
|
U32 const mlen = opt[storePos].mlen;
|
1188
|
-
U32 const
|
1412
|
+
U32 const offBase = opt[storePos].off;
|
1189
1413
|
U32 const advance = llen + mlen;
|
1190
1414
|
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
|
1191
1415
|
anchor - istart, (unsigned)llen, (unsigned)mlen);
|
@@ -1197,11 +1421,14 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1197
1421
|
}
|
1198
1422
|
|
1199
1423
|
assert(anchor + llen <= iend);
|
1200
|
-
ZSTD_updateStats(optStatePtr, llen, anchor,
|
1201
|
-
ZSTD_storeSeq(seqStore, llen, anchor, iend,
|
1424
|
+
ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
|
1425
|
+
ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
|
1202
1426
|
anchor += advance;
|
1203
1427
|
ip = anchor;
|
1204
1428
|
} }
|
1429
|
+
DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
|
1430
|
+
|
1431
|
+
/* update all costs */
|
1205
1432
|
ZSTD_setBasePrices(optStatePtr, optLevel);
|
1206
1433
|
}
|
1207
1434
|
} /* while (ip < ilimit) */
|
@@ -1209,50 +1436,51 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1209
1436
|
/* Return the last literals size */
|
1210
1437
|
return (size_t)(iend - anchor);
|
1211
1438
|
}
|
1439
|
+
#endif /* build exclusions */
|
1212
1440
|
|
1441
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1442
|
+
static size_t ZSTD_compressBlock_opt0(
|
1443
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1444
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1445
|
+
{
|
1446
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
|
1447
|
+
}
|
1448
|
+
#endif
|
1449
|
+
|
1450
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1451
|
+
static size_t ZSTD_compressBlock_opt2(
|
1452
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1453
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1454
|
+
{
|
1455
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
|
1456
|
+
}
|
1457
|
+
#endif
|
1213
1458
|
|
1459
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1214
1460
|
size_t ZSTD_compressBlock_btopt(
|
1215
1461
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1216
1462
|
const void* src, size_t srcSize)
|
1217
1463
|
{
|
1218
1464
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
1219
|
-
return
|
1465
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1220
1466
|
}
|
1467
|
+
#endif
|
1221
1468
|
|
1222
1469
|
|
1223
|
-
/* used in 2-pass strategy */
|
1224
|
-
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
1225
|
-
{
|
1226
|
-
U32 s, sum=0;
|
1227
|
-
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
1228
|
-
for (s=0; s<lastEltIndex+1; s++) {
|
1229
|
-
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
1230
|
-
table[s]--;
|
1231
|
-
sum += table[s];
|
1232
|
-
}
|
1233
|
-
return sum;
|
1234
|
-
}
|
1235
1470
|
|
1236
|
-
/* used in 2-pass strategy */
|
1237
|
-
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
1238
|
-
{
|
1239
|
-
if (ZSTD_compressedLiterals(optPtr))
|
1240
|
-
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
1241
|
-
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
1242
|
-
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
1243
|
-
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
1244
|
-
}
|
1245
1471
|
|
1472
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1246
1473
|
/* ZSTD_initStats_ultra():
|
1247
1474
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
1248
1475
|
* only works on first block, with no dictionary and no ldm.
|
1249
|
-
* this function cannot error,
|
1476
|
+
* this function cannot error out, its narrow contract must be respected.
|
1250
1477
|
*/
|
1251
|
-
static
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1478
|
+
static
|
1479
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1480
|
+
void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
1481
|
+
seqStore_t* seqStore,
|
1482
|
+
U32 rep[ZSTD_REP_NUM],
|
1483
|
+
const void* src, size_t srcSize)
|
1256
1484
|
{
|
1257
1485
|
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
|
1258
1486
|
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
|
@@ -1263,17 +1491,15 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
1263
1491
|
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
1264
1492
|
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
1265
1493
|
|
1266
|
-
|
1494
|
+
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
|
1267
1495
|
|
1268
|
-
/* invalidate first scan from history */
|
1496
|
+
/* invalidate first scan from history, only keep entropy stats */
|
1269
1497
|
ZSTD_resetSeqStore(seqStore);
|
1270
1498
|
ms->window.base -= srcSize;
|
1271
1499
|
ms->window.dictLimit += (U32)srcSize;
|
1272
1500
|
ms->window.lowLimit = ms->window.dictLimit;
|
1273
1501
|
ms->nextToUpdate = ms->window.dictLimit;
|
1274
1502
|
|
1275
|
-
/* re-inforce weight of collected statistics */
|
1276
|
-
ZSTD_upscaleStats(&ms->opt);
|
1277
1503
|
}
|
1278
1504
|
|
1279
1505
|
size_t ZSTD_compressBlock_btultra(
|
@@ -1281,7 +1507,7 @@ size_t ZSTD_compressBlock_btultra(
|
|
1281
1507
|
const void* src, size_t srcSize)
|
1282
1508
|
{
|
1283
1509
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
1284
|
-
return
|
1510
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1285
1511
|
}
|
1286
1512
|
|
1287
1513
|
size_t ZSTD_compressBlock_btultra2(
|
@@ -1291,10 +1517,10 @@ size_t ZSTD_compressBlock_btultra2(
|
|
1291
1517
|
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
|
1292
1518
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
1293
1519
|
|
1294
|
-
/* 2-
|
1520
|
+
/* 2-passes strategy:
|
1295
1521
|
* this strategy makes a first pass over first block to collect statistics
|
1296
|
-
*
|
1297
|
-
* After 1st pass, function forgets
|
1522
|
+
* in order to seed next round's statistics with it.
|
1523
|
+
* After 1st pass, function forgets history, and starts a new block.
|
1298
1524
|
* Consequently, this can only work if no data has been previously loaded in tables,
|
1299
1525
|
* aka, no dictionary, no prefix, no ldm preprocessing.
|
1300
1526
|
* The compression ratio gain is generally small (~0.5% on first block),
|
@@ -1303,42 +1529,47 @@ size_t ZSTD_compressBlock_btultra2(
|
|
1303
1529
|
if ( (ms->opt.litLengthSum==0) /* first block */
|
1304
1530
|
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
1305
1531
|
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
1306
|
-
&& (curr == ms->window.dictLimit)
|
1307
|
-
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
1532
|
+
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
1533
|
+
&& (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
|
1308
1534
|
) {
|
1309
1535
|
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
1310
1536
|
}
|
1311
1537
|
|
1312
|
-
return
|
1538
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1313
1539
|
}
|
1540
|
+
#endif
|
1314
1541
|
|
1542
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1315
1543
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
1316
1544
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1317
1545
|
const void* src, size_t srcSize)
|
1318
1546
|
{
|
1319
|
-
return
|
1547
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1320
1548
|
}
|
1321
1549
|
|
1322
|
-
size_t
|
1550
|
+
size_t ZSTD_compressBlock_btopt_extDict(
|
1323
1551
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1324
1552
|
const void* src, size_t srcSize)
|
1325
1553
|
{
|
1326
|
-
return
|
1554
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1327
1555
|
}
|
1556
|
+
#endif
|
1328
1557
|
|
1329
|
-
|
1558
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1559
|
+
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
1330
1560
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1331
1561
|
const void* src, size_t srcSize)
|
1332
1562
|
{
|
1333
|
-
return
|
1563
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1334
1564
|
}
|
1335
1565
|
|
1336
1566
|
size_t ZSTD_compressBlock_btultra_extDict(
|
1337
1567
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1338
1568
|
const void* src, size_t srcSize)
|
1339
1569
|
{
|
1340
|
-
return
|
1570
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1341
1571
|
}
|
1572
|
+
#endif
|
1342
1573
|
|
1343
1574
|
/* note : no btultra2 variant for extDict nor dictMatchState,
|
1344
1575
|
* because btultra2 is not meant to work with dictionaries
|