extzstd 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/contrib/zstd/CHANGELOG +188 -1
- data/contrib/zstd/CONTRIBUTING.md +157 -74
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +81 -58
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +59 -35
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/appveyor.yml +49 -136
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +87 -181
- data/contrib/zstd/lib/README.md +23 -6
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +33 -59
- data/contrib/zstd/lib/common/compiler.h +115 -45
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +1 -1
- data/contrib/zstd/lib/common/debug.h +1 -1
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +82 -3
- data/contrib/zstd/lib/common/fse.h +9 -85
- data/contrib/zstd/lib/common/fse_decompress.c +29 -111
- data/contrib/zstd/lib/common/huf.h +84 -172
- data/contrib/zstd/lib/common/mem.h +58 -49
- data/contrib/zstd/lib/common/pool.c +37 -16
- data/contrib/zstd/lib/common/pool.h +9 -3
- data/contrib/zstd/lib/common/portability_macros.h +156 -0
- data/contrib/zstd/lib/common/threading.c +68 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +7 -809
- data/contrib/zstd/lib/common/xxhash.h +5568 -167
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +64 -150
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +69 -150
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +773 -251
- data/contrib/zstd/lib/compress/zstd_compress.c +2650 -826
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +509 -180
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +33 -305
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +266 -85
- data/contrib/zstd/lib/compress/zstd_double_fast.c +369 -132
- data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +722 -258
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1105 -360
- data/contrib/zstd/lib/compress/zstd_lazy.h +41 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +272 -208
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +324 -197
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +109 -53
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1071 -539
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +507 -82
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +962 -310
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +54 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +44 -32
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -5
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +24 -16
- data/contrib/zstd/lib/dictBuilder/zdict.c +88 -95
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +16 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +24 -69
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +25 -72
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +23 -69
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +35 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +42 -87
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +35 -82
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +214 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +922 -293
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +7 -6
- data/ext/extzstd.c +13 -10
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +16 -5
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -14,40 +14,47 @@
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
|
17
|
-
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
|
18
17
|
#define ZSTD_MAX_PRICE (1<<30)
|
|
19
18
|
|
|
20
|
-
#define ZSTD_PREDEF_THRESHOLD
|
|
19
|
+
#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
/*-*************************************
|
|
24
23
|
* Price functions for optimal parser
|
|
25
24
|
***************************************/
|
|
26
25
|
|
|
27
|
-
#if 0 /* approximation at bit level */
|
|
26
|
+
#if 0 /* approximation at bit level (for tests) */
|
|
28
27
|
# define BITCOST_ACCURACY 0
|
|
29
28
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
30
|
-
# define WEIGHT(stat)
|
|
31
|
-
#elif 0 /* fractional bit accuracy */
|
|
29
|
+
# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
|
|
30
|
+
#elif 0 /* fractional bit accuracy (for tests) */
|
|
32
31
|
# define BITCOST_ACCURACY 8
|
|
33
32
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
34
|
-
# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
|
|
33
|
+
# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
|
|
35
34
|
#else /* opt==approx, ultra==accurate */
|
|
36
35
|
# define BITCOST_ACCURACY 8
|
|
37
36
|
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
|
|
38
|
-
# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
|
37
|
+
# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
|
|
39
38
|
#endif
|
|
40
39
|
|
|
40
|
+
/* ZSTD_bitWeight() :
|
|
41
|
+
* provide estimated "cost" of a stat in full bits only */
|
|
41
42
|
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
|
|
42
43
|
{
|
|
43
44
|
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
|
|
44
45
|
}
|
|
45
46
|
|
|
47
|
+
/* ZSTD_fracWeight() :
|
|
48
|
+
* provide fractional-bit "cost" of a stat,
|
|
49
|
+
* using linear interpolation approximation */
|
|
46
50
|
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
|
47
51
|
{
|
|
48
52
|
U32 const stat = rawStat + 1;
|
|
49
53
|
U32 const hb = ZSTD_highbit32(stat);
|
|
50
54
|
U32 const BWeight = hb * BITCOST_MULTIPLIER;
|
|
55
|
+
/* Fweight was meant for "Fractional weight"
|
|
56
|
+
* but it's effectively a value between 1 and 2
|
|
57
|
+
* using fixed point arithmetic */
|
|
51
58
|
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
|
|
52
59
|
U32 const weight = BWeight + FWeight;
|
|
53
60
|
assert(hb + BITCOST_ACCURACY < 31);
|
|
@@ -58,7 +65,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
|
|
58
65
|
/* debugging function,
|
|
59
66
|
* @return price in bytes as fractional value
|
|
60
67
|
* for debug messages only */
|
|
61
|
-
MEM_STATIC double ZSTD_fCost(
|
|
68
|
+
MEM_STATIC double ZSTD_fCost(int price)
|
|
62
69
|
{
|
|
63
70
|
return (double)price / (BITCOST_MULTIPLIER*8);
|
|
64
71
|
}
|
|
@@ -66,7 +73,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
|
|
|
66
73
|
|
|
67
74
|
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
|
|
68
75
|
{
|
|
69
|
-
return optPtr->literalCompressionMode !=
|
|
76
|
+
return optPtr->literalCompressionMode != ZSTD_ps_disable;
|
|
70
77
|
}
|
|
71
78
|
|
|
72
79
|
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
@@ -79,25 +86,52 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
|
|
79
86
|
}
|
|
80
87
|
|
|
81
88
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
89
|
+
static U32 sum_u32(const unsigned table[], size_t nbElts)
|
|
90
|
+
{
|
|
91
|
+
size_t n;
|
|
92
|
+
U32 total = 0;
|
|
93
|
+
for (n=0; n<nbElts; n++) {
|
|
94
|
+
total += table[n];
|
|
95
|
+
}
|
|
96
|
+
return total;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
|
|
100
|
+
|
|
101
|
+
static U32
|
|
102
|
+
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
|
|
86
103
|
{
|
|
87
104
|
U32 s, sum=0;
|
|
88
|
-
DEBUGLOG(5, "
|
|
89
|
-
|
|
105
|
+
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
|
|
106
|
+
(unsigned)lastEltIndex+1, (unsigned)shift );
|
|
107
|
+
assert(shift < 30);
|
|
90
108
|
for (s=0; s<lastEltIndex+1; s++) {
|
|
91
|
-
|
|
92
|
-
|
|
109
|
+
unsigned const base = base1 ? 1 : (table[s]>0);
|
|
110
|
+
unsigned const newStat = base + (table[s] >> shift);
|
|
111
|
+
sum += newStat;
|
|
112
|
+
table[s] = newStat;
|
|
93
113
|
}
|
|
94
114
|
return sum;
|
|
95
115
|
}
|
|
96
116
|
|
|
117
|
+
/* ZSTD_scaleStats() :
|
|
118
|
+
* reduce all elt frequencies in table if sum too large
|
|
119
|
+
* return the resulting sum of elements */
|
|
120
|
+
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
|
|
121
|
+
{
|
|
122
|
+
U32 const prevsum = sum_u32(table, lastEltIndex+1);
|
|
123
|
+
U32 const factor = prevsum >> logTarget;
|
|
124
|
+
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
|
|
125
|
+
assert(logTarget < 30);
|
|
126
|
+
if (factor <= 1) return prevsum;
|
|
127
|
+
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
|
|
128
|
+
}
|
|
129
|
+
|
|
97
130
|
/* ZSTD_rescaleFreqs() :
|
|
98
131
|
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
|
99
132
|
* take hints from dictionary if there is one
|
|
100
|
-
*
|
|
133
|
+
* and init from zero if there is none,
|
|
134
|
+
* using src for literals stats, and baseline stats for sequence symbols
|
|
101
135
|
* otherwise downscale existing stats, to be used as seed for next block.
|
|
102
136
|
*/
|
|
103
137
|
static void
|
|
@@ -109,24 +143,28 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
109
143
|
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
|
|
110
144
|
optPtr->priceType = zop_dynamic;
|
|
111
145
|
|
|
112
|
-
if (optPtr->litLengthSum == 0) { /* first block
|
|
113
|
-
|
|
114
|
-
|
|
146
|
+
if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
|
|
147
|
+
|
|
148
|
+
/* heuristic: use pre-defined stats for too small inputs */
|
|
149
|
+
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
|
|
150
|
+
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
|
|
115
151
|
optPtr->priceType = zop_predef;
|
|
116
152
|
}
|
|
117
153
|
|
|
118
154
|
assert(optPtr->symbolCosts != NULL);
|
|
119
155
|
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
|
|
120
|
-
|
|
156
|
+
|
|
157
|
+
/* huffman stats covering the full value set : table presumed generated by dictionary */
|
|
121
158
|
optPtr->priceType = zop_dynamic;
|
|
122
159
|
|
|
123
160
|
if (compressedLiterals) {
|
|
161
|
+
/* generate literals statistics from huffman table */
|
|
124
162
|
unsigned lit;
|
|
125
163
|
assert(optPtr->litFreq != NULL);
|
|
126
164
|
optPtr->litSum = 0;
|
|
127
165
|
for (lit=0; lit<=MaxLit; lit++) {
|
|
128
166
|
U32 const scaleLog = 11; /* scale to 2K */
|
|
129
|
-
U32 const bitCost =
|
|
167
|
+
U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
|
|
130
168
|
assert(bitCost <= scaleLog);
|
|
131
169
|
optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
|
|
132
170
|
optPtr->litSum += optPtr->litFreq[lit];
|
|
@@ -168,20 +206,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
168
206
|
optPtr->offCodeSum += optPtr->offCodeFreq[of];
|
|
169
207
|
} }
|
|
170
208
|
|
|
171
|
-
} else { /*
|
|
209
|
+
} else { /* first block, no dictionary */
|
|
172
210
|
|
|
173
211
|
assert(optPtr->litFreq != NULL);
|
|
174
212
|
if (compressedLiterals) {
|
|
213
|
+
/* base initial cost of literals on direct frequency within src */
|
|
175
214
|
unsigned lit = MaxLit;
|
|
176
215
|
HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
|
|
177
|
-
optPtr->litSum =
|
|
216
|
+
optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
|
|
178
217
|
}
|
|
179
218
|
|
|
180
|
-
{ unsigned
|
|
181
|
-
|
|
182
|
-
|
|
219
|
+
{ unsigned const baseLLfreqs[MaxLL+1] = {
|
|
220
|
+
4, 2, 1, 1, 1, 1, 1, 1,
|
|
221
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
|
222
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
|
223
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
|
224
|
+
1, 1, 1, 1
|
|
225
|
+
};
|
|
226
|
+
ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
|
|
227
|
+
optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
|
|
183
228
|
}
|
|
184
|
-
optPtr->litLengthSum = MaxLL+1;
|
|
185
229
|
|
|
186
230
|
{ unsigned ml;
|
|
187
231
|
for (ml=0; ml<=MaxML; ml++)
|
|
@@ -189,21 +233,25 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
|
|
|
189
233
|
}
|
|
190
234
|
optPtr->matchLengthSum = MaxML+1;
|
|
191
235
|
|
|
192
|
-
{ unsigned
|
|
193
|
-
|
|
194
|
-
|
|
236
|
+
{ unsigned const baseOFCfreqs[MaxOff+1] = {
|
|
237
|
+
6, 2, 1, 1, 2, 3, 4, 4,
|
|
238
|
+
4, 3, 2, 1, 1, 1, 1, 1,
|
|
239
|
+
1, 1, 1, 1, 1, 1, 1, 1,
|
|
240
|
+
1, 1, 1, 1, 1, 1, 1, 1
|
|
241
|
+
};
|
|
242
|
+
ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
|
|
243
|
+
optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
|
|
195
244
|
}
|
|
196
|
-
optPtr->offCodeSum = MaxOff+1;
|
|
197
245
|
|
|
198
246
|
}
|
|
199
247
|
|
|
200
|
-
} else { /* new block :
|
|
248
|
+
} else { /* new block : scale down accumulated statistics */
|
|
201
249
|
|
|
202
250
|
if (compressedLiterals)
|
|
203
|
-
optPtr->litSum =
|
|
204
|
-
optPtr->litLengthSum =
|
|
205
|
-
optPtr->matchLengthSum =
|
|
206
|
-
optPtr->offCodeSum =
|
|
251
|
+
optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
|
|
252
|
+
optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
|
|
253
|
+
optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
|
|
254
|
+
optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
|
|
207
255
|
}
|
|
208
256
|
|
|
209
257
|
ZSTD_setBasePrices(optPtr, optLevel);
|
|
@@ -225,11 +273,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
|
225
273
|
return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
|
|
226
274
|
|
|
227
275
|
/* dynamic statistics */
|
|
228
|
-
{ U32 price =
|
|
276
|
+
{ U32 price = optPtr->litSumBasePrice * litLength;
|
|
277
|
+
U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
|
|
229
278
|
U32 u;
|
|
279
|
+
assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
|
|
230
280
|
for (u=0; u < litLength; u++) {
|
|
231
|
-
|
|
232
|
-
|
|
281
|
+
U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
|
|
282
|
+
if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
|
|
283
|
+
price -= litPrice;
|
|
233
284
|
}
|
|
234
285
|
return price;
|
|
235
286
|
}
|
|
@@ -239,7 +290,17 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
|
239
290
|
* cost of literalLength symbol */
|
|
240
291
|
static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
|
241
292
|
{
|
|
242
|
-
|
|
293
|
+
assert(litLength <= ZSTD_BLOCKSIZE_MAX);
|
|
294
|
+
if (optPtr->priceType == zop_predef)
|
|
295
|
+
return WEIGHT(litLength, optLevel);
|
|
296
|
+
|
|
297
|
+
/* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
|
|
298
|
+
* because it isn't representable in the zstd format.
|
|
299
|
+
* So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
|
|
300
|
+
* In such a case, the block would be all literals.
|
|
301
|
+
*/
|
|
302
|
+
if (litLength == ZSTD_BLOCKSIZE_MAX)
|
|
303
|
+
return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
|
|
243
304
|
|
|
244
305
|
/* dynamic statistics */
|
|
245
306
|
{ U32 const llCode = ZSTD_LLcode(litLength);
|
|
@@ -250,22 +311,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
|
|
250
311
|
}
|
|
251
312
|
|
|
252
313
|
/* ZSTD_getMatchPrice() :
|
|
253
|
-
* Provides the cost of the match part (offset + matchLength) of a sequence
|
|
314
|
+
* Provides the cost of the match part (offset + matchLength) of a sequence.
|
|
254
315
|
* Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
|
|
255
|
-
*
|
|
316
|
+
* @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
|
|
317
|
+
* @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
|
|
318
|
+
*/
|
|
256
319
|
FORCE_INLINE_TEMPLATE U32
|
|
257
|
-
ZSTD_getMatchPrice(U32 const
|
|
320
|
+
ZSTD_getMatchPrice(U32 const offBase,
|
|
258
321
|
U32 const matchLength,
|
|
259
322
|
const optState_t* const optPtr,
|
|
260
323
|
int const optLevel)
|
|
261
324
|
{
|
|
262
325
|
U32 price;
|
|
263
|
-
U32 const offCode = ZSTD_highbit32(
|
|
326
|
+
U32 const offCode = ZSTD_highbit32(offBase);
|
|
264
327
|
U32 const mlBase = matchLength - MINMATCH;
|
|
265
328
|
assert(matchLength >= MINMATCH);
|
|
266
329
|
|
|
267
|
-
if (optPtr->priceType == zop_predef) /* fixed scheme,
|
|
268
|
-
return WEIGHT(mlBase, optLevel)
|
|
330
|
+
if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
|
|
331
|
+
return WEIGHT(mlBase, optLevel)
|
|
332
|
+
+ ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
|
|
269
333
|
|
|
270
334
|
/* dynamic statistics */
|
|
271
335
|
price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
|
|
@@ -284,10 +348,10 @@ ZSTD_getMatchPrice(U32 const offset,
|
|
|
284
348
|
}
|
|
285
349
|
|
|
286
350
|
/* ZSTD_updateStats() :
|
|
287
|
-
* assumption : literals +
|
|
351
|
+
* assumption : literals + litLength <= iend */
|
|
288
352
|
static void ZSTD_updateStats(optState_t* const optPtr,
|
|
289
353
|
U32 litLength, const BYTE* literals,
|
|
290
|
-
U32
|
|
354
|
+
U32 offBase, U32 matchLength)
|
|
291
355
|
{
|
|
292
356
|
/* literals */
|
|
293
357
|
if (ZSTD_compressedLiterals(optPtr)) {
|
|
@@ -303,8 +367,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
|
|
|
303
367
|
optPtr->litLengthSum++;
|
|
304
368
|
}
|
|
305
369
|
|
|
306
|
-
/*
|
|
307
|
-
{ U32 const offCode = ZSTD_highbit32(
|
|
370
|
+
/* offset code : follows storeSeq() numeric representation */
|
|
371
|
+
{ U32 const offCode = ZSTD_highbit32(offBase);
|
|
308
372
|
assert(offCode <= MaxOff);
|
|
309
373
|
optPtr->offCodeFreq[offCode]++;
|
|
310
374
|
optPtr->offCodeSum++;
|
|
@@ -338,7 +402,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
|
338
402
|
|
|
339
403
|
/* Update hashTable3 up to ip (excluded)
|
|
340
404
|
Assumption : always within prefix (i.e. not within extDict) */
|
|
341
|
-
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
405
|
+
static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
|
342
406
|
U32* nextToUpdate3,
|
|
343
407
|
const BYTE* const ip)
|
|
344
408
|
{
|
|
@@ -364,11 +428,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|
|
364
428
|
* Binary Tree search
|
|
365
429
|
***************************************/
|
|
366
430
|
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
|
367
|
-
*
|
|
431
|
+
* @param ip assumed <= iend-8 .
|
|
432
|
+
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
|
368
433
|
* @return : nb of positions added */
|
|
369
434
|
static U32 ZSTD_insertBt1(
|
|
370
|
-
ZSTD_matchState_t* ms,
|
|
435
|
+
const ZSTD_matchState_t* ms,
|
|
371
436
|
const BYTE* const ip, const BYTE* const iend,
|
|
437
|
+
U32 const target,
|
|
372
438
|
U32 const mls, const int extDict)
|
|
373
439
|
{
|
|
374
440
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
@@ -391,7 +457,10 @@ static U32 ZSTD_insertBt1(
|
|
|
391
457
|
U32* smallerPtr = bt + 2*(curr&btMask);
|
|
392
458
|
U32* largerPtr = smallerPtr + 1;
|
|
393
459
|
U32 dummy32; /* to be nullified at the end */
|
|
394
|
-
|
|
460
|
+
/* windowLow is based on target because
|
|
461
|
+
* we only need positions that will be in the window at the end of the tree update.
|
|
462
|
+
*/
|
|
463
|
+
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
|
395
464
|
U32 matchEndIdx = curr+8+1;
|
|
396
465
|
size_t bestLength = 8;
|
|
397
466
|
U32 nbCompares = 1U << cParams->searchLog;
|
|
@@ -404,11 +473,12 @@ static U32 ZSTD_insertBt1(
|
|
|
404
473
|
|
|
405
474
|
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
|
406
475
|
|
|
476
|
+
assert(curr <= target);
|
|
407
477
|
assert(ip <= iend-8); /* required for h calculation */
|
|
408
478
|
hashTable[h] = curr; /* Update Hash Table */
|
|
409
479
|
|
|
410
480
|
assert(windowLow > 0);
|
|
411
|
-
|
|
481
|
+
for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
|
|
412
482
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
|
413
483
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
414
484
|
assert(matchIndex < curr);
|
|
@@ -492,7 +562,7 @@ void ZSTD_updateTree_internal(
|
|
|
492
562
|
idx, target, dictMode);
|
|
493
563
|
|
|
494
564
|
while(idx < target) {
|
|
495
|
-
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
|
565
|
+
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
|
496
566
|
assert(idx < (U32)(idx + forward));
|
|
497
567
|
idx += forward;
|
|
498
568
|
}
|
|
@@ -505,16 +575,17 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
|
|
505
575
|
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
|
|
506
576
|
}
|
|
507
577
|
|
|
508
|
-
FORCE_INLINE_TEMPLATE
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
578
|
+
FORCE_INLINE_TEMPLATE U32
|
|
579
|
+
ZSTD_insertBtAndGetAllMatches (
|
|
580
|
+
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
|
581
|
+
ZSTD_matchState_t* ms,
|
|
582
|
+
U32* nextToUpdate3,
|
|
583
|
+
const BYTE* const ip, const BYTE* const iLimit,
|
|
584
|
+
const ZSTD_dictMode_e dictMode,
|
|
585
|
+
const U32 rep[ZSTD_REP_NUM],
|
|
586
|
+
const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
|
587
|
+
const U32 lengthToBeat,
|
|
588
|
+
const U32 mls /* template */)
|
|
518
589
|
{
|
|
519
590
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
520
591
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
|
@@ -597,7 +668,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
597
668
|
DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
|
|
598
669
|
repCode, ll0, repOffset, repLen);
|
|
599
670
|
bestLength = repLen;
|
|
600
|
-
matches[mnum].off = repCode - ll0;
|
|
671
|
+
matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
|
|
601
672
|
matches[mnum].len = (U32)repLen;
|
|
602
673
|
mnum++;
|
|
603
674
|
if ( (repLen > sufficient_len)
|
|
@@ -626,7 +697,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
626
697
|
bestLength = mlen;
|
|
627
698
|
assert(curr > matchIndex3);
|
|
628
699
|
assert(mnum==0); /* no prior solution */
|
|
629
|
-
matches[0].off = (curr - matchIndex3)
|
|
700
|
+
matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
|
|
630
701
|
matches[0].len = (U32)mlen;
|
|
631
702
|
mnum = 1;
|
|
632
703
|
if ( (mlen > sufficient_len) |
|
|
@@ -635,11 +706,11 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
635
706
|
return 1;
|
|
636
707
|
} } }
|
|
637
708
|
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
|
638
|
-
}
|
|
709
|
+
} /* if (mls == 3) */
|
|
639
710
|
|
|
640
711
|
hashTable[h] = curr; /* Update Hash Table */
|
|
641
712
|
|
|
642
|
-
|
|
713
|
+
for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
|
|
643
714
|
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
|
644
715
|
const BYTE* match;
|
|
645
716
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
@@ -659,21 +730,20 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
659
730
|
}
|
|
660
731
|
|
|
661
732
|
if (matchLength > bestLength) {
|
|
662
|
-
DEBUGLOG(8, "found match of length %u at distance %u (
|
|
663
|
-
(U32)matchLength, curr - matchIndex, curr - matchIndex
|
|
733
|
+
DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
|
|
734
|
+
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
|
|
664
735
|
assert(matchEndIdx > matchIndex);
|
|
665
736
|
if (matchLength > matchEndIdx - matchIndex)
|
|
666
737
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
667
738
|
bestLength = matchLength;
|
|
668
|
-
matches[mnum].off = (curr - matchIndex)
|
|
739
|
+
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
|
|
669
740
|
matches[mnum].len = (U32)matchLength;
|
|
670
741
|
mnum++;
|
|
671
742
|
if ( (matchLength > ZSTD_OPT_NUM)
|
|
672
743
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
|
673
744
|
if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
|
|
674
745
|
break; /* drop, to preserve bt consistency (miss a little bit of compression) */
|
|
675
|
-
|
|
676
|
-
}
|
|
746
|
+
} }
|
|
677
747
|
|
|
678
748
|
if (match[matchLength] < ip[matchLength]) {
|
|
679
749
|
/* match smaller than current */
|
|
@@ -692,12 +762,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
692
762
|
|
|
693
763
|
*smallerPtr = *largerPtr = 0;
|
|
694
764
|
|
|
765
|
+
assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
|
|
695
766
|
if (dictMode == ZSTD_dictMatchState && nbCompares) {
|
|
696
767
|
size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
|
|
697
768
|
U32 dictMatchIndex = dms->hashTable[dmsH];
|
|
698
769
|
const U32* const dmsBt = dms->chainTable;
|
|
699
770
|
commonLengthSmaller = commonLengthLarger = 0;
|
|
700
|
-
|
|
771
|
+
for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
|
|
701
772
|
const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
|
|
702
773
|
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
|
703
774
|
const BYTE* match = dmsBase + dictMatchIndex;
|
|
@@ -707,19 +778,18 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
707
778
|
|
|
708
779
|
if (matchLength > bestLength) {
|
|
709
780
|
matchIndex = dictMatchIndex + dmsIndexDelta;
|
|
710
|
-
DEBUGLOG(8, "found dms match of length %u at distance %u (
|
|
711
|
-
(U32)matchLength, curr - matchIndex, curr - matchIndex
|
|
781
|
+
DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
|
|
782
|
+
(U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
|
|
712
783
|
if (matchLength > matchEndIdx - matchIndex)
|
|
713
784
|
matchEndIdx = matchIndex + (U32)matchLength;
|
|
714
785
|
bestLength = matchLength;
|
|
715
|
-
matches[mnum].off = (curr - matchIndex)
|
|
786
|
+
matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
|
|
716
787
|
matches[mnum].len = (U32)matchLength;
|
|
717
788
|
mnum++;
|
|
718
789
|
if ( (matchLength > ZSTD_OPT_NUM)
|
|
719
790
|
| (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
|
|
720
791
|
break; /* drop, to guarantee consistency (miss a little bit of compression) */
|
|
721
|
-
|
|
722
|
-
}
|
|
792
|
+
} }
|
|
723
793
|
|
|
724
794
|
if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
|
|
725
795
|
if (match[matchLength] < ip[matchLength]) {
|
|
@@ -729,39 +799,91 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
|
|
729
799
|
/* match is larger than current */
|
|
730
800
|
commonLengthLarger = matchLength;
|
|
731
801
|
dictMatchIndex = nextPtr[0];
|
|
732
|
-
|
|
733
|
-
}
|
|
734
|
-
}
|
|
802
|
+
} } } /* if (dictMode == ZSTD_dictMatchState) */
|
|
735
803
|
|
|
736
804
|
assert(matchEndIdx > curr+8);
|
|
737
805
|
ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
|
|
738
806
|
return mnum;
|
|
739
807
|
}
|
|
740
808
|
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
809
|
+
typedef U32 (*ZSTD_getAllMatchesFn)(
|
|
810
|
+
ZSTD_match_t*,
|
|
811
|
+
ZSTD_matchState_t*,
|
|
812
|
+
U32*,
|
|
813
|
+
const BYTE*,
|
|
814
|
+
const BYTE*,
|
|
815
|
+
const U32 rep[ZSTD_REP_NUM],
|
|
816
|
+
U32 const ll0,
|
|
817
|
+
U32 const lengthToBeat);
|
|
818
|
+
|
|
819
|
+
FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
|
|
820
|
+
ZSTD_match_t* matches,
|
|
821
|
+
ZSTD_matchState_t* ms,
|
|
822
|
+
U32* nextToUpdate3,
|
|
823
|
+
const BYTE* ip,
|
|
824
|
+
const BYTE* const iHighLimit,
|
|
825
|
+
const U32 rep[ZSTD_REP_NUM],
|
|
826
|
+
U32 const ll0,
|
|
827
|
+
U32 const lengthToBeat,
|
|
828
|
+
const ZSTD_dictMode_e dictMode,
|
|
829
|
+
const U32 mls)
|
|
750
830
|
{
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
ZSTD_updateTree_internal(ms, ip, iHighLimit,
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
831
|
+
assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
|
|
832
|
+
DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
|
|
833
|
+
if (ip < ms->window.base + ms->nextToUpdate)
|
|
834
|
+
return 0; /* skipped area */
|
|
835
|
+
ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
|
|
836
|
+
return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
|
|
840
|
+
|
|
841
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
|
|
842
|
+
static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
|
|
843
|
+
ZSTD_match_t* matches, \
|
|
844
|
+
ZSTD_matchState_t* ms, \
|
|
845
|
+
U32* nextToUpdate3, \
|
|
846
|
+
const BYTE* ip, \
|
|
847
|
+
const BYTE* const iHighLimit, \
|
|
848
|
+
const U32 rep[ZSTD_REP_NUM], \
|
|
849
|
+
U32 const ll0, \
|
|
850
|
+
U32 const lengthToBeat) \
|
|
851
|
+
{ \
|
|
852
|
+
return ZSTD_btGetAllMatches_internal( \
|
|
853
|
+
matches, ms, nextToUpdate3, ip, iHighLimit, \
|
|
854
|
+
rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
|
|
764
855
|
}
|
|
856
|
+
|
|
857
|
+
#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \
|
|
858
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \
|
|
859
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \
|
|
860
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \
|
|
861
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
|
|
862
|
+
|
|
863
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
|
|
864
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
|
|
865
|
+
GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
|
|
866
|
+
|
|
867
|
+
#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \
|
|
868
|
+
{ \
|
|
869
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
|
|
870
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
|
|
871
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
|
|
872
|
+
ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
static ZSTD_getAllMatchesFn
|
|
876
|
+
ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
|
|
877
|
+
{
|
|
878
|
+
ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
|
|
879
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
|
|
880
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
|
|
881
|
+
ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
|
|
882
|
+
};
|
|
883
|
+
U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
|
|
884
|
+
assert((U32)dictMode < 3);
|
|
885
|
+
assert(mls - 3 < 4);
|
|
886
|
+
return getAllMatchesFns[(int)dictMode][mls - 3];
|
|
765
887
|
}
|
|
766
888
|
|
|
767
889
|
/*************************
|
|
@@ -770,16 +892,18 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
|
|
770
892
|
|
|
771
893
|
/* Struct containing info needed to make decision about ldm inclusion */
|
|
772
894
|
typedef struct {
|
|
773
|
-
rawSeqStore_t seqStore;
|
|
774
|
-
U32 startPosInBlock;
|
|
775
|
-
U32 endPosInBlock;
|
|
776
|
-
U32 offset;
|
|
895
|
+
rawSeqStore_t seqStore; /* External match candidates store for this block */
|
|
896
|
+
U32 startPosInBlock; /* Start position of the current match candidate */
|
|
897
|
+
U32 endPosInBlock; /* End position of the current match candidate */
|
|
898
|
+
U32 offset; /* Offset of the match candidate */
|
|
777
899
|
} ZSTD_optLdm_t;
|
|
778
900
|
|
|
779
901
|
/* ZSTD_optLdm_skipRawSeqStoreBytes():
|
|
780
|
-
* Moves forward in rawSeqStore by nbBytes,
|
|
902
|
+
* Moves forward in @rawSeqStore by @nbBytes,
|
|
903
|
+
* which will update the fields 'pos' and 'posInSequence'.
|
|
781
904
|
*/
|
|
782
|
-
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
|
|
905
|
+
static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
|
|
906
|
+
{
|
|
783
907
|
U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
|
|
784
908
|
while (currPos && rawSeqStore->pos < rawSeqStore->size) {
|
|
785
909
|
rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
|
|
@@ -800,8 +924,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t
|
|
|
800
924
|
* Calculates the beginning and end of the next match in the current block.
|
|
801
925
|
* Updates 'pos' and 'posInSequence' of the ldmSeqStore.
|
|
802
926
|
*/
|
|
803
|
-
static void
|
|
804
|
-
|
|
927
|
+
static void
|
|
928
|
+
ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
|
|
929
|
+
U32 blockBytesRemaining)
|
|
930
|
+
{
|
|
805
931
|
rawSeq currSeq;
|
|
806
932
|
U32 currBlockEndPos;
|
|
807
933
|
U32 literalsBytesRemaining;
|
|
@@ -813,8 +939,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
|
|
|
813
939
|
optLdm->endPosInBlock = UINT_MAX;
|
|
814
940
|
return;
|
|
815
941
|
}
|
|
816
|
-
/* Calculate appropriate bytes left in matchLength and litLength
|
|
817
|
-
|
|
942
|
+
/* Calculate appropriate bytes left in matchLength and litLength
|
|
943
|
+
* after adjusting based on ldmSeqStore->posInSequence */
|
|
818
944
|
currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
|
|
819
945
|
assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
|
|
820
946
|
currBlockEndPos = currPosInBlock + blockBytesRemaining;
|
|
@@ -850,15 +976,16 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
|
|
|
850
976
|
}
|
|
851
977
|
|
|
852
978
|
/* ZSTD_optLdm_maybeAddMatch():
|
|
853
|
-
* Adds a match if it's long enough,
|
|
854
|
-
*
|
|
979
|
+
* Adds a match if it's long enough,
|
|
980
|
+
* based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
|
|
981
|
+
* into 'matches'. Maintains the correct ordering of 'matches'.
|
|
855
982
|
*/
|
|
856
983
|
static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
|
857
|
-
ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
U32
|
|
984
|
+
const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
|
|
985
|
+
{
|
|
986
|
+
U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
|
|
987
|
+
/* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
|
|
988
|
+
U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
|
|
862
989
|
|
|
863
990
|
/* Ensure that current block position is not outside of the match */
|
|
864
991
|
if (currPosInBlock < optLdm->startPosInBlock
|
|
@@ -868,10 +995,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
|
|
868
995
|
}
|
|
869
996
|
|
|
870
997
|
if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
|
|
871
|
-
|
|
872
|
-
|
|
998
|
+
U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
|
|
999
|
+
DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
|
|
1000
|
+
candidateOffBase, candidateMatchLength, currPosInBlock);
|
|
873
1001
|
matches[*nbMatches].len = candidateMatchLength;
|
|
874
|
-
matches[*nbMatches].off =
|
|
1002
|
+
matches[*nbMatches].off = candidateOffBase;
|
|
875
1003
|
(*nbMatches)++;
|
|
876
1004
|
}
|
|
877
1005
|
}
|
|
@@ -879,8 +1007,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
|
|
|
879
1007
|
/* ZSTD_optLdm_processMatchCandidate():
|
|
880
1008
|
* Wrapper function to update ldm seq store and call ldm functions as necessary.
|
|
881
1009
|
*/
|
|
882
|
-
static void
|
|
883
|
-
|
|
1010
|
+
static void
|
|
1011
|
+
ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
|
|
1012
|
+
ZSTD_match_t* matches, U32* nbMatches,
|
|
1013
|
+
U32 currPosInBlock, U32 remainingBytes)
|
|
1014
|
+
{
|
|
884
1015
|
if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
|
|
885
1016
|
return;
|
|
886
1017
|
}
|
|
@@ -891,19 +1022,19 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
|
|
|
891
1022
|
* at the end of a match from the ldm seq store, and will often be some bytes
|
|
892
1023
|
* over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
|
|
893
1024
|
*/
|
|
894
|
-
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
|
1025
|
+
U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
|
895
1026
|
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
|
896
|
-
}
|
|
1027
|
+
}
|
|
897
1028
|
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
|
898
1029
|
}
|
|
899
1030
|
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
|
900
1031
|
}
|
|
901
1032
|
|
|
1033
|
+
|
|
902
1034
|
/*-*******************************
|
|
903
1035
|
* Optimal parser
|
|
904
1036
|
*********************************/
|
|
905
1037
|
|
|
906
|
-
|
|
907
1038
|
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
|
908
1039
|
{
|
|
909
1040
|
return sol.litlen + sol.mlen;
|
|
@@ -944,6 +1075,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
944
1075
|
const BYTE* const prefixStart = base + ms->window.dictLimit;
|
|
945
1076
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
|
946
1077
|
|
|
1078
|
+
ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
|
|
1079
|
+
|
|
947
1080
|
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
|
948
1081
|
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
|
949
1082
|
U32 nextToUpdate3 = ms->nextToUpdate;
|
|
@@ -953,6 +1086,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
953
1086
|
ZSTD_optimal_t lastSequence;
|
|
954
1087
|
ZSTD_optLdm_t optLdm;
|
|
955
1088
|
|
|
1089
|
+
ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
|
|
1090
|
+
|
|
956
1091
|
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
|
|
957
1092
|
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
|
|
958
1093
|
ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
|
|
@@ -971,7 +1106,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
971
1106
|
/* find first match */
|
|
972
1107
|
{ U32 const litlen = (U32)(ip - anchor);
|
|
973
1108
|
U32 const ll0 = !litlen;
|
|
974
|
-
U32 nbMatches =
|
|
1109
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
|
|
975
1110
|
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
|
976
1111
|
(U32)(ip-istart), (U32)(iend - ip));
|
|
977
1112
|
if (!nbMatches) { ip++; continue; }
|
|
@@ -985,18 +1120,18 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
985
1120
|
* in every price. We include the literal length to avoid negative
|
|
986
1121
|
* prices when we subtract the previous literal length.
|
|
987
1122
|
*/
|
|
988
|
-
opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
|
1123
|
+
opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
|
|
989
1124
|
|
|
990
1125
|
/* large match -> immediate encoding */
|
|
991
1126
|
{ U32 const maxML = matches[nbMatches-1].len;
|
|
992
|
-
U32 const
|
|
993
|
-
DEBUGLOG(6, "found %u matches of maxLength=%u and
|
|
994
|
-
nbMatches, maxML,
|
|
1127
|
+
U32 const maxOffBase = matches[nbMatches-1].off;
|
|
1128
|
+
DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
|
|
1129
|
+
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
|
|
995
1130
|
|
|
996
1131
|
if (maxML > sufficient_len) {
|
|
997
1132
|
lastSequence.litlen = litlen;
|
|
998
1133
|
lastSequence.mlen = maxML;
|
|
999
|
-
lastSequence.off =
|
|
1134
|
+
lastSequence.off = maxOffBase;
|
|
1000
1135
|
DEBUGLOG(6, "large match (%u>%u), immediate encoding",
|
|
1001
1136
|
maxML, sufficient_len);
|
|
1002
1137
|
cur = 0;
|
|
@@ -1005,24 +1140,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1005
1140
|
} }
|
|
1006
1141
|
|
|
1007
1142
|
/* set prices for first matches starting position == 0 */
|
|
1008
|
-
|
|
1143
|
+
assert(opt[0].price >= 0);
|
|
1144
|
+
{ U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
|
1009
1145
|
U32 pos;
|
|
1010
1146
|
U32 matchNb;
|
|
1011
1147
|
for (pos = 1; pos < minMatch; pos++) {
|
|
1012
1148
|
opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
|
|
1013
1149
|
}
|
|
1014
1150
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
|
1015
|
-
U32 const
|
|
1151
|
+
U32 const offBase = matches[matchNb].off;
|
|
1016
1152
|
U32 const end = matches[matchNb].len;
|
|
1017
1153
|
for ( ; pos <= end ; pos++ ) {
|
|
1018
|
-
U32 const matchPrice = ZSTD_getMatchPrice(
|
|
1154
|
+
U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
|
|
1019
1155
|
U32 const sequencePrice = literalsPrice + matchPrice;
|
|
1020
1156
|
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
|
1021
|
-
pos, ZSTD_fCost(sequencePrice));
|
|
1157
|
+
pos, ZSTD_fCost((int)sequencePrice));
|
|
1022
1158
|
opt[pos].mlen = pos;
|
|
1023
|
-
opt[pos].off =
|
|
1159
|
+
opt[pos].off = offBase;
|
|
1024
1160
|
opt[pos].litlen = litlen;
|
|
1025
|
-
opt[pos].price = sequencePrice;
|
|
1161
|
+
opt[pos].price = (int)sequencePrice;
|
|
1026
1162
|
} }
|
|
1027
1163
|
last_pos = pos-1;
|
|
1028
1164
|
}
|
|
@@ -1037,9 +1173,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1037
1173
|
/* Fix current position with one literal if cheaper */
|
|
1038
1174
|
{ U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
|
|
1039
1175
|
int const price = opt[cur-1].price
|
|
1040
|
-
+ ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
|
1041
|
-
+ ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
|
1042
|
-
- ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
|
1176
|
+
+ (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
|
|
1177
|
+
+ (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
|
|
1178
|
+
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
|
1043
1179
|
assert(price < 1000000000); /* overflow check */
|
|
1044
1180
|
if (price <= opt[cur].price) {
|
|
1045
1181
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
|
@@ -1065,7 +1201,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1065
1201
|
assert(cur >= opt[cur].mlen);
|
|
1066
1202
|
if (opt[cur].mlen != 0) {
|
|
1067
1203
|
U32 const prev = cur - opt[cur].mlen;
|
|
1068
|
-
repcodes_t newReps =
|
|
1204
|
+
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
|
|
1069
1205
|
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
|
1070
1206
|
} else {
|
|
1071
1207
|
ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
|
@@ -1082,11 +1218,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1082
1218
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
|
1083
1219
|
}
|
|
1084
1220
|
|
|
1221
|
+
assert(opt[cur].price >= 0);
|
|
1085
1222
|
{ U32 const ll0 = (opt[cur].mlen != 0);
|
|
1086
1223
|
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
|
1087
|
-
U32 const previousPrice = opt[cur].price;
|
|
1224
|
+
U32 const previousPrice = (U32)opt[cur].price;
|
|
1088
1225
|
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
|
1089
|
-
U32 nbMatches =
|
|
1226
|
+
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
|
|
1090
1227
|
U32 matchNb;
|
|
1091
1228
|
|
|
1092
1229
|
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
|
@@ -1119,12 +1256,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
|
1119
1256
|
U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
|
|
1120
1257
|
U32 mlen;
|
|
1121
1258
|
|
|
1122
|
-
DEBUGLOG(7, "testing match %u =>
|
|
1259
|
+
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
|
|
1123
1260
|
matchNb, matches[matchNb].off, lastML, litlen);
|
|
1124
1261
|
|
|
1125
1262
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
|
1126
1263
|
U32 const pos = cur + mlen;
|
|
1127
|
-
int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
|
1264
|
+
int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
|
1128
1265
|
|
|
1129
1266
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
|
1130
1267
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
|
@@ -1154,7 +1291,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
|
1154
1291
|
* update them while traversing the sequences.
|
|
1155
1292
|
*/
|
|
1156
1293
|
if (lastSequence.mlen != 0) {
|
|
1157
|
-
repcodes_t reps =
|
|
1294
|
+
repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
|
|
1158
1295
|
ZSTD_memcpy(rep, &reps, sizeof(reps));
|
|
1159
1296
|
} else {
|
|
1160
1297
|
ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
|
|
@@ -1185,7 +1322,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
|
1185
1322
|
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
|
|
1186
1323
|
U32 const llen = opt[storePos].litlen;
|
|
1187
1324
|
U32 const mlen = opt[storePos].mlen;
|
|
1188
|
-
U32 const
|
|
1325
|
+
U32 const offBase = opt[storePos].off;
|
|
1189
1326
|
U32 const advance = llen + mlen;
|
|
1190
1327
|
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
|
|
1191
1328
|
anchor - istart, (unsigned)llen, (unsigned)mlen);
|
|
@@ -1197,8 +1334,8 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
|
1197
1334
|
}
|
|
1198
1335
|
|
|
1199
1336
|
assert(anchor + llen <= iend);
|
|
1200
|
-
ZSTD_updateStats(optStatePtr, llen, anchor,
|
|
1201
|
-
ZSTD_storeSeq(seqStore, llen, anchor, iend,
|
|
1337
|
+
ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
|
|
1338
|
+
ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
|
|
1202
1339
|
anchor += advance;
|
|
1203
1340
|
ip = anchor;
|
|
1204
1341
|
} }
|
|
@@ -1210,43 +1347,35 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
|
1210
1347
|
return (size_t)(iend - anchor);
|
|
1211
1348
|
}
|
|
1212
1349
|
|
|
1350
|
+
static size_t ZSTD_compressBlock_opt0(
|
|
1351
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1352
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
|
1353
|
+
{
|
|
1354
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
static size_t ZSTD_compressBlock_opt2(
|
|
1358
|
+
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1359
|
+
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
|
1360
|
+
{
|
|
1361
|
+
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
|
|
1362
|
+
}
|
|
1213
1363
|
|
|
1214
1364
|
size_t ZSTD_compressBlock_btopt(
|
|
1215
1365
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1216
1366
|
const void* src, size_t srcSize)
|
|
1217
1367
|
{
|
|
1218
1368
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
|
1219
|
-
return
|
|
1369
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
|
1220
1370
|
}
|
|
1221
1371
|
|
|
1222
1372
|
|
|
1223
|
-
/* used in 2-pass strategy */
|
|
1224
|
-
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
|
1225
|
-
{
|
|
1226
|
-
U32 s, sum=0;
|
|
1227
|
-
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
|
1228
|
-
for (s=0; s<lastEltIndex+1; s++) {
|
|
1229
|
-
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
|
1230
|
-
table[s]--;
|
|
1231
|
-
sum += table[s];
|
|
1232
|
-
}
|
|
1233
|
-
return sum;
|
|
1234
|
-
}
|
|
1235
1373
|
|
|
1236
|
-
/* used in 2-pass strategy */
|
|
1237
|
-
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
|
1238
|
-
{
|
|
1239
|
-
if (ZSTD_compressedLiterals(optPtr))
|
|
1240
|
-
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
|
1241
|
-
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
|
1242
|
-
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
|
1243
|
-
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
|
1244
|
-
}
|
|
1245
1374
|
|
|
1246
1375
|
/* ZSTD_initStats_ultra():
|
|
1247
1376
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
|
1248
1377
|
* only works on first block, with no dictionary and no ldm.
|
|
1249
|
-
* this function cannot error,
|
|
1378
|
+
* this function cannot error out, its narrow contract must be respected.
|
|
1250
1379
|
*/
|
|
1251
1380
|
static void
|
|
1252
1381
|
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
@@ -1263,17 +1392,15 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
|
|
1263
1392
|
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
|
1264
1393
|
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
|
1265
1394
|
|
|
1266
|
-
|
|
1395
|
+
ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
|
|
1267
1396
|
|
|
1268
|
-
/* invalidate first scan from history */
|
|
1397
|
+
/* invalidate first scan from history, only keep entropy stats */
|
|
1269
1398
|
ZSTD_resetSeqStore(seqStore);
|
|
1270
1399
|
ms->window.base -= srcSize;
|
|
1271
1400
|
ms->window.dictLimit += (U32)srcSize;
|
|
1272
1401
|
ms->window.lowLimit = ms->window.dictLimit;
|
|
1273
1402
|
ms->nextToUpdate = ms->window.dictLimit;
|
|
1274
1403
|
|
|
1275
|
-
/* re-inforce weight of collected statistics */
|
|
1276
|
-
ZSTD_upscaleStats(&ms->opt);
|
|
1277
1404
|
}
|
|
1278
1405
|
|
|
1279
1406
|
size_t ZSTD_compressBlock_btultra(
|
|
@@ -1281,7 +1408,7 @@ size_t ZSTD_compressBlock_btultra(
|
|
|
1281
1408
|
const void* src, size_t srcSize)
|
|
1282
1409
|
{
|
|
1283
1410
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
|
1284
|
-
return
|
|
1411
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
|
1285
1412
|
}
|
|
1286
1413
|
|
|
1287
1414
|
size_t ZSTD_compressBlock_btultra2(
|
|
@@ -1291,53 +1418,53 @@ size_t ZSTD_compressBlock_btultra2(
|
|
|
1291
1418
|
U32 const curr = (U32)((const BYTE*)src - ms->window.base);
|
|
1292
1419
|
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
|
1293
1420
|
|
|
1294
|
-
/* 2-
|
|
1421
|
+
/* 2-passes strategy:
|
|
1295
1422
|
* this strategy makes a first pass over first block to collect statistics
|
|
1296
|
-
*
|
|
1297
|
-
* After 1st pass, function forgets
|
|
1423
|
+
* in order to seed next round's statistics with it.
|
|
1424
|
+
* After 1st pass, function forgets history, and starts a new block.
|
|
1298
1425
|
* Consequently, this can only work if no data has been previously loaded in tables,
|
|
1299
1426
|
* aka, no dictionary, no prefix, no ldm preprocessing.
|
|
1300
1427
|
* The compression ratio gain is generally small (~0.5% on first block),
|
|
1301
|
-
|
|
1428
|
+
** the cost is 2x cpu time on first block. */
|
|
1302
1429
|
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
|
1303
1430
|
if ( (ms->opt.litLengthSum==0) /* first block */
|
|
1304
1431
|
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
|
1305
1432
|
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
|
1306
|
-
&& (curr == ms->window.dictLimit)
|
|
1307
|
-
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
|
1433
|
+
&& (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
|
1434
|
+
&& (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
|
|
1308
1435
|
) {
|
|
1309
1436
|
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
|
1310
1437
|
}
|
|
1311
1438
|
|
|
1312
|
-
return
|
|
1439
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
|
1313
1440
|
}
|
|
1314
1441
|
|
|
1315
1442
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
|
1316
1443
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1317
1444
|
const void* src, size_t srcSize)
|
|
1318
1445
|
{
|
|
1319
|
-
return
|
|
1446
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
|
1320
1447
|
}
|
|
1321
1448
|
|
|
1322
1449
|
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
|
1323
1450
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1324
1451
|
const void* src, size_t srcSize)
|
|
1325
1452
|
{
|
|
1326
|
-
return
|
|
1453
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
|
1327
1454
|
}
|
|
1328
1455
|
|
|
1329
1456
|
size_t ZSTD_compressBlock_btopt_extDict(
|
|
1330
1457
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1331
1458
|
const void* src, size_t srcSize)
|
|
1332
1459
|
{
|
|
1333
|
-
return
|
|
1460
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
|
1334
1461
|
}
|
|
1335
1462
|
|
|
1336
1463
|
size_t ZSTD_compressBlock_btultra_extDict(
|
|
1337
1464
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
|
1338
1465
|
const void* src, size_t srcSize)
|
|
1339
1466
|
{
|
|
1340
|
-
return
|
|
1467
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
|
1341
1468
|
}
|
|
1342
1469
|
|
|
1343
1470
|
/* note : no btultra2 variant for extDict nor dictMatchState,
|