zstdlib 0.14.0-x86-mingw32 → 0.15.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +5 -0
- data/ext/zstdlib_c/extconf.rb +1 -1
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/bits.h +92 -87
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/bitstream.h +26 -29
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/compiler.h +36 -22
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/cpu.h +1 -1
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/debug.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/error_private.c +1 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/error_private.h +0 -10
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/fse.h +2 -17
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/fse_decompress.c +2 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/huf.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/mem.h +7 -11
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/pool.h +0 -9
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/portability_macros.h +22 -9
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/threading.h +0 -8
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/xxhash.h +93 -19
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_deps.h +12 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_internal.h +1 -69
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_trace.h +5 -12
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/hist.c +10 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/hist.h +7 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress.c +1057 -367
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_internal.h +227 -125
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_literals.c +1 -1
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_sequences.c +7 -7
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_sequences.h +7 -6
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_superblock.c +17 -17
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_cwksp.h +41 -24
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_double_fast.c +58 -50
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_double_fast.h +4 -12
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_fast.c +91 -74
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_fast.h +4 -12
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_lazy.c +64 -64
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_lazy.h +30 -39
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm.c +48 -33
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm.h +6 -14
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_opt.c +55 -51
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_opt.h +8 -16
- data/ext/zstdlib_c/zstd-1.5.7/lib/compress/zstd_preSplit.c +238 -0
- data/ext/zstdlib_c/zstd-1.5.7/lib/compress/zstd_preSplit.h +33 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstdmt_compress.c +134 -93
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstdmt_compress.h +4 -15
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/huf_decompress_amd64.S +10 -3
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress.c +14 -11
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_block.c +6 -12
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_internal.h +5 -5
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zdict.h +15 -8
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zstd.h +241 -132
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zstd_errors.h +1 -8
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzwrite.c +2 -1
- data/lib/2.4/zstdlib_c.so +0 -0
- data/lib/2.5/zstdlib_c.so +0 -0
- data/lib/2.6/zstdlib_c.so +0 -0
- data/lib/2.7/zstdlib_c.so +0 -0
- data/lib/3.0/zstdlib_c.so +0 -0
- data/lib/3.1/zstdlib_c.so +0 -0
- data/lib/3.2/zstdlib_c.so +0 -0
- data/lib/3.3/zstdlib_c.so +0 -0
- data/lib/3.4/zstdlib_c.so +0 -0
- metadata +75 -73
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/allocations.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/debug.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/entropy_common.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/pool.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/threading.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/xxhash.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_common.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/clevels.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/fse_compress.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/huf_compress.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_literals.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_superblock.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm_geartab.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/huf_decompress.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_ddict.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_ddict.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_block.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzclose.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzcompatibility.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzguts.h +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzlib.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzread.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/zstd_zlibwrapper.c +0 -0
- data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/zstd_zlibwrapper.h +5 -5
@@ -11,27 +11,23 @@
|
|
11
11
|
#ifndef ZSTD_DOUBLE_FAST_H
|
12
12
|
#define ZSTD_DOUBLE_FAST_H
|
13
13
|
|
14
|
-
#if defined (__cplusplus)
|
15
|
-
extern "C" {
|
16
|
-
#endif
|
17
|
-
|
18
14
|
#include "../common/mem.h" /* U32 */
|
19
15
|
#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
|
20
16
|
|
21
17
|
#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
|
22
18
|
|
23
|
-
void ZSTD_fillDoubleHashTable(
|
19
|
+
void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
|
24
20
|
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
|
25
21
|
ZSTD_tableFillPurpose_e tfp);
|
26
22
|
|
27
23
|
size_t ZSTD_compressBlock_doubleFast(
|
28
|
-
|
24
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
29
25
|
void const* src, size_t srcSize);
|
30
26
|
size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
31
|
-
|
27
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
32
28
|
void const* src, size_t srcSize);
|
33
29
|
size_t ZSTD_compressBlock_doubleFast_extDict(
|
34
|
-
|
30
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
35
31
|
void const* src, size_t srcSize);
|
36
32
|
|
37
33
|
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
|
@@ -43,8 +39,4 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
|
43
39
|
#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
|
44
40
|
#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
|
45
41
|
|
46
|
-
#if defined (__cplusplus)
|
47
|
-
}
|
48
|
-
#endif
|
49
|
-
|
50
42
|
#endif /* ZSTD_DOUBLE_FAST_H */
|
@@ -13,7 +13,7 @@
|
|
13
13
|
|
14
14
|
static
|
15
15
|
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
16
|
-
void ZSTD_fillHashTableForCDict(
|
16
|
+
void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms,
|
17
17
|
const void* const end,
|
18
18
|
ZSTD_dictTableLoadMethod_e dtlm)
|
19
19
|
{
|
@@ -45,12 +45,12 @@ void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
|
|
45
45
|
size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
|
46
46
|
if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */
|
47
47
|
ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
|
48
|
-
|
48
|
+
} } } }
|
49
49
|
}
|
50
50
|
|
51
51
|
static
|
52
52
|
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
53
|
-
void ZSTD_fillHashTableForCCtx(
|
53
|
+
void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms,
|
54
54
|
const void* const end,
|
55
55
|
ZSTD_dictTableLoadMethod_e dtlm)
|
56
56
|
{
|
@@ -84,7 +84,7 @@ void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
|
|
84
84
|
} } } }
|
85
85
|
}
|
86
86
|
|
87
|
-
void ZSTD_fillHashTable(
|
87
|
+
void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
|
88
88
|
const void* const end,
|
89
89
|
ZSTD_dictTableLoadMethod_e dtlm,
|
90
90
|
ZSTD_tableFillPurpose_e tfp)
|
@@ -97,6 +97,50 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
97
97
|
}
|
98
98
|
|
99
99
|
|
100
|
+
typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit);
|
101
|
+
|
102
|
+
static int
|
103
|
+
ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
|
104
|
+
{
|
105
|
+
/* Array of ~random data, should have low probability of matching data.
|
106
|
+
* Load from here if the index is invalid.
|
107
|
+
* Used to avoid unpredictable branches. */
|
108
|
+
static const BYTE dummy[] = {0x12,0x34,0x56,0x78};
|
109
|
+
|
110
|
+
/* currentIdx >= lowLimit is a (somewhat) unpredictable branch.
|
111
|
+
* However expression below compiles into conditional move.
|
112
|
+
*/
|
113
|
+
const BYTE* mvalAddr = ZSTD_selectAddr(matchIdx, idxLowLimit, matchAddress, dummy);
|
114
|
+
/* Note: this used to be written as : return test1 && test2;
|
115
|
+
* Unfortunately, once inlined, these tests become branches,
|
116
|
+
* in which case it becomes critical that they are executed in the right order (test1 then test2).
|
117
|
+
* So we have to write these tests in a specific manner to ensure their ordering.
|
118
|
+
*/
|
119
|
+
if (MEM_read32(currentPtr) != MEM_read32(mvalAddr)) return 0;
|
120
|
+
/* force ordering of these tests, which matters once the function is inlined, as they become branches */
|
121
|
+
#if defined(__GNUC__)
|
122
|
+
__asm__("");
|
123
|
+
#endif
|
124
|
+
return matchIdx >= idxLowLimit;
|
125
|
+
}
|
126
|
+
|
127
|
+
static int
|
128
|
+
ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
|
129
|
+
{
|
130
|
+
/* using a branch instead of a cmov,
|
131
|
+
* because it's faster in scenarios where matchIdx >= idxLowLimit is generally true,
|
132
|
+
* aka almost all candidates are within range */
|
133
|
+
U32 mval;
|
134
|
+
if (matchIdx >= idxLowLimit) {
|
135
|
+
mval = MEM_read32(matchAddress);
|
136
|
+
} else {
|
137
|
+
mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */
|
138
|
+
}
|
139
|
+
|
140
|
+
return (MEM_read32(currentPtr) == mval);
|
141
|
+
}
|
142
|
+
|
143
|
+
|
100
144
|
/**
|
101
145
|
* If you squint hard enough (and ignore repcodes), the search operation at any
|
102
146
|
* given position is broken into 4 stages:
|
@@ -146,15 +190,14 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
|
146
190
|
FORCE_INLINE_TEMPLATE
|
147
191
|
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
148
192
|
size_t ZSTD_compressBlock_fast_noDict_generic(
|
149
|
-
|
193
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
150
194
|
void const* src, size_t srcSize,
|
151
|
-
U32 const mls,
|
195
|
+
U32 const mls, int useCmov)
|
152
196
|
{
|
153
197
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
154
198
|
U32* const hashTable = ms->hashTable;
|
155
199
|
U32 const hlog = cParams->hashLog;
|
156
|
-
|
157
|
-
size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
|
200
|
+
size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */
|
158
201
|
const BYTE* const base = ms->window.base;
|
159
202
|
const BYTE* const istart = (const BYTE*)src;
|
160
203
|
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
@@ -176,8 +219,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
|
|
176
219
|
|
177
220
|
size_t hash0; /* hash for ip0 */
|
178
221
|
size_t hash1; /* hash for ip1 */
|
179
|
-
U32
|
180
|
-
U32 mval; /* src value at match idx */
|
222
|
+
U32 matchIdx; /* match idx for ip0 */
|
181
223
|
|
182
224
|
U32 offcode;
|
183
225
|
const BYTE* match0;
|
@@ -190,6 +232,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
|
|
190
232
|
size_t step;
|
191
233
|
const BYTE* nextStep;
|
192
234
|
const size_t kStepIncr = (1 << (kSearchStrength - 1));
|
235
|
+
const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch;
|
193
236
|
|
194
237
|
DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
|
195
238
|
ip0 += (ip0 == prefixStart);
|
@@ -218,7 +261,7 @@ _start: /* Requires: ip0 */
|
|
218
261
|
hash0 = ZSTD_hashPtr(ip0, hlog, mls);
|
219
262
|
hash1 = ZSTD_hashPtr(ip1, hlog, mls);
|
220
263
|
|
221
|
-
|
264
|
+
matchIdx = hashTable[hash0];
|
222
265
|
|
223
266
|
do {
|
224
267
|
/* load repcode match for ip[2]*/
|
@@ -238,35 +281,25 @@ _start: /* Requires: ip0 */
|
|
238
281
|
offcode = REPCODE1_TO_OFFBASE;
|
239
282
|
mLength += 4;
|
240
283
|
|
241
|
-
/*
|
242
|
-
* This write is known to be safe because
|
284
|
+
/* Write next hash table entry: it's already calculated.
|
285
|
+
* This write is known to be safe because ip1 is before the
|
243
286
|
* repcode (ip2). */
|
244
287
|
hashTable[hash1] = (U32)(ip1 - base);
|
245
288
|
|
246
289
|
goto _match;
|
247
290
|
}
|
248
291
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
|
254
|
-
}
|
255
|
-
|
256
|
-
/* check match at ip[0] */
|
257
|
-
if (MEM_read32(ip0) == mval) {
|
258
|
-
/* found a match! */
|
259
|
-
|
260
|
-
/* First write next hash table entry; we've already calculated it.
|
261
|
-
* This write is known to be safe because the ip1 == ip0 + 1, so
|
262
|
-
* we know we will resume searching after ip1 */
|
292
|
+
if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
|
293
|
+
/* Write next hash table entry (it's already calculated).
|
294
|
+
* This write is known to be safe because the ip1 == ip0 + 1,
|
295
|
+
* so searching will resume after ip1 */
|
263
296
|
hashTable[hash1] = (U32)(ip1 - base);
|
264
297
|
|
265
298
|
goto _offset;
|
266
299
|
}
|
267
300
|
|
268
301
|
/* lookup ip[1] */
|
269
|
-
|
302
|
+
matchIdx = hashTable[hash1];
|
270
303
|
|
271
304
|
/* hash ip[2] */
|
272
305
|
hash0 = hash1;
|
@@ -281,36 +314,19 @@ _start: /* Requires: ip0 */
|
|
281
314
|
current0 = (U32)(ip0 - base);
|
282
315
|
hashTable[hash0] = current0;
|
283
316
|
|
284
|
-
|
285
|
-
|
286
|
-
mval = MEM_read32(base + idx);
|
287
|
-
} else {
|
288
|
-
mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
|
289
|
-
}
|
290
|
-
|
291
|
-
/* check match at ip[0] */
|
292
|
-
if (MEM_read32(ip0) == mval) {
|
293
|
-
/* found a match! */
|
294
|
-
|
295
|
-
/* first write next hash table entry; we've already calculated it */
|
317
|
+
if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
|
318
|
+
/* Write next hash table entry, since it's already calculated */
|
296
319
|
if (step <= 4) {
|
297
|
-
/*
|
298
|
-
|
299
|
-
|
300
|
-
*
|
301
|
-
* The minimum possible match has length 4, so the earliest ip0
|
302
|
-
* can be after we take this match will be the current ip0 + 4.
|
303
|
-
* ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
|
304
|
-
* write this position.
|
305
|
-
*/
|
320
|
+
/* Avoid writing an index if it's >= position where search will resume.
|
321
|
+
* The minimum possible match has length 4, so search can resume at ip0 + 4.
|
322
|
+
*/
|
306
323
|
hashTable[hash1] = (U32)(ip1 - base);
|
307
324
|
}
|
308
|
-
|
309
325
|
goto _offset;
|
310
326
|
}
|
311
327
|
|
312
328
|
/* lookup ip[1] */
|
313
|
-
|
329
|
+
matchIdx = hashTable[hash1];
|
314
330
|
|
315
331
|
/* hash ip[2] */
|
316
332
|
hash0 = hash1;
|
@@ -332,7 +348,7 @@ _start: /* Requires: ip0 */
|
|
332
348
|
} while (ip3 < ilimit);
|
333
349
|
|
334
350
|
_cleanup:
|
335
|
-
/* Note that there are probably still a couple positions
|
351
|
+
/* Note that there are probably still a couple positions one could search.
|
336
352
|
* However, it seems to be a meaningful performance hit to try to search
|
337
353
|
* them. So let's not. */
|
338
354
|
|
@@ -361,7 +377,7 @@ _cleanup:
|
|
361
377
|
_offset: /* Requires: ip0, idx */
|
362
378
|
|
363
379
|
/* Compute the offset code. */
|
364
|
-
match0 = base +
|
380
|
+
match0 = base + matchIdx;
|
365
381
|
rep_offset2 = rep_offset1;
|
366
382
|
rep_offset1 = (U32)(ip0-match0);
|
367
383
|
offcode = OFFSET_TO_OFFBASE(rep_offset1);
|
@@ -406,12 +422,12 @@ _match: /* Requires: ip0, match0, offcode */
|
|
406
422
|
goto _start;
|
407
423
|
}
|
408
424
|
|
409
|
-
#define ZSTD_GEN_FAST_FN(dictMode,
|
410
|
-
static size_t ZSTD_compressBlock_fast_##dictMode##_##
|
411
|
-
|
425
|
+
#define ZSTD_GEN_FAST_FN(dictMode, mml, cmov) \
|
426
|
+
static size_t ZSTD_compressBlock_fast_##dictMode##_##mml##_##cmov( \
|
427
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
|
412
428
|
void const* src, size_t srcSize) \
|
413
429
|
{ \
|
414
|
-
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize,
|
430
|
+
return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mml, cmov); \
|
415
431
|
}
|
416
432
|
|
417
433
|
ZSTD_GEN_FAST_FN(noDict, 4, 1)
|
@@ -425,13 +441,15 @@ ZSTD_GEN_FAST_FN(noDict, 6, 0)
|
|
425
441
|
ZSTD_GEN_FAST_FN(noDict, 7, 0)
|
426
442
|
|
427
443
|
size_t ZSTD_compressBlock_fast(
|
428
|
-
|
444
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
429
445
|
void const* src, size_t srcSize)
|
430
446
|
{
|
431
|
-
U32 const
|
447
|
+
U32 const mml = ms->cParams.minMatch;
|
448
|
+
/* use cmov when "candidate in range" branch is likely unpredictable */
|
449
|
+
int const useCmov = ms->cParams.windowLog < 19;
|
432
450
|
assert(ms->dictMatchState == NULL);
|
433
|
-
if (
|
434
|
-
switch(
|
451
|
+
if (useCmov) {
|
452
|
+
switch(mml)
|
435
453
|
{
|
436
454
|
default: /* includes case 3 */
|
437
455
|
case 4 :
|
@@ -444,7 +462,8 @@ size_t ZSTD_compressBlock_fast(
|
|
444
462
|
return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
|
445
463
|
}
|
446
464
|
} else {
|
447
|
-
|
465
|
+
/* use a branch instead */
|
466
|
+
switch(mml)
|
448
467
|
{
|
449
468
|
default: /* includes case 3 */
|
450
469
|
case 4 :
|
@@ -456,14 +475,13 @@ size_t ZSTD_compressBlock_fast(
|
|
456
475
|
case 7 :
|
457
476
|
return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
|
458
477
|
}
|
459
|
-
|
460
478
|
}
|
461
479
|
}
|
462
480
|
|
463
481
|
FORCE_INLINE_TEMPLATE
|
464
482
|
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
465
483
|
size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
466
|
-
|
484
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
467
485
|
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
|
468
486
|
{
|
469
487
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -482,7 +500,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
482
500
|
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
483
501
|
U32 offset_1=rep[0], offset_2=rep[1];
|
484
502
|
|
485
|
-
const
|
503
|
+
const ZSTD_MatchState_t* const dms = ms->dictMatchState;
|
486
504
|
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
|
487
505
|
const U32* const dictHashTable = dms->hashTable;
|
488
506
|
const U32 dictStartIndex = dms->window.dictLimit;
|
@@ -546,8 +564,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
546
564
|
size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
|
547
565
|
hashTable[hash0] = curr; /* update hash table */
|
548
566
|
|
549
|
-
if (((
|
550
|
-
3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
|
567
|
+
if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
|
551
568
|
&& (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
|
552
569
|
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
553
570
|
mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
|
@@ -580,8 +597,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
580
597
|
}
|
581
598
|
}
|
582
599
|
|
583
|
-
if (
|
584
|
-
/* found a regular match */
|
600
|
+
if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) {
|
601
|
+
/* found a regular match of size >= 4 */
|
585
602
|
U32 const offset = (U32) (ip0 - match);
|
586
603
|
mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
|
587
604
|
while (((ip0 > anchor) & (match > prefixStart))
|
@@ -631,7 +648,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
|
631
648
|
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
|
632
649
|
dictBase - dictIndexDelta + repIndex2 :
|
633
650
|
base + repIndex2;
|
634
|
-
if ( ((
|
651
|
+
if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
|
635
652
|
&& (MEM_read32(repMatch2) == MEM_read32(ip0))) {
|
636
653
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
637
654
|
size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
@@ -667,7 +684,7 @@ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
|
|
667
684
|
ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
|
668
685
|
|
669
686
|
size_t ZSTD_compressBlock_fast_dictMatchState(
|
670
|
-
|
687
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
671
688
|
void const* src, size_t srcSize)
|
672
689
|
{
|
673
690
|
U32 const mls = ms->cParams.minMatch;
|
@@ -690,7 +707,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
|
690
707
|
static
|
691
708
|
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
692
709
|
size_t ZSTD_compressBlock_fast_extDict_generic(
|
693
|
-
|
710
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
694
711
|
void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
|
695
712
|
{
|
696
713
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
@@ -925,7 +942,7 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */
|
|
925
942
|
while (ip0 <= ilimit) {
|
926
943
|
U32 const repIndex2 = (U32)(ip0-base) - offset_2;
|
927
944
|
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
928
|
-
if ( (((
|
945
|
+
if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0))
|
929
946
|
&& (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
|
930
947
|
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
931
948
|
size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
@@ -948,7 +965,7 @@ ZSTD_GEN_FAST_FN(extDict, 6, 0)
|
|
948
965
|
ZSTD_GEN_FAST_FN(extDict, 7, 0)
|
949
966
|
|
950
967
|
size_t ZSTD_compressBlock_fast_extDict(
|
951
|
-
|
968
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
952
969
|
void const* src, size_t srcSize)
|
953
970
|
{
|
954
971
|
U32 const mls = ms->cParams.minMatch;
|
@@ -11,28 +11,20 @@
|
|
11
11
|
#ifndef ZSTD_FAST_H
|
12
12
|
#define ZSTD_FAST_H
|
13
13
|
|
14
|
-
#if defined (__cplusplus)
|
15
|
-
extern "C" {
|
16
|
-
#endif
|
17
|
-
|
18
14
|
#include "../common/mem.h" /* U32 */
|
19
15
|
#include "zstd_compress_internal.h"
|
20
16
|
|
21
|
-
void ZSTD_fillHashTable(
|
17
|
+
void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
|
22
18
|
void const* end, ZSTD_dictTableLoadMethod_e dtlm,
|
23
19
|
ZSTD_tableFillPurpose_e tfp);
|
24
20
|
size_t ZSTD_compressBlock_fast(
|
25
|
-
|
21
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
26
22
|
void const* src, size_t srcSize);
|
27
23
|
size_t ZSTD_compressBlock_fast_dictMatchState(
|
28
|
-
|
24
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
29
25
|
void const* src, size_t srcSize);
|
30
26
|
size_t ZSTD_compressBlock_fast_extDict(
|
31
|
-
|
27
|
+
ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
32
28
|
void const* src, size_t srcSize);
|
33
29
|
|
34
|
-
#if defined (__cplusplus)
|
35
|
-
}
|
36
|
-
#endif
|
37
|
-
|
38
30
|
#endif /* ZSTD_FAST_H */
|