zstdlib 0.14.0-x86-mingw32 → 0.15.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +5 -0
  3. data/ext/zstdlib_c/extconf.rb +1 -1
  4. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/bits.h +92 -87
  5. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/bitstream.h +26 -29
  6. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/compiler.h +36 -22
  7. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/cpu.h +1 -1
  8. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/debug.h +0 -9
  9. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/error_private.c +1 -0
  10. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/error_private.h +0 -10
  11. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/fse.h +2 -17
  12. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/fse_decompress.c +2 -0
  13. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/huf.h +0 -9
  14. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/mem.h +7 -11
  15. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/pool.h +0 -9
  16. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/portability_macros.h +22 -9
  17. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/threading.h +0 -8
  18. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/xxhash.h +93 -19
  19. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_deps.h +12 -0
  20. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_internal.h +1 -69
  21. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_trace.h +5 -12
  22. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/hist.c +10 -0
  23. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/hist.h +7 -0
  24. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress.c +1057 -367
  25. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_internal.h +227 -125
  26. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_literals.c +1 -1
  27. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_sequences.c +7 -7
  28. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_sequences.h +7 -6
  29. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_superblock.c +17 -17
  30. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_cwksp.h +41 -24
  31. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_double_fast.c +58 -50
  32. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_double_fast.h +4 -12
  33. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_fast.c +91 -74
  34. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_fast.h +4 -12
  35. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_lazy.c +64 -64
  36. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_lazy.h +30 -39
  37. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm.c +48 -33
  38. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm.h +6 -14
  39. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_opt.c +55 -51
  40. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_opt.h +8 -16
  41. data/ext/zstdlib_c/zstd-1.5.7/lib/compress/zstd_preSplit.c +238 -0
  42. data/ext/zstdlib_c/zstd-1.5.7/lib/compress/zstd_preSplit.h +33 -0
  43. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstdmt_compress.c +134 -93
  44. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstdmt_compress.h +4 -15
  45. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/huf_decompress_amd64.S +10 -3
  46. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress.c +14 -11
  47. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_block.c +6 -12
  48. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_internal.h +5 -5
  49. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zdict.h +15 -8
  50. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zstd.h +241 -132
  51. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/zstd_errors.h +1 -8
  52. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzwrite.c +2 -1
  53. data/lib/2.4/zstdlib_c.so +0 -0
  54. data/lib/2.5/zstdlib_c.so +0 -0
  55. data/lib/2.6/zstdlib_c.so +0 -0
  56. data/lib/2.7/zstdlib_c.so +0 -0
  57. data/lib/3.0/zstdlib_c.so +0 -0
  58. data/lib/3.1/zstdlib_c.so +0 -0
  59. data/lib/3.2/zstdlib_c.so +0 -0
  60. data/lib/3.3/zstdlib_c.so +0 -0
  61. data/lib/3.4/zstdlib_c.so +0 -0
  62. metadata +75 -73
  63. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/allocations.h +0 -0
  64. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/debug.c +0 -0
  65. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/entropy_common.c +0 -0
  66. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/pool.c +0 -0
  67. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/threading.c +0 -0
  68. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/xxhash.c +0 -0
  69. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/common/zstd_common.c +0 -0
  70. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/clevels.h +0 -0
  71. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/fse_compress.c +0 -0
  72. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/huf_compress.c +0 -0
  73. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_literals.h +0 -0
  74. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_compress_superblock.h +0 -0
  75. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/compress/zstd_ldm_geartab.h +0 -0
  76. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/huf_decompress.c +0 -0
  77. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_ddict.c +0 -0
  78. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_ddict.h +0 -0
  79. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/lib/decompress/zstd_decompress_block.h +0 -0
  80. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzclose.c +0 -0
  81. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzcompatibility.h +0 -0
  82. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzguts.h +0 -0
  83. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzlib.c +0 -0
  84. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/gzread.c +0 -0
  85. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/zstd_zlibwrapper.c +0 -0
  86. data/ext/zstdlib_c/{zstd-1.5.6 → zstd-1.5.7}/zlibWrapper/zstd_zlibwrapper.h +5 -5
@@ -11,27 +11,23 @@
11
11
  #ifndef ZSTD_DOUBLE_FAST_H
12
12
  #define ZSTD_DOUBLE_FAST_H
13
13
 
14
- #if defined (__cplusplus)
15
- extern "C" {
16
- #endif
17
-
18
14
  #include "../common/mem.h" /* U32 */
19
15
  #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
16
 
21
17
  #ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
22
18
 
23
- void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
19
+ void ZSTD_fillDoubleHashTable(ZSTD_MatchState_t* ms,
24
20
  void const* end, ZSTD_dictTableLoadMethod_e dtlm,
25
21
  ZSTD_tableFillPurpose_e tfp);
26
22
 
27
23
  size_t ZSTD_compressBlock_doubleFast(
28
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
24
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
25
  void const* src, size_t srcSize);
30
26
  size_t ZSTD_compressBlock_doubleFast_dictMatchState(
31
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
27
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
32
28
  void const* src, size_t srcSize);
33
29
  size_t ZSTD_compressBlock_doubleFast_extDict(
34
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
30
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
35
31
  void const* src, size_t srcSize);
36
32
 
37
33
  #define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
@@ -43,8 +39,4 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
43
39
  #define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
44
40
  #endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
45
41
 
46
- #if defined (__cplusplus)
47
- }
48
- #endif
49
-
50
42
  #endif /* ZSTD_DOUBLE_FAST_H */
@@ -13,7 +13,7 @@
13
13
 
14
14
  static
15
15
  ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
16
- void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
16
+ void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms,
17
17
  const void* const end,
18
18
  ZSTD_dictTableLoadMethod_e dtlm)
19
19
  {
@@ -45,12 +45,12 @@ void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
45
45
  size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
46
46
  if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */
47
47
  ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
48
- } } } }
48
+ } } } }
49
49
  }
50
50
 
51
51
  static
52
52
  ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
53
- void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
53
+ void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms,
54
54
  const void* const end,
55
55
  ZSTD_dictTableLoadMethod_e dtlm)
56
56
  {
@@ -84,7 +84,7 @@ void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
84
84
  } } } }
85
85
  }
86
86
 
87
- void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
87
+ void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
88
88
  const void* const end,
89
89
  ZSTD_dictTableLoadMethod_e dtlm,
90
90
  ZSTD_tableFillPurpose_e tfp)
@@ -97,6 +97,50 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
97
97
  }
98
98
 
99
99
 
100
+ typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit);
101
+
102
+ static int
103
+ ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
104
+ {
105
+ /* Array of ~random data, should have low probability of matching data.
106
+ * Load from here if the index is invalid.
107
+ * Used to avoid unpredictable branches. */
108
+ static const BYTE dummy[] = {0x12,0x34,0x56,0x78};
109
+
110
+ /* currentIdx >= lowLimit is a (somewhat) unpredictable branch.
111
+ * However expression below compiles into conditional move.
112
+ */
113
+ const BYTE* mvalAddr = ZSTD_selectAddr(matchIdx, idxLowLimit, matchAddress, dummy);
114
+ /* Note: this used to be written as : return test1 && test2;
115
+ * Unfortunately, once inlined, these tests become branches,
116
+ * in which case it becomes critical that they are executed in the right order (test1 then test2).
117
+ * So we have to write these tests in a specific manner to ensure their ordering.
118
+ */
119
+ if (MEM_read32(currentPtr) != MEM_read32(mvalAddr)) return 0;
120
+ /* force ordering of these tests, which matters once the function is inlined, as they become branches */
121
+ #if defined(__GNUC__)
122
+ __asm__("");
123
+ #endif
124
+ return matchIdx >= idxLowLimit;
125
+ }
126
+
127
+ static int
128
+ ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
129
+ {
130
+ /* using a branch instead of a cmov,
131
+ * because it's faster in scenarios where matchIdx >= idxLowLimit is generally true,
132
+ * aka almost all candidates are within range */
133
+ U32 mval;
134
+ if (matchIdx >= idxLowLimit) {
135
+ mval = MEM_read32(matchAddress);
136
+ } else {
137
+ mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */
138
+ }
139
+
140
+ return (MEM_read32(currentPtr) == mval);
141
+ }
142
+
143
+
100
144
  /**
101
145
  * If you squint hard enough (and ignore repcodes), the search operation at any
102
146
  * given position is broken into 4 stages:
@@ -146,15 +190,14 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
146
190
  FORCE_INLINE_TEMPLATE
147
191
  ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
148
192
  size_t ZSTD_compressBlock_fast_noDict_generic(
149
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
193
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
150
194
  void const* src, size_t srcSize,
151
- U32 const mls, U32 const hasStep)
195
+ U32 const mls, int useCmov)
152
196
  {
153
197
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
154
198
  U32* const hashTable = ms->hashTable;
155
199
  U32 const hlog = cParams->hashLog;
156
- /* support stepSize of 0 */
157
- size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
200
+ size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */
158
201
  const BYTE* const base = ms->window.base;
159
202
  const BYTE* const istart = (const BYTE*)src;
160
203
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
@@ -176,8 +219,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
176
219
 
177
220
  size_t hash0; /* hash for ip0 */
178
221
  size_t hash1; /* hash for ip1 */
179
- U32 idx; /* match idx for ip0 */
180
- U32 mval; /* src value at match idx */
222
+ U32 matchIdx; /* match idx for ip0 */
181
223
 
182
224
  U32 offcode;
183
225
  const BYTE* match0;
@@ -190,6 +232,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
190
232
  size_t step;
191
233
  const BYTE* nextStep;
192
234
  const size_t kStepIncr = (1 << (kSearchStrength - 1));
235
+ const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch;
193
236
 
194
237
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
195
238
  ip0 += (ip0 == prefixStart);
@@ -218,7 +261,7 @@ _start: /* Requires: ip0 */
218
261
  hash0 = ZSTD_hashPtr(ip0, hlog, mls);
219
262
  hash1 = ZSTD_hashPtr(ip1, hlog, mls);
220
263
 
221
- idx = hashTable[hash0];
264
+ matchIdx = hashTable[hash0];
222
265
 
223
266
  do {
224
267
  /* load repcode match for ip[2]*/
@@ -238,35 +281,25 @@ _start: /* Requires: ip0 */
238
281
  offcode = REPCODE1_TO_OFFBASE;
239
282
  mLength += 4;
240
283
 
241
- /* First write next hash table entry; we've already calculated it.
242
- * This write is known to be safe because the ip1 is before the
284
+ /* Write next hash table entry: it's already calculated.
285
+ * This write is known to be safe because ip1 is before the
243
286
  * repcode (ip2). */
244
287
  hashTable[hash1] = (U32)(ip1 - base);
245
288
 
246
289
  goto _match;
247
290
  }
248
291
 
249
- /* load match for ip[0] */
250
- if (idx >= prefixStartIndex) {
251
- mval = MEM_read32(base + idx);
252
- } else {
253
- mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
254
- }
255
-
256
- /* check match at ip[0] */
257
- if (MEM_read32(ip0) == mval) {
258
- /* found a match! */
259
-
260
- /* First write next hash table entry; we've already calculated it.
261
- * This write is known to be safe because the ip1 == ip0 + 1, so
262
- * we know we will resume searching after ip1 */
292
+ if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
293
+ /* Write next hash table entry (it's already calculated).
294
+ * This write is known to be safe because the ip1 == ip0 + 1,
295
+ * so searching will resume after ip1 */
263
296
  hashTable[hash1] = (U32)(ip1 - base);
264
297
 
265
298
  goto _offset;
266
299
  }
267
300
 
268
301
  /* lookup ip[1] */
269
- idx = hashTable[hash1];
302
+ matchIdx = hashTable[hash1];
270
303
 
271
304
  /* hash ip[2] */
272
305
  hash0 = hash1;
@@ -281,36 +314,19 @@ _start: /* Requires: ip0 */
281
314
  current0 = (U32)(ip0 - base);
282
315
  hashTable[hash0] = current0;
283
316
 
284
- /* load match for ip[0] */
285
- if (idx >= prefixStartIndex) {
286
- mval = MEM_read32(base + idx);
287
- } else {
288
- mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
289
- }
290
-
291
- /* check match at ip[0] */
292
- if (MEM_read32(ip0) == mval) {
293
- /* found a match! */
294
-
295
- /* first write next hash table entry; we've already calculated it */
317
+ if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
318
+ /* Write next hash table entry, since it's already calculated */
296
319
  if (step <= 4) {
297
- /* We need to avoid writing an index into the hash table >= the
298
- * position at which we will pick up our searching after we've
299
- * taken this match.
300
- *
301
- * The minimum possible match has length 4, so the earliest ip0
302
- * can be after we take this match will be the current ip0 + 4.
303
- * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely
304
- * write this position.
305
- */
320
+ /* Avoid writing an index if it's >= position where search will resume.
321
+ * The minimum possible match has length 4, so search can resume at ip0 + 4.
322
+ */
306
323
  hashTable[hash1] = (U32)(ip1 - base);
307
324
  }
308
-
309
325
  goto _offset;
310
326
  }
311
327
 
312
328
  /* lookup ip[1] */
313
- idx = hashTable[hash1];
329
+ matchIdx = hashTable[hash1];
314
330
 
315
331
  /* hash ip[2] */
316
332
  hash0 = hash1;
@@ -332,7 +348,7 @@ _start: /* Requires: ip0 */
332
348
  } while (ip3 < ilimit);
333
349
 
334
350
  _cleanup:
335
- /* Note that there are probably still a couple positions we could search.
351
+ /* Note that there are probably still a couple positions one could search.
336
352
  * However, it seems to be a meaningful performance hit to try to search
337
353
  * them. So let's not. */
338
354
 
@@ -361,7 +377,7 @@ _cleanup:
361
377
  _offset: /* Requires: ip0, idx */
362
378
 
363
379
  /* Compute the offset code. */
364
- match0 = base + idx;
380
+ match0 = base + matchIdx;
365
381
  rep_offset2 = rep_offset1;
366
382
  rep_offset1 = (U32)(ip0-match0);
367
383
  offcode = OFFSET_TO_OFFBASE(rep_offset1);
@@ -406,12 +422,12 @@ _match: /* Requires: ip0, match0, offcode */
406
422
  goto _start;
407
423
  }
408
424
 
409
- #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
410
- static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
411
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
425
+ #define ZSTD_GEN_FAST_FN(dictMode, mml, cmov) \
426
+ static size_t ZSTD_compressBlock_fast_##dictMode##_##mml##_##cmov( \
427
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
412
428
  void const* src, size_t srcSize) \
413
429
  { \
414
- return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
430
+ return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mml, cmov); \
415
431
  }
416
432
 
417
433
  ZSTD_GEN_FAST_FN(noDict, 4, 1)
@@ -425,13 +441,15 @@ ZSTD_GEN_FAST_FN(noDict, 6, 0)
425
441
  ZSTD_GEN_FAST_FN(noDict, 7, 0)
426
442
 
427
443
  size_t ZSTD_compressBlock_fast(
428
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
444
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
429
445
  void const* src, size_t srcSize)
430
446
  {
431
- U32 const mls = ms->cParams.minMatch;
447
+ U32 const mml = ms->cParams.minMatch;
448
+ /* use cmov when "candidate in range" branch is likely unpredictable */
449
+ int const useCmov = ms->cParams.windowLog < 19;
432
450
  assert(ms->dictMatchState == NULL);
433
- if (ms->cParams.targetLength > 1) {
434
- switch(mls)
451
+ if (useCmov) {
452
+ switch(mml)
435
453
  {
436
454
  default: /* includes case 3 */
437
455
  case 4 :
@@ -444,7 +462,8 @@ size_t ZSTD_compressBlock_fast(
444
462
  return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
445
463
  }
446
464
  } else {
447
- switch(mls)
465
+ /* use a branch instead */
466
+ switch(mml)
448
467
  {
449
468
  default: /* includes case 3 */
450
469
  case 4 :
@@ -456,14 +475,13 @@ size_t ZSTD_compressBlock_fast(
456
475
  case 7 :
457
476
  return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
458
477
  }
459
-
460
478
  }
461
479
  }
462
480
 
463
481
  FORCE_INLINE_TEMPLATE
464
482
  ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
465
483
  size_t ZSTD_compressBlock_fast_dictMatchState_generic(
466
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
484
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
467
485
  void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
468
486
  {
469
487
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -482,7 +500,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
482
500
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
483
501
  U32 offset_1=rep[0], offset_2=rep[1];
484
502
 
485
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
503
+ const ZSTD_MatchState_t* const dms = ms->dictMatchState;
486
504
  const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
487
505
  const U32* const dictHashTable = dms->hashTable;
488
506
  const U32 dictStartIndex = dms->window.dictLimit;
@@ -546,8 +564,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
546
564
  size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
547
565
  hashTable[hash0] = curr; /* update hash table */
548
566
 
549
- if (((U32) ((prefixStartIndex - 1) - repIndex) >=
550
- 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
567
+ if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
551
568
  && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
552
569
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
553
570
  mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
@@ -580,8 +597,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
580
597
  }
581
598
  }
582
599
 
583
- if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) {
584
- /* found a regular match */
600
+ if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) {
601
+ /* found a regular match of size >= 4 */
585
602
  U32 const offset = (U32) (ip0 - match);
586
603
  mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
587
604
  while (((ip0 > anchor) & (match > prefixStart))
@@ -631,7 +648,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
631
648
  const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
632
649
  dictBase - dictIndexDelta + repIndex2 :
633
650
  base + repIndex2;
634
- if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
651
+ if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
635
652
  && (MEM_read32(repMatch2) == MEM_read32(ip0))) {
636
653
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
637
654
  size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
@@ -667,7 +684,7 @@ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
667
684
  ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
668
685
 
669
686
  size_t ZSTD_compressBlock_fast_dictMatchState(
670
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
687
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
671
688
  void const* src, size_t srcSize)
672
689
  {
673
690
  U32 const mls = ms->cParams.minMatch;
@@ -690,7 +707,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
690
707
  static
691
708
  ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
692
709
  size_t ZSTD_compressBlock_fast_extDict_generic(
693
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
710
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
694
711
  void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
695
712
  {
696
713
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -925,7 +942,7 @@ _match: /* Requires: ip0, match0, offcode, matchEnd */
925
942
  while (ip0 <= ilimit) {
926
943
  U32 const repIndex2 = (U32)(ip0-base) - offset_2;
927
944
  const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
928
- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */
945
+ if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0))
929
946
  && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
930
947
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
931
948
  size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
@@ -948,7 +965,7 @@ ZSTD_GEN_FAST_FN(extDict, 6, 0)
948
965
  ZSTD_GEN_FAST_FN(extDict, 7, 0)
949
966
 
950
967
  size_t ZSTD_compressBlock_fast_extDict(
951
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
968
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
952
969
  void const* src, size_t srcSize)
953
970
  {
954
971
  U32 const mls = ms->cParams.minMatch;
@@ -11,28 +11,20 @@
11
11
  #ifndef ZSTD_FAST_H
12
12
  #define ZSTD_FAST_H
13
13
 
14
- #if defined (__cplusplus)
15
- extern "C" {
16
- #endif
17
-
18
14
  #include "../common/mem.h" /* U32 */
19
15
  #include "zstd_compress_internal.h"
20
16
 
21
- void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
17
+ void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
22
18
  void const* end, ZSTD_dictTableLoadMethod_e dtlm,
23
19
  ZSTD_tableFillPurpose_e tfp);
24
20
  size_t ZSTD_compressBlock_fast(
25
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
21
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26
22
  void const* src, size_t srcSize);
27
23
  size_t ZSTD_compressBlock_fast_dictMatchState(
28
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
24
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
25
  void const* src, size_t srcSize);
30
26
  size_t ZSTD_compressBlock_fast_extDict(
31
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
27
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
32
28
  void const* src, size_t srcSize);
33
29
 
34
- #if defined (__cplusplus)
35
- }
36
- #endif
37
-
38
30
  #endif /* ZSTD_FAST_H */