multi_compress 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -3
  3. data/GET_STARTED.md +3 -3
  4. data/README.md +75 -66
  5. data/THIRD_PARTY_NOTICES.md +24 -0
  6. data/ext/multi_compress/brotli_dec_static_init.c +3 -0
  7. data/ext/multi_compress/brotli_enc_static_init.c +3 -0
  8. data/ext/multi_compress/extconf.rb +22 -1
  9. data/ext/multi_compress/multi_compress.c +152 -72
  10. data/ext/multi_compress/vendor/.vendored +2 -2
  11. data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
  13. data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
  14. data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
  15. data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
  16. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
  17. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
  18. data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
  20. data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
  21. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
  22. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
  23. data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
  24. data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
  25. data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
  26. data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
  27. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
  28. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
  29. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
  30. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
  31. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
  32. data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
  33. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
  34. data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
  35. data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
  36. data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
  37. data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
  38. data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
  40. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
  41. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
  42. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
  43. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
  44. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
  45. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
  46. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
  47. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
  48. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
  49. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
  50. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
  51. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
  52. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
  53. data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
  54. data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
  55. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
  56. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
  57. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
  58. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
  59. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
  60. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
  61. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
  62. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
  63. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
  65. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
  66. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
  67. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
  68. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
  69. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
  70. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
  71. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
  72. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
  73. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
  74. data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
  76. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
  77. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
  79. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
  83. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
  84. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
  85. data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
  87. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
  88. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
  89. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
  90. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
  91. data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
  92. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
  93. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
  94. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
  95. data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
  96. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
  97. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
  98. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
  99. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
  100. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
  101. data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
  102. data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
  103. data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
  104. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
  105. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
  106. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
  107. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
  108. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
  109. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
  110. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
  111. data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
  112. data/ext/multi_compress/vendor/zstd/COPYING +339 -0
  113. data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
  114. data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
  115. data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
  116. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  117. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
  118. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
  119. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
  120. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
  121. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
  122. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
  123. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
  124. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
  125. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
  126. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
  127. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
  128. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
  129. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
  130. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
  131. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
  132. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
  133. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
  134. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
  135. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
  136. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
  137. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
  138. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
  139. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
  140. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
  141. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
  142. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
  143. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
  144. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
  145. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
  146. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
  147. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
  148. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
  149. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
  170. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
  171. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
  178. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
  179. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  182. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
  183. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
  187. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
  188. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
  189. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
  190. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
  191. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
  192. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
  193. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
  194. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
  204. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
  205. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
  206. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
  207. data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
  208. data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
  209. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
  210. data/lib/multi_compress/version.rb +1 -1
  211. metadata +29 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -10,14 +10,23 @@
10
10
 
11
11
  #include "zstd_compress_internal.h"
12
12
  #include "zstd_lazy.h"
13
+ #include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
14
+
15
+ #if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
16
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
17
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
18
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
19
+
20
+ #define kLazySkippingStep 8
13
21
 
14
22
 
15
23
  /*-*************************************
16
24
  * Binary Tree search
17
25
  ***************************************/
18
26
 
19
- static void
20
- ZSTD_updateDUBT(ZSTD_matchState_t* ms,
27
+ static
28
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
29
+ void ZSTD_updateDUBT(ZSTD_MatchState_t* ms,
21
30
  const BYTE* ip, const BYTE* iend,
22
31
  U32 mls)
23
32
  {
@@ -60,8 +69,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
60
69
  * sort one already inserted but unsorted position
61
70
  * assumption : curr >= btlow == (curr - btmask)
62
71
  * doesn't fail */
63
- static void
64
- ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
72
+ static
73
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
74
+ void ZSTD_insertDUBT1(const ZSTD_MatchState_t* ms,
65
75
  U32 curr, const BYTE* inputEnd,
66
76
  U32 nbCompares, U32 btLow,
67
77
  const ZSTD_dictMode_e dictMode)
@@ -149,9 +159,10 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
149
159
  }
150
160
 
151
161
 
152
- static size_t
153
- ZSTD_DUBT_findBetterDictMatch (
154
- const ZSTD_matchState_t* ms,
162
+ static
163
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
164
+ size_t ZSTD_DUBT_findBetterDictMatch (
165
+ const ZSTD_MatchState_t* ms,
155
166
  const BYTE* const ip, const BYTE* const iend,
156
167
  size_t* offsetPtr,
157
168
  size_t bestLength,
@@ -159,7 +170,7 @@ ZSTD_DUBT_findBetterDictMatch (
159
170
  U32 const mls,
160
171
  const ZSTD_dictMode_e dictMode)
161
172
  {
162
- const ZSTD_matchState_t * const dms = ms->dictMatchState;
173
+ const ZSTD_MatchState_t * const dms = ms->dictMatchState;
163
174
  const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
164
175
  const U32 * const dictHashTable = dms->hashTable;
165
176
  U32 const hashLog = dmsCParams->hashLog;
@@ -197,8 +208,8 @@ ZSTD_DUBT_findBetterDictMatch (
197
208
  U32 matchIndex = dictMatchIndex + dictIndexDelta;
198
209
  if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
199
210
  DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
200
- curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex);
201
- bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
211
+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex);
212
+ bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
202
213
  }
203
214
  if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
204
215
  break; /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -218,7 +229,7 @@ ZSTD_DUBT_findBetterDictMatch (
218
229
  }
219
230
 
220
231
  if (bestLength >= MINMATCH) {
221
- U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
232
+ U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex;
222
233
  DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
223
234
  curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
224
235
  }
@@ -227,10 +238,11 @@ ZSTD_DUBT_findBetterDictMatch (
227
238
  }
228
239
 
229
240
 
230
- static size_t
231
- ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
241
+ static
242
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
243
+ size_t ZSTD_DUBT_findBestMatch(ZSTD_MatchState_t* ms,
232
244
  const BYTE* const ip, const BYTE* const iend,
233
- size_t* offsetPtr,
245
+ size_t* offBasePtr,
234
246
  U32 const mls,
235
247
  const ZSTD_dictMode_e dictMode)
236
248
  {
@@ -327,8 +339,8 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
327
339
  if (matchLength > bestLength) {
328
340
  if (matchLength > matchEndIdx - matchIndex)
329
341
  matchEndIdx = matchIndex + (U32)matchLength;
330
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331
- bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
342
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) )
343
+ bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex);
332
344
  if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
333
345
  if (dictMode == ZSTD_dictMatchState) {
334
346
  nbCompares = 0; /* in addition to avoiding checking any
@@ -361,16 +373,16 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
361
373
  if (dictMode == ZSTD_dictMatchState && nbCompares) {
362
374
  bestLength = ZSTD_DUBT_findBetterDictMatch(
363
375
  ms, ip, iend,
364
- offsetPtr, bestLength, nbCompares,
376
+ offBasePtr, bestLength, nbCompares,
365
377
  mls, dictMode);
366
378
  }
367
379
 
368
380
  assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
369
381
  ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
370
382
  if (bestLength >= MINMATCH) {
371
- U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
383
+ U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex;
372
384
  DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
373
- curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
385
+ curr, (U32)bestLength, (U32)*offBasePtr, mIndex);
374
386
  }
375
387
  return bestLength;
376
388
  }
@@ -378,24 +390,25 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
378
390
 
379
391
 
380
392
  /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
381
- FORCE_INLINE_TEMPLATE size_t
382
- ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
393
+ FORCE_INLINE_TEMPLATE
394
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
395
+ size_t ZSTD_BtFindBestMatch( ZSTD_MatchState_t* ms,
383
396
  const BYTE* const ip, const BYTE* const iLimit,
384
- size_t* offsetPtr,
397
+ size_t* offBasePtr,
385
398
  const U32 mls /* template */,
386
399
  const ZSTD_dictMode_e dictMode)
387
400
  {
388
401
  DEBUGLOG(7, "ZSTD_BtFindBestMatch");
389
402
  if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
390
403
  ZSTD_updateDUBT(ms, ip, iLimit, mls);
391
- return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
404
+ return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode);
392
405
  }
393
406
 
394
407
  /***********************************
395
408
  * Dedicated dict search
396
409
  ***********************************/
397
410
 
398
- void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
411
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_MatchState_t* ms, const BYTE* const ip)
399
412
  {
400
413
  const BYTE* const base = ms->window.base;
401
414
  U32 const target = (U32)(ip - base);
@@ -514,7 +527,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B
514
527
  */
515
528
  FORCE_INLINE_TEMPLATE
516
529
  size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
517
- const ZSTD_matchState_t* const dms,
530
+ const ZSTD_MatchState_t* const dms,
518
531
  const BYTE* const ip, const BYTE* const iLimit,
519
532
  const BYTE* const prefixStart, const U32 curr,
520
533
  const U32 dictLimit, const size_t ddsIdx) {
@@ -561,7 +574,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
561
574
  /* save best solution */
562
575
  if (currentMl > ml) {
563
576
  ml = currentMl;
564
- *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
577
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
565
578
  if (ip+currentMl == iLimit) {
566
579
  /* best possible, avoids read overflow on next attempt */
567
580
  return ml;
@@ -598,7 +611,7 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
598
611
  /* save best solution */
599
612
  if (currentMl > ml) {
600
613
  ml = currentMl;
601
- *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
614
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta));
602
615
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
603
616
  }
604
617
  }
@@ -614,10 +627,12 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
614
627
 
615
628
  /* Update chains up to ip (excluded)
616
629
  Assumption : always within prefix (i.e. not within extDict) */
617
- FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
618
- ZSTD_matchState_t* ms,
630
+ FORCE_INLINE_TEMPLATE
631
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
632
+ U32 ZSTD_insertAndFindFirstIndex_internal(
633
+ ZSTD_MatchState_t* ms,
619
634
  const ZSTD_compressionParameters* const cParams,
620
- const BYTE* ip, U32 const mls)
635
+ const BYTE* ip, U32 const mls, U32 const lazySkipping)
621
636
  {
622
637
  U32* const hashTable = ms->hashTable;
623
638
  const U32 hashLog = cParams->hashLog;
@@ -632,21 +647,25 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
632
647
  NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
633
648
  hashTable[h] = idx;
634
649
  idx++;
650
+ /* Stop inserting every position when in the lazy skipping mode. */
651
+ if (lazySkipping)
652
+ break;
635
653
  }
636
654
 
637
655
  ms->nextToUpdate = target;
638
656
  return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
639
657
  }
640
658
 
641
- U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
659
+ U32 ZSTD_insertAndFindFirstIndex(ZSTD_MatchState_t* ms, const BYTE* ip) {
642
660
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
643
- return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
661
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0);
644
662
  }
645
663
 
646
664
  /* inlining is important to hardwire a hot branch (template emulation) */
647
665
  FORCE_INLINE_TEMPLATE
666
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
648
667
  size_t ZSTD_HcFindBestMatch(
649
- ZSTD_matchState_t* ms,
668
+ ZSTD_MatchState_t* ms,
650
669
  const BYTE* const ip, const BYTE* const iLimit,
651
670
  size_t* offsetPtr,
652
671
  const U32 mls, const ZSTD_dictMode_e dictMode)
@@ -670,7 +689,7 @@ size_t ZSTD_HcFindBestMatch(
670
689
  U32 nbAttempts = 1U << cParams->searchLog;
671
690
  size_t ml=4-1;
672
691
 
673
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
692
+ const ZSTD_MatchState_t* const dms = ms->dictMatchState;
674
693
  const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
675
694
  ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
676
695
  const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
@@ -684,14 +703,15 @@ size_t ZSTD_HcFindBestMatch(
684
703
  }
685
704
 
686
705
  /* HC4 match finder */
687
- matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
706
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping);
688
707
 
689
708
  for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
690
709
  size_t currentMl=0;
691
710
  if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
692
711
  const BYTE* const match = base + matchIndex;
693
712
  assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
694
- if (match[ml] == ip[ml]) /* potentially better */
713
+ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
714
+ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
695
715
  currentMl = ZSTD_count(ip, match, iLimit);
696
716
  } else {
697
717
  const BYTE* const match = dictBase + matchIndex;
@@ -703,7 +723,7 @@ size_t ZSTD_HcFindBestMatch(
703
723
  /* save best solution */
704
724
  if (currentMl > ml) {
705
725
  ml = currentMl;
706
- *offsetPtr = STORE_OFFSET(curr - matchIndex);
726
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
707
727
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
708
728
  }
709
729
 
@@ -739,7 +759,7 @@ size_t ZSTD_HcFindBestMatch(
739
759
  if (currentMl > ml) {
740
760
  ml = currentMl;
741
761
  assert(curr > matchIndex + dmsIndexDelta);
742
- *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
762
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
743
763
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
744
764
  }
745
765
 
@@ -756,8 +776,6 @@ size_t ZSTD_HcFindBestMatch(
756
776
  * (SIMD) Row-based matchfinder
757
777
  ***********************************/
758
778
  /* Constants for row-based hash */
759
- #define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
760
- #define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
761
779
  #define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
762
780
  #define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
763
781
 
@@ -769,73 +787,19 @@ typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 repr
769
787
  * Starting from the LSB, returns the idx of the next non-zero bit.
770
788
  * Basically counting the nb of trailing zeroes.
771
789
  */
772
- static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
773
- assert(val != 0);
774
- # if defined(_MSC_VER) && defined(_WIN64)
775
- if (val != 0) {
776
- unsigned long r;
777
- _BitScanForward64(&r, val);
778
- return (U32)(r);
779
- } else {
780
- /* Should not reach this code path */
781
- __assume(0);
782
- }
783
- # elif (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
784
- if (sizeof(size_t) == 4) {
785
- U32 mostSignificantWord = (U32)(val >> 32);
786
- U32 leastSignificantWord = (U32)val;
787
- if (leastSignificantWord == 0) {
788
- return 32 + (U32)__builtin_ctz(mostSignificantWord);
789
- } else {
790
- return (U32)__builtin_ctz(leastSignificantWord);
791
- }
792
- } else {
793
- return (U32)__builtin_ctzll(val);
794
- }
795
- # else
796
- /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
797
- * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
798
- */
799
- val = ~val & (val - 1ULL); /* Lowest set bit mask */
800
- val = val - ((val >> 1) & 0x5555555555555555);
801
- val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
802
- return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
803
- # endif
804
- }
805
-
806
- /* ZSTD_rotateRight_*():
807
- * Rotates a bitfield to the right by "count" bits.
808
- * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
809
- */
810
- FORCE_INLINE_TEMPLATE
811
- U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
812
- assert(count < 64);
813
- count &= 0x3F; /* for fickle pattern recognition */
814
- return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
815
- }
816
-
817
- FORCE_INLINE_TEMPLATE
818
- U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
819
- assert(count < 32);
820
- count &= 0x1F; /* for fickle pattern recognition */
821
- return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
822
- }
823
-
824
- FORCE_INLINE_TEMPLATE
825
- U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
826
- assert(count < 16);
827
- count &= 0x0F; /* for fickle pattern recognition */
828
- return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
790
+ MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
791
+ return ZSTD_countTrailingZeros64(val);
829
792
  }
830
793
 
831
794
  /* ZSTD_row_nextIndex():
832
795
  * Returns the next index to insert at within a tagTable row, and updates the "head"
833
- * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
796
+ * value to reflect the update. Essentially cycles backwards from [1, {entries per row})
834
797
  */
835
798
  FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
836
- U32 const next = (*tagRow - 1) & rowMask;
837
- *tagRow = (BYTE)next;
838
- return next;
799
+ U32 next = (*tagRow-1) & rowMask;
800
+ next += (next == 0) ? rowMask : 0; /* skip first position */
801
+ *tagRow = (BYTE)next;
802
+ return next;
839
803
  }
840
804
 
841
805
  /* ZSTD_isAligned():
@@ -849,7 +813,7 @@ MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
849
813
  /* ZSTD_row_prefetch():
850
814
  * Performs prefetching for the hashTable and tagTable at a given row.
851
815
  */
852
- FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
816
+ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) {
853
817
  PREFETCH_L1(hashTable + relRow);
854
818
  if (rowLog >= 5) {
855
819
  PREFETCH_L1(hashTable + relRow + 16);
@@ -868,18 +832,20 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* ta
868
832
  * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
869
833
  * but not beyond iLimit.
870
834
  */
871
- FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
835
+ FORCE_INLINE_TEMPLATE
836
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
837
+ void ZSTD_row_fillHashCache(ZSTD_MatchState_t* ms, const BYTE* base,
872
838
  U32 const rowLog, U32 const mls,
873
839
  U32 idx, const BYTE* const iLimit)
874
840
  {
875
841
  U32 const* const hashTable = ms->hashTable;
876
- U16 const* const tagTable = ms->tagTable;
842
+ BYTE const* const tagTable = ms->tagTable;
877
843
  U32 const hashLog = ms->rowHashLog;
878
844
  U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
879
845
  U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
880
846
 
881
847
  for (; idx < lim; ++idx) {
882
- U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
848
+ U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
883
849
  U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
884
850
  ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
885
851
  ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
@@ -894,12 +860,15 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
894
860
  * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
895
861
  * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
896
862
  */
897
- FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
898
- U16 const* tagTable, BYTE const* base,
863
+ FORCE_INLINE_TEMPLATE
864
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
865
+ U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
866
+ BYTE const* tagTable, BYTE const* base,
899
867
  U32 idx, U32 const hashLog,
900
- U32 const rowLog, U32 const mls)
868
+ U32 const rowLog, U32 const mls,
869
+ U64 const hashSalt)
901
870
  {
902
- U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
871
+ U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
903
872
  U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
904
873
  ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
905
874
  { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
@@ -911,28 +880,29 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
911
880
  /* ZSTD_row_update_internalImpl():
912
881
  * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
913
882
  */
914
- FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
915
- U32 updateStartIdx, U32 const updateEndIdx,
916
- U32 const mls, U32 const rowLog,
917
- U32 const rowMask, U32 const useCache)
883
+ FORCE_INLINE_TEMPLATE
884
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
885
+ void ZSTD_row_update_internalImpl(ZSTD_MatchState_t* ms,
886
+ U32 updateStartIdx, U32 const updateEndIdx,
887
+ U32 const mls, U32 const rowLog,
888
+ U32 const rowMask, U32 const useCache)
918
889
  {
919
890
  U32* const hashTable = ms->hashTable;
920
- U16* const tagTable = ms->tagTable;
891
+ BYTE* const tagTable = ms->tagTable;
921
892
  U32 const hashLog = ms->rowHashLog;
922
893
  const BYTE* const base = ms->window.base;
923
894
 
924
895
  DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
925
896
  for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
926
- U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
927
- : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
897
+ U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt)
898
+ : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt);
928
899
  U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
929
900
  U32* const row = hashTable + relRow;
930
- BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
931
- Explicit cast allows us to get exact desired position within each row */
901
+ BYTE* tagRow = tagTable + relRow;
932
902
  U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
933
903
 
934
- assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
935
- ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
904
+ assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt));
905
+ tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK;
936
906
  row[pos] = updateStartIdx;
937
907
  }
938
908
  }
@@ -941,9 +911,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
941
911
  * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
942
912
  * Skips sections of long matches as is necessary.
943
913
  */
944
- FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
945
- U32 const mls, U32 const rowLog,
946
- U32 const rowMask, U32 const useCache)
914
+ FORCE_INLINE_TEMPLATE
915
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
916
+ void ZSTD_row_update_internal(ZSTD_MatchState_t* ms, const BYTE* ip,
917
+ U32 const mls, U32 const rowLog,
918
+ U32 const rowMask, U32 const useCache)
947
919
  {
948
920
  U32 idx = ms->nextToUpdate;
949
921
  const BYTE* const base = ms->window.base;
@@ -974,13 +946,41 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const
974
946
  * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
975
947
  * processing.
976
948
  */
977
- void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
949
+ void ZSTD_row_update(ZSTD_MatchState_t* const ms, const BYTE* ip) {
978
950
  const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
979
951
  const U32 rowMask = (1u << rowLog) - 1;
980
952
  const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
981
953
 
982
954
  DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
983
- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
955
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */);
956
+ }
957
+
958
+ /* Returns the mask width of bits group of which will be set to 1. Given not all
959
+ * architectures have easy movemask instruction, this helps to iterate over
960
+ * groups of bits easier and faster.
961
+ */
962
+ FORCE_INLINE_TEMPLATE U32
963
+ ZSTD_row_matchMaskGroupWidth(const U32 rowEntries)
964
+ {
965
+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
966
+ assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
967
+ (void)rowEntries;
968
+ #if defined(ZSTD_ARCH_ARM_NEON)
969
+ /* NEON path only works for little endian */
970
+ if (!MEM_isLittleEndian()) {
971
+ return 1;
972
+ }
973
+ if (rowEntries == 16) {
974
+ return 4;
975
+ }
976
+ if (rowEntries == 32) {
977
+ return 2;
978
+ }
979
+ if (rowEntries == 64) {
980
+ return 1;
981
+ }
982
+ #endif
983
+ return 1;
984
984
  }
985
985
 
986
986
  #if defined(ZSTD_ARCH_X86_SSE2)
@@ -1003,71 +1003,82 @@ ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U
1003
1003
  }
1004
1004
  #endif
1005
1005
 
1006
- /* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
1007
- * the hash at the nth position in a row of the tagTable.
1008
- * Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
1009
- * to match up with the actual layout of the entries within the hashTable */
1006
+ #if defined(ZSTD_ARCH_ARM_NEON)
1007
+ FORCE_INLINE_TEMPLATE ZSTD_VecMask
1008
+ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag, const U32 headGrouped)
1009
+ {
1010
+ assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
1011
+ if (rowEntries == 16) {
1012
+ /* vshrn_n_u16 shifts by 4 every u16 and narrows to 8 lower bits.
1013
+ * After that groups of 4 bits represent the equalMask. We lower
1014
+ * all bits except the highest in these groups by doing AND with
1015
+ * 0x88 = 0b10001000.
1016
+ */
1017
+ const uint8x16_t chunk = vld1q_u8(src);
1018
+ const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
1019
+ const uint8x8_t res = vshrn_n_u16(equalMask, 4);
1020
+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0);
1021
+ return ZSTD_rotateRight_U64(matches, headGrouped) & 0x8888888888888888ull;
1022
+ } else if (rowEntries == 32) {
1023
+ /* Same idea as with rowEntries == 16 but doing AND with
1024
+ * 0x55 = 0b01010101.
1025
+ */
1026
+ const uint16x8x2_t chunk = vld2q_u16((const uint16_t*)(const void*)src);
1027
+ const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
1028
+ const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
1029
+ const uint8x16_t dup = vdupq_n_u8(tag);
1030
+ const uint8x8_t t0 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk0, dup)), 6);
1031
+ const uint8x8_t t1 = vshrn_n_u16(vreinterpretq_u16_u8(vceqq_u8(chunk1, dup)), 6);
1032
+ const uint8x8_t res = vsli_n_u8(t0, t1, 4);
1033
+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(res), 0) ;
1034
+ return ZSTD_rotateRight_U64(matches, headGrouped) & 0x5555555555555555ull;
1035
+ } else { /* rowEntries == 64 */
1036
+ const uint8x16x4_t chunk = vld4q_u8(src);
1037
+ const uint8x16_t dup = vdupq_n_u8(tag);
1038
+ const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
1039
+ const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
1040
+ const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
1041
+ const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
1042
+
1043
+ const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
1044
+ const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
1045
+ const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
1046
+ const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
1047
+ const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
1048
+ const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
1049
+ return ZSTD_rotateRight_U64(matches, headGrouped);
1050
+ }
1051
+ }
1052
+ #endif
1053
+
1054
+ /* Returns a ZSTD_VecMask (U64) that has the nth group (determined by
1055
+ * ZSTD_row_matchMaskGroupWidth) of bits set to 1 if the newly-computed "tag"
1056
+ * matches the hash at the nth position in a row of the tagTable.
1057
+ * Each row is a circular buffer beginning at the value of "headGrouped". So we
1058
+ * must rotate the "matches" bitfield to match up with the actual layout of the
1059
+ * entries within the hashTable */
1010
1060
  FORCE_INLINE_TEMPLATE ZSTD_VecMask
1011
- ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries)
1061
+ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGrouped, const U32 rowEntries)
1012
1062
  {
1013
- const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
1063
+ const BYTE* const src = tagRow;
1014
1064
  assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
1015
1065
  assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
1066
+ assert(ZSTD_row_matchMaskGroupWidth(rowEntries) * rowEntries <= sizeof(ZSTD_VecMask) * 8);
1016
1067
 
1017
1068
  #if defined(ZSTD_ARCH_X86_SSE2)
1018
1069
 
1019
- return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, head);
1070
+ return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, headGrouped);
1020
1071
 
1021
1072
  #else /* SW or NEON-LE */
1022
1073
 
1023
1074
  # if defined(ZSTD_ARCH_ARM_NEON)
1024
1075
  /* This NEON path only works for little endian - otherwise use SWAR below */
1025
1076
  if (MEM_isLittleEndian()) {
1026
- if (rowEntries == 16) {
1027
- const uint8x16_t chunk = vld1q_u8(src);
1028
- const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
1029
- const uint16x8_t t0 = vshlq_n_u16(equalMask, 7);
1030
- const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14));
1031
- const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14));
1032
- const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28));
1033
- const U16 hi = (U16)vgetq_lane_u8(t3, 8);
1034
- const U16 lo = (U16)vgetq_lane_u8(t3, 0);
1035
- return ZSTD_rotateRight_U16((hi << 8) | lo, head);
1036
- } else if (rowEntries == 32) {
1037
- const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src);
1038
- const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
1039
- const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
1040
- const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag));
1041
- const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag));
1042
- const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0));
1043
- const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1));
1044
- const uint8x8_t t0 = vreinterpret_u8_s8(pack0);
1045
- const uint8x8_t t1 = vreinterpret_u8_s8(pack1);
1046
- const uint8x8_t t2 = vsri_n_u8(t1, t0, 2);
1047
- const uint8x8x2_t t3 = vuzp_u8(t2, t0);
1048
- const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4);
1049
- const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0);
1050
- return ZSTD_rotateRight_U32(matches, head);
1051
- } else { /* rowEntries == 64 */
1052
- const uint8x16x4_t chunk = vld4q_u8(src);
1053
- const uint8x16_t dup = vdupq_n_u8(tag);
1054
- const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
1055
- const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
1056
- const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
1057
- const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
1058
-
1059
- const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
1060
- const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
1061
- const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
1062
- const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
1063
- const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
1064
- const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
1065
- return ZSTD_rotateRight_U64(matches, head);
1066
- }
1077
+ return ZSTD_row_getNEONMask(rowEntries, src, tag, headGrouped);
1067
1078
  }
1068
1079
  # endif /* ZSTD_ARCH_ARM_NEON */
1069
1080
  /* SWAR */
1070
- { const size_t chunkSize = sizeof(size_t);
1081
+ { const int chunkSize = sizeof(size_t);
1071
1082
  const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
1072
1083
  const size_t xFF = ~((size_t)0);
1073
1084
  const size_t x01 = xFF / 0xFF;
@@ -1100,11 +1111,11 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
1100
1111
  }
1101
1112
  matches = ~matches;
1102
1113
  if (rowEntries == 16) {
1103
- return ZSTD_rotateRight_U16((U16)matches, head);
1114
+ return ZSTD_rotateRight_U16((U16)matches, headGrouped);
1104
1115
  } else if (rowEntries == 32) {
1105
- return ZSTD_rotateRight_U32((U32)matches, head);
1116
+ return ZSTD_rotateRight_U32((U32)matches, headGrouped);
1106
1117
  } else {
1107
- return ZSTD_rotateRight_U64((U64)matches, head);
1118
+ return ZSTD_rotateRight_U64((U64)matches, headGrouped);
1108
1119
  }
1109
1120
  }
1110
1121
  #endif
@@ -1112,29 +1123,30 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head,
1112
1123
 
1113
1124
  /* The high-level approach of the SIMD row based match finder is as follows:
1114
1125
  * - Figure out where to insert the new entry:
1115
- * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
1116
- * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
1126
+ * - Generate a hash for current input position and split it into a one byte of tag and `rowHashLog` bits of index.
1127
+ * - The hash is salted by a value that changes on every context reset, so when the same table is used
1128
+ * we will avoid collisions that would otherwise slow us down by introducing phantom matches.
1129
+ * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
1117
1130
  * which row to insert into.
1118
- * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
1119
- * be considered as a circular buffer with a "head" index that resides in the tagTable.
1120
- * - Also insert the "tag" into the equivalent row and position in the tagTable.
1121
- * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
1122
- * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
1123
- * for alignment/performance reasons, leaving some bytes unused.
1124
- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
1131
+ * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
1132
+ * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
1133
+ * per row).
1134
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
1125
1135
  * generate a bitfield that we can cycle through to check the collisions in the hash table.
1126
1136
  * - Pick the longest match.
1137
+ * - Insert the tag into the equivalent row and position in the tagTable.
1127
1138
  */
1128
1139
  FORCE_INLINE_TEMPLATE
1140
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1129
1141
  size_t ZSTD_RowFindBestMatch(
1130
- ZSTD_matchState_t* ms,
1142
+ ZSTD_MatchState_t* ms,
1131
1143
  const BYTE* const ip, const BYTE* const iLimit,
1132
1144
  size_t* offsetPtr,
1133
1145
  const U32 mls, const ZSTD_dictMode_e dictMode,
1134
1146
  const U32 rowLog)
1135
1147
  {
1136
1148
  U32* const hashTable = ms->hashTable;
1137
- U16* const tagTable = ms->tagTable;
1149
+ BYTE* const tagTable = ms->tagTable;
1138
1150
  U32* const hashCache = ms->hashCache;
1139
1151
  const U32 hashLog = ms->rowHashLog;
1140
1152
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -1152,11 +1164,14 @@ size_t ZSTD_RowFindBestMatch(
1152
1164
  const U32 rowEntries = (1U << rowLog);
1153
1165
  const U32 rowMask = rowEntries - 1;
1154
1166
  const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
1167
+ const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries);
1168
+ const U64 hashSalt = ms->hashSalt;
1155
1169
  U32 nbAttempts = 1U << cappedSearchLog;
1156
1170
  size_t ml=4-1;
1171
+ U32 hash;
1157
1172
 
1158
1173
  /* DMS/DDS variables that may be referenced laster */
1159
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
1174
+ const ZSTD_MatchState_t* const dms = ms->dictMatchState;
1160
1175
 
1161
1176
  /* Initialize the following variables to satisfy static analyzer */
1162
1177
  size_t ddsIdx = 0;
@@ -1177,7 +1192,7 @@ size_t ZSTD_RowFindBestMatch(
1177
1192
  if (dictMode == ZSTD_dictMatchState) {
1178
1193
  /* Prefetch DMS rows */
1179
1194
  U32* const dmsHashTable = dms->hashTable;
1180
- U16* const dmsTagTable = dms->tagTable;
1195
+ BYTE* const dmsTagTable = dms->tagTable;
1181
1196
  U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
1182
1197
  U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1183
1198
  dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
@@ -1187,23 +1202,34 @@ size_t ZSTD_RowFindBestMatch(
1187
1202
  }
1188
1203
 
1189
1204
  /* Update the hashTable and tagTable up to (but not including) ip */
1190
- ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
1205
+ if (!ms->lazySkipping) {
1206
+ ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
1207
+ hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt);
1208
+ } else {
1209
+ /* Stop inserting every position when in the lazy skipping mode.
1210
+ * The hash cache is also not kept up to date in this mode.
1211
+ */
1212
+ hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt);
1213
+ ms->nextToUpdate = curr;
1214
+ }
1215
+ ms->hashSaltEntropy += hash; /* collect salt entropy */
1216
+
1191
1217
  { /* Get the hash for ip, compute the appropriate row */
1192
- U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
1193
1218
  U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
1194
1219
  U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
1195
1220
  U32* const row = hashTable + relRow;
1196
1221
  BYTE* tagRow = (BYTE*)(tagTable + relRow);
1197
- U32 const head = *tagRow & rowMask;
1222
+ U32 const headGrouped = (*tagRow & rowMask) * groupWidth;
1198
1223
  U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
1199
1224
  size_t numMatches = 0;
1200
1225
  size_t currMatch = 0;
1201
- ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
1226
+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries);
1202
1227
 
1203
1228
  /* Cycle through the matches and prefetch */
1204
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
1205
- U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
1229
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
1230
+ U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
1206
1231
  U32 const matchIndex = row[matchPos];
1232
+ if(matchPos == 0) continue;
1207
1233
  assert(numMatches < rowEntries);
1208
1234
  if (matchIndex < lowLimit)
1209
1235
  break;
@@ -1213,13 +1239,14 @@ size_t ZSTD_RowFindBestMatch(
1213
1239
  PREFETCH_L1(dictBase + matchIndex);
1214
1240
  }
1215
1241
  matchBuffer[numMatches++] = matchIndex;
1242
+ --nbAttempts;
1216
1243
  }
1217
1244
 
1218
1245
  /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
1219
1246
  in ZSTD_row_update_internal() at the next search. */
1220
1247
  {
1221
1248
  U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
1222
- tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
1249
+ tagRow[pos] = (BYTE)tag;
1223
1250
  row[pos] = ms->nextToUpdate++;
1224
1251
  }
1225
1252
 
@@ -1233,7 +1260,8 @@ size_t ZSTD_RowFindBestMatch(
1233
1260
  if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
1234
1261
  const BYTE* const match = base + matchIndex;
1235
1262
  assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
1236
- if (match[ml] == ip[ml]) /* potentially better */
1263
+ /* read 4B starting from (match + ml + 1 - sizeof(U32)) */
1264
+ if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */
1237
1265
  currentMl = ZSTD_count(ip, match, iLimit);
1238
1266
  } else {
1239
1267
  const BYTE* const match = dictBase + matchIndex;
@@ -1245,7 +1273,7 @@ size_t ZSTD_RowFindBestMatch(
1245
1273
  /* Save best solution */
1246
1274
  if (currentMl > ml) {
1247
1275
  ml = currentMl;
1248
- *offsetPtr = STORE_OFFSET(curr - matchIndex);
1276
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex);
1249
1277
  if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
1250
1278
  }
1251
1279
  }
@@ -1263,19 +1291,21 @@ size_t ZSTD_RowFindBestMatch(
1263
1291
  const U32 dmsSize = (U32)(dmsEnd - dmsBase);
1264
1292
  const U32 dmsIndexDelta = dictLimit - dmsSize;
1265
1293
 
1266
- { U32 const head = *dmsTagRow & rowMask;
1294
+ { U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth;
1267
1295
  U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
1268
1296
  size_t numMatches = 0;
1269
1297
  size_t currMatch = 0;
1270
- ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
1298
+ ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries);
1271
1299
 
1272
- for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
1273
- U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
1300
+ for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) {
1301
+ U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask;
1274
1302
  U32 const matchIndex = dmsRow[matchPos];
1303
+ if(matchPos == 0) continue;
1275
1304
  if (matchIndex < dmsLowestIndex)
1276
1305
  break;
1277
1306
  PREFETCH_L1(dmsBase + matchIndex);
1278
1307
  matchBuffer[numMatches++] = matchIndex;
1308
+ --nbAttempts;
1279
1309
  }
1280
1310
 
1281
1311
  /* Return the longest match */
@@ -1294,7 +1324,7 @@ size_t ZSTD_RowFindBestMatch(
1294
1324
  if (currentMl > ml) {
1295
1325
  ml = currentMl;
1296
1326
  assert(curr > matchIndex + dmsIndexDelta);
1297
- *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
1327
+ *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta));
1298
1328
  if (ip+currentMl == iLimit) break;
1299
1329
  }
1300
1330
  }
@@ -1304,17 +1334,13 @@ size_t ZSTD_RowFindBestMatch(
1304
1334
  }
1305
1335
 
1306
1336
 
1307
- typedef size_t (*searchMax_f)(
1308
- ZSTD_matchState_t* ms,
1309
- const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
1310
-
1311
1337
  /**
1312
- * This struct contains the functions necessary for lazy to search.
1313
- * Currently, that is only searchMax. However, it is still valuable to have the
1314
- * VTable because this makes it easier to add more functions to the VTable later.
1338
+ * Generate search functions templated on (dictMode, mls, rowLog).
1339
+ * These functions are outlined for code size & compilation time.
1340
+ * ZSTD_searchMax() dispatches to the correct implementation function.
1315
1341
  *
1316
1342
  * TODO: The start of the search function involves loading and calculating a
1317
- * bunch of constants from the ZSTD_matchState_t. These computations could be
1343
+ * bunch of constants from the ZSTD_MatchState_t. These computations could be
1318
1344
  * done in an initialization function, and saved somewhere in the match state.
1319
1345
  * Then we could pass a pointer to the saved state instead of the match state,
1320
1346
  * and avoid duplicate computations.
@@ -1329,39 +1355,36 @@ typedef size_t (*searchMax_f)(
1329
1355
  * the single segment loop. It should go in searchMax instead of its own
1330
1356
  * function to avoid having multiple virtual function calls per search.
1331
1357
  */
1332
- typedef struct {
1333
- searchMax_f searchMax;
1334
- } ZSTD_LazyVTable;
1335
1358
 
1336
- #define GEN_ZSTD_BT_VTABLE(dictMode, mls) \
1337
- static size_t ZSTD_BtFindBestMatch_##dictMode##_##mls( \
1338
- ZSTD_matchState_t* ms, \
1339
- const BYTE* ip, const BYTE* const iLimit, \
1340
- size_t* offsetPtr) \
1341
- { \
1342
- assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1343
- return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
1344
- } \
1345
- static const ZSTD_LazyVTable ZSTD_BtVTable_##dictMode##_##mls = { \
1346
- ZSTD_BtFindBestMatch_##dictMode##_##mls \
1347
- };
1348
-
1349
- #define GEN_ZSTD_HC_VTABLE(dictMode, mls) \
1350
- static size_t ZSTD_HcFindBestMatch_##dictMode##_##mls( \
1351
- ZSTD_matchState_t* ms, \
1359
+ #define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls
1360
+ #define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls
1361
+ #define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
1362
+
1363
+ #define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
1364
+
1365
+ #define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \
1366
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \
1367
+ ZSTD_MatchState_t* ms, \
1368
+ const BYTE* ip, const BYTE* const iLimit, \
1369
+ size_t* offBasePtr) \
1370
+ { \
1371
+ assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1372
+ return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
1373
+ } \
1374
+
1375
+ #define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \
1376
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \
1377
+ ZSTD_MatchState_t* ms, \
1352
1378
  const BYTE* ip, const BYTE* const iLimit, \
1353
1379
  size_t* offsetPtr) \
1354
1380
  { \
1355
1381
  assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \
1356
1382
  return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
1357
1383
  } \
1358
- static const ZSTD_LazyVTable ZSTD_HcVTable_##dictMode##_##mls = { \
1359
- ZSTD_HcFindBestMatch_##dictMode##_##mls \
1360
- };
1361
1384
 
1362
- #define GEN_ZSTD_ROW_VTABLE(dictMode, mls, rowLog) \
1363
- static size_t ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog( \
1364
- ZSTD_matchState_t* ms, \
1385
+ #define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \
1386
+ ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \
1387
+ ZSTD_MatchState_t* ms, \
1365
1388
  const BYTE* ip, const BYTE* const iLimit, \
1366
1389
  size_t* offsetPtr) \
1367
1390
  { \
@@ -1369,9 +1392,6 @@ typedef struct {
1369
1392
  assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \
1370
1393
  return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
1371
1394
  } \
1372
- static const ZSTD_LazyVTable ZSTD_RowVTable_##dictMode##_##mls##_##rowLog = { \
1373
- ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog \
1374
- };
1375
1395
 
1376
1396
  #define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
1377
1397
  X(dictMode, mls, 4) \
@@ -1394,87 +1414,107 @@ typedef struct {
1394
1414
  X(__VA_ARGS__, dictMatchState) \
1395
1415
  X(__VA_ARGS__, dedicatedDictSearch)
1396
1416
 
1397
- /* Generate Row VTables for each combination of (dictMode, mls, rowLog) */
1398
- ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_VTABLE)
1399
- /* Generate Binary Tree VTables for each combination of (dictMode, mls) */
1400
- ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_VTABLE)
1401
- /* Generate Hash Chain VTables for each combination of (dictMode, mls) */
1402
- ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_VTABLE)
1403
-
1404
- #define GEN_ZSTD_BT_VTABLE_ARRAY(dictMode) \
1405
- { \
1406
- &ZSTD_BtVTable_##dictMode##_4, \
1407
- &ZSTD_BtVTable_##dictMode##_5, \
1408
- &ZSTD_BtVTable_##dictMode##_6 \
1409
- }
1410
-
1411
- #define GEN_ZSTD_HC_VTABLE_ARRAY(dictMode) \
1412
- { \
1413
- &ZSTD_HcVTable_##dictMode##_4, \
1414
- &ZSTD_HcVTable_##dictMode##_5, \
1415
- &ZSTD_HcVTable_##dictMode##_6 \
1416
- }
1417
-
1418
- #define GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, mls) \
1419
- { \
1420
- &ZSTD_RowVTable_##dictMode##_##mls##_4, \
1421
- &ZSTD_RowVTable_##dictMode##_##mls##_5, \
1422
- &ZSTD_RowVTable_##dictMode##_##mls##_6 \
1423
- }
1417
+ /* Generate row search fns for each combination of (dictMode, mls, rowLog) */
1418
+ ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN)
1419
+ /* Generate binary Tree search fns for each combination of (dictMode, mls) */
1420
+ ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN)
1421
+ /* Generate hash chain search fns for each combination of (dictMode, mls) */
1422
+ ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN)
1424
1423
 
1425
- #define GEN_ZSTD_ROW_VTABLE_ARRAY(dictMode) \
1426
- { \
1427
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 4), \
1428
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 5), \
1429
- GEN_ZSTD_ROW_VTABLE_ARRAY_(dictMode, 6) \
1430
- }
1424
+ typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
1431
1425
 
1432
- #define GEN_ZSTD_VTABLE_ARRAY(X) \
1433
- { \
1434
- X(noDict), \
1435
- X(extDict), \
1436
- X(dictMatchState), \
1437
- X(dedicatedDictSearch) \
1426
+ #define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \
1427
+ case mls: \
1428
+ return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
1429
+ #define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \
1430
+ case mls: \
1431
+ return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
1432
+ #define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \
1433
+ case rowLog: \
1434
+ return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
1435
+
1436
+ #define ZSTD_SWITCH_MLS(X, dictMode) \
1437
+ switch (mls) { \
1438
+ ZSTD_FOR_EACH_MLS(X, dictMode) \
1438
1439
  }
1439
1440
 
1440
- /* *******************************
1441
- * Common parser - lazy strategy
1442
- *********************************/
1443
- typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
1441
+ #define ZSTD_SWITCH_ROWLOG(dictMode, mls) \
1442
+ case mls: \
1443
+ switch (rowLog) { \
1444
+ ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
1445
+ } \
1446
+ ZSTD_UNREACHABLE; \
1447
+ break;
1448
+
1449
+ #define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \
1450
+ switch (searchMethod) { \
1451
+ case search_hashChain: \
1452
+ ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
1453
+ break; \
1454
+ case search_binaryTree: \
1455
+ ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
1456
+ break; \
1457
+ case search_rowHash: \
1458
+ ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \
1459
+ break; \
1460
+ } \
1461
+ ZSTD_UNREACHABLE;
1444
1462
 
1445
1463
  /**
1446
- * This table is indexed first by the four ZSTD_dictMode_e values, and then
1447
- * by the two searchMethod_e values. NULLs are placed for configurations
1448
- * that should never occur (extDict modes go to the other implementation
1449
- * below and there is no DDSS for binary tree search yet).
1464
+ * Searches for the longest match at @p ip.
1465
+ * Dispatches to the correct implementation function based on the
1466
+ * (searchMethod, dictMode, mls, rowLog). We use switch statements
1467
+ * here instead of using an indirect function call through a function
1468
+ * pointer because after Spectre and Meltdown mitigations, indirect
1469
+ * function calls can be very costly, especially in the kernel.
1470
+ *
1471
+ * NOTE: dictMode and searchMethod should be templated, so those switch
1472
+ * statements should be optimized out. Only the mls & rowLog switches
1473
+ * should be left.
1474
+ *
1475
+ * @param ms The match state.
1476
+ * @param ip The position to search at.
1477
+ * @param iend The end of the input data.
1478
+ * @param[out] offsetPtr Stores the match offset into this pointer.
1479
+ * @param mls The minimum search length, in the range [4, 6].
1480
+ * @param rowLog The row log (if applicable), in the range [4, 6].
1481
+ * @param searchMethod The search method to use (templated).
1482
+ * @param dictMode The dictMode (templated).
1483
+ *
1484
+ * @returns The length of the longest match found, or < mls if no match is found.
1485
+ * If a match is found its offset is stored in @p offsetPtr.
1450
1486
  */
1451
-
1452
- static ZSTD_LazyVTable const*
1453
- ZSTD_selectLazyVTable(ZSTD_matchState_t const* ms, searchMethod_e searchMethod, ZSTD_dictMode_e dictMode)
1487
+ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
1488
+ ZSTD_MatchState_t* ms,
1489
+ const BYTE* ip,
1490
+ const BYTE* iend,
1491
+ size_t* offsetPtr,
1492
+ U32 const mls,
1493
+ U32 const rowLog,
1494
+ searchMethod_e const searchMethod,
1495
+ ZSTD_dictMode_e const dictMode)
1454
1496
  {
1455
- /* Fill the Hc/Bt VTable arrays with the right functions for the (dictMode, mls) combination. */
1456
- ZSTD_LazyVTable const* const hcVTables[4][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_HC_VTABLE_ARRAY);
1457
- ZSTD_LazyVTable const* const btVTables[4][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_BT_VTABLE_ARRAY);
1458
- /* Fill the Row VTable array with the right functions for the (dictMode, mls, rowLog) combination. */
1459
- ZSTD_LazyVTable const* const rowVTables[4][3][3] = GEN_ZSTD_VTABLE_ARRAY(GEN_ZSTD_ROW_VTABLE_ARRAY);
1460
-
1461
- U32 const mls = MAX(4, MIN(6, ms->cParams.minMatch));
1462
- U32 const rowLog = MAX(4, MIN(6, ms->cParams.searchLog));
1463
- switch (searchMethod) {
1464
- case search_hashChain:
1465
- return hcVTables[dictMode][mls - 4];
1466
- case search_binaryTree:
1467
- return btVTables[dictMode][mls - 4];
1468
- case search_rowHash:
1469
- return rowVTables[dictMode][mls - 4][rowLog - 4];
1470
- default:
1471
- return NULL;
1497
+ if (dictMode == ZSTD_noDict) {
1498
+ ZSTD_SWITCH_SEARCH_METHOD(noDict)
1499
+ } else if (dictMode == ZSTD_extDict) {
1500
+ ZSTD_SWITCH_SEARCH_METHOD(extDict)
1501
+ } else if (dictMode == ZSTD_dictMatchState) {
1502
+ ZSTD_SWITCH_SEARCH_METHOD(dictMatchState)
1503
+ } else if (dictMode == ZSTD_dedicatedDictSearch) {
1504
+ ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch)
1472
1505
  }
1506
+ ZSTD_UNREACHABLE;
1507
+ return 0;
1473
1508
  }
1474
1509
 
1475
- FORCE_INLINE_TEMPLATE size_t
1476
- ZSTD_compressBlock_lazy_generic(
1477
- ZSTD_matchState_t* ms, seqStore_t* seqStore,
1510
+ /* *******************************
1511
+ * Common parser - lazy strategy
1512
+ *********************************/
1513
+
1514
+ FORCE_INLINE_TEMPLATE
1515
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1516
+ size_t ZSTD_compressBlock_lazy_generic(
1517
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore,
1478
1518
  U32 rep[ZSTD_REP_NUM],
1479
1519
  const void* src, size_t srcSize,
1480
1520
  const searchMethod_e searchMethod, const U32 depth,
@@ -1488,14 +1528,16 @@ ZSTD_compressBlock_lazy_generic(
1488
1528
  const BYTE* const base = ms->window.base;
1489
1529
  const U32 prefixLowestIndex = ms->window.dictLimit;
1490
1530
  const BYTE* const prefixLowest = base + prefixLowestIndex;
1531
+ const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
1532
+ const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
1491
1533
 
1492
- searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, dictMode)->searchMax;
1493
- U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
1534
+ U32 offset_1 = rep[0], offset_2 = rep[1];
1535
+ U32 offsetSaved1 = 0, offsetSaved2 = 0;
1494
1536
 
1495
1537
  const int isDMS = dictMode == ZSTD_dictMatchState;
1496
1538
  const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
1497
1539
  const int isDxS = isDMS || isDDS;
1498
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
1540
+ const ZSTD_MatchState_t* const dms = ms->dictMatchState;
1499
1541
  const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
1500
1542
  const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
1501
1543
  const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
@@ -1505,16 +1547,14 @@ ZSTD_compressBlock_lazy_generic(
1505
1547
  0;
1506
1548
  const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
1507
1549
 
1508
- assert(searchMax != NULL);
1509
-
1510
1550
  DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
1511
1551
  ip += (dictAndPrefixLength == 0);
1512
1552
  if (dictMode == ZSTD_noDict) {
1513
1553
  U32 const curr = (U32)(ip - base);
1514
1554
  U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
1515
1555
  U32 const maxRep = curr - windowLow;
1516
- if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
1517
- if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
1556
+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
1557
+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
1518
1558
  }
1519
1559
  if (isDxS) {
1520
1560
  /* dictMatchState repCode checks don't currently handle repCode == 0
@@ -1523,11 +1563,11 @@ ZSTD_compressBlock_lazy_generic(
1523
1563
  assert(offset_2 <= dictAndPrefixLength);
1524
1564
  }
1525
1565
 
1566
+ /* Reset the lazy skipping state */
1567
+ ms->lazySkipping = 0;
1568
+
1526
1569
  if (searchMethod == search_rowHash) {
1527
- const U32 rowLog = MAX(4, MIN(6, ms->cParams.searchLog));
1528
- ZSTD_row_fillHashCache(ms, base, rowLog,
1529
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
1530
- ms->nextToUpdate, ilimit);
1570
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1531
1571
  }
1532
1572
 
1533
1573
  /* Match Loop */
@@ -1539,7 +1579,7 @@ ZSTD_compressBlock_lazy_generic(
1539
1579
  #endif
1540
1580
  while (ip < ilimit) {
1541
1581
  size_t matchLength=0;
1542
- size_t offcode=STORE_REPCODE_1;
1582
+ size_t offBase = REPCODE1_TO_OFFBASE;
1543
1583
  const BYTE* start=ip+1;
1544
1584
  DEBUGLOG(7, "search baseline (depth 0)");
1545
1585
 
@@ -1550,7 +1590,7 @@ ZSTD_compressBlock_lazy_generic(
1550
1590
  && repIndex < prefixLowestIndex) ?
1551
1591
  dictBase + (repIndex - dictIndexDelta) :
1552
1592
  base + repIndex;
1553
- if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
1593
+ if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
1554
1594
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1555
1595
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1556
1596
  matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
@@ -1564,14 +1604,23 @@ ZSTD_compressBlock_lazy_generic(
1564
1604
  }
1565
1605
 
1566
1606
  /* first search (depth 0) */
1567
- { size_t offsetFound = 999999999;
1568
- size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
1607
+ { size_t offbaseFound = 999999999;
1608
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode);
1569
1609
  if (ml2 > matchLength)
1570
- matchLength = ml2, start = ip, offcode=offsetFound;
1610
+ matchLength = ml2, start = ip, offBase = offbaseFound;
1571
1611
  }
1572
1612
 
1573
1613
  if (matchLength < 4) {
1574
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
1614
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */;
1615
+ ip += step;
1616
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
1617
+ * In this mode we stop inserting every position into our tables, and only insert
1618
+ * positions that we search, which is one in step positions.
1619
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
1620
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
1621
+ * triggered once we've gone 2KB without finding any matches.
1622
+ */
1623
+ ms->lazySkipping = step > kLazySkippingStep;
1575
1624
  continue;
1576
1625
  }
1577
1626
 
@@ -1581,34 +1630,34 @@ ZSTD_compressBlock_lazy_generic(
1581
1630
  DEBUGLOG(7, "search depth 1");
1582
1631
  ip ++;
1583
1632
  if ( (dictMode == ZSTD_noDict)
1584
- && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1633
+ && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1585
1634
  size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
1586
1635
  int const gain2 = (int)(mlRep * 3);
1587
- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
1636
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
1588
1637
  if ((mlRep >= 4) && (gain2 > gain1))
1589
- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
1638
+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
1590
1639
  }
1591
1640
  if (isDxS) {
1592
1641
  const U32 repIndex = (U32)(ip - base) - offset_1;
1593
1642
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
1594
1643
  dictBase + (repIndex - dictIndexDelta) :
1595
1644
  base + repIndex;
1596
- if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
1645
+ if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
1597
1646
  && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1598
1647
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1599
1648
  size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1600
1649
  int const gain2 = (int)(mlRep * 3);
1601
- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
1650
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
1602
1651
  if ((mlRep >= 4) && (gain2 > gain1))
1603
- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
1652
+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
1604
1653
  }
1605
1654
  }
1606
- { size_t offset2=999999999;
1607
- size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1608
- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */
1609
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
1655
+ { size_t ofbCandidate=999999999;
1656
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
1657
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
1658
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
1610
1659
  if ((ml2 >= 4) && (gain2 > gain1)) {
1611
- matchLength = ml2, offcode = offset2, start = ip;
1660
+ matchLength = ml2, offBase = ofbCandidate, start = ip;
1612
1661
  continue; /* search a better one */
1613
1662
  } }
1614
1663
 
@@ -1617,34 +1666,34 @@ ZSTD_compressBlock_lazy_generic(
1617
1666
  DEBUGLOG(7, "search depth 2");
1618
1667
  ip ++;
1619
1668
  if ( (dictMode == ZSTD_noDict)
1620
- && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1669
+ && (offBase) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1621
1670
  size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
1622
1671
  int const gain2 = (int)(mlRep * 4);
1623
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
1672
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
1624
1673
  if ((mlRep >= 4) && (gain2 > gain1))
1625
- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
1674
+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
1626
1675
  }
1627
1676
  if (isDxS) {
1628
1677
  const U32 repIndex = (U32)(ip - base) - offset_1;
1629
1678
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
1630
1679
  dictBase + (repIndex - dictIndexDelta) :
1631
1680
  base + repIndex;
1632
- if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
1681
+ if ((ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
1633
1682
  && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1634
1683
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1635
1684
  size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1636
1685
  int const gain2 = (int)(mlRep * 4);
1637
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
1686
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
1638
1687
  if ((mlRep >= 4) && (gain2 > gain1))
1639
- matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
1688
+ matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip;
1640
1689
  }
1641
1690
  }
1642
- { size_t offset2=999999999;
1643
- size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1644
- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */
1645
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
1691
+ { size_t ofbCandidate=999999999;
1692
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode);
1693
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
1694
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
1646
1695
  if ((ml2 >= 4) && (gain2 > gain1)) {
1647
- matchLength = ml2, offcode = offset2, start = ip;
1696
+ matchLength = ml2, offBase = ofbCandidate, start = ip;
1648
1697
  continue;
1649
1698
  } } }
1650
1699
  break; /* nothing found : store previous solution */
@@ -1655,26 +1704,33 @@ ZSTD_compressBlock_lazy_generic(
1655
1704
  * notably if `value` is unsigned, resulting in a large positive `-value`.
1656
1705
  */
1657
1706
  /* catch up */
1658
- if (STORED_IS_OFFSET(offcode)) {
1707
+ if (OFFBASE_IS_OFFSET(offBase)) {
1659
1708
  if (dictMode == ZSTD_noDict) {
1660
- while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest))
1661
- && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) ) /* only search for offset within prefix */
1709
+ while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest))
1710
+ && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */
1662
1711
  { start--; matchLength++; }
1663
1712
  }
1664
1713
  if (isDxS) {
1665
- U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
1714
+ U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
1666
1715
  const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
1667
1716
  const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
1668
1717
  while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
1669
1718
  }
1670
- offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
1719
+ offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
1671
1720
  }
1672
1721
  /* store sequence */
1673
1722
  _storeSequence:
1674
1723
  { size_t const litLength = (size_t)(start - anchor);
1675
- ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
1724
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
1676
1725
  anchor = ip = start + matchLength;
1677
1726
  }
1727
+ if (ms->lazySkipping) {
1728
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
1729
+ if (searchMethod == search_rowHash) {
1730
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1731
+ }
1732
+ ms->lazySkipping = 0;
1733
+ }
1678
1734
 
1679
1735
  /* check immediate repcode */
1680
1736
  if (isDxS) {
@@ -1684,12 +1740,12 @@ _storeSequence:
1684
1740
  const BYTE* repMatch = repIndex < prefixLowestIndex ?
1685
1741
  dictBase - dictIndexDelta + repIndex :
1686
1742
  base + repIndex;
1687
- if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
1743
+ if ( (ZSTD_index_overlap_check(prefixLowestIndex, repIndex))
1688
1744
  && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1689
1745
  const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
1690
1746
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
1691
- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset_2 <=> offset_1 */
1692
- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
1747
+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */
1748
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
1693
1749
  ip += matchLength;
1694
1750
  anchor = ip;
1695
1751
  continue;
@@ -1703,168 +1759,183 @@ _storeSequence:
1703
1759
  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
1704
1760
  /* store sequence */
1705
1761
  matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1706
- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */
1707
- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
1762
+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */
1763
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
1708
1764
  ip += matchLength;
1709
1765
  anchor = ip;
1710
1766
  continue; /* faster when present ... (?) */
1711
1767
  } } }
1712
1768
 
1713
- /* Save reps for next block */
1714
- rep[0] = offset_1 ? offset_1 : savedOffset;
1715
- rep[1] = offset_2 ? offset_2 : savedOffset;
1769
+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
1770
+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
1771
+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
1772
+
1773
+ /* save reps for next block */
1774
+ rep[0] = offset_1 ? offset_1 : offsetSaved1;
1775
+ rep[1] = offset_2 ? offset_2 : offsetSaved2;
1716
1776
 
1717
1777
  /* Return the last literals size */
1718
1778
  return (size_t)(iend - anchor);
1719
1779
  }
1780
+ #endif /* build exclusions */
1720
1781
 
1721
1782
 
1722
- size_t ZSTD_compressBlock_btlazy2(
1723
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1783
+ #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
1784
+ size_t ZSTD_compressBlock_greedy(
1785
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1724
1786
  void const* src, size_t srcSize)
1725
1787
  {
1726
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
1788
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
1727
1789
  }
1728
1790
 
1729
- size_t ZSTD_compressBlock_lazy2(
1730
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1791
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
1792
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1731
1793
  void const* src, size_t srcSize)
1732
1794
  {
1733
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
1795
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1734
1796
  }
1735
1797
 
1736
- size_t ZSTD_compressBlock_lazy(
1737
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1798
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1799
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1738
1800
  void const* src, size_t srcSize)
1739
1801
  {
1740
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
1802
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
1741
1803
  }
1742
1804
 
1743
- size_t ZSTD_compressBlock_greedy(
1744
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1805
+ size_t ZSTD_compressBlock_greedy_row(
1806
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1745
1807
  void const* src, size_t srcSize)
1746
1808
  {
1747
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
1809
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
1748
1810
  }
1749
1811
 
1750
- size_t ZSTD_compressBlock_btlazy2_dictMatchState(
1751
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1812
+ size_t ZSTD_compressBlock_greedy_dictMatchState_row(
1813
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1752
1814
  void const* src, size_t srcSize)
1753
1815
  {
1754
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
1816
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
1755
1817
  }
1756
1818
 
1757
- size_t ZSTD_compressBlock_lazy2_dictMatchState(
1758
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1819
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
1820
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1759
1821
  void const* src, size_t srcSize)
1760
1822
  {
1761
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
1823
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
1762
1824
  }
1825
+ #endif
1763
1826
 
1764
- size_t ZSTD_compressBlock_lazy_dictMatchState(
1765
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1827
+ #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
1828
+ size_t ZSTD_compressBlock_lazy(
1829
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1766
1830
  void const* src, size_t srcSize)
1767
1831
  {
1768
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
1832
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
1769
1833
  }
1770
1834
 
1771
- size_t ZSTD_compressBlock_greedy_dictMatchState(
1772
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1835
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
1836
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1773
1837
  void const* src, size_t srcSize)
1774
1838
  {
1775
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1839
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
1776
1840
  }
1777
1841
 
1778
-
1779
- size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1780
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1842
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1843
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1781
1844
  void const* src, size_t srcSize)
1782
1845
  {
1783
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1846
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1784
1847
  }
1785
1848
 
1786
- size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1787
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1849
+ size_t ZSTD_compressBlock_lazy_row(
1850
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1788
1851
  void const* src, size_t srcSize)
1789
1852
  {
1790
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1853
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
1791
1854
  }
1792
1855
 
1793
- size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1794
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1856
+ size_t ZSTD_compressBlock_lazy_dictMatchState_row(
1857
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1795
1858
  void const* src, size_t srcSize)
1796
1859
  {
1797
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
1860
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
1798
1861
  }
1799
1862
 
1800
- /* Row-based matchfinder */
1801
- size_t ZSTD_compressBlock_lazy2_row(
1802
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1863
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
1864
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1803
1865
  void const* src, size_t srcSize)
1804
1866
  {
1805
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
1867
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
1806
1868
  }
1869
+ #endif
1807
1870
 
1808
- size_t ZSTD_compressBlock_lazy_row(
1809
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1871
+ #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
1872
+ size_t ZSTD_compressBlock_lazy2(
1873
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1810
1874
  void const* src, size_t srcSize)
1811
1875
  {
1812
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
1876
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
1813
1877
  }
1814
1878
 
1815
- size_t ZSTD_compressBlock_greedy_row(
1816
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1879
+ size_t ZSTD_compressBlock_lazy2_dictMatchState(
1880
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1817
1881
  void const* src, size_t srcSize)
1818
1882
  {
1819
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
1883
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
1820
1884
  }
1821
1885
 
1822
- size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
1823
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1886
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1887
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1824
1888
  void const* src, size_t srcSize)
1825
1889
  {
1826
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
1890
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1827
1891
  }
1828
1892
 
1829
- size_t ZSTD_compressBlock_lazy_dictMatchState_row(
1830
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1893
+ size_t ZSTD_compressBlock_lazy2_row(
1894
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1831
1895
  void const* src, size_t srcSize)
1832
1896
  {
1833
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
1897
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
1834
1898
  }
1835
1899
 
1836
- size_t ZSTD_compressBlock_greedy_dictMatchState_row(
1837
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1900
+ size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
1901
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1838
1902
  void const* src, size_t srcSize)
1839
1903
  {
1840
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
1904
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
1841
1905
  }
1842
1906
 
1843
-
1844
1907
  size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
1845
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1908
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1846
1909
  void const* src, size_t srcSize)
1847
1910
  {
1848
1911
  return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
1849
1912
  }
1913
+ #endif
1850
1914
 
1851
- size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
1852
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1915
+ #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
1916
+ size_t ZSTD_compressBlock_btlazy2(
1917
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1853
1918
  void const* src, size_t srcSize)
1854
1919
  {
1855
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
1920
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
1856
1921
  }
1857
1922
 
1858
- size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
1859
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1923
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
1924
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1860
1925
  void const* src, size_t srcSize)
1861
1926
  {
1862
- return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
1927
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
1863
1928
  }
1929
+ #endif
1864
1930
 
1931
+ #if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
1932
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
1933
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
1934
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
1865
1935
  FORCE_INLINE_TEMPLATE
1936
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1866
1937
  size_t ZSTD_compressBlock_lazy_extDict_generic(
1867
- ZSTD_matchState_t* ms, seqStore_t* seqStore,
1938
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore,
1868
1939
  U32 rep[ZSTD_REP_NUM],
1869
1940
  const void* src, size_t srcSize,
1870
1941
  const searchMethod_e searchMethod, const U32 depth)
@@ -1881,19 +1952,20 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1881
1952
  const BYTE* const dictEnd = dictBase + dictLimit;
1882
1953
  const BYTE* const dictStart = dictBase + ms->window.lowLimit;
1883
1954
  const U32 windowLog = ms->cParams.windowLog;
1884
- const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5;
1955
+ const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
1956
+ const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
1885
1957
 
1886
- searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, ZSTD_extDict)->searchMax;
1887
1958
  U32 offset_1 = rep[0], offset_2 = rep[1];
1888
1959
 
1889
1960
  DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
1890
1961
 
1962
+ /* Reset the lazy skipping state */
1963
+ ms->lazySkipping = 0;
1964
+
1891
1965
  /* init */
1892
1966
  ip += (ip == prefixStart);
1893
1967
  if (searchMethod == search_rowHash) {
1894
- ZSTD_row_fillHashCache(ms, base, rowLog,
1895
- MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
1896
- ms->nextToUpdate, ilimit);
1968
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
1897
1969
  }
1898
1970
 
1899
1971
  /* Match Loop */
@@ -1905,7 +1977,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1905
1977
  #endif
1906
1978
  while (ip < ilimit) {
1907
1979
  size_t matchLength=0;
1908
- size_t offcode=STORE_REPCODE_1;
1980
+ size_t offBase = REPCODE1_TO_OFFBASE;
1909
1981
  const BYTE* start=ip+1;
1910
1982
  U32 curr = (U32)(ip-base);
1911
1983
 
@@ -1914,7 +1986,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1914
1986
  const U32 repIndex = (U32)(curr+1 - offset_1);
1915
1987
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1916
1988
  const BYTE* const repMatch = repBase + repIndex;
1917
- if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
1989
+ if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
1918
1990
  & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
1919
1991
  if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
1920
1992
  /* repcode detected we should take it */
@@ -1924,14 +1996,23 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1924
1996
  } }
1925
1997
 
1926
1998
  /* first search (depth 0) */
1927
- { size_t offsetFound = 999999999;
1928
- size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
1999
+ { size_t ofbCandidate = 999999999;
2000
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
1929
2001
  if (ml2 > matchLength)
1930
- matchLength = ml2, start = ip, offcode=offsetFound;
2002
+ matchLength = ml2, start = ip, offBase = ofbCandidate;
1931
2003
  }
1932
2004
 
1933
2005
  if (matchLength < 4) {
1934
- ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
2006
+ size_t const step = ((size_t)(ip-anchor) >> kSearchStrength);
2007
+ ip += step + 1; /* jump faster over incompressible sections */
2008
+ /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time.
2009
+ * In this mode we stop inserting every position into our tables, and only insert
2010
+ * positions that we search, which is one in step positions.
2011
+ * The exact cutoff is flexible, I've just chosen a number that is reasonably high,
2012
+ * so we minimize the compression ratio loss in "normal" scenarios. This mode gets
2013
+ * triggered once we've gone 2KB without finding any matches.
2014
+ */
2015
+ ms->lazySkipping = step > kLazySkippingStep;
1935
2016
  continue;
1936
2017
  }
1937
2018
 
@@ -1941,30 +2022,30 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1941
2022
  ip ++;
1942
2023
  curr++;
1943
2024
  /* check repCode */
1944
- if (offcode) {
2025
+ if (offBase) {
1945
2026
  const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1946
2027
  const U32 repIndex = (U32)(curr - offset_1);
1947
2028
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1948
2029
  const BYTE* const repMatch = repBase + repIndex;
1949
- if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2030
+ if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
1950
2031
  & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
1951
2032
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1952
2033
  /* repcode detected */
1953
2034
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1954
2035
  size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1955
2036
  int const gain2 = (int)(repLength * 3);
1956
- int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
2037
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1);
1957
2038
  if ((repLength >= 4) && (gain2 > gain1))
1958
- matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
2039
+ matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
1959
2040
  } }
1960
2041
 
1961
2042
  /* search match, depth 1 */
1962
- { size_t offset2=999999999;
1963
- size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1964
- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */
1965
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
2043
+ { size_t ofbCandidate = 999999999;
2044
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
2045
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
2046
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4);
1966
2047
  if ((ml2 >= 4) && (gain2 > gain1)) {
1967
- matchLength = ml2, offcode = offset2, start = ip;
2048
+ matchLength = ml2, offBase = ofbCandidate, start = ip;
1968
2049
  continue; /* search a better one */
1969
2050
  } }
1970
2051
 
@@ -1973,50 +2054,57 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1973
2054
  ip ++;
1974
2055
  curr++;
1975
2056
  /* check repCode */
1976
- if (offcode) {
2057
+ if (offBase) {
1977
2058
  const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1978
2059
  const U32 repIndex = (U32)(curr - offset_1);
1979
2060
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1980
2061
  const BYTE* const repMatch = repBase + repIndex;
1981
- if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2062
+ if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
1982
2063
  & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
1983
2064
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1984
2065
  /* repcode detected */
1985
2066
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1986
2067
  size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1987
2068
  int const gain2 = (int)(repLength * 4);
1988
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
2069
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1);
1989
2070
  if ((repLength >= 4) && (gain2 > gain1))
1990
- matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
2071
+ matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip;
1991
2072
  } }
1992
2073
 
1993
2074
  /* search match, depth 2 */
1994
- { size_t offset2=999999999;
1995
- size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1996
- int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2))); /* raw approx */
1997
- int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
2075
+ { size_t ofbCandidate = 999999999;
2076
+ size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict);
2077
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */
2078
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7);
1998
2079
  if ((ml2 >= 4) && (gain2 > gain1)) {
1999
- matchLength = ml2, offcode = offset2, start = ip;
2080
+ matchLength = ml2, offBase = ofbCandidate, start = ip;
2000
2081
  continue;
2001
2082
  } } }
2002
2083
  break; /* nothing found : store previous solution */
2003
2084
  }
2004
2085
 
2005
2086
  /* catch up */
2006
- if (STORED_IS_OFFSET(offcode)) {
2007
- U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
2087
+ if (OFFBASE_IS_OFFSET(offBase)) {
2088
+ U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase));
2008
2089
  const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
2009
2090
  const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
2010
2091
  while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
2011
- offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
2092
+ offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase);
2012
2093
  }
2013
2094
 
2014
2095
  /* store sequence */
2015
2096
  _storeSequence:
2016
2097
  { size_t const litLength = (size_t)(start - anchor);
2017
- ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
2098
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength);
2018
2099
  anchor = ip = start + matchLength;
2019
2100
  }
2101
+ if (ms->lazySkipping) {
2102
+ /* We've found a match, disable lazy skipping mode, and refill the hash cache. */
2103
+ if (searchMethod == search_rowHash) {
2104
+ ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit);
2105
+ }
2106
+ ms->lazySkipping = 0;
2107
+ }
2020
2108
 
2021
2109
  /* check immediate repcode */
2022
2110
  while (ip <= ilimit) {
@@ -2025,14 +2113,14 @@ _storeSequence:
2025
2113
  const U32 repIndex = repCurrent - offset_2;
2026
2114
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2027
2115
  const BYTE* const repMatch = repBase + repIndex;
2028
- if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
2116
+ if ( (ZSTD_index_overlap_check(dictLimit, repIndex))
2029
2117
  & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
2030
2118
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
2031
2119
  /* repcode detected we should take it */
2032
2120
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2033
2121
  matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
2034
- offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap offset history */
2035
- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
2122
+ offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */
2123
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength);
2036
2124
  ip += matchLength;
2037
2125
  anchor = ip;
2038
2126
  continue; /* faster when present ... (?) */
@@ -2047,58 +2135,65 @@ _storeSequence:
2047
2135
  /* Return the last literals size */
2048
2136
  return (size_t)(iend - anchor);
2049
2137
  }
2138
+ #endif /* build exclusions */
2050
2139
 
2051
-
2140
+ #ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
2052
2141
  size_t ZSTD_compressBlock_greedy_extDict(
2053
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2142
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2054
2143
  void const* src, size_t srcSize)
2055
2144
  {
2056
2145
  return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
2057
2146
  }
2058
2147
 
2059
- size_t ZSTD_compressBlock_lazy_extDict(
2060
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2148
+ size_t ZSTD_compressBlock_greedy_extDict_row(
2149
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2061
2150
  void const* src, size_t srcSize)
2062
-
2063
2151
  {
2064
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
2152
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
2065
2153
  }
2154
+ #endif
2066
2155
 
2067
- size_t ZSTD_compressBlock_lazy2_extDict(
2068
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2156
+ #ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
2157
+ size_t ZSTD_compressBlock_lazy_extDict(
2158
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2069
2159
  void const* src, size_t srcSize)
2070
2160
 
2071
2161
  {
2072
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
2162
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
2073
2163
  }
2074
2164
 
2075
- size_t ZSTD_compressBlock_btlazy2_extDict(
2076
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2165
+ size_t ZSTD_compressBlock_lazy_extDict_row(
2166
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2077
2167
  void const* src, size_t srcSize)
2078
2168
 
2079
2169
  {
2080
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
2170
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
2081
2171
  }
2172
+ #endif
2082
2173
 
2083
- size_t ZSTD_compressBlock_greedy_extDict_row(
2084
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2174
+ #ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
2175
+ size_t ZSTD_compressBlock_lazy2_extDict(
2176
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2085
2177
  void const* src, size_t srcSize)
2178
+
2086
2179
  {
2087
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
2180
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
2088
2181
  }
2089
2182
 
2090
- size_t ZSTD_compressBlock_lazy_extDict_row(
2091
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2183
+ size_t ZSTD_compressBlock_lazy2_extDict_row(
2184
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2092
2185
  void const* src, size_t srcSize)
2093
-
2094
2186
  {
2095
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
2187
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
2096
2188
  }
2189
+ #endif
2097
2190
 
2098
- size_t ZSTD_compressBlock_lazy2_extDict_row(
2099
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2191
+ #ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
2192
+ size_t ZSTD_compressBlock_btlazy2_extDict(
2193
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
2100
2194
  void const* src, size_t srcSize)
2101
2195
 
2102
2196
  {
2103
- return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
2197
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
2104
2198
  }
2199
+ #endif