multi_compress 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -3
  3. data/GET_STARTED.md +3 -3
  4. data/README.md +75 -66
  5. data/THIRD_PARTY_NOTICES.md +24 -0
  6. data/ext/multi_compress/brotli_dec_static_init.c +3 -0
  7. data/ext/multi_compress/brotli_enc_static_init.c +3 -0
  8. data/ext/multi_compress/extconf.rb +22 -1
  9. data/ext/multi_compress/multi_compress.c +152 -72
  10. data/ext/multi_compress/vendor/.vendored +2 -2
  11. data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
  13. data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
  14. data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
  15. data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
  16. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
  17. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
  18. data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
  20. data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
  21. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
  22. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
  23. data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
  24. data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
  25. data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
  26. data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
  27. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
  28. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
  29. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
  30. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
  31. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
  32. data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
  33. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
  34. data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
  35. data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
  36. data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
  37. data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
  38. data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
  40. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
  41. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
  42. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
  43. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
  44. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
  45. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
  46. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
  47. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
  48. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
  49. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
  50. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
  51. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
  52. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
  53. data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
  54. data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
  55. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
  56. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
  57. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
  58. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
  59. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
  60. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
  61. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
  62. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
  63. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
  65. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
  66. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
  67. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
  68. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
  69. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
  70. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
  71. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
  72. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
  73. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
  74. data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
  76. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
  77. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
  79. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
  83. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
  84. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
  85. data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
  87. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
  88. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
  89. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
  90. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
  91. data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
  92. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
  93. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
  94. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
  95. data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
  96. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
  97. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
  98. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
  99. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
  100. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
  101. data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
  102. data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
  103. data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
  104. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
  105. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
  106. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
  107. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
  108. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
  109. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
  110. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
  111. data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
  112. data/ext/multi_compress/vendor/zstd/COPYING +339 -0
  113. data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
  114. data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
  115. data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
  116. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  117. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
  118. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
  119. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
  120. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
  121. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
  122. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
  123. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
  124. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
  125. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
  126. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
  127. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
  128. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
  129. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
  130. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
  131. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
  132. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
  133. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
  134. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
  135. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
  136. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
  137. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
  138. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
  139. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
  140. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
  141. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
  142. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
  143. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
  144. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
  145. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
  146. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
  147. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
  148. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
  149. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
  170. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
  171. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
  178. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
  179. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  182. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
  183. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
  187. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
  188. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
  189. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
  190. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
  191. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
  192. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
  193. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
  194. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
  204. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
  205. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
  206. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
  207. data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
  208. data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
  209. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
  210. data/lib/multi_compress/version.rb +1 -1
  211. metadata +29 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,8 +11,46 @@
11
11
  #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
12
12
  #include "zstd_fast.h"
13
13
 
14
+ static
15
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
16
+ void ZSTD_fillHashTableForCDict(ZSTD_MatchState_t* ms,
17
+ const void* const end,
18
+ ZSTD_dictTableLoadMethod_e dtlm)
19
+ {
20
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
21
+ U32* const hashTable = ms->hashTable;
22
+ U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
23
+ U32 const mls = cParams->minMatch;
24
+ const BYTE* const base = ms->window.base;
25
+ const BYTE* ip = base + ms->nextToUpdate;
26
+ const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
27
+ const U32 fastHashFillStep = 3;
28
+
29
+ /* Currently, we always use ZSTD_dtlm_full for filling CDict tables.
30
+ * Feel free to remove this assert if there's a good reason! */
31
+ assert(dtlm == ZSTD_dtlm_full);
32
+
33
+ /* Always insert every fastHashFillStep position into the hash table.
34
+ * Insert the other positions if their hash entry is empty.
35
+ */
36
+ for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
37
+ U32 const curr = (U32)(ip - base);
38
+ { size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls);
39
+ ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); }
40
+
41
+ if (dtlm == ZSTD_dtlm_fast) continue;
42
+ /* Only load extra positions for ZSTD_dtlm_full */
43
+ { U32 p;
44
+ for (p = 1; p < fastHashFillStep; ++p) {
45
+ size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls);
46
+ if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */
47
+ ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p);
48
+ } } } }
49
+ }
14
50
 
15
- void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
51
+ static
52
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
53
+ void ZSTD_fillHashTableForCCtx(ZSTD_MatchState_t* ms,
16
54
  const void* const end,
17
55
  ZSTD_dictTableLoadMethod_e dtlm)
18
56
  {
@@ -25,6 +63,10 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
25
63
  const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
26
64
  const U32 fastHashFillStep = 3;
27
65
 
66
+ /* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables.
67
+ * Feel free to remove this assert if there's a good reason! */
68
+ assert(dtlm == ZSTD_dtlm_fast);
69
+
28
70
  /* Always insert every fastHashFillStep position into the hash table.
29
71
  * Insert the other positions if their hash entry is empty.
30
72
  */
@@ -42,6 +84,62 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
42
84
  } } } }
43
85
  }
44
86
 
87
+ void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
88
+ const void* const end,
89
+ ZSTD_dictTableLoadMethod_e dtlm,
90
+ ZSTD_tableFillPurpose_e tfp)
91
+ {
92
+ if (tfp == ZSTD_tfp_forCDict) {
93
+ ZSTD_fillHashTableForCDict(ms, end, dtlm);
94
+ } else {
95
+ ZSTD_fillHashTableForCCtx(ms, end, dtlm);
96
+ }
97
+ }
98
+
99
+
100
+ typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit);
101
+
102
+ static int
103
+ ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
104
+ {
105
+ /* Array of ~random data, should have low probability of matching data.
106
+ * Load from here if the index is invalid.
107
+ * Used to avoid unpredictable branches. */
108
+ static const BYTE dummy[] = {0x12,0x34,0x56,0x78};
109
+
110
+ /* currentIdx >= lowLimit is a (somewhat) unpredictable branch.
111
+ * However expression below compiles into conditional move.
112
+ */
113
+ const BYTE* mvalAddr = ZSTD_selectAddr(matchIdx, idxLowLimit, matchAddress, dummy);
114
+ /* Note: this used to be written as : return test1 && test2;
115
+ * Unfortunately, once inlined, these tests become branches,
116
+ * in which case it becomes critical that they are executed in the right order (test1 then test2).
117
+ * So we have to write these tests in a specific manner to ensure their ordering.
118
+ */
119
+ if (MEM_read32(currentPtr) != MEM_read32(mvalAddr)) return 0;
120
+ /* force ordering of these tests, which matters once the function is inlined, as they become branches */
121
+ #if defined(__GNUC__)
122
+ __asm__("");
123
+ #endif
124
+ return matchIdx >= idxLowLimit;
125
+ }
126
+
127
+ static int
128
+ ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
129
+ {
130
+ /* using a branch instead of a cmov,
131
+ * because it's faster in scenarios where matchIdx >= idxLowLimit is generally true,
132
+ * aka almost all candidates are within range */
133
+ U32 mval;
134
+ if (matchIdx >= idxLowLimit) {
135
+ mval = MEM_read32(matchAddress);
136
+ } else {
137
+ mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */
138
+ }
139
+
140
+ return (MEM_read32(currentPtr) == mval);
141
+ }
142
+
45
143
 
46
144
  /**
47
145
  * If you squint hard enough (and ignore repcodes), the search operation at any
@@ -89,17 +187,17 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
89
187
  *
90
188
  * This is also the work we do at the beginning to enter the loop initially.
91
189
  */
92
- FORCE_INLINE_TEMPLATE size_t
93
- ZSTD_compressBlock_fast_noDict_generic(
94
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
190
+ FORCE_INLINE_TEMPLATE
191
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
192
+ size_t ZSTD_compressBlock_fast_noDict_generic(
193
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
95
194
  void const* src, size_t srcSize,
96
- U32 const mls, U32 const hasStep)
195
+ U32 const mls, int useCmov)
97
196
  {
98
197
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
99
198
  U32* const hashTable = ms->hashTable;
100
199
  U32 const hlog = cParams->hashLog;
101
- /* support stepSize of 0 */
102
- size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
200
+ size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; /* min 2 */
103
201
  const BYTE* const base = ms->window.base;
104
202
  const BYTE* const istart = (const BYTE*)src;
105
203
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
@@ -117,12 +215,11 @@ ZSTD_compressBlock_fast_noDict_generic(
117
215
 
118
216
  U32 rep_offset1 = rep[0];
119
217
  U32 rep_offset2 = rep[1];
120
- U32 offsetSaved = 0;
218
+ U32 offsetSaved1 = 0, offsetSaved2 = 0;
121
219
 
122
220
  size_t hash0; /* hash for ip0 */
123
221
  size_t hash1; /* hash for ip1 */
124
- U32 idx; /* match idx for ip0 */
125
- U32 mval; /* src value at match idx */
222
+ U32 matchIdx; /* match idx for ip0 */
126
223
 
127
224
  U32 offcode;
128
225
  const BYTE* match0;
@@ -135,14 +232,15 @@ ZSTD_compressBlock_fast_noDict_generic(
135
232
  size_t step;
136
233
  const BYTE* nextStep;
137
234
  const size_t kStepIncr = (1 << (kSearchStrength - 1));
235
+ const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch;
138
236
 
139
237
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
140
238
  ip0 += (ip0 == prefixStart);
141
239
  { U32 const curr = (U32)(ip0 - base);
142
240
  U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
143
241
  U32 const maxRep = curr - windowLow;
144
- if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
145
- if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
242
+ if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
243
+ if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
146
244
  }
147
245
 
148
246
  /* start each op */
@@ -163,7 +261,7 @@ _start: /* Requires: ip0 */
163
261
  hash0 = ZSTD_hashPtr(ip0, hlog, mls);
164
262
  hash1 = ZSTD_hashPtr(ip1, hlog, mls);
165
263
 
166
- idx = hashTable[hash0];
264
+ matchIdx = hashTable[hash0];
167
265
 
168
266
  do {
169
267
  /* load repcode match for ip[2]*/
@@ -180,26 +278,28 @@ _start: /* Requires: ip0 */
180
278
  mLength = ip0[-1] == match0[-1];
181
279
  ip0 -= mLength;
182
280
  match0 -= mLength;
183
- offcode = STORE_REPCODE_1;
281
+ offcode = REPCODE1_TO_OFFBASE;
184
282
  mLength += 4;
283
+
284
+ /* Write next hash table entry: it's already calculated.
285
+ * This write is known to be safe because ip1 is before the
286
+ * repcode (ip2). */
287
+ hashTable[hash1] = (U32)(ip1 - base);
288
+
185
289
  goto _match;
186
290
  }
187
291
 
188
- /* load match for ip[0] */
189
- if (idx >= prefixStartIndex) {
190
- mval = MEM_read32(base + idx);
191
- } else {
192
- mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
193
- }
292
+ if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
293
+ /* Write next hash table entry (it's already calculated).
294
+ * This write is known to be safe because the ip1 == ip0 + 1,
295
+ * so searching will resume after ip1 */
296
+ hashTable[hash1] = (U32)(ip1 - base);
194
297
 
195
- /* check match at ip[0] */
196
- if (MEM_read32(ip0) == mval) {
197
- /* found a match! */
198
298
  goto _offset;
199
299
  }
200
300
 
201
301
  /* lookup ip[1] */
202
- idx = hashTable[hash1];
302
+ matchIdx = hashTable[hash1];
203
303
 
204
304
  /* hash ip[2] */
205
305
  hash0 = hash1;
@@ -214,21 +314,19 @@ _start: /* Requires: ip0 */
214
314
  current0 = (U32)(ip0 - base);
215
315
  hashTable[hash0] = current0;
216
316
 
217
- /* load match for ip[0] */
218
- if (idx >= prefixStartIndex) {
219
- mval = MEM_read32(base + idx);
220
- } else {
221
- mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
222
- }
223
-
224
- /* check match at ip[0] */
225
- if (MEM_read32(ip0) == mval) {
226
- /* found a match! */
317
+ if (matchFound(ip0, base + matchIdx, matchIdx, prefixStartIndex)) {
318
+ /* Write next hash table entry, since it's already calculated */
319
+ if (step <= 4) {
320
+ /* Avoid writing an index if it's >= position where search will resume.
321
+ * The minimum possible match has length 4, so search can resume at ip0 + 4.
322
+ */
323
+ hashTable[hash1] = (U32)(ip1 - base);
324
+ }
227
325
  goto _offset;
228
326
  }
229
327
 
230
328
  /* lookup ip[1] */
231
- idx = hashTable[hash1];
329
+ matchIdx = hashTable[hash1];
232
330
 
233
331
  /* hash ip[2] */
234
332
  hash0 = hash1;
@@ -250,13 +348,28 @@ _start: /* Requires: ip0 */
250
348
  } while (ip3 < ilimit);
251
349
 
252
350
  _cleanup:
253
- /* Note that there are probably still a couple positions we could search.
351
+ /* Note that there are probably still a couple positions one could search.
254
352
  * However, it seems to be a meaningful performance hit to try to search
255
353
  * them. So let's not. */
256
354
 
355
+ /* When the repcodes are outside of the prefix, we set them to zero before the loop.
356
+ * When the offsets are still zero, we need to restore them after the block to have a correct
357
+ * repcode history. If only one offset was invalid, it is easy. The tricky case is when both
358
+ * offsets were invalid. We need to figure out which offset to refill with.
359
+ * - If both offsets are zero they are in the same order.
360
+ * - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
361
+ * - If only one is zero, we need to decide which offset to restore.
362
+ * - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
363
+ * - It is impossible for rep_offset2 to be non-zero.
364
+ *
365
+ * So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
366
+ * set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
367
+ */
368
+ offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
369
+
257
370
  /* save reps for next block */
258
- rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
259
- rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
371
+ rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
372
+ rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
260
373
 
261
374
  /* Return the last literals size */
262
375
  return (size_t)(iend - anchor);
@@ -264,10 +377,10 @@ _cleanup:
264
377
  _offset: /* Requires: ip0, idx */
265
378
 
266
379
  /* Compute the offset code. */
267
- match0 = base + idx;
380
+ match0 = base + matchIdx;
268
381
  rep_offset2 = rep_offset1;
269
382
  rep_offset1 = (U32)(ip0-match0);
270
- offcode = STORE_OFFSET(rep_offset1);
383
+ offcode = OFFSET_TO_OFFBASE(rep_offset1);
271
384
  mLength = 4;
272
385
 
273
386
  /* Count the backwards match length. */
@@ -287,11 +400,6 @@ _match: /* Requires: ip0, match0, offcode */
287
400
  ip0 += mLength;
288
401
  anchor = ip0;
289
402
 
290
- /* write next hash table entry */
291
- if (ip1 < ip0) {
292
- hashTable[hash1] = (U32)(ip1 - base);
293
- }
294
-
295
403
  /* Fill table and check for immediate repcode. */
296
404
  if (ip0 <= ilimit) {
297
405
  /* Fill Table */
@@ -306,7 +414,7 @@ _match: /* Requires: ip0, match0, offcode */
306
414
  { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
307
415
  hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
308
416
  ip0 += rLength;
309
- ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
417
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
310
418
  anchor = ip0;
311
419
  continue; /* faster when present (confirmed on gcc-8) ... (?) */
312
420
  } } }
@@ -314,12 +422,12 @@ _match: /* Requires: ip0, match0, offcode */
314
422
  goto _start;
315
423
  }
316
424
 
317
- #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
318
- static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
319
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
425
+ #define ZSTD_GEN_FAST_FN(dictMode, mml, cmov) \
426
+ static size_t ZSTD_compressBlock_fast_##dictMode##_##mml##_##cmov( \
427
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
320
428
  void const* src, size_t srcSize) \
321
429
  { \
322
- return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
430
+ return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mml, cmov); \
323
431
  }
324
432
 
325
433
  ZSTD_GEN_FAST_FN(noDict, 4, 1)
@@ -333,13 +441,15 @@ ZSTD_GEN_FAST_FN(noDict, 6, 0)
333
441
  ZSTD_GEN_FAST_FN(noDict, 7, 0)
334
442
 
335
443
  size_t ZSTD_compressBlock_fast(
336
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
444
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
337
445
  void const* src, size_t srcSize)
338
446
  {
339
- U32 const mls = ms->cParams.minMatch;
447
+ U32 const mml = ms->cParams.minMatch;
448
+ /* use cmov when "candidate in range" branch is likely unpredictable */
449
+ int const useCmov = ms->cParams.windowLog < 19;
340
450
  assert(ms->dictMatchState == NULL);
341
- if (ms->cParams.targetLength > 1) {
342
- switch(mls)
451
+ if (useCmov) {
452
+ switch(mml)
343
453
  {
344
454
  default: /* includes case 3 */
345
455
  case 4 :
@@ -352,7 +462,8 @@ size_t ZSTD_compressBlock_fast(
352
462
  return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
353
463
  }
354
464
  } else {
355
- switch(mls)
465
+ /* use a branch instead */
466
+ switch(mml)
356
467
  {
357
468
  default: /* includes case 3 */
358
469
  case 4 :
@@ -364,13 +475,13 @@ size_t ZSTD_compressBlock_fast(
364
475
  case 7 :
365
476
  return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
366
477
  }
367
-
368
478
  }
369
479
  }
370
480
 
371
481
  FORCE_INLINE_TEMPLATE
482
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
372
483
  size_t ZSTD_compressBlock_fast_dictMatchState_generic(
373
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
484
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
374
485
  void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
375
486
  {
376
487
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -380,16 +491,16 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
380
491
  U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
381
492
  const BYTE* const base = ms->window.base;
382
493
  const BYTE* const istart = (const BYTE*)src;
383
- const BYTE* ip = istart;
494
+ const BYTE* ip0 = istart;
495
+ const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */
384
496
  const BYTE* anchor = istart;
385
497
  const U32 prefixStartIndex = ms->window.dictLimit;
386
498
  const BYTE* const prefixStart = base + prefixStartIndex;
387
499
  const BYTE* const iend = istart + srcSize;
388
500
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
389
501
  U32 offset_1=rep[0], offset_2=rep[1];
390
- U32 offsetSaved = 0;
391
502
 
392
- const ZSTD_matchState_t* const dms = ms->dictMatchState;
503
+ const ZSTD_MatchState_t* const dms = ms->dictMatchState;
393
504
  const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
394
505
  const U32* const dictHashTable = dms->hashTable;
395
506
  const U32 dictStartIndex = dms->window.dictLimit;
@@ -397,13 +508,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
397
508
  const BYTE* const dictStart = dictBase + dictStartIndex;
398
509
  const BYTE* const dictEnd = dms->window.nextSrc;
399
510
  const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase);
400
- const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
401
- const U32 dictHLog = dictCParams->hashLog;
511
+ const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart);
512
+ const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
402
513
 
403
514
  /* if a dictionary is still attached, it necessarily means that
404
515
  * it is within window size. So we just check it. */
405
516
  const U32 maxDistance = 1U << cParams->windowLog;
406
- const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
517
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
407
518
  assert(endIndex - prefixStartIndex <= maxDistance);
408
519
  (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
409
520
 
@@ -413,106 +524,154 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
413
524
  * when translating a dict index into a local index */
414
525
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
415
526
 
527
+ if (ms->prefetchCDictTables) {
528
+ size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
529
+ PREFETCH_AREA(dictHashTable, hashTableBytes);
530
+ }
531
+
416
532
  /* init */
417
533
  DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
418
- ip += (dictAndPrefixLength == 0);
534
+ ip0 += (dictAndPrefixLength == 0);
419
535
  /* dictMatchState repCode checks don't currently handle repCode == 0
420
536
  * disabling. */
421
537
  assert(offset_1 <= dictAndPrefixLength);
422
538
  assert(offset_2 <= dictAndPrefixLength);
423
539
 
424
- /* Main Search Loop */
425
- while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
540
+ /* Outer search loop */
541
+ assert(stepSize >= 1);
542
+ while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */
426
543
  size_t mLength;
427
- size_t const h = ZSTD_hashPtr(ip, hlog, mls);
428
- U32 const curr = (U32)(ip-base);
429
- U32 const matchIndex = hashTable[h];
430
- const BYTE* match = base + matchIndex;
431
- const U32 repIndex = curr + 1 - offset_1;
432
- const BYTE* repMatch = (repIndex < prefixStartIndex) ?
433
- dictBase + (repIndex - dictIndexDelta) :
434
- base + repIndex;
435
- hashTable[h] = curr; /* update hash table */
436
-
437
- if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
438
- && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
439
- const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
440
- mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
441
- ip++;
442
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
443
- } else if ( (matchIndex <= prefixStartIndex) ) {
444
- size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
445
- U32 const dictMatchIndex = dictHashTable[dictHash];
446
- const BYTE* dictMatch = dictBase + dictMatchIndex;
447
- if (dictMatchIndex <= dictStartIndex ||
448
- MEM_read32(dictMatch) != MEM_read32(ip)) {
449
- assert(stepSize >= 1);
450
- ip += ((ip-anchor) >> kSearchStrength) + stepSize;
451
- continue;
452
- } else {
453
- /* found a dict match */
454
- U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
455
- mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
456
- while (((ip>anchor) & (dictMatch>dictStart))
457
- && (ip[-1] == dictMatch[-1])) {
458
- ip--; dictMatch--; mLength++;
544
+ size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls);
545
+
546
+ size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls);
547
+ U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS];
548
+ int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0);
549
+
550
+ U32 matchIndex = hashTable[hash0];
551
+ U32 curr = (U32)(ip0 - base);
552
+ size_t step = stepSize;
553
+ const size_t kStepIncr = 1 << kSearchStrength;
554
+ const BYTE* nextStep = ip0 + kStepIncr;
555
+
556
+ /* Inner search loop */
557
+ while (1) {
558
+ const BYTE* match = base + matchIndex;
559
+ const U32 repIndex = curr + 1 - offset_1;
560
+ const BYTE* repMatch = (repIndex < prefixStartIndex) ?
561
+ dictBase + (repIndex - dictIndexDelta) :
562
+ base + repIndex;
563
+ const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls);
564
+ size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls);
565
+ hashTable[hash0] = curr; /* update hash table */
566
+
567
+ if ((ZSTD_index_overlap_check(prefixStartIndex, repIndex))
568
+ && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) {
569
+ const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
570
+ mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4;
571
+ ip0++;
572
+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
573
+ break;
574
+ }
575
+
576
+ if (dictTagsMatch) {
577
+ /* Found a possible dict match */
578
+ const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
579
+ const BYTE* dictMatch = dictBase + dictMatchIndex;
580
+ if (dictMatchIndex > dictStartIndex &&
581
+ MEM_read32(dictMatch) == MEM_read32(ip0)) {
582
+ /* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */
583
+ if (matchIndex <= prefixStartIndex) {
584
+ U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta);
585
+ mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4;
586
+ while (((ip0 > anchor) & (dictMatch > dictStart))
587
+ && (ip0[-1] == dictMatch[-1])) {
588
+ ip0--;
589
+ dictMatch--;
590
+ mLength++;
591
+ } /* catch up */
592
+ offset_2 = offset_1;
593
+ offset_1 = offset;
594
+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
595
+ break;
596
+ }
597
+ }
598
+ }
599
+
600
+ if (ZSTD_match4Found_cmov(ip0, match, matchIndex, prefixStartIndex)) {
601
+ /* found a regular match of size >= 4 */
602
+ U32 const offset = (U32) (ip0 - match);
603
+ mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4;
604
+ while (((ip0 > anchor) & (match > prefixStart))
605
+ && (ip0[-1] == match[-1])) {
606
+ ip0--;
607
+ match--;
608
+ mLength++;
459
609
  } /* catch up */
460
610
  offset_2 = offset_1;
461
611
  offset_1 = offset;
462
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
612
+ ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
613
+ break;
463
614
  }
464
- } else if (MEM_read32(match) != MEM_read32(ip)) {
465
- /* it's not a match, and we're not going to check the dictionary */
466
- assert(stepSize >= 1);
467
- ip += ((ip-anchor) >> kSearchStrength) + stepSize;
468
- continue;
469
- } else {
470
- /* found a regular match */
471
- U32 const offset = (U32)(ip-match);
472
- mLength = ZSTD_count(ip+4, match+4, iend) + 4;
473
- while (((ip>anchor) & (match>prefixStart))
474
- && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
475
- offset_2 = offset_1;
476
- offset_1 = offset;
477
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
478
- }
615
+
616
+ /* Prepare for next iteration */
617
+ dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS];
618
+ dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1);
619
+ matchIndex = hashTable[hash1];
620
+
621
+ if (ip1 >= nextStep) {
622
+ step++;
623
+ nextStep += kStepIncr;
624
+ }
625
+ ip0 = ip1;
626
+ ip1 = ip1 + step;
627
+ if (ip1 > ilimit) goto _cleanup;
628
+
629
+ curr = (U32)(ip0 - base);
630
+ hash0 = hash1;
631
+ } /* end inner search loop */
479
632
 
480
633
  /* match found */
481
- ip += mLength;
482
- anchor = ip;
634
+ assert(mLength);
635
+ ip0 += mLength;
636
+ anchor = ip0;
483
637
 
484
- if (ip <= ilimit) {
638
+ if (ip0 <= ilimit) {
485
639
  /* Fill Table */
486
640
  assert(base+curr+2 > istart); /* check base overflow */
487
641
  hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
488
- hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
642
+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
489
643
 
490
644
  /* check immediate repcode */
491
- while (ip <= ilimit) {
492
- U32 const current2 = (U32)(ip-base);
645
+ while (ip0 <= ilimit) {
646
+ U32 const current2 = (U32)(ip0-base);
493
647
  U32 const repIndex2 = current2 - offset_2;
494
648
  const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
495
649
  dictBase - dictIndexDelta + repIndex2 :
496
650
  base + repIndex2;
497
- if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
498
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
651
+ if ( (ZSTD_index_overlap_check(prefixStartIndex, repIndex2))
652
+ && (MEM_read32(repMatch2) == MEM_read32(ip0))) {
499
653
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
500
- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
654
+ size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
501
655
  U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
502
- ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
503
- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
504
- ip += repLength2;
505
- anchor = ip;
656
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
657
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2;
658
+ ip0 += repLength2;
659
+ anchor = ip0;
506
660
  continue;
507
661
  }
508
662
  break;
509
663
  }
510
664
  }
665
+
666
+ /* Prepare for next iteration */
667
+ assert(ip0 == anchor);
668
+ ip1 = ip0 + stepSize;
511
669
  }
512
670
 
671
+ _cleanup:
513
672
  /* save reps for next block */
514
- rep[0] = offset_1 ? offset_1 : offsetSaved;
515
- rep[1] = offset_2 ? offset_2 : offsetSaved;
673
+ rep[0] = offset_1;
674
+ rep[1] = offset_2;
516
675
 
517
676
  /* Return the last literals size */
518
677
  return (size_t)(iend - anchor);
@@ -525,7 +684,7 @@ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
525
684
  ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
526
685
 
527
686
  size_t ZSTD_compressBlock_fast_dictMatchState(
528
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
687
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
529
688
  void const* src, size_t srcSize)
530
689
  {
531
690
  U32 const mls = ms->cParams.minMatch;
@@ -545,19 +704,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
545
704
  }
546
705
 
547
706
 
548
- static size_t ZSTD_compressBlock_fast_extDict_generic(
549
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
707
+ static
708
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
709
+ size_t ZSTD_compressBlock_fast_extDict_generic(
710
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
550
711
  void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
551
712
  {
552
713
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
553
714
  U32* const hashTable = ms->hashTable;
554
715
  U32 const hlog = cParams->hashLog;
555
716
  /* support stepSize of 0 */
556
- U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
717
+ size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
557
718
  const BYTE* const base = ms->window.base;
558
719
  const BYTE* const dictBase = ms->window.dictBase;
559
720
  const BYTE* const istart = (const BYTE*)src;
560
- const BYTE* ip = istart;
561
721
  const BYTE* anchor = istart;
562
722
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
563
723
  const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
@@ -570,6 +730,28 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
570
730
  const BYTE* const iend = istart + srcSize;
571
731
  const BYTE* const ilimit = iend - 8;
572
732
  U32 offset_1=rep[0], offset_2=rep[1];
733
+ U32 offsetSaved1 = 0, offsetSaved2 = 0;
734
+
735
+ const BYTE* ip0 = istart;
736
+ const BYTE* ip1;
737
+ const BYTE* ip2;
738
+ const BYTE* ip3;
739
+ U32 current0;
740
+
741
+
742
+ size_t hash0; /* hash for ip0 */
743
+ size_t hash1; /* hash for ip1 */
744
+ U32 idx; /* match idx for ip0 */
745
+ const BYTE* idxBase; /* base pointer for idx */
746
+
747
+ U32 offcode;
748
+ const BYTE* match0;
749
+ size_t mLength;
750
+ const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */
751
+
752
+ size_t step;
753
+ const BYTE* nextStep;
754
+ const size_t kStepIncr = (1 << (kSearchStrength - 1));
573
755
 
574
756
  (void)hasStep; /* not currently specialized on whether it's accelerated */
575
757
 
@@ -579,75 +761,202 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
579
761
  if (prefixStartIndex == dictStartIndex)
580
762
  return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
581
763
 
582
- /* Search Loop */
583
- while (ip < ilimit) { /* < instead of <=, because (ip+1) */
584
- const size_t h = ZSTD_hashPtr(ip, hlog, mls);
585
- const U32 matchIndex = hashTable[h];
586
- const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
587
- const BYTE* match = matchBase + matchIndex;
588
- const U32 curr = (U32)(ip-base);
589
- const U32 repIndex = curr + 1 - offset_1;
590
- const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
591
- const BYTE* const repMatch = repBase + repIndex;
592
- hashTable[h] = curr; /* update hash table */
593
- DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
594
-
595
- if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
596
- & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
597
- && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
598
- const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
599
- size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
600
- ip++;
601
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
602
- ip += rLength;
603
- anchor = ip;
604
- } else {
605
- if ( (matchIndex < dictStartIndex) ||
606
- (MEM_read32(match) != MEM_read32(ip)) ) {
607
- assert(stepSize >= 1);
608
- ip += ((ip-anchor) >> kSearchStrength) + stepSize;
609
- continue;
764
+ { U32 const curr = (U32)(ip0 - base);
765
+ U32 const maxRep = curr - dictStartIndex;
766
+ if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
767
+ if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
768
+ }
769
+
770
+ /* start each op */
771
+ _start: /* Requires: ip0 */
772
+
773
+ step = stepSize;
774
+ nextStep = ip0 + kStepIncr;
775
+
776
+ /* calculate positions, ip0 - anchor == 0, so we skip step calc */
777
+ ip1 = ip0 + 1;
778
+ ip2 = ip0 + step;
779
+ ip3 = ip2 + 1;
780
+
781
+ if (ip3 >= ilimit) {
782
+ goto _cleanup;
783
+ }
784
+
785
+ hash0 = ZSTD_hashPtr(ip0, hlog, mls);
786
+ hash1 = ZSTD_hashPtr(ip1, hlog, mls);
787
+
788
+ idx = hashTable[hash0];
789
+ idxBase = idx < prefixStartIndex ? dictBase : base;
790
+
791
+ do {
792
+ { /* load repcode match for ip[2] */
793
+ U32 const current2 = (U32)(ip2 - base);
794
+ U32 const repIndex = current2 - offset_1;
795
+ const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
796
+ U32 rval;
797
+ if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */
798
+ & (offset_1 > 0) ) {
799
+ rval = MEM_read32(repBase + repIndex);
800
+ } else {
801
+ rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */
610
802
  }
611
- { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
612
- const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
613
- U32 const offset = curr - matchIndex;
614
- size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
615
- while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
616
- offset_2 = offset_1; offset_1 = offset; /* update offset history */
617
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
618
- ip += mLength;
619
- anchor = ip;
803
+
804
+ /* write back hash table entry */
805
+ current0 = (U32)(ip0 - base);
806
+ hashTable[hash0] = current0;
807
+
808
+ /* check repcode at ip[2] */
809
+ if (MEM_read32(ip2) == rval) {
810
+ ip0 = ip2;
811
+ match0 = repBase + repIndex;
812
+ matchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
813
+ assert((match0 != prefixStart) & (match0 != dictStart));
814
+ mLength = ip0[-1] == match0[-1];
815
+ ip0 -= mLength;
816
+ match0 -= mLength;
817
+ offcode = REPCODE1_TO_OFFBASE;
818
+ mLength += 4;
819
+ goto _match;
620
820
  } }
621
821
 
622
- if (ip <= ilimit) {
623
- /* Fill Table */
624
- hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
625
- hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
626
- /* check immediate repcode */
627
- while (ip <= ilimit) {
628
- U32 const current2 = (U32)(ip-base);
629
- U32 const repIndex2 = current2 - offset_2;
630
- const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
631
- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
632
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
633
- const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
634
- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
635
- { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
636
- ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
637
- hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
638
- ip += repLength2;
639
- anchor = ip;
640
- continue;
641
- }
642
- break;
643
- } } }
822
+ { /* load match for ip[0] */
823
+ U32 const mval = idx >= dictStartIndex ?
824
+ MEM_read32(idxBase + idx) :
825
+ MEM_read32(ip0) ^ 1; /* guaranteed not to match */
826
+
827
+ /* check match at ip[0] */
828
+ if (MEM_read32(ip0) == mval) {
829
+ /* found a match! */
830
+ goto _offset;
831
+ } }
832
+
833
+ /* lookup ip[1] */
834
+ idx = hashTable[hash1];
835
+ idxBase = idx < prefixStartIndex ? dictBase : base;
836
+
837
+ /* hash ip[2] */
838
+ hash0 = hash1;
839
+ hash1 = ZSTD_hashPtr(ip2, hlog, mls);
840
+
841
+ /* advance to next positions */
842
+ ip0 = ip1;
843
+ ip1 = ip2;
844
+ ip2 = ip3;
845
+
846
+ /* write back hash table entry */
847
+ current0 = (U32)(ip0 - base);
848
+ hashTable[hash0] = current0;
849
+
850
+ { /* load match for ip[0] */
851
+ U32 const mval = idx >= dictStartIndex ?
852
+ MEM_read32(idxBase + idx) :
853
+ MEM_read32(ip0) ^ 1; /* guaranteed not to match */
854
+
855
+ /* check match at ip[0] */
856
+ if (MEM_read32(ip0) == mval) {
857
+ /* found a match! */
858
+ goto _offset;
859
+ } }
860
+
861
+ /* lookup ip[1] */
862
+ idx = hashTable[hash1];
863
+ idxBase = idx < prefixStartIndex ? dictBase : base;
864
+
865
+ /* hash ip[2] */
866
+ hash0 = hash1;
867
+ hash1 = ZSTD_hashPtr(ip2, hlog, mls);
868
+
869
+ /* advance to next positions */
870
+ ip0 = ip1;
871
+ ip1 = ip2;
872
+ ip2 = ip0 + step;
873
+ ip3 = ip1 + step;
874
+
875
+ /* calculate step */
876
+ if (ip2 >= nextStep) {
877
+ step++;
878
+ PREFETCH_L1(ip1 + 64);
879
+ PREFETCH_L1(ip1 + 128);
880
+ nextStep += kStepIncr;
881
+ }
882
+ } while (ip3 < ilimit);
883
+
884
+ _cleanup:
885
+ /* Note that there are probably still a couple positions we could search.
886
+ * However, it seems to be a meaningful performance hit to try to search
887
+ * them. So let's not. */
888
+
889
+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
890
+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
891
+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
644
892
 
645
893
  /* save reps for next block */
646
- rep[0] = offset_1;
647
- rep[1] = offset_2;
894
+ rep[0] = offset_1 ? offset_1 : offsetSaved1;
895
+ rep[1] = offset_2 ? offset_2 : offsetSaved2;
648
896
 
649
897
  /* Return the last literals size */
650
898
  return (size_t)(iend - anchor);
899
+
900
+ _offset: /* Requires: ip0, idx, idxBase */
901
+
902
+ /* Compute the offset code. */
903
+ { U32 const offset = current0 - idx;
904
+ const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart;
905
+ matchEnd = idx < prefixStartIndex ? dictEnd : iend;
906
+ match0 = idxBase + idx;
907
+ offset_2 = offset_1;
908
+ offset_1 = offset;
909
+ offcode = OFFSET_TO_OFFBASE(offset);
910
+ mLength = 4;
911
+
912
+ /* Count the backwards match length. */
913
+ while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) {
914
+ ip0--;
915
+ match0--;
916
+ mLength++;
917
+ } }
918
+
919
+ _match: /* Requires: ip0, match0, offcode, matchEnd */
920
+
921
+ /* Count the forward length. */
922
+ assert(matchEnd != 0);
923
+ mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart);
924
+
925
+ ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
926
+
927
+ ip0 += mLength;
928
+ anchor = ip0;
929
+
930
+ /* write next hash table entry */
931
+ if (ip1 < ip0) {
932
+ hashTable[hash1] = (U32)(ip1 - base);
933
+ }
934
+
935
+ /* Fill table and check for immediate repcode. */
936
+ if (ip0 <= ilimit) {
937
+ /* Fill Table */
938
+ assert(base+current0+2 > istart); /* check base overflow */
939
+ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
940
+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
941
+
942
+ while (ip0 <= ilimit) {
943
+ U32 const repIndex2 = (U32)(ip0-base) - offset_2;
944
+ const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
945
+ if ( ((ZSTD_index_overlap_check(prefixStartIndex, repIndex2)) & (offset_2 > 0))
946
+ && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) {
947
+ const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
948
+ size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
949
+ { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */
950
+ ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
951
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
952
+ ip0 += repLength2;
953
+ anchor = ip0;
954
+ continue;
955
+ }
956
+ break;
957
+ } }
958
+
959
+ goto _start;
651
960
  }
652
961
 
653
962
  ZSTD_GEN_FAST_FN(extDict, 4, 0)
@@ -656,10 +965,11 @@ ZSTD_GEN_FAST_FN(extDict, 6, 0)
656
965
  ZSTD_GEN_FAST_FN(extDict, 7, 0)
657
966
 
658
967
  size_t ZSTD_compressBlock_fast_extDict(
659
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
968
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
660
969
  void const* src, size_t srcSize)
661
970
  {
662
971
  U32 const mls = ms->cParams.minMatch;
972
+ assert(ms->dictMatchState == NULL);
663
973
  switch(mls)
664
974
  {
665
975
  default: /* includes case 3 */