multi_compress 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -3
  3. data/GET_STARTED.md +3 -3
  4. data/README.md +75 -66
  5. data/THIRD_PARTY_NOTICES.md +24 -0
  6. data/ext/multi_compress/brotli_dec_static_init.c +3 -0
  7. data/ext/multi_compress/brotli_enc_static_init.c +3 -0
  8. data/ext/multi_compress/extconf.rb +22 -1
  9. data/ext/multi_compress/multi_compress.c +152 -72
  10. data/ext/multi_compress/vendor/.vendored +2 -2
  11. data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
  13. data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
  14. data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
  15. data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
  16. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
  17. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
  18. data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
  20. data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
  21. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
  22. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
  23. data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
  24. data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
  25. data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
  26. data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
  27. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
  28. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
  29. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
  30. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
  31. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
  32. data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
  33. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
  34. data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
  35. data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
  36. data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
  37. data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
  38. data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
  40. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
  41. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
  42. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
  43. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
  44. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
  45. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
  46. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
  47. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
  48. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
  49. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
  50. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
  51. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
  52. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
  53. data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
  54. data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
  55. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
  56. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
  57. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
  58. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
  59. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
  60. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
  61. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
  62. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
  63. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
  65. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
  66. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
  67. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
  68. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
  69. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
  70. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
  71. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
  72. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
  73. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
  74. data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
  76. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
  77. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
  79. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
  83. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
  84. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
  85. data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
  87. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
  88. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
  89. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
  90. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
  91. data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
  92. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
  93. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
  94. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
  95. data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
  96. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
  97. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
  98. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
  99. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
  100. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
  101. data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
  102. data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
  103. data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
  104. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
  105. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
  106. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
  107. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
  108. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
  109. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
  110. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
  111. data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
  112. data/ext/multi_compress/vendor/zstd/COPYING +339 -0
  113. data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
  114. data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
  115. data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
  116. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  117. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
  118. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
  119. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
  120. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
  121. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
  122. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
  123. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
  124. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
  125. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
  126. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
  127. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
  128. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
  129. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
  130. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
  131. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
  132. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
  133. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
  134. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
  135. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
  136. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
  137. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
  138. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
  139. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
  140. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
  141. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
  142. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
  143. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
  144. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
  145. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
  146. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
  147. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
  148. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
  149. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
  170. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
  171. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
  178. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
  179. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  182. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
  183. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
  187. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
  188. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
  189. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
  190. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
  191. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
  192. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
  193. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
  194. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
  204. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
  205. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
  206. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
  207. data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
  208. data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
  209. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
  210. data/lib/multi_compress/version.rb +1 -1
  211. metadata +29 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -12,11 +12,14 @@
12
12
  #include "hist.h"
13
13
  #include "zstd_opt.h"
14
14
 
15
+ #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
15
18
 
16
19
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
20
  #define ZSTD_MAX_PRICE (1<<30)
18
21
 
19
- #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
22
+ #define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
20
23
 
21
24
 
22
25
  /*-*************************************
@@ -26,27 +29,35 @@
26
29
  #if 0 /* approximation at bit level (for tests) */
27
30
  # define BITCOST_ACCURACY 0
28
31
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
29
- # define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
32
+ # define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
30
33
  #elif 0 /* fractional bit accuracy (for tests) */
31
34
  # define BITCOST_ACCURACY 8
32
35
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
33
- # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
36
+ # define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
34
37
  #else /* opt==approx, ultra==accurate */
35
38
  # define BITCOST_ACCURACY 8
36
39
  # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
37
- # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
40
+ # define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
38
41
  #endif
39
42
 
43
+ /* ZSTD_bitWeight() :
44
+ * provide estimated "cost" of a stat in full bits only */
40
45
  MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
41
46
  {
42
47
  return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
43
48
  }
44
49
 
50
+ /* ZSTD_fracWeight() :
51
+ * provide fractional-bit "cost" of a stat,
52
+ * using linear interpolation approximation */
45
53
  MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
46
54
  {
47
55
  U32 const stat = rawStat + 1;
48
56
  U32 const hb = ZSTD_highbit32(stat);
49
57
  U32 const BWeight = hb * BITCOST_MULTIPLIER;
58
+ /* Fweight was meant for "Fractional weight"
59
+ * but it's effectively a value between 1 and 2
60
+ * using fixed point arithmetic */
50
61
  U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
51
62
  U32 const weight = BWeight + FWeight;
52
63
  assert(hb + BITCOST_ACCURACY < 31);
@@ -57,7 +68,7 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
57
68
  /* debugging function,
58
69
  * @return price in bytes as fractional value
59
70
  * for debug messages only */
60
- MEM_STATIC double ZSTD_fCost(U32 price)
71
+ MEM_STATIC double ZSTD_fCost(int price)
61
72
  {
62
73
  return (double)price / (BITCOST_MULTIPLIER*8);
63
74
  }
@@ -88,20 +99,26 @@ static U32 sum_u32(const unsigned table[], size_t nbElts)
88
99
  return total;
89
100
  }
90
101
 
91
- static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
102
+ typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
103
+
104
+ static U32
105
+ ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
92
106
  {
93
107
  U32 s, sum=0;
94
- DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
108
+ DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
109
+ (unsigned)lastEltIndex+1, (unsigned)shift );
95
110
  assert(shift < 30);
96
111
  for (s=0; s<lastEltIndex+1; s++) {
97
- table[s] = 1 + (table[s] >> shift);
98
- sum += table[s];
112
+ unsigned const base = base1 ? 1 : (table[s]>0);
113
+ unsigned const newStat = base + (table[s] >> shift);
114
+ sum += newStat;
115
+ table[s] = newStat;
99
116
  }
100
117
  return sum;
101
118
  }
102
119
 
103
120
  /* ZSTD_scaleStats() :
104
- * reduce all elements in table is sum too large
121
+ * reduce all elt frequencies in table if sum too large
105
122
  * return the resulting sum of elements */
106
123
  static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
107
124
  {
@@ -110,7 +127,7 @@ static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
110
127
  DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
111
128
  assert(logTarget < 30);
112
129
  if (factor <= 1) return prevsum;
113
- return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
130
+ return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
114
131
  }
115
132
 
116
133
  /* ZSTD_rescaleFreqs() :
@@ -129,18 +146,22 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
129
146
  DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
130
147
  optPtr->priceType = zop_dynamic;
131
148
 
132
- if (optPtr->litLengthSum == 0) { /* first block : init */
133
- if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
134
- DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
149
+ if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
150
+
151
+ /* heuristic: use pre-defined stats for too small inputs */
152
+ if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
153
+ DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
135
154
  optPtr->priceType = zop_predef;
136
155
  }
137
156
 
138
157
  assert(optPtr->symbolCosts != NULL);
139
158
  if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
140
- /* huffman table presumed generated by dictionary */
159
+
160
+ /* huffman stats covering the full value set : table presumed generated by dictionary */
141
161
  optPtr->priceType = zop_dynamic;
142
162
 
143
163
  if (compressedLiterals) {
164
+ /* generate literals statistics from huffman table */
144
165
  unsigned lit;
145
166
  assert(optPtr->litFreq != NULL);
146
167
  optPtr->litSum = 0;
@@ -188,13 +209,14 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
188
209
  optPtr->offCodeSum += optPtr->offCodeFreq[of];
189
210
  } }
190
211
 
191
- } else { /* not a dictionary */
212
+ } else { /* first block, no dictionary */
192
213
 
193
214
  assert(optPtr->litFreq != NULL);
194
215
  if (compressedLiterals) {
216
+ /* base initial cost of literals on direct frequency within src */
195
217
  unsigned lit = MaxLit;
196
218
  HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
197
- optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
219
+ optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible);
198
220
  }
199
221
 
200
222
  { unsigned const baseLLfreqs[MaxLL+1] = {
@@ -224,10 +246,9 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
224
246
  optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
225
247
  }
226
248
 
227
-
228
249
  }
229
250
 
230
- } else { /* new block : re-use previous statistics, scaled down */
251
+ } else { /* new block : scale down accumulated statistics */
231
252
 
232
253
  if (compressedLiterals)
233
254
  optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
@@ -246,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
246
267
  const optState_t* const optPtr,
247
268
  int optLevel)
248
269
  {
270
+ DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
249
271
  if (litLength == 0) return 0;
250
272
 
251
273
  if (!ZSTD_compressedLiterals(optPtr))
@@ -255,11 +277,14 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
255
277
  return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
256
278
 
257
279
  /* dynamic statistics */
258
- { U32 price = litLength * optPtr->litSumBasePrice;
280
+ { U32 price = optPtr->litSumBasePrice * litLength;
281
+ U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER;
259
282
  U32 u;
283
+ assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER);
260
284
  for (u=0; u < litLength; u++) {
261
- assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
262
- price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
285
+ U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel);
286
+ if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax;
287
+ price -= litPrice;
263
288
  }
264
289
  return price;
265
290
  }
@@ -272,10 +297,11 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
272
297
  assert(litLength <= ZSTD_BLOCKSIZE_MAX);
273
298
  if (optPtr->priceType == zop_predef)
274
299
  return WEIGHT(litLength, optLevel);
275
- /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
276
- * because it isn't representable in the zstd format. So instead just
277
- * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
278
- * would be all literals.
300
+
301
+ /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
302
+ * because it isn't representable in the zstd format.
303
+ * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1.
304
+ * In such a case, the block would be all literals.
279
305
  */
280
306
  if (litLength == ZSTD_BLOCKSIZE_MAX)
281
307
  return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
@@ -289,24 +315,25 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
289
315
  }
290
316
 
291
317
  /* ZSTD_getMatchPrice() :
292
- * Provides the cost of the match part (offset + matchLength) of a sequence
318
+ * Provides the cost of the match part (offset + matchLength) of a sequence.
293
319
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
294
- * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
320
+ * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq()
295
321
  * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
296
322
  */
297
323
  FORCE_INLINE_TEMPLATE U32
298
- ZSTD_getMatchPrice(U32 const offcode,
324
+ ZSTD_getMatchPrice(U32 const offBase,
299
325
  U32 const matchLength,
300
326
  const optState_t* const optPtr,
301
327
  int const optLevel)
302
328
  {
303
329
  U32 price;
304
- U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
330
+ U32 const offCode = ZSTD_highbit32(offBase);
305
331
  U32 const mlBase = matchLength - MINMATCH;
306
332
  assert(matchLength >= MINMATCH);
307
333
 
308
- if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
309
- return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
334
+ if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */
335
+ return WEIGHT(mlBase, optLevel)
336
+ + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */
310
337
 
311
338
  /* dynamic statistics */
312
339
  price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
@@ -325,10 +352,10 @@ ZSTD_getMatchPrice(U32 const offcode,
325
352
  }
326
353
 
327
354
  /* ZSTD_updateStats() :
328
- * assumption : literals + litLengtn <= iend */
355
+ * assumption : literals + litLength <= iend */
329
356
  static void ZSTD_updateStats(optState_t* const optPtr,
330
357
  U32 litLength, const BYTE* literals,
331
- U32 offsetCode, U32 matchLength)
358
+ U32 offBase, U32 matchLength)
332
359
  {
333
360
  /* literals */
334
361
  if (ZSTD_compressedLiterals(optPtr)) {
@@ -344,8 +371,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
344
371
  optPtr->litLengthSum++;
345
372
  }
346
373
 
347
- /* offset code : expected to follow storeSeq() numeric representation */
348
- { U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
374
+ /* offset code : follows storeSeq() numeric representation */
375
+ { U32 const offCode = ZSTD_highbit32(offBase);
349
376
  assert(offCode <= MaxOff);
350
377
  optPtr->offCodeFreq[offCode]++;
351
378
  optPtr->offCodeSum++;
@@ -379,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
379
406
 
380
407
  /* Update hashTable3 up to ip (excluded)
381
408
  Assumption : always within prefix (i.e. not within extDict) */
382
- static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
383
- U32* nextToUpdate3,
384
- const BYTE* const ip)
409
+ static
410
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
+ U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_MatchState_t* ms,
412
+ U32* nextToUpdate3,
413
+ const BYTE* const ip)
385
414
  {
386
415
  U32* const hashTable3 = ms->hashTable3;
387
416
  U32 const hashLog3 = ms->hashLog3;
@@ -408,8 +437,10 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
408
437
  * @param ip assumed <= iend-8 .
409
438
  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
410
439
  * @return : nb of positions added */
411
- static U32 ZSTD_insertBt1(
412
- const ZSTD_matchState_t* ms,
440
+ static
441
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
+ U32 ZSTD_insertBt1(
443
+ const ZSTD_MatchState_t* ms,
413
444
  const BYTE* const ip, const BYTE* const iend,
414
445
  U32 const target,
415
446
  U32 const mls, const int extDict)
@@ -527,15 +558,16 @@ static U32 ZSTD_insertBt1(
527
558
  }
528
559
 
529
560
  FORCE_INLINE_TEMPLATE
561
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
530
562
  void ZSTD_updateTree_internal(
531
- ZSTD_matchState_t* ms,
563
+ ZSTD_MatchState_t* ms,
532
564
  const BYTE* const ip, const BYTE* const iend,
533
565
  const U32 mls, const ZSTD_dictMode_e dictMode)
534
566
  {
535
567
  const BYTE* const base = ms->window.base;
536
568
  U32 const target = (U32)(ip - base);
537
569
  U32 idx = ms->nextToUpdate;
538
- DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
570
+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
539
571
  idx, target, dictMode);
540
572
 
541
573
  while(idx < target) {
@@ -548,20 +580,23 @@ void ZSTD_updateTree_internal(
548
580
  ms->nextToUpdate = target;
549
581
  }
550
582
 
551
- void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
583
+ void ZSTD_updateTree(ZSTD_MatchState_t* ms, const BYTE* ip, const BYTE* iend) {
552
584
  ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
553
585
  }
554
586
 
555
587
  FORCE_INLINE_TEMPLATE
556
- U32 ZSTD_insertBtAndGetAllMatches (
557
- ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
558
- ZSTD_matchState_t* ms,
559
- U32* nextToUpdate3,
560
- const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
561
- const U32 rep[ZSTD_REP_NUM],
562
- U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
563
- const U32 lengthToBeat,
564
- U32 const mls /* template */)
588
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
+ U32
590
+ ZSTD_insertBtAndGetAllMatches (
591
+ ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
592
+ ZSTD_MatchState_t* ms,
593
+ U32* nextToUpdate3,
594
+ const BYTE* const ip, const BYTE* const iLimit,
595
+ const ZSTD_dictMode_e dictMode,
596
+ const U32 rep[ZSTD_REP_NUM],
597
+ const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
598
+ const U32 lengthToBeat,
599
+ const U32 mls /* template */)
565
600
  {
566
601
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
567
602
  U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
@@ -590,7 +625,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
590
625
  U32 mnum = 0;
591
626
  U32 nbCompares = 1U << cParams->searchLog;
592
627
 
593
- const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
628
+ const ZSTD_MatchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
594
629
  const ZSTD_compressionParameters* const dmsCParams =
595
630
  dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
596
631
  const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
@@ -629,13 +664,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
629
664
  assert(curr >= windowLow);
630
665
  if ( dictMode == ZSTD_extDict
631
666
  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */
632
- & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
667
+ & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
633
668
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
634
669
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
635
670
  }
636
671
  if (dictMode == ZSTD_dictMatchState
637
672
  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */
638
- & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
673
+ & (ZSTD_index_overlap_check(dictLimit, repIndex)) )
639
674
  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
640
675
  repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
641
676
  } }
@@ -644,7 +679,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
644
679
  DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
645
680
  repCode, ll0, repOffset, repLen);
646
681
  bestLength = repLen;
647
- matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1); /* expect value between 1 and 3 */
682
+ matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */
648
683
  matches[mnum].len = (U32)repLen;
649
684
  mnum++;
650
685
  if ( (repLen > sufficient_len)
@@ -673,7 +708,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
673
708
  bestLength = mlen;
674
709
  assert(curr > matchIndex3);
675
710
  assert(mnum==0); /* no prior solution */
676
- matches[0].off = STORE_OFFSET(curr - matchIndex3);
711
+ matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3);
677
712
  matches[0].len = (U32)mlen;
678
713
  mnum = 1;
679
714
  if ( (mlen > sufficient_len) |
@@ -706,13 +741,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
706
741
  }
707
742
 
708
743
  if (matchLength > bestLength) {
709
- DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
710
- (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
744
+ DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)",
745
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
711
746
  assert(matchEndIdx > matchIndex);
712
747
  if (matchLength > matchEndIdx - matchIndex)
713
748
  matchEndIdx = matchIndex + (U32)matchLength;
714
749
  bestLength = matchLength;
715
- matches[mnum].off = STORE_OFFSET(curr - matchIndex);
750
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
716
751
  matches[mnum].len = (U32)matchLength;
717
752
  mnum++;
718
753
  if ( (matchLength > ZSTD_OPT_NUM)
@@ -754,12 +789,12 @@ U32 ZSTD_insertBtAndGetAllMatches (
754
789
 
755
790
  if (matchLength > bestLength) {
756
791
  matchIndex = dictMatchIndex + dmsIndexDelta;
757
- DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
758
- (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
792
+ DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)",
793
+ (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex));
759
794
  if (matchLength > matchEndIdx - matchIndex)
760
795
  matchEndIdx = matchIndex + (U32)matchLength;
761
796
  bestLength = matchLength;
762
- matches[mnum].off = STORE_OFFSET(curr - matchIndex);
797
+ matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex);
763
798
  matches[mnum].len = (U32)matchLength;
764
799
  mnum++;
765
800
  if ( (matchLength > ZSTD_OPT_NUM)
@@ -784,7 +819,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
784
819
 
785
820
  typedef U32 (*ZSTD_getAllMatchesFn)(
786
821
  ZSTD_match_t*,
787
- ZSTD_matchState_t*,
822
+ ZSTD_MatchState_t*,
788
823
  U32*,
789
824
  const BYTE*,
790
825
  const BYTE*,
@@ -792,9 +827,11 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
792
827
  U32 const ll0,
793
828
  U32 const lengthToBeat);
794
829
 
795
- FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
830
+ FORCE_INLINE_TEMPLATE
831
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
+ U32 ZSTD_btGetAllMatches_internal(
796
833
  ZSTD_match_t* matches,
797
- ZSTD_matchState_t* ms,
834
+ ZSTD_MatchState_t* ms,
798
835
  U32* nextToUpdate3,
799
836
  const BYTE* ip,
800
837
  const BYTE* const iHighLimit,
@@ -817,7 +854,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
817
854
  #define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \
818
855
  static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \
819
856
  ZSTD_match_t* matches, \
820
- ZSTD_matchState_t* ms, \
857
+ ZSTD_MatchState_t* ms, \
821
858
  U32* nextToUpdate3, \
822
859
  const BYTE* ip, \
823
860
  const BYTE* const iHighLimit, \
@@ -849,7 +886,7 @@ GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
849
886
  }
850
887
 
851
888
  static ZSTD_getAllMatchesFn
852
- ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
889
+ ZSTD_selectBtGetAllMatches(ZSTD_MatchState_t const* ms, ZSTD_dictMode_e const dictMode)
853
890
  {
854
891
  ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
855
892
  ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
@@ -868,7 +905,7 @@ ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const di
868
905
 
869
906
  /* Struct containing info needed to make decision about ldm inclusion */
870
907
  typedef struct {
871
- rawSeqStore_t seqStore; /* External match candidates store for this block */
908
+ RawSeqStore_t seqStore; /* External match candidates store for this block */
872
909
  U32 startPosInBlock; /* Start position of the current match candidate */
873
910
  U32 endPosInBlock; /* End position of the current match candidate */
874
911
  U32 offset; /* Offset of the match candidate */
@@ -878,7 +915,7 @@ typedef struct {
878
915
  * Moves forward in @rawSeqStore by @nbBytes,
879
916
  * which will update the fields 'pos' and 'posInSequence'.
880
917
  */
881
- static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
918
+ static void ZSTD_optLdm_skipRawSeqStoreBytes(RawSeqStore_t* rawSeqStore, size_t nbBytes)
882
919
  {
883
920
  U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
884
921
  while (currPos && rawSeqStore->pos < rawSeqStore->size) {
@@ -935,7 +972,7 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock
935
972
  return;
936
973
  }
937
974
 
938
- /* Matches may be < MINMATCH by this process. In that case, we will reject them
975
+ /* Matches may be < minMatch by this process. In that case, we will reject them
939
976
  when we are deciding whether or not to add the ldm */
940
977
  optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
941
978
  optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
@@ -957,25 +994,26 @@ ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock
957
994
  * into 'matches'. Maintains the correct ordering of 'matches'.
958
995
  */
959
996
  static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
960
- const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
997
+ const ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
998
+ U32 minMatch)
961
999
  {
962
1000
  U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
963
- /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
1001
+ /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */
964
1002
  U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
965
1003
 
966
1004
  /* Ensure that current block position is not outside of the match */
967
1005
  if (currPosInBlock < optLdm->startPosInBlock
968
1006
  || currPosInBlock >= optLdm->endPosInBlock
969
- || candidateMatchLength < MINMATCH) {
1007
+ || candidateMatchLength < minMatch) {
970
1008
  return;
971
1009
  }
972
1010
 
973
1011
  if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
974
- U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
975
- DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
976
- candidateOffCode, candidateMatchLength, currPosInBlock);
1012
+ U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset);
1013
+ DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u",
1014
+ candidateOffBase, candidateMatchLength, currPosInBlock);
977
1015
  matches[*nbMatches].len = candidateMatchLength;
978
- matches[*nbMatches].off = candidateOffCode;
1016
+ matches[*nbMatches].off = candidateOffBase;
979
1017
  (*nbMatches)++;
980
1018
  }
981
1019
  }
@@ -986,7 +1024,8 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
986
1024
  static void
987
1025
  ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
988
1026
  ZSTD_match_t* matches, U32* nbMatches,
989
- U32 currPosInBlock, U32 remainingBytes)
1027
+ U32 currPosInBlock, U32 remainingBytes,
1028
+ U32 minMatch)
990
1029
  {
991
1030
  if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
992
1031
  return;
@@ -1003,7 +1042,7 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1003
1042
  }
1004
1043
  ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
1005
1044
  }
1006
- ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
1045
+ ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock, minMatch);
1007
1046
  }
1008
1047
 
1009
1048
 
@@ -1011,11 +1050,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1011
1050
  * Optimal parser
1012
1051
  *********************************/
1013
1052
 
1014
- static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1015
- {
1016
- return sol.litlen + sol.mlen;
1017
- }
1018
-
1019
1053
  #if 0 /* debug */
1020
1054
 
1021
1055
  static void
@@ -1033,9 +1067,15 @@ listStats(const U32* table, int lastEltID)
1033
1067
 
1034
1068
  #endif
1035
1069
 
1036
- FORCE_INLINE_TEMPLATE size_t
1037
- ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1038
- seqStore_t* seqStore,
1070
+ #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1071
+ #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1072
+ #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1073
+
1074
+ FORCE_INLINE_TEMPLATE
1075
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1076
+ size_t
1077
+ ZSTD_compressBlock_opt_generic(ZSTD_MatchState_t* ms,
1078
+ SeqStore_t* seqStore,
1039
1079
  U32 rep[ZSTD_REP_NUM],
1040
1080
  const void* src, size_t srcSize,
1041
1081
  const int optLevel,
@@ -1059,9 +1099,11 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1059
1099
 
1060
1100
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1061
1101
  ZSTD_match_t* const matches = optStatePtr->matchTable;
1062
- ZSTD_optimal_t lastSequence;
1102
+ ZSTD_optimal_t lastStretch;
1063
1103
  ZSTD_optLdm_t optLdm;
1064
1104
 
1105
+ ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1106
+
1065
1107
  optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1066
1108
  optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
1067
1109
  ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
@@ -1082,103 +1124,140 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1082
1124
  U32 const ll0 = !litlen;
1083
1125
  U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1084
1126
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1085
- (U32)(ip-istart), (U32)(iend - ip));
1086
- if (!nbMatches) { ip++; continue; }
1127
+ (U32)(ip-istart), (U32)(iend-ip),
1128
+ minMatch);
1129
+ if (!nbMatches) {
1130
+ DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1131
+ ip++;
1132
+ continue;
1133
+ }
1134
+
1135
+ /* Match found: let's store this solution, and eventually find more candidates.
1136
+ * During this forward pass, @opt is used to store stretches,
1137
+ * defined as "a match followed by N literals".
1138
+ * Note how this is different from a Sequence, which is "N literals followed by a match".
1139
+ * Storing stretches allows us to store different match predecessors
1140
+ * for each literal position part of a literals run. */
1087
1141
 
1088
1142
  /* initialize opt[0] */
1089
- { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1090
- opt[0].mlen = 0; /* means is_a_literal */
1143
+ opt[0].mlen = 0; /* there are only literals so far */
1091
1144
  opt[0].litlen = litlen;
1092
- /* We don't need to include the actual price of the literals because
1093
- * it is static for the duration of the forward pass, and is included
1094
- * in every price. We include the literal length to avoid negative
1095
- * prices when we subtract the previous literal length.
1145
+ /* No need to include the actual price of the literals before the first match
1146
+ * because it is static for the duration of the forward pass, and is included
1147
+ * in every subsequent price. But, we include the literal length because
1148
+ * the cost variation of litlen depends on the value of litlen.
1096
1149
  */
1097
- opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1150
+ opt[0].price = LL_PRICE(litlen);
1151
+ ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1152
+ ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1098
1153
 
1099
1154
  /* large match -> immediate encoding */
1100
1155
  { U32 const maxML = matches[nbMatches-1].len;
1101
- U32 const maxOffcode = matches[nbMatches-1].off;
1102
- DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
1103
- nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
1156
+ U32 const maxOffBase = matches[nbMatches-1].off;
1157
+ DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series",
1158
+ nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1104
1159
 
1105
1160
  if (maxML > sufficient_len) {
1106
- lastSequence.litlen = litlen;
1107
- lastSequence.mlen = maxML;
1108
- lastSequence.off = maxOffcode;
1109
- DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1161
+ lastStretch.litlen = 0;
1162
+ lastStretch.mlen = maxML;
1163
+ lastStretch.off = maxOffBase;
1164
+ DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1110
1165
  maxML, sufficient_len);
1111
1166
  cur = 0;
1112
- last_pos = ZSTD_totalLen(lastSequence);
1167
+ last_pos = maxML;
1113
1168
  goto _shortestPath;
1114
1169
  } }
1115
1170
 
1116
1171
  /* set prices for first matches starting position == 0 */
1117
1172
  assert(opt[0].price >= 0);
1118
- { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1119
- U32 pos;
1173
+ { U32 pos;
1120
1174
  U32 matchNb;
1121
1175
  for (pos = 1; pos < minMatch; pos++) {
1122
- opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1176
+ opt[pos].price = ZSTD_MAX_PRICE;
1177
+ opt[pos].mlen = 0;
1178
+ opt[pos].litlen = litlen + pos;
1123
1179
  }
1124
1180
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1125
- U32 const offcode = matches[matchNb].off;
1181
+ U32 const offBase = matches[matchNb].off;
1126
1182
  U32 const end = matches[matchNb].len;
1127
1183
  for ( ; pos <= end ; pos++ ) {
1128
- U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
1129
- U32 const sequencePrice = literalsPrice + matchPrice;
1184
+ int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1185
+ int const sequencePrice = opt[0].price + matchPrice;
1130
1186
  DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1131
1187
  pos, ZSTD_fCost(sequencePrice));
1132
1188
  opt[pos].mlen = pos;
1133
- opt[pos].off = offcode;
1134
- opt[pos].litlen = litlen;
1135
- opt[pos].price = (int)sequencePrice;
1136
- } }
1189
+ opt[pos].off = offBase;
1190
+ opt[pos].litlen = 0; /* end of match */
1191
+ opt[pos].price = sequencePrice + LL_PRICE(0);
1192
+ }
1193
+ }
1137
1194
  last_pos = pos-1;
1195
+ opt[pos].price = ZSTD_MAX_PRICE;
1138
1196
  }
1139
1197
  }
1140
1198
 
1141
1199
  /* check further positions */
1142
1200
  for (cur = 1; cur <= last_pos; cur++) {
1143
1201
  const BYTE* const inr = ip + cur;
1144
- assert(cur < ZSTD_OPT_NUM);
1145
- DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1202
+ assert(cur <= ZSTD_OPT_NUM);
1203
+ DEBUGLOG(7, "cPos:%i==rPos:%u", (int)(inr-istart), cur);
1146
1204
 
1147
1205
  /* Fix current position with one literal if cheaper */
1148
- { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1206
+ { U32 const litlen = opt[cur-1].litlen + 1;
1149
1207
  int const price = opt[cur-1].price
1150
- + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1151
- + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1152
- - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1208
+ + LIT_PRICE(ip+cur-1)
1209
+ + LL_INCPRICE(litlen);
1153
1210
  assert(price < 1000000000); /* overflow check */
1154
1211
  if (price <= opt[cur].price) {
1155
- DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1156
- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1212
+ ZSTD_optimal_t const prevMatch = opt[cur];
1213
+ DEBUGLOG(7, "cPos:%i==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1214
+ (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1157
1215
  opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1158
- opt[cur].mlen = 0;
1159
- opt[cur].off = 0;
1216
+ opt[cur] = opt[cur-1];
1160
1217
  opt[cur].litlen = litlen;
1161
1218
  opt[cur].price = price;
1219
+ if ( (optLevel >= 1) /* additional check only for higher modes */
1220
+ && (prevMatch.litlen == 0) /* replace a match */
1221
+ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1222
+ && LIKELY(ip + cur < iend)
1223
+ ) {
1224
+ /* check next position, in case it would be cheaper */
1225
+ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1226
+ int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1227
+ DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1228
+ cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1229
+ if ( (with1literal < withMoreLiterals)
1230
+ && (with1literal < opt[cur+1].price) ) {
1231
+ /* update offset history - before it disappears */
1232
+ U32 const prev = cur - prevMatch.mlen;
1233
+ Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1234
+ assert(cur >= prevMatch.mlen);
1235
+ DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1236
+ ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1237
+ newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1238
+ opt[cur+1] = prevMatch; /* mlen & offbase */
1239
+ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(Repcodes_t));
1240
+ opt[cur+1].litlen = 1;
1241
+ opt[cur+1].price = with1literal;
1242
+ if (last_pos < cur+1) last_pos = cur+1;
1243
+ }
1244
+ }
1162
1245
  } else {
1163
- DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1164
- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1165
- opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1246
+ DEBUGLOG(7, "cPos:%i==rPos:%u : literal would cost more (%.2f>%.2f)",
1247
+ (int)(inr-istart), cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1166
1248
  }
1167
1249
  }
1168
1250
 
1169
- /* Set the repcodes of the current position. We must do it here
1170
- * because we rely on the repcodes of the 2nd to last sequence being
1171
- * correct to set the next chunks repcodes during the backward
1172
- * traversal.
1251
+ /* Offset history is not updated during match comparison.
1252
+ * Do it here, now that the match is selected and confirmed.
1173
1253
  */
1174
- ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1254
+ ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(Repcodes_t));
1175
1255
  assert(cur >= opt[cur].mlen);
1176
- if (opt[cur].mlen != 0) {
1256
+ if (opt[cur].litlen == 0) {
1257
+ /* just finished a match => alter offset history */
1177
1258
  U32 const prev = cur - opt[cur].mlen;
1178
- repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1179
- ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1180
- } else {
1181
- ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1259
+ Repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1260
+ ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(Repcodes_t));
1182
1261
  }
1183
1262
 
1184
1263
  /* last match must start at a minimum distance of 8 from oend */
@@ -1188,38 +1267,37 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1188
1267
 
1189
1268
  if ( (optLevel==0) /*static_test*/
1190
1269
  && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1191
- DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1270
+ DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1192
1271
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1193
1272
  }
1194
1273
 
1195
1274
  assert(opt[cur].price >= 0);
1196
- { U32 const ll0 = (opt[cur].mlen != 0);
1197
- U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1198
- U32 const previousPrice = (U32)opt[cur].price;
1199
- U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1275
+ { U32 const ll0 = (opt[cur].litlen == 0);
1276
+ int const previousPrice = opt[cur].price;
1277
+ int const basePrice = previousPrice + LL_PRICE(0);
1200
1278
  U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1201
1279
  U32 matchNb;
1202
1280
 
1203
1281
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1204
- (U32)(inr-istart), (U32)(iend-inr));
1282
+ (U32)(inr-istart), (U32)(iend-inr),
1283
+ minMatch);
1205
1284
 
1206
1285
  if (!nbMatches) {
1207
1286
  DEBUGLOG(7, "rPos:%u : no match found", cur);
1208
1287
  continue;
1209
1288
  }
1210
1289
 
1211
- { U32 const maxML = matches[nbMatches-1].len;
1212
- DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1213
- inr-istart, cur, nbMatches, maxML);
1214
-
1215
- if ( (maxML > sufficient_len)
1216
- || (cur + maxML >= ZSTD_OPT_NUM) ) {
1217
- lastSequence.mlen = maxML;
1218
- lastSequence.off = matches[nbMatches-1].off;
1219
- lastSequence.litlen = litlen;
1220
- cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1221
- last_pos = cur + ZSTD_totalLen(lastSequence);
1222
- if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
1290
+ { U32 const longestML = matches[nbMatches-1].len;
1291
+ DEBUGLOG(7, "cPos:%i==rPos:%u, found %u matches, of longest ML=%u",
1292
+ (int)(inr-istart), cur, nbMatches, longestML);
1293
+
1294
+ if ( (longestML > sufficient_len)
1295
+ || (cur + longestML >= ZSTD_OPT_NUM)
1296
+ || (ip + cur + longestML >= iend) ) {
1297
+ lastStretch.mlen = longestML;
1298
+ lastStretch.off = matches[nbMatches-1].off;
1299
+ lastStretch.litlen = 0;
1300
+ last_pos = cur + longestML;
1223
1301
  goto _shortestPath;
1224
1302
  } }
1225
1303
 
@@ -1230,20 +1308,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1230
1308
  U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
1231
1309
  U32 mlen;
1232
1310
 
1233
- DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1234
- matchNb, matches[matchNb].off, lastML, litlen);
1311
+ DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1312
+ matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1235
1313
 
1236
1314
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1237
1315
  U32 const pos = cur + mlen;
1238
- int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1316
+ int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1239
1317
 
1240
1318
  if ((pos > last_pos) || (price < opt[pos].price)) {
1241
1319
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1242
1320
  pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1243
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
1321
+ while (last_pos < pos) {
1322
+ /* fill empty positions, for future comparisons */
1323
+ last_pos++;
1324
+ opt[last_pos].price = ZSTD_MAX_PRICE;
1325
+ opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
1326
+ }
1244
1327
  opt[pos].mlen = mlen;
1245
1328
  opt[pos].off = offset;
1246
- opt[pos].litlen = litlen;
1329
+ opt[pos].litlen = 0;
1247
1330
  opt[pos].price = price;
1248
1331
  } else {
1249
1332
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
@@ -1251,55 +1334,89 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1251
1334
  if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1252
1335
  }
1253
1336
  } } }
1337
+ opt[last_pos+1].price = ZSTD_MAX_PRICE;
1254
1338
  } /* for (cur = 1; cur <= last_pos; cur++) */
1255
1339
 
1256
- lastSequence = opt[last_pos];
1257
- cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1258
- assert(cur < ZSTD_OPT_NUM); /* control overflow*/
1340
+ lastStretch = opt[last_pos];
1341
+ assert(cur >= lastStretch.mlen);
1342
+ cur = last_pos - lastStretch.mlen;
1259
1343
 
1260
1344
  _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1261
1345
  assert(opt[0].mlen == 0);
1346
+ assert(last_pos >= lastStretch.mlen);
1347
+ assert(cur == last_pos - lastStretch.mlen);
1262
1348
 
1263
- /* Set the next chunk's repcodes based on the repcodes of the beginning
1264
- * of the last match, and the last sequence. This avoids us having to
1265
- * update them while traversing the sequences.
1266
- */
1267
- if (lastSequence.mlen != 0) {
1268
- repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1269
- ZSTD_memcpy(rep, &reps, sizeof(reps));
1349
+ if (lastStretch.mlen==0) {
1350
+ /* no solution : all matches have been converted into literals */
1351
+ assert(lastStretch.litlen == (ip - anchor) + last_pos);
1352
+ ip += last_pos;
1353
+ continue;
1354
+ }
1355
+ assert(lastStretch.off > 0);
1356
+
1357
+ /* Update offset history */
1358
+ if (lastStretch.litlen == 0) {
1359
+ /* finishing on a match : update offset history */
1360
+ Repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1361
+ ZSTD_memcpy(rep, &reps, sizeof(Repcodes_t));
1270
1362
  } else {
1271
- ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1363
+ ZSTD_memcpy(rep, lastStretch.rep, sizeof(Repcodes_t));
1364
+ assert(cur >= lastStretch.litlen);
1365
+ cur -= lastStretch.litlen;
1272
1366
  }
1273
1367
 
1274
- { U32 const storeEnd = cur + 1;
1368
+ /* Let's write the shortest path solution.
1369
+ * It is stored in @opt in reverse order,
1370
+ * starting from @storeEnd (==cur+2),
1371
+ * effectively partially @opt overwriting.
1372
+ * Content is changed too:
1373
+ * - So far, @opt stored stretches, aka a match followed by literals
1374
+ * - Now, it will store sequences, aka literals followed by a match
1375
+ */
1376
+ { U32 const storeEnd = cur + 2;
1275
1377
  U32 storeStart = storeEnd;
1276
- U32 seqPos = cur;
1378
+ U32 stretchPos = cur;
1277
1379
 
1278
1380
  DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1279
1381
  last_pos, cur); (void)last_pos;
1280
- assert(storeEnd < ZSTD_OPT_NUM);
1281
- DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1282
- storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1283
- opt[storeEnd] = lastSequence;
1284
- while (seqPos > 0) {
1285
- U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1382
+ assert(storeEnd < ZSTD_OPT_SIZE);
1383
+ DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1384
+ storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1385
+ if (lastStretch.litlen > 0) {
1386
+ /* last "sequence" is unfinished: just a bunch of literals */
1387
+ opt[storeEnd].litlen = lastStretch.litlen;
1388
+ opt[storeEnd].mlen = 0;
1389
+ storeStart = storeEnd-1;
1390
+ opt[storeStart] = lastStretch;
1391
+ } {
1392
+ opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
1393
+ storeStart = storeEnd;
1394
+ }
1395
+ while (1) {
1396
+ ZSTD_optimal_t nextStretch = opt[stretchPos];
1397
+ opt[storeStart].litlen = nextStretch.litlen;
1398
+ DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1399
+ opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1400
+ if (nextStretch.mlen == 0) {
1401
+ /* reaching beginning of segment */
1402
+ break;
1403
+ }
1286
1404
  storeStart--;
1287
- DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1288
- seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1289
- opt[storeStart] = opt[seqPos];
1290
- seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1405
+ opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1406
+ assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1407
+ stretchPos -= nextStretch.litlen + nextStretch.mlen;
1291
1408
  }
1292
1409
 
1293
1410
  /* save sequences */
1294
- DEBUGLOG(6, "sending selected sequences into seqStore")
1411
+ DEBUGLOG(6, "sending selected sequences into seqStore");
1295
1412
  { U32 storePos;
1296
1413
  for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1297
1414
  U32 const llen = opt[storePos].litlen;
1298
1415
  U32 const mlen = opt[storePos].mlen;
1299
- U32 const offCode = opt[storePos].off;
1416
+ U32 const offBase = opt[storePos].off;
1300
1417
  U32 const advance = llen + mlen;
1301
- DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1302
- anchor - istart, (unsigned)llen, (unsigned)mlen);
1418
+ DEBUGLOG(6, "considering seq starting at %i, llen=%u, mlen=%u",
1419
+ (int)(anchor - istart), (unsigned)llen, (unsigned)mlen);
1303
1420
 
1304
1421
  if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1305
1422
  assert(storePos == storeEnd); /* must be last sequence */
@@ -1308,11 +1425,14 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1308
1425
  }
1309
1426
 
1310
1427
  assert(anchor + llen <= iend);
1311
- ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1312
- ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
1428
+ ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen);
1429
+ ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen);
1313
1430
  anchor += advance;
1314
1431
  ip = anchor;
1315
1432
  } }
1433
+ DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1434
+
1435
+ /* update all costs */
1316
1436
  ZSTD_setBasePrices(optStatePtr, optLevel);
1317
1437
  }
1318
1438
  } /* while (ip < ilimit) */
@@ -1320,42 +1440,51 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1320
1440
  /* Return the last literals size */
1321
1441
  return (size_t)(iend - anchor);
1322
1442
  }
1443
+ #endif /* build exclusions */
1323
1444
 
1445
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1324
1446
  static size_t ZSTD_compressBlock_opt0(
1325
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1447
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1326
1448
  const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1327
1449
  {
1328
1450
  return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1329
1451
  }
1452
+ #endif
1330
1453
 
1454
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1331
1455
  static size_t ZSTD_compressBlock_opt2(
1332
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1456
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1333
1457
  const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1334
1458
  {
1335
1459
  return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1336
1460
  }
1461
+ #endif
1337
1462
 
1463
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1338
1464
  size_t ZSTD_compressBlock_btopt(
1339
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1465
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1340
1466
  const void* src, size_t srcSize)
1341
1467
  {
1342
1468
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1343
1469
  return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1344
1470
  }
1471
+ #endif
1345
1472
 
1346
1473
 
1347
1474
 
1348
1475
 
1476
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1349
1477
  /* ZSTD_initStats_ultra():
1350
1478
  * make a first compression pass, just to seed stats with more accurate starting values.
1351
1479
  * only works on first block, with no dictionary and no ldm.
1352
- * this function cannot error, hence its contract must be respected.
1480
+ * this function cannot error out, its narrow contract must be respected.
1353
1481
  */
1354
- static void
1355
- ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1356
- seqStore_t* seqStore,
1357
- U32 rep[ZSTD_REP_NUM],
1358
- const void* src, size_t srcSize)
1482
+ static
1483
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1484
+ void ZSTD_initStats_ultra(ZSTD_MatchState_t* ms,
1485
+ SeqStore_t* seqStore,
1486
+ U32 rep[ZSTD_REP_NUM],
1487
+ const void* src, size_t srcSize)
1359
1488
  {
1360
1489
  U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1361
1490
  ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
@@ -1368,7 +1497,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1368
1497
 
1369
1498
  ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/
1370
1499
 
1371
- /* invalidate first scan from history */
1500
+ /* invalidate first scan from history, only keep entropy stats */
1372
1501
  ZSTD_resetSeqStore(seqStore);
1373
1502
  ms->window.base -= srcSize;
1374
1503
  ms->window.dictLimit += (U32)srcSize;
@@ -1378,7 +1507,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1378
1507
  }
1379
1508
 
1380
1509
  size_t ZSTD_compressBlock_btultra(
1381
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1510
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1382
1511
  const void* src, size_t srcSize)
1383
1512
  {
1384
1513
  DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
@@ -1386,16 +1515,16 @@ size_t ZSTD_compressBlock_btultra(
1386
1515
  }
1387
1516
 
1388
1517
  size_t ZSTD_compressBlock_btultra2(
1389
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1518
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1390
1519
  const void* src, size_t srcSize)
1391
1520
  {
1392
1521
  U32 const curr = (U32)((const BYTE*)src - ms->window.base);
1393
1522
  DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
1394
1523
 
1395
- /* 2-pass strategy:
1524
+ /* 2-passes strategy:
1396
1525
  * this strategy makes a first pass over first block to collect statistics
1397
- * and seed next round's statistics with it.
1398
- * After 1st pass, function forgets everything, and starts a new block.
1526
+ * in order to seed next round's statistics with it.
1527
+ * After 1st pass, function forgets history, and starts a new block.
1399
1528
  * Consequently, this can only work if no data has been previously loaded in tables,
1400
1529
  * aka, no dictionary, no prefix, no ldm preprocessing.
1401
1530
  * The compression ratio gain is generally small (~0.5% on first block),
@@ -1404,42 +1533,47 @@ size_t ZSTD_compressBlock_btultra2(
1404
1533
  if ( (ms->opt.litLengthSum==0) /* first block */
1405
1534
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1406
1535
  && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
1407
- && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1408
- && (srcSize > ZSTD_PREDEF_THRESHOLD)
1536
+ && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
1537
+ && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */
1409
1538
  ) {
1410
1539
  ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
1411
1540
  }
1412
1541
 
1413
1542
  return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1414
1543
  }
1544
+ #endif
1415
1545
 
1546
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1416
1547
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1417
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1548
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1418
1549
  const void* src, size_t srcSize)
1419
1550
  {
1420
1551
  return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1421
1552
  }
1422
1553
 
1423
- size_t ZSTD_compressBlock_btultra_dictMatchState(
1424
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1554
+ size_t ZSTD_compressBlock_btopt_extDict(
1555
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1425
1556
  const void* src, size_t srcSize)
1426
1557
  {
1427
- return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1558
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1428
1559
  }
1560
+ #endif
1429
1561
 
1430
- size_t ZSTD_compressBlock_btopt_extDict(
1431
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1562
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1563
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
1564
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1432
1565
  const void* src, size_t srcSize)
1433
1566
  {
1434
- return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1567
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1435
1568
  }
1436
1569
 
1437
1570
  size_t ZSTD_compressBlock_btultra_extDict(
1438
- ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1571
+ ZSTD_MatchState_t* ms, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1439
1572
  const void* src, size_t srcSize)
1440
1573
  {
1441
1574
  return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1442
1575
  }
1576
+ #endif
1443
1577
 
1444
1578
  /* note : no btultra2 variant for extDict nor dictMatchState,
1445
1579
  * because btultra2 is not meant to work with dictionaries