multi_compress 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -3
  3. data/GET_STARTED.md +3 -3
  4. data/README.md +75 -66
  5. data/THIRD_PARTY_NOTICES.md +24 -0
  6. data/ext/multi_compress/brotli_dec_static_init.c +3 -0
  7. data/ext/multi_compress/brotli_enc_static_init.c +3 -0
  8. data/ext/multi_compress/extconf.rb +22 -1
  9. data/ext/multi_compress/vendor/.vendored +2 -2
  10. data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
  11. data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
  12. data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
  13. data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
  14. data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
  15. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
  16. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
  17. data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
  18. data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
  19. data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
  20. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
  21. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
  22. data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
  23. data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
  24. data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
  25. data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
  26. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
  27. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
  28. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
  29. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
  30. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
  31. data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
  32. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
  33. data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
  34. data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
  35. data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
  36. data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
  37. data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
  38. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
  39. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
  40. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
  41. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
  42. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
  43. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
  44. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
  45. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
  46. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
  47. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
  48. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
  49. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
  50. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
  51. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
  52. data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
  53. data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
  54. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
  55. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
  56. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
  57. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
  58. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
  59. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
  60. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
  61. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
  62. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
  63. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
  64. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
  65. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
  66. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
  67. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
  68. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
  69. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
  70. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
  71. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
  72. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
  73. data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
  74. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
  75. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
  76. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
  77. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
  78. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
  79. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
  82. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
  83. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
  84. data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
  85. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
  86. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
  87. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
  88. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
  89. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
  90. data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
  91. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
  92. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
  93. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
  94. data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
  95. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
  96. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
  97. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
  98. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
  99. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
  100. data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
  101. data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
  102. data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
  103. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
  104. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
  105. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
  106. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
  107. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
  108. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
  109. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
  110. data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
  111. data/ext/multi_compress/vendor/zstd/COPYING +339 -0
  112. data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
  113. data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
  114. data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
  115. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  116. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
  117. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
  118. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
  119. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
  120. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
  121. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
  122. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
  123. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
  124. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
  125. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
  126. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
  127. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
  128. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
  129. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
  130. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
  131. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
  132. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
  133. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
  134. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
  135. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
  136. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
  137. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
  138. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
  139. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
  140. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
  141. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
  142. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
  143. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
  144. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
  145. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
  146. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
  147. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
  148. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
  149. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
  169. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
  170. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
  171. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
  177. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
  178. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
  179. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  181. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
  182. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
  183. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
  186. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
  187. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
  188. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
  189. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
  190. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
  191. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
  192. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
  193. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
  194. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
  203. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
  204. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
  205. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
  206. data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
  207. data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
  208. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
  209. data/lib/multi_compress/version.rb +1 -1
  210. metadata +29 -2
@@ -10,11 +10,6 @@
10
10
  #ifndef BROTLI_ENC_HASH_H_
11
11
  #define BROTLI_ENC_HASH_H_
12
12
 
13
- #include <stdlib.h> /* exit */
14
- #include <string.h> /* memcmp, memset */
15
-
16
- #include <brotli/types.h>
17
-
18
13
  #include "../common/constants.h"
19
14
  #include "../common/dictionary.h"
20
15
  #include "../common/platform.h"
@@ -22,7 +17,10 @@
22
17
  #include "encoder_dict.h"
23
18
  #include "fast_log.h"
24
19
  #include "find_match_length.h"
20
+ #include "hash_base.h"
21
+ #include "matching_tag_mask.h"
25
22
  #include "memory.h"
23
+ #include "params.h"
26
24
  #include "quality.h"
27
25
  #include "static_dict.h"
28
26
 
@@ -38,7 +36,7 @@ typedef struct {
38
36
  void* extra[4];
39
37
 
40
38
  /**
41
- * False before the fisrt invocation of HasherSetup (where "extra" memory)
39
+ * False before the first invocation of HasherSetup (where "extra" memory)
42
40
  * is allocated.
43
41
  */
44
42
  BROTLI_BOOL is_setup_;
@@ -71,23 +69,6 @@ typedef struct HasherSearchResult {
71
69
  int len_code_delta; /* == len_code - len */
72
70
  } HasherSearchResult;
73
71
 
74
- /* kHashMul32 multiplier has these properties:
75
- * The multiplier must be odd. Otherwise we may lose the highest bit.
76
- * No long streaks of ones or zeros.
77
- * There is no effort to ensure that it is a prime, the oddity is enough
78
- for this use.
79
- * The number has been tuned heuristically against compression benchmarks. */
80
- static const uint32_t kHashMul32 = 0x1E35A7BD;
81
- static const uint64_t kHashMul64 =
82
- BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
83
-
84
- static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
85
- uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
86
- /* The higher bits contain more mixture from the multiplication,
87
- so we take our results from there. */
88
- return h >> (32 - 14);
89
- }
90
-
91
72
  static BROTLI_INLINE void PrepareDistanceCache(
92
73
  int* BROTLI_RESTRICT distance_cache, const int num_distances) {
93
74
  if (num_distances > 4) {
@@ -297,6 +278,16 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
297
278
  #include "hash_longest_match64_inc.h" /* NOLINT(build/include) */
298
279
  #undef HASHER
299
280
 
281
+ #if defined(BROTLI_MAX_SIMD_QUALITY)
282
+ #define HASHER() H58
283
+ #include "hash_longest_match_simd_inc.h" /* NOLINT(build/include) */
284
+ #undef HASHER
285
+
286
+ #define HASHER() H68
287
+ #include "hash_longest_match64_simd_inc.h" /* NOLINT(build/include) */
288
+ #undef HASHER
289
+ #endif
290
+
300
291
  #define BUCKET_BITS 15
301
292
 
302
293
  #define NUM_LAST_DISTANCES_TO_CHECK 4
@@ -388,7 +379,13 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
388
379
  #undef CAT
389
380
  #undef EXPAND_CAT
390
381
 
391
- #define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
382
+ #if defined(BROTLI_MAX_SIMD_QUALITY)
383
+ #define FOR_SIMPLE_HASHERS(H) \
384
+ H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54) H(58) H(68)
385
+ #else
386
+ #define FOR_SIMPLE_HASHERS(H) \
387
+ H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
388
+ #endif
392
389
  #define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
393
390
  #define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
394
391
  #define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
@@ -523,8 +520,8 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch(
523
520
  const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
524
521
 
525
522
  const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
526
- const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
527
- const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
523
+ const uint16_t* heads = (uint16_t*)(&slot_offsets[(size_t)1u << slot_bits]);
524
+ const uint32_t* items = (uint32_t*)(&heads[(size_t)1u << bucket_bits]);
528
525
  const uint8_t* source = NULL;
529
526
 
530
527
  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
@@ -548,6 +545,8 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch(
548
545
  source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
549
546
  }
550
547
 
548
+ BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
549
+
551
550
  for (i = 0; i < 4; ++i) {
552
551
  const size_t distance = (size_t)distance_cache[i];
553
552
  size_t offset;
@@ -574,6 +573,11 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch(
574
573
  }
575
574
  }
576
575
  }
576
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
577
+ * 4 bytes all the time. */
578
+ if (best_len < 3) {
579
+ best_len = 3;
580
+ }
577
581
  while (item == 0) {
578
582
  size_t offset;
579
583
  size_t distance;
@@ -586,9 +590,10 @@ static BROTLI_INLINE void FindCompoundDictionaryMatch(
586
590
  limit = source_size - offset;
587
591
  limit = (limit > max_length) ? max_length : limit;
588
592
  if (distance > max_distance) continue;
589
- if (cur_ix_masked + best_len > ring_buffer_mask ||
590
- best_len >= limit ||
591
- data[cur_ix_masked + best_len] != source[offset + best_len]) {
593
+ if (cur_ix_masked + best_len > ring_buffer_mask || best_len >= limit ||
594
+ /* compare 4 bytes ending at best_len + 1 */
595
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
596
+ BrotliUnalignedRead32(&source[offset + best_len - 3])) {
592
597
  continue;
593
598
  }
594
599
  {
@@ -627,8 +632,8 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
627
632
  const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
628
633
 
629
634
  const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
630
- const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
631
- const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
635
+ const uint16_t* heads = (uint16_t*)(&slot_offsets[(size_t)1u << slot_bits]);
636
+ const uint32_t* items = (uint32_t*)(&heads[(size_t)1u << bucket_bits]);
632
637
  const uint8_t* source = NULL;
633
638
 
634
639
  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
@@ -651,6 +656,8 @@ static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
651
656
  source = (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
652
657
  }
653
658
 
659
+ BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
660
+
654
661
  while (item == 0) {
655
662
  size_t offset;
656
663
  size_t distance;
@@ -0,0 +1,38 @@
1
+ /* Copyright 2025 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Basic common hash functions / constants. */
8
+
9
+ #ifndef THIRD_PARTY_BROTLI_ENC_HASH_BASE_H_
10
+ #define THIRD_PARTY_BROTLI_ENC_HASH_BASE_H_
11
+
12
+ #include "../common/platform.h"
13
+
14
+ /* kHashMul32 multiplier has these properties:
15
+ * The multiplier must be odd. Otherwise we may lose the highest bit.
16
+ * No long streaks of ones or zeros.
17
+ * There is no effort to ensure that it is a prime, the oddity is enough
18
+ for this use.
19
+ * The number has been tuned heuristically against compression benchmarks. */
20
+ static const uint32_t kHashMul32 = 0x1E35A7BD;
21
+ static const uint64_t kHashMul64 =
22
+ BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
23
+
24
+ static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
25
+ uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
26
+ /* The higher bits contain more mixture from the multiplication,
27
+ so we take our results from there. */
28
+ return h >> (32 - 14);
29
+ }
30
+
31
+ static BROTLI_INLINE uint32_t Hash15(const uint8_t* data) {
32
+ uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
33
+ /* The higher bits contain more mixture from the multiplication,
34
+ so we take our results from there. */
35
+ return h >> (32 - 15);
36
+ }
37
+
38
+ #endif // THIRD_PARTY_BROTLI_ENC_HASH_BASE_H_
@@ -212,6 +212,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
212
212
  const uint8_t tiny_hash = (uint8_t)(key);
213
213
  out->len = 0;
214
214
  out->len_code_delta = 0;
215
+
216
+ BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
217
+
215
218
  /* Try last distance first. */
216
219
  for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
217
220
  const size_t backward = (size_t)distance_cache[i];
@@ -241,6 +244,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
241
244
  }
242
245
  }
243
246
  }
247
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
248
+ * 4 bytes all the time. */
249
+ if (best_len < 3) {
250
+ best_len = 3;
251
+ }
244
252
  {
245
253
  const size_t bank = key & (NUM_BANKS - 1);
246
254
  size_t backward = 0;
@@ -257,7 +265,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
257
265
  delta = banks[bank].slots[last].delta;
258
266
  if (cur_ix_masked + best_len > ring_buffer_mask ||
259
267
  prev_ix + best_len > ring_buffer_mask ||
260
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
268
+ /* compare 4 bytes ending at best_len + 1 */
269
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
270
+ BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
261
271
  continue;
262
272
  }
263
273
  {
@@ -170,8 +170,16 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
170
170
  score_t best_score = out->score;
171
171
  size_t best_len = out->len;
172
172
  size_t i;
173
+ /* Precalculate the hash key and prefetch the bucket. */
174
+ const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
175
+ uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
176
+ PREFETCH_L1(bucket);
177
+ if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
173
178
  out->len = 0;
174
179
  out->len_code_delta = 0;
180
+
181
+ BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
182
+
175
183
  /* Try last distance first. */
176
184
  for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
177
185
  const size_t backward = (size_t)distance_cache[i];
@@ -184,8 +192,10 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
184
192
  }
185
193
  prev_ix &= ring_buffer_mask;
186
194
 
187
- if (cur_ix_masked + best_len > ring_buffer_mask ||
188
- prev_ix + best_len > ring_buffer_mask ||
195
+ if (cur_ix_masked + best_len > ring_buffer_mask) {
196
+ break;
197
+ }
198
+ if (prev_ix + best_len > ring_buffer_mask ||
189
199
  data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
190
200
  continue;
191
201
  }
@@ -211,9 +221,12 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
211
221
  }
212
222
  }
213
223
  }
224
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
225
+ * 4 bytes all the time. */
226
+ if (best_len < 3) {
227
+ best_len = 3;
228
+ }
214
229
  {
215
- const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
216
- uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
217
230
  const size_t down =
218
231
  (num[key] > self->block_size_) ?
219
232
  (num[key] - self->block_size_) : 0u;
@@ -228,9 +241,13 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
228
241
  break;
229
242
  }
230
243
  prev_ix &= ring_buffer_mask;
231
- if (cur_ix_masked + best_len > ring_buffer_mask ||
232
- prev_ix + best_len > ring_buffer_mask ||
233
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
244
+ if (cur_ix_masked + best_len > ring_buffer_mask) {
245
+ break;
246
+ }
247
+ if (prev_ix + best_len > ring_buffer_mask ||
248
+ /* compare 4 bytes ending at best_len + 1 */
249
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
250
+ BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
234
251
  continue;
235
252
  }
236
253
  current4 = BrotliUnalignedRead32(data + prev_ix);
@@ -0,0 +1,304 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2010 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN */
9
+
10
+ /* A (forgetful) hash table to the data seen by the compressor, to
11
+ help create backward references to previous data.
12
+
13
+ This is a hash map of fixed size (bucket_size_) to a ring buffer of
14
+ fixed size (block_size_). The ring buffer contains the last block_size_
15
+ index positions of the given hash key in the compressed data. */
16
+
17
+ #define HashLongestMatch HASHER()
18
+
19
+ #define TAG_HASH_BITS 8
20
+ #define TAG_HASH_MASK ((1 << TAG_HASH_BITS) - 1)
21
+
22
+ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
23
+ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
24
+
25
+ /* HashBytes is the function that chooses the bucket to place the address in. */
26
+ static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
27
+ uint64_t hash_mul) {
28
+ const uint64_t h = BROTLI_UNALIGNED_LOAD64LE(data) * hash_mul;
29
+ /* The higher bits contain more mixture from the multiplication,
30
+ so we take our results from there. */
31
+ return (size_t)(h >> (64 - 15 - TAG_HASH_BITS));
32
+ }
33
+
34
+ typedef struct HashLongestMatch {
35
+ /* Number of hash buckets. */
36
+ size_t bucket_size_;
37
+ /* Only block_size_ newest backward references are kept,
38
+ and the older are forgotten. */
39
+ size_t block_size_;
40
+ /* Hash multiplier tuned to match length. */
41
+ uint64_t hash_mul_;
42
+ /* Mask for accessing entries in a block (in a ring-buffer manner). */
43
+ uint32_t block_mask_;
44
+
45
+ int block_bits_;
46
+ int num_last_distances_to_check_;
47
+
48
+ /* Shortcuts. */
49
+ HasherCommon* common_;
50
+
51
+ /* --- Dynamic size members --- */
52
+
53
+ /* Number of entries in a particular bucket. */
54
+ uint16_t* num_; /* uint16_t[bucket_size]; */
55
+
56
+ uint8_t* tags_;
57
+
58
+ /* Buckets containing block_size_ of backward references. */
59
+ uint32_t* buckets_; /* uint32_t[bucket_size * block_size]; */
60
+ } HashLongestMatch;
61
+
62
+ static void FN(Initialize)(
63
+ HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
64
+ const BrotliEncoderParams* params) {
65
+ self->common_ = common;
66
+
67
+ BROTLI_UNUSED(params);
68
+ self->hash_mul_ = kHashMul64 << (64 - 5 * 8);
69
+ BROTLI_DCHECK(common->params.bucket_bits == 15);
70
+ self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
71
+ self->block_bits_ = common->params.block_bits;
72
+ self->block_size_ = (size_t)1 << common->params.block_bits;
73
+ self->block_mask_ = (uint32_t)(self->block_size_ - 1);
74
+ self->num_last_distances_to_check_ =
75
+ common->params.num_last_distances_to_check;
76
+ self->num_ = (uint16_t*)common->extra[0];
77
+ self->tags_ = (uint8_t*)common->extra[1];
78
+ self->buckets_ = (uint32_t*)common->extra[2];
79
+ }
80
+
81
+ static void FN(Prepare)(
82
+ HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
83
+ size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
84
+ uint16_t* BROTLI_RESTRICT num = self->num_;
85
+ /* Partial preparation is 100 times slower (per socket). */
86
+ size_t partial_prepare_threshold = self->bucket_size_ >> 6;
87
+ if (one_shot && input_size <= partial_prepare_threshold) {
88
+ size_t i;
89
+ for (i = 0; i < input_size; ++i) {
90
+ const size_t hash = FN(HashBytes)(&data[i], self->hash_mul_);
91
+ const size_t key = hash >> TAG_HASH_BITS;
92
+ num[key] = 65535;
93
+ }
94
+ } else {
95
+ /* Set all the bytes of num to 255, which makes each uint16_t 65535. */
96
+ memset(num, 255, self->bucket_size_ * sizeof(num[0]));
97
+ }
98
+ }
99
+
100
+ static BROTLI_INLINE void FN(HashMemAllocInBytes)(
101
+ const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
102
+ size_t input_size, size_t* alloc_size) {
103
+ size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
104
+ size_t block_size = (size_t)1 << params->hasher.block_bits;
105
+ BROTLI_UNUSED(one_shot);
106
+ BROTLI_UNUSED(input_size);
107
+ alloc_size[0] = sizeof(uint16_t) * bucket_size;
108
+ alloc_size[1] = sizeof(uint8_t) * bucket_size * block_size;
109
+ alloc_size[2] = sizeof(uint32_t) * bucket_size * block_size;
110
+ }
111
+
112
+ /* Look at 4 bytes at &data[ix & mask].
113
+ Compute a hash from these, and store the value of ix at that position. */
114
+ static BROTLI_INLINE void FN(Store)(
115
+ HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
116
+ const size_t mask, const size_t ix) {
117
+ uint16_t* BROTLI_RESTRICT num = self->num_;
118
+ uint8_t* BROTLI_RESTRICT tags = self->tags_;
119
+ uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
120
+ const size_t hash = FN(HashBytes)(&data[ix & mask], self->hash_mul_);
121
+ const size_t key = hash >> TAG_HASH_BITS;
122
+ const uint8_t tag = hash & TAG_HASH_MASK;
123
+ const size_t minor_ix = num[key] & self->block_mask_;
124
+ const size_t offset = minor_ix + (key << self->block_bits_);
125
+ --num[key];
126
+ buckets[offset] = (uint32_t)ix;
127
+ tags[offset] = tag;
128
+ }
129
+
130
+ static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
131
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask,
132
+ const size_t ix_start, const size_t ix_end) {
133
+ size_t i;
134
+ for (i = ix_start; i < ix_end; ++i) {
135
+ FN(Store)(self, data, mask, i);
136
+ }
137
+ }
138
+
139
+ static BROTLI_INLINE void FN(StitchToPreviousBlock)(
140
+ HashLongestMatch* BROTLI_RESTRICT self,
141
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
142
+ size_t ringbuffer_mask) {
143
+ if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
144
+ /* Prepare the hashes for three last bytes of the last write.
145
+ These could not be calculated before, since they require knowledge
146
+ of both the previous and the current block. */
147
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
148
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
149
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
150
+ }
151
+ }
152
+
153
+ static BROTLI_INLINE void FN(PrepareDistanceCache)(
154
+ HashLongestMatch* BROTLI_RESTRICT self,
155
+ int* BROTLI_RESTRICT distance_cache) {
156
+ PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
157
+ }
158
+
159
+ /* Find a longest backward match of &data[cur_ix] up to the length of
160
+ max_length and stores the position cur_ix in the hash table.
161
+
162
+ REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
163
+ values; if this method is invoked repeatedly with the same distance
164
+ cache values, it is enough to invoke FN(PrepareDistanceCache) once.
165
+
166
+ Does not look for matches longer than max_length.
167
+ Does not look for matches further away than max_backward.
168
+ Writes the best match into |out|.
169
+ |out|->score is updated only if a better match is found. */
170
+ static BROTLI_INLINE void FN(FindLongestMatch)(
171
+ HashLongestMatch* BROTLI_RESTRICT self,
172
+ const BrotliEncoderDictionary* dictionary,
173
+ const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
174
+ const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
175
+ const size_t max_length, const size_t max_backward,
176
+ const size_t dictionary_distance, const size_t max_distance,
177
+ HasherSearchResult* BROTLI_RESTRICT out) {
178
+ uint16_t* BROTLI_RESTRICT num = self->num_;
179
+ uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
180
+ uint8_t* BROTLI_RESTRICT tags = self->tags_;
181
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
182
+ /* Don't accept a short copy from far away. */
183
+ score_t min_score = out->score;
184
+ score_t best_score = out->score;
185
+ size_t best_len = out->len;
186
+ size_t i;
187
+ /* Precalculate the hash key and prefetch the bucket. */
188
+ const size_t hash = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
189
+ const size_t key = hash >> TAG_HASH_BITS;
190
+ uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
191
+ uint8_t* BROTLI_RESTRICT tag_bucket = &tags[key << self->block_bits_];
192
+ PREFETCH_L1(bucket);
193
+ PREFETCH_L1(tag_bucket);
194
+ if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
195
+ out->len = 0;
196
+ out->len_code_delta = 0;
197
+
198
+ BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
199
+
200
+ /* Try last distance first. */
201
+ for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
202
+ const size_t backward = (size_t)distance_cache[i];
203
+ size_t prev_ix = (size_t)(cur_ix - backward);
204
+ if (prev_ix >= cur_ix) {
205
+ continue;
206
+ }
207
+ if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
208
+ continue;
209
+ }
210
+ prev_ix &= ring_buffer_mask;
211
+
212
+ if (cur_ix_masked + best_len > ring_buffer_mask) {
213
+ break;
214
+ }
215
+ if (prev_ix + best_len > ring_buffer_mask ||
216
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
217
+ continue;
218
+ }
219
+ {
220
+ const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
221
+ &data[cur_ix_masked],
222
+ max_length);
223
+ if (len >= 3 || (len == 2 && i < 2)) {
224
+ /* Comparing for >= 2 does not change the semantics, but just saves for
225
+ a few unnecessary binary logarithms in backward reference score,
226
+ since we are not interested in such short matches. */
227
+ score_t score = BackwardReferenceScoreUsingLastDistance(len);
228
+ if (best_score < score) {
229
+ if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
230
+ if (best_score < score) {
231
+ best_score = score;
232
+ best_len = len;
233
+ out->len = best_len;
234
+ out->distance = backward;
235
+ out->score = best_score;
236
+ }
237
+ }
238
+ }
239
+ }
240
+ }
241
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
242
+ * 4 bytes all the time. */
243
+ if (best_len < 3) {
244
+ best_len = 3;
245
+ }
246
+ {
247
+ const uint8_t tag = hash & TAG_HASH_MASK;
248
+ const uint32_t first4 = BrotliUnalignedRead32(data + cur_ix_masked);
249
+ const size_t max_length_m4 = max_length - 4;
250
+ const size_t head = (num[key] + 1) & self->block_mask_;
251
+ uint64_t matches =
252
+ GetMatchingTagMask(self->block_size_ / 16, tag, tag_bucket, head);
253
+ /* Mask off any matches from uninitialized tags. */
254
+ uint16_t n = 65535 - num[key];
255
+ uint64_t block_has_unused_slots = self->block_size_ > n;
256
+ uint64_t mask = (block_has_unused_slots << (n & (64 - 1))) - 1;
257
+ matches &= mask;
258
+ for (; matches > 0; matches &= (matches - 1)) {
259
+ const size_t rb_index =
260
+ (head + (size_t)BROTLI_TZCNT64(matches)) & self->block_mask_;
261
+ size_t prev_ix = bucket[rb_index];
262
+ uint32_t current4;
263
+ const size_t backward = cur_ix - prev_ix;
264
+ if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
265
+ break;
266
+ }
267
+ prev_ix &= ring_buffer_mask;
268
+ if (cur_ix_masked + best_len > ring_buffer_mask) {
269
+ break;
270
+ }
271
+ if (prev_ix + best_len > ring_buffer_mask ||
272
+ /* compare 4 bytes ending at best_len + 1 */
273
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
274
+ BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
275
+ continue;
276
+ }
277
+ current4 = BrotliUnalignedRead32(data + prev_ix);
278
+ if (first4 != current4) continue;
279
+ {
280
+ const size_t len = FindMatchLengthWithLimit(&data[prev_ix + 4],
281
+ &data[cur_ix_masked + 4],
282
+ max_length_m4) + 4;
283
+ const score_t score = BackwardReferenceScore(len, backward);
284
+ if (best_score < score) {
285
+ best_score = score;
286
+ best_len = len;
287
+ out->len = best_len;
288
+ out->distance = backward;
289
+ out->score = best_score;
290
+ }
291
+ }
292
+ }
293
+ bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
294
+ tag_bucket[num[key] & self->block_mask_] = tag;
295
+ --num[key];
296
+ }
297
+ if (min_score == out->score) {
298
+ SearchInStaticDictionary(dictionary,
299
+ self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
300
+ max_distance, out, BROTLI_FALSE);
301
+ }
302
+ }
303
+
304
+ #undef HashLongestMatch
@@ -104,11 +104,13 @@ static BROTLI_INLINE void FN(HashMemAllocInBytes)(
104
104
  static BROTLI_INLINE void FN(Store)(
105
105
  HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
106
106
  const size_t mask, const size_t ix) {
107
+ uint16_t* BROTLI_RESTRICT num = self->num_;
108
+ uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
107
109
  const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
108
- const size_t minor_ix = self->num_[key] & self->block_mask_;
110
+ const size_t minor_ix = num[key] & self->block_mask_;
109
111
  const size_t offset = minor_ix + (key << self->block_bits_);
110
- self->buckets_[offset] = (uint32_t)ix;
111
- ++self->num_[key];
112
+ ++num[key];
113
+ buckets[offset] = (uint32_t)ix;
112
114
  }
113
115
 
114
116
  static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
@@ -167,8 +169,17 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
167
169
  score_t best_score = out->score;
168
170
  size_t best_len = out->len;
169
171
  size_t i;
172
+ /* Precalculate the hash key and prefetch the bucket. */
173
+ const uint32_t key =
174
+ FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
175
+ uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
176
+ PREFETCH_L1(bucket);
177
+ if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
170
178
  out->len = 0;
171
179
  out->len_code_delta = 0;
180
+
181
+ BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
182
+
172
183
  /* Try last distance first. */
173
184
  for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
174
185
  const size_t backward = (size_t)distance_cache[i];
@@ -181,8 +192,10 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
181
192
  }
182
193
  prev_ix &= ring_buffer_mask;
183
194
 
184
- if (cur_ix_masked + best_len > ring_buffer_mask ||
185
- prev_ix + best_len > ring_buffer_mask ||
195
+ if (cur_ix_masked + best_len > ring_buffer_mask) {
196
+ break;
197
+ }
198
+ if (prev_ix + best_len > ring_buffer_mask ||
186
199
  data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
187
200
  continue;
188
201
  }
@@ -208,10 +221,12 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
208
221
  }
209
222
  }
210
223
  }
224
+ /* we require matches of len >4, so increase best_len to 3, so we can compare
225
+ * 4 bytes all the time. */
226
+ if (best_len < 3) {
227
+ best_len = 3;
228
+ }
211
229
  {
212
- const uint32_t key =
213
- FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
214
- uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
215
230
  const size_t down =
216
231
  (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
217
232
  for (i = num[key]; i > down;) {
@@ -221,9 +236,13 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
221
236
  break;
222
237
  }
223
238
  prev_ix &= ring_buffer_mask;
224
- if (cur_ix_masked + best_len > ring_buffer_mask ||
225
- prev_ix + best_len > ring_buffer_mask ||
226
- data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
239
+ if (cur_ix_masked + best_len > ring_buffer_mask) {
240
+ break;
241
+ }
242
+ if (prev_ix + best_len > ring_buffer_mask ||
243
+ /* compare 4 bytes ending at best_len + 1 */
244
+ BrotliUnalignedRead32(&data[cur_ix_masked + best_len - 3]) !=
245
+ BrotliUnalignedRead32(&data[prev_ix + best_len - 3])) {
227
246
  continue;
228
247
  }
229
248
  {
@@ -155,6 +155,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
155
155
  uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
156
156
  const size_t best_len_in = out->len;
157
157
  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
158
+ /* TODO: compare 4 bytes at once (and set the minimum best len to 4) */
158
159
  int compare_char = data[cur_ix_masked + best_len_in];
159
160
  size_t key = FN(HashBytes)(&data[cur_ix_masked]);
160
161
  size_t key_out;
@@ -163,6 +164,9 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
163
164
  size_t best_len = best_len_in;
164
165
  size_t cached_backward = (size_t)distance_cache[0];
165
166
  size_t prev_ix = cur_ix - cached_backward;
167
+
168
+ BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
169
+
166
170
  out->len_code_delta = 0;
167
171
  if (prev_ix < cur_ix) {
168
172
  prev_ix &= (uint32_t)ring_buffer_mask;