isomorfeus-ferret 0.12.4 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +77 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  91. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  92. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  93. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  94. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  95. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  96. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  97. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  98. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  99. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
  100. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  101. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  102. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  103. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  104. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  106. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  144. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  145. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  162. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  163. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  164. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  165. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  166. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  167. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  168. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  169. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  170. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  171. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  172. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  173. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  174. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  175. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  176. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  177. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  178. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  179. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  180. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  181. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  182. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  183. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  184. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  185. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  186. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  187. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  188. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  189. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  190. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  191. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  192. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  193. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  194. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  195. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  196. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  197. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  198. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  199. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  200. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  201. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  202. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  203. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  204. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  205. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  206. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  207. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  208. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  209. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  210. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  211. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  212. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  213. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  214. data/lib/isomorfeus/ferret/version.rb +1 -1
  215. metadata +125 -5
  216. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,262 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2010 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN */
9
+
10
+ /* A (forgetful) hash table to the data seen by the compressor, to
11
+ help create backward references to previous data.
12
+
13
+ This is a hash map of fixed size (bucket_size_) to a ring buffer of
14
+ fixed size (block_size_). The ring buffer contains the last block_size_
15
+ index positions of the given hash key in the compressed data. */
16
+
17
+ #define HashLongestMatch HASHER()
18
+
19
+ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
20
+ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
21
+
22
+ /* HashBytes is the function that chooses the bucket to place the address in. */
23
+ static uint32_t FN(HashBytes)(
24
+ const uint8_t* BROTLI_RESTRICT data, const int shift) {
25
+ uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
26
+ /* The higher bits contain more mixture from the multiplication,
27
+ so we take our results from there. */
28
+ return (uint32_t)(h >> shift);
29
+ }
30
+
31
+ typedef struct HashLongestMatch {
32
+ /* Number of hash buckets. */
33
+ size_t bucket_size_;
34
+ /* Only block_size_ newest backward references are kept,
35
+ and the older are forgotten. */
36
+ size_t block_size_;
37
+ /* Left-shift for computing hash bucket index from hash value. */
38
+ int hash_shift_;
39
+ /* Mask for accessing entries in a block (in a ring-buffer manner). */
40
+ uint32_t block_mask_;
41
+
42
+ int block_bits_;
43
+ int num_last_distances_to_check_;
44
+
45
+ /* Shortcuts. */
46
+ HasherCommon* common_;
47
+
48
+ /* --- Dynamic size members --- */
49
+
50
+ /* Number of entries in a particular bucket. */
51
+ uint16_t* num_; /* uint16_t[bucket_size]; */
52
+
53
+ /* Buckets containing block_size_ of backward references. */
54
+ uint32_t* buckets_; /* uint32_t[bucket_size * block_size]; */
55
+ } HashLongestMatch;
56
+
57
+ static BROTLI_INLINE uint16_t* FN(Num)(void* extra) {
58
+ return (uint16_t*)extra;
59
+ }
60
+
61
+ static void FN(Initialize)(
62
+ HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
63
+ const BrotliEncoderParams* params) {
64
+ self->common_ = common;
65
+
66
+ BROTLI_UNUSED(params);
67
+ self->hash_shift_ = 32 - common->params.bucket_bits;
68
+ self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
69
+ self->block_size_ = (size_t)1 << common->params.block_bits;
70
+ self->block_mask_ = (uint32_t)(self->block_size_ - 1);
71
+ self->num_ = (uint16_t*)common->extra;
72
+ self->buckets_ = (uint32_t*)(&self->num_[self->bucket_size_]);
73
+ self->block_bits_ = common->params.block_bits;
74
+ self->num_last_distances_to_check_ =
75
+ common->params.num_last_distances_to_check;
76
+ }
77
+
78
+ static void FN(Prepare)(
79
+ HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
80
+ size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
81
+ uint16_t* BROTLI_RESTRICT num = self->num_;
82
+ /* Partial preparation is 100 times slower (per socket). */
83
+ size_t partial_prepare_threshold = self->bucket_size_ >> 6;
84
+ if (one_shot && input_size <= partial_prepare_threshold) {
85
+ size_t i;
86
+ for (i = 0; i < input_size; ++i) {
87
+ const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
88
+ num[key] = 0;
89
+ }
90
+ } else {
91
+ memset(num, 0, self->bucket_size_ * sizeof(num[0]));
92
+ }
93
+ }
94
+
95
+ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
96
+ const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
97
+ size_t input_size) {
98
+ size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
99
+ size_t block_size = (size_t)1 << params->hasher.block_bits;
100
+ BROTLI_UNUSED(one_shot);
101
+ BROTLI_UNUSED(input_size);
102
+ return sizeof(uint16_t) * bucket_size +
103
+ sizeof(uint32_t) * bucket_size * block_size;
104
+ }
105
+
106
+ /* Look at 4 bytes at &data[ix & mask].
107
+ Compute a hash from these, and store the value of ix at that position. */
108
+ static BROTLI_INLINE void FN(Store)(
109
+ HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
110
+ const size_t mask, const size_t ix) {
111
+ const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
112
+ const size_t minor_ix = self->num_[key] & self->block_mask_;
113
+ const size_t offset = minor_ix + (key << self->block_bits_);
114
+ self->buckets_[offset] = (uint32_t)ix;
115
+ ++self->num_[key];
116
+ }
117
+
118
+ static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
119
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask,
120
+ const size_t ix_start, const size_t ix_end) {
121
+ size_t i;
122
+ for (i = ix_start; i < ix_end; ++i) {
123
+ FN(Store)(self, data, mask, i);
124
+ }
125
+ }
126
+
127
+ static BROTLI_INLINE void FN(StitchToPreviousBlock)(
128
+ HashLongestMatch* BROTLI_RESTRICT self,
129
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
130
+ size_t ringbuffer_mask) {
131
+ if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
132
+ /* Prepare the hashes for three last bytes of the last write.
133
+ These could not be calculated before, since they require knowledge
134
+ of both the previous and the current block. */
135
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
136
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
137
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
138
+ }
139
+ }
140
+
141
+ static BROTLI_INLINE void FN(PrepareDistanceCache)(
142
+ HashLongestMatch* BROTLI_RESTRICT self,
143
+ int* BROTLI_RESTRICT distance_cache) {
144
+ PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
145
+ }
146
+
147
+ /* Find a longest backward match of &data[cur_ix] up to the length of
148
+ max_length and stores the position cur_ix in the hash table.
149
+
150
+ REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
151
+ values; if this method is invoked repeatedly with the same distance
152
+ cache values, it is enough to invoke FN(PrepareDistanceCache) once.
153
+
154
+ Does not look for matches longer than max_length.
155
+ Does not look for matches further away than max_backward.
156
+ Writes the best match into |out|.
157
+ |out|->score is updated only if a better match is found. */
158
+ static BROTLI_INLINE void FN(FindLongestMatch)(
159
+ HashLongestMatch* BROTLI_RESTRICT self,
160
+ const BrotliEncoderDictionary* dictionary,
161
+ const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
162
+ const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
163
+ const size_t max_length, const size_t max_backward,
164
+ const size_t dictionary_distance, const size_t max_distance,
165
+ HasherSearchResult* BROTLI_RESTRICT out) {
166
+ uint16_t* BROTLI_RESTRICT num = self->num_;
167
+ uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
168
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
169
+ /* Don't accept a short copy from far away. */
170
+ score_t min_score = out->score;
171
+ score_t best_score = out->score;
172
+ size_t best_len = out->len;
173
+ size_t i;
174
+ out->len = 0;
175
+ out->len_code_delta = 0;
176
+ /* Try last distance first. */
177
+ for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
178
+ const size_t backward = (size_t)distance_cache[i];
179
+ size_t prev_ix = (size_t)(cur_ix - backward);
180
+ if (prev_ix >= cur_ix) {
181
+ continue;
182
+ }
183
+ if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
184
+ continue;
185
+ }
186
+ prev_ix &= ring_buffer_mask;
187
+
188
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
189
+ prev_ix + best_len > ring_buffer_mask ||
190
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
191
+ continue;
192
+ }
193
+ {
194
+ const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
195
+ &data[cur_ix_masked],
196
+ max_length);
197
+ if (len >= 3 || (len == 2 && i < 2)) {
198
+ /* Comparing for >= 2 does not change the semantics, but just saves for
199
+ a few unnecessary binary logarithms in backward reference score,
200
+ since we are not interested in such short matches. */
201
+ score_t score = BackwardReferenceScoreUsingLastDistance(len);
202
+ if (best_score < score) {
203
+ if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
204
+ if (best_score < score) {
205
+ best_score = score;
206
+ best_len = len;
207
+ out->len = best_len;
208
+ out->distance = backward;
209
+ out->score = best_score;
210
+ }
211
+ }
212
+ }
213
+ }
214
+ }
215
+ {
216
+ const uint32_t key =
217
+ FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
218
+ uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
219
+ const size_t down =
220
+ (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
221
+ for (i = num[key]; i > down;) {
222
+ size_t prev_ix = bucket[--i & self->block_mask_];
223
+ const size_t backward = cur_ix - prev_ix;
224
+ if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
225
+ break;
226
+ }
227
+ prev_ix &= ring_buffer_mask;
228
+ if (cur_ix_masked + best_len > ring_buffer_mask ||
229
+ prev_ix + best_len > ring_buffer_mask ||
230
+ data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
231
+ continue;
232
+ }
233
+ {
234
+ const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
235
+ &data[cur_ix_masked],
236
+ max_length);
237
+ if (len >= 4) {
238
+ /* Comparing for >= 3 does not change the semantics, but just saves
239
+ for a few unnecessary binary logarithms in backward reference
240
+ score, since we are not interested in such short matches. */
241
+ score_t score = BackwardReferenceScore(len, backward);
242
+ if (best_score < score) {
243
+ best_score = score;
244
+ best_len = len;
245
+ out->len = best_len;
246
+ out->distance = backward;
247
+ out->score = best_score;
248
+ }
249
+ }
250
+ }
251
+ }
252
+ bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
253
+ ++num[key];
254
+ }
255
+ if (min_score == out->score) {
256
+ SearchInStaticDictionary(dictionary,
257
+ self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
258
+ max_distance, out, BROTLI_FALSE);
259
+ }
260
+ }
261
+
262
+ #undef HashLongestMatch
@@ -0,0 +1,266 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2010 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP_BITS, HASH_LEN,
9
+ USE_DICTIONARY
10
+ */
11
+
12
+ #define HashLongestMatchQuickly HASHER()
13
+
14
+ #define BUCKET_SIZE (1 << BUCKET_BITS)
15
+ #define BUCKET_MASK (BUCKET_SIZE - 1)
16
+ #define BUCKET_SWEEP (1 << BUCKET_SWEEP_BITS)
17
+ #define BUCKET_SWEEP_MASK ((BUCKET_SWEEP - 1) << 3)
18
+
19
+ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
20
+ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
21
+
22
+ /* HashBytes is the function that chooses the bucket to place
23
+ the address in. The HashLongestMatch and HashLongestMatchQuickly
24
+ classes have separate, different implementations of hashing. */
25
+ static uint32_t FN(HashBytes)(const uint8_t* data) {
26
+ const uint64_t h = ((BROTLI_UNALIGNED_LOAD64LE(data) << (64 - 8 * HASH_LEN)) *
27
+ kHashMul64);
28
+ /* The higher bits contain more mixture from the multiplication,
29
+ so we take our results from there. */
30
+ return (uint32_t)(h >> (64 - BUCKET_BITS));
31
+ }
32
+
33
+ /* A (forgetful) hash table to the data seen by the compressor, to
34
+ help create backward references to previous data.
35
+
36
+ This is a hash map of fixed size (BUCKET_SIZE). */
37
+ typedef struct HashLongestMatchQuickly {
38
+ /* Shortcuts. */
39
+ HasherCommon* common;
40
+
41
+ /* --- Dynamic size members --- */
42
+
43
+ uint32_t* buckets_; /* uint32_t[BUCKET_SIZE]; */
44
+ } HashLongestMatchQuickly;
45
+
46
+ static void FN(Initialize)(
47
+ HasherCommon* common, HashLongestMatchQuickly* BROTLI_RESTRICT self,
48
+ const BrotliEncoderParams* params) {
49
+ self->common = common;
50
+
51
+ BROTLI_UNUSED(params);
52
+ self->buckets_ = (uint32_t*)common->extra;
53
+ }
54
+
55
+ static void FN(Prepare)(
56
+ HashLongestMatchQuickly* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
57
+ size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
58
+ uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
59
+ /* Partial preparation is 100 times slower (per socket). */
60
+ size_t partial_prepare_threshold = BUCKET_SIZE >> 5;
61
+ if (one_shot && input_size <= partial_prepare_threshold) {
62
+ size_t i;
63
+ for (i = 0; i < input_size; ++i) {
64
+ const uint32_t key = FN(HashBytes)(&data[i]);
65
+ if (BUCKET_SWEEP == 1) {
66
+ buckets[key] = 0;
67
+ } else {
68
+ uint32_t j;
69
+ for (j = 0; j < BUCKET_SWEEP; ++j) {
70
+ buckets[(key + (j << 3)) & BUCKET_MASK] = 0;
71
+ }
72
+ }
73
+ }
74
+ } else {
75
+ /* It is not strictly necessary to fill this buffer here, but
76
+ not filling will make the results of the compression stochastic
77
+ (but correct). This is because random data would cause the
78
+ system to find accidentally good backward references here and there. */
79
+ memset(buckets, 0, sizeof(uint32_t) * BUCKET_SIZE);
80
+ }
81
+ }
82
+
83
+ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
84
+ const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
85
+ size_t input_size) {
86
+ BROTLI_UNUSED(params);
87
+ BROTLI_UNUSED(one_shot);
88
+ BROTLI_UNUSED(input_size);
89
+ return sizeof(uint32_t) * BUCKET_SIZE;
90
+ }
91
+
92
+ /* Look at 5 bytes at &data[ix & mask].
93
+ Compute a hash from these, and store the value somewhere within
94
+ [ix .. ix+3]. */
95
+ static BROTLI_INLINE void FN(Store)(
96
+ HashLongestMatchQuickly* BROTLI_RESTRICT self,
97
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
98
+ const uint32_t key = FN(HashBytes)(&data[ix & mask]);
99
+ if (BUCKET_SWEEP == 1) {
100
+ self->buckets_[key] = (uint32_t)ix;
101
+ } else {
102
+ /* Wiggle the value with the bucket sweep range. */
103
+ const uint32_t off = ix & BUCKET_SWEEP_MASK;
104
+ self->buckets_[(key + off) & BUCKET_MASK] = (uint32_t)ix;
105
+ }
106
+ }
107
+
108
+ static BROTLI_INLINE void FN(StoreRange)(
109
+ HashLongestMatchQuickly* BROTLI_RESTRICT self,
110
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask,
111
+ const size_t ix_start, const size_t ix_end) {
112
+ size_t i;
113
+ for (i = ix_start; i < ix_end; ++i) {
114
+ FN(Store)(self, data, mask, i);
115
+ }
116
+ }
117
+
118
+ static BROTLI_INLINE void FN(StitchToPreviousBlock)(
119
+ HashLongestMatchQuickly* BROTLI_RESTRICT self,
120
+ size_t num_bytes, size_t position,
121
+ const uint8_t* ringbuffer, size_t ringbuffer_mask) {
122
+ if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
123
+ /* Prepare the hashes for three last bytes of the last write.
124
+ These could not be calculated before, since they require knowledge
125
+ of both the previous and the current block. */
126
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
127
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
128
+ FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
129
+ }
130
+ }
131
+
132
+ static BROTLI_INLINE void FN(PrepareDistanceCache)(
133
+ HashLongestMatchQuickly* BROTLI_RESTRICT self,
134
+ int* BROTLI_RESTRICT distance_cache) {
135
+ BROTLI_UNUSED(self);
136
+ BROTLI_UNUSED(distance_cache);
137
+ }
138
+
139
+ /* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
140
+ up to the length of max_length and stores the position cur_ix in the
141
+ hash table.
142
+
143
+ Does not look for matches longer than max_length.
144
+ Does not look for matches further away than max_backward.
145
+ Writes the best match into |out|.
146
+ |out|->score is updated only if a better match is found. */
147
+ static BROTLI_INLINE void FN(FindLongestMatch)(
148
+ HashLongestMatchQuickly* BROTLI_RESTRICT self,
149
+ const BrotliEncoderDictionary* dictionary,
150
+ const uint8_t* BROTLI_RESTRICT data,
151
+ const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
152
+ const size_t cur_ix, const size_t max_length, const size_t max_backward,
153
+ const size_t dictionary_distance, const size_t max_distance,
154
+ HasherSearchResult* BROTLI_RESTRICT out) {
155
+ uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
156
+ const size_t best_len_in = out->len;
157
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
158
+ int compare_char = data[cur_ix_masked + best_len_in];
159
+ size_t key = FN(HashBytes)(&data[cur_ix_masked]);
160
+ size_t key_out;
161
+ score_t min_score = out->score;
162
+ score_t best_score = out->score;
163
+ size_t best_len = best_len_in;
164
+ size_t cached_backward = (size_t)distance_cache[0];
165
+ size_t prev_ix = cur_ix - cached_backward;
166
+ out->len_code_delta = 0;
167
+ if (prev_ix < cur_ix) {
168
+ prev_ix &= (uint32_t)ring_buffer_mask;
169
+ if (compare_char == data[prev_ix + best_len]) {
170
+ const size_t len = FindMatchLengthWithLimit(
171
+ &data[prev_ix], &data[cur_ix_masked], max_length);
172
+ if (len >= 4) {
173
+ const score_t score = BackwardReferenceScoreUsingLastDistance(len);
174
+ if (best_score < score) {
175
+ out->len = len;
176
+ out->distance = cached_backward;
177
+ out->score = score;
178
+ if (BUCKET_SWEEP == 1) {
179
+ buckets[key] = (uint32_t)cur_ix;
180
+ return;
181
+ } else {
182
+ best_len = len;
183
+ best_score = score;
184
+ compare_char = data[cur_ix_masked + len];
185
+ }
186
+ }
187
+ }
188
+ }
189
+ }
190
+ if (BUCKET_SWEEP == 1) {
191
+ size_t backward;
192
+ size_t len;
193
+ /* Only one to look for, don't bother to prepare for a loop. */
194
+ prev_ix = buckets[key];
195
+ buckets[key] = (uint32_t)cur_ix;
196
+ backward = cur_ix - prev_ix;
197
+ prev_ix &= (uint32_t)ring_buffer_mask;
198
+ if (compare_char != data[prev_ix + best_len_in]) {
199
+ return;
200
+ }
201
+ if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
202
+ return;
203
+ }
204
+ len = FindMatchLengthWithLimit(&data[prev_ix],
205
+ &data[cur_ix_masked],
206
+ max_length);
207
+ if (len >= 4) {
208
+ const score_t score = BackwardReferenceScore(len, backward);
209
+ if (best_score < score) {
210
+ out->len = len;
211
+ out->distance = backward;
212
+ out->score = score;
213
+ return;
214
+ }
215
+ }
216
+ } else {
217
+ size_t keys[BUCKET_SWEEP];
218
+ size_t i;
219
+ for (i = 0; i < BUCKET_SWEEP; ++i) {
220
+ keys[i] = (key + (i << 3)) & BUCKET_MASK;
221
+ }
222
+ key_out = keys[(cur_ix & BUCKET_SWEEP_MASK) >> 3];
223
+ for (i = 0; i < BUCKET_SWEEP; ++i) {
224
+ size_t len;
225
+ size_t backward;
226
+ prev_ix = buckets[keys[i]];
227
+ backward = cur_ix - prev_ix;
228
+ prev_ix &= (uint32_t)ring_buffer_mask;
229
+ if (compare_char != data[prev_ix + best_len]) {
230
+ continue;
231
+ }
232
+ if (BROTLI_PREDICT_FALSE(backward == 0 || backward > max_backward)) {
233
+ continue;
234
+ }
235
+ len = FindMatchLengthWithLimit(&data[prev_ix],
236
+ &data[cur_ix_masked],
237
+ max_length);
238
+ if (len >= 4) {
239
+ const score_t score = BackwardReferenceScore(len, backward);
240
+ if (best_score < score) {
241
+ best_len = len;
242
+ out->len = len;
243
+ compare_char = data[cur_ix_masked + len];
244
+ best_score = score;
245
+ out->score = score;
246
+ out->distance = backward;
247
+ }
248
+ }
249
+ }
250
+ }
251
+ if (USE_DICTIONARY && min_score == out->score) {
252
+ SearchInStaticDictionary(dictionary,
253
+ self->common, &data[cur_ix_masked], max_length, dictionary_distance,
254
+ max_distance, out, BROTLI_TRUE);
255
+ }
256
+ if (BUCKET_SWEEP != 1) {
257
+ buckets[key_out] = (uint32_t)cur_ix;
258
+ }
259
+ }
260
+
261
+ #undef BUCKET_SWEEP_MASK
262
+ #undef BUCKET_SWEEP
263
+ #undef BUCKET_MASK
264
+ #undef BUCKET_SIZE
265
+
266
+ #undef HashLongestMatchQuickly