isomorfeus-ferret 0.12.5 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +54 -4
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  91. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  92. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  93. data/ext/isomorfeus_ferret_ext/bzip_blocksort.c +1094 -0
  94. data/ext/isomorfeus_ferret_ext/bzip_huffman.c +205 -0
  95. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  96. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  97. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  98. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  99. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  100. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  101. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  102. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  103. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  104. data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
  105. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  106. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  107. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  108. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  109. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  110. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  111. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  112. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  113. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  114. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  115. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  116. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
  117. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  118. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  119. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  120. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  121. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
  122. data/ext/isomorfeus_ferret_ext/frt_global.c +105 -63
  123. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -3
  124. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  125. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  126. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  127. data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
  128. data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
  129. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  130. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  131. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +67 -91
  132. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  133. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  134. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  135. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  136. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  137. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  138. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +26 -25
  139. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  140. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  141. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  142. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  143. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  144. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
  145. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  146. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  147. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  148. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  149. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  150. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  151. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  152. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  153. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +21 -109
  154. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  155. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  156. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  157. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  158. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  159. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  160. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  161. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  162. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  163. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  164. data/ext/isomorfeus_ferret_ext/test.c +1 -2
  165. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  166. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  167. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  168. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  169. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  170. data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
  171. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  172. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  173. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  174. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  175. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  176. data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
  177. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  178. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  179. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  180. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  181. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  182. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  183. data/ext/isomorfeus_ferret_ext/test_search.c +60 -62
  184. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  185. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  186. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  187. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  188. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  189. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  190. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  191. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  192. data/lib/isomorfeus/ferret/version.rb +1 -1
  193. metadata +113 -58
  194. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  195. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  196. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  197. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  198. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  199. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  200. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  201. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  202. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  203. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  204. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  205. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  206. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  207. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  208. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  209. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  210. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  211. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  212. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  213. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  214. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  215. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  216. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  217. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  218. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  219. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  220. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  221. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  222. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  223. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  224. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  225. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  226. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  227. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  228. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  229. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  230. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  231. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  232. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  233. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  234. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  235. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  236. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  237. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  238. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  239. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  240. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  241. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  242. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  243. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  244. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  245. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  246. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  247. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,212 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2018 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
9
+ /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
10
+ /* JUMP = skip bytes for speedup */
11
+
12
+ /* Rolling hash for long distance long string matches. Stores one position
13
+ per bucket, bucket key is computed over a long region. */
14
+
15
+ #define HashRolling HASHER()
16
+
17
+ static const uint32_t FN(kRollingHashMul32) = 69069;
18
+ static const uint32_t FN(kInvalidPos) = 0xffffffff;
19
+
20
+ /* This hasher uses a longer forward length, but returning a higher value here
21
+ will hurt compression by the main hasher when combined with a composite
22
+ hasher. The hasher tests for forward itself instead. */
23
+ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
24
+ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
25
+
26
+ /* Computes a code from a single byte. A lookup table of 256 values could be
27
+ used, but simply adding 1 works about as good. */
28
+ static uint32_t FN(HashByte)(uint8_t byte) {
29
+ return (uint32_t)byte + 1u;
30
+ }
31
+
32
+ static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
33
+ uint32_t factor) {
34
+ return (uint32_t)(factor * state + FN(HashByte)(add));
35
+ }
36
+
37
+ static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
38
+ uint8_t rem, uint32_t factor,
39
+ uint32_t factor_remove) {
40
+ return (uint32_t)(factor * state +
41
+ FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
42
+ }
43
+
44
+ typedef struct HashRolling {
45
+ uint32_t state;
46
+ uint32_t* table;
47
+ size_t next_ix;
48
+
49
+ uint32_t chunk_len;
50
+ uint32_t factor;
51
+ uint32_t factor_remove;
52
+ } HashRolling;
53
+
54
+ static void FN(Initialize)(
55
+ HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
56
+ const BrotliEncoderParams* params) {
57
+ size_t i;
58
+ self->state = 0;
59
+ self->next_ix = 0;
60
+
61
+ self->factor = FN(kRollingHashMul32);
62
+
63
+ /* Compute the factor of the oldest byte to remove: factor**steps modulo
64
+ 0xffffffff (the multiplications rely on 32-bit overflow) */
65
+ self->factor_remove = 1;
66
+ for (i = 0; i < CHUNKLEN; i += JUMP) {
67
+ self->factor_remove *= self->factor;
68
+ }
69
+
70
+ self->table = (uint32_t*)common->extra;
71
+ for (i = 0; i < NUMBUCKETS; i++) {
72
+ self->table[i] = FN(kInvalidPos);
73
+ }
74
+
75
+ BROTLI_UNUSED(params);
76
+ }
77
+
78
+ static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
79
+ size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
80
+ size_t i;
81
+ /* Too small size, cannot use this hasher. */
82
+ if (input_size < CHUNKLEN) return;
83
+ self->state = 0;
84
+ for (i = 0; i < CHUNKLEN; i += JUMP) {
85
+ self->state = FN(HashRollingFunctionInitial)(
86
+ self->state, data[i], self->factor);
87
+ }
88
+ BROTLI_UNUSED(one_shot);
89
+ }
90
+
91
+ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
92
+ const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
93
+ size_t input_size) {
94
+ return NUMBUCKETS * sizeof(uint32_t);
95
+ BROTLI_UNUSED(params);
96
+ BROTLI_UNUSED(one_shot);
97
+ BROTLI_UNUSED(input_size);
98
+ }
99
+
100
+ static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
101
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
102
+ BROTLI_UNUSED(self);
103
+ BROTLI_UNUSED(data);
104
+ BROTLI_UNUSED(mask);
105
+ BROTLI_UNUSED(ix);
106
+ }
107
+
108
+ static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
109
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask,
110
+ const size_t ix_start, const size_t ix_end) {
111
+ BROTLI_UNUSED(self);
112
+ BROTLI_UNUSED(data);
113
+ BROTLI_UNUSED(mask);
114
+ BROTLI_UNUSED(ix_start);
115
+ BROTLI_UNUSED(ix_end);
116
+ }
117
+
118
+ static BROTLI_INLINE void FN(StitchToPreviousBlock)(
119
+ HashRolling* BROTLI_RESTRICT self,
120
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
121
+ size_t ring_buffer_mask) {
122
+ /* In this case we must re-initialize the hasher from scratch from the
123
+ current position. */
124
+ size_t position_masked;
125
+ size_t available = num_bytes;
126
+ if ((position & (JUMP - 1)) != 0) {
127
+ size_t diff = JUMP - (position & (JUMP - 1));
128
+ available = (diff > available) ? 0 : (available - diff);
129
+ position += diff;
130
+ }
131
+ position_masked = position & ring_buffer_mask;
132
+ /* wrapping around ringbuffer not handled. */
133
+ if (available > ring_buffer_mask - position_masked) {
134
+ available = ring_buffer_mask - position_masked;
135
+ }
136
+
137
+ FN(Prepare)(self, BROTLI_FALSE, available,
138
+ ringbuffer + (position & ring_buffer_mask));
139
+ self->next_ix = position;
140
+ BROTLI_UNUSED(num_bytes);
141
+ }
142
+
143
+ static BROTLI_INLINE void FN(PrepareDistanceCache)(
144
+ HashRolling* BROTLI_RESTRICT self,
145
+ int* BROTLI_RESTRICT distance_cache) {
146
+ BROTLI_UNUSED(self);
147
+ BROTLI_UNUSED(distance_cache);
148
+ }
149
+
150
+ static BROTLI_INLINE void FN(FindLongestMatch)(
151
+ HashRolling* BROTLI_RESTRICT self,
152
+ const BrotliEncoderDictionary* dictionary,
153
+ const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
154
+ const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
155
+ const size_t max_length, const size_t max_backward,
156
+ const size_t dictionary_distance, const size_t max_distance,
157
+ HasherSearchResult* BROTLI_RESTRICT out) {
158
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
159
+ size_t pos;
160
+
161
+ if ((cur_ix & (JUMP - 1)) != 0) return;
162
+
163
+ /* Not enough lookahead */
164
+ if (max_length < CHUNKLEN) return;
165
+
166
+ for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
167
+ uint32_t code = self->state & MASK;
168
+
169
+ uint8_t rem = data[pos & ring_buffer_mask];
170
+ uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
171
+ size_t found_ix = FN(kInvalidPos);
172
+
173
+ self->state = FN(HashRollingFunction)(
174
+ self->state, add, rem, self->factor, self->factor_remove);
175
+
176
+ if (code < NUMBUCKETS) {
177
+ found_ix = self->table[code];
178
+ self->table[code] = (uint32_t)pos;
179
+ if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
180
+ /* The cast to 32-bit makes backward distances up to 4GB work even
181
+ if cur_ix is above 4GB, despite using 32-bit values in the table. */
182
+ size_t backward = (uint32_t)(cur_ix - found_ix);
183
+ if (backward <= max_backward) {
184
+ const size_t found_ix_masked = found_ix & ring_buffer_mask;
185
+ const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
186
+ &data[cur_ix_masked],
187
+ max_length);
188
+ if (len >= 4 && len > out->len) {
189
+ score_t score = BackwardReferenceScore(len, backward);
190
+ if (score > out->score) {
191
+ out->len = len;
192
+ out->distance = backward;
193
+ out->score = score;
194
+ out->len_code_delta = 0;
195
+ }
196
+ }
197
+ }
198
+ }
199
+ }
200
+ }
201
+
202
+ self->next_ix = cur_ix + JUMP;
203
+
204
+ /* NOTE: this hasher does not search in the dictionary. It is used as
205
+ backup-hasher, the main hasher already searches in it. */
206
+ BROTLI_UNUSED(dictionary);
207
+ BROTLI_UNUSED(distance_cache);
208
+ BROTLI_UNUSED(dictionary_distance);
209
+ BROTLI_UNUSED(max_distance);
210
+ }
211
+
212
+ #undef HashRolling
@@ -0,0 +1,329 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2016 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH,
9
+ MAX_TREE_SEARCH_DEPTH */
10
+
11
+ /* A (forgetful) hash table where each hash bucket contains a binary tree of
12
+ sequences whose first 4 bytes share the same hash code.
13
+ Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
14
+ position in the input data. The binary tree is sorted by the lexicographic
15
+ order of the sequences, and it is also a max-heap with respect to the
16
+ starting positions. */
17
+
18
+ #define HashToBinaryTree HASHER()
19
+
20
+ #define BUCKET_SIZE (1 << BUCKET_BITS)
21
+
22
+ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
23
+ static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
24
+ return MAX_TREE_COMP_LENGTH;
25
+ }
26
+
27
+ static uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
28
+ uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
29
+ /* The higher bits contain more mixture from the multiplication,
30
+ so we take our results from there. */
31
+ return h >> (32 - BUCKET_BITS);
32
+ }
33
+
34
+ typedef struct HashToBinaryTree {
35
+ /* The window size minus 1 */
36
+ size_t window_mask_;
37
+
38
+ /* Hash table that maps the 4-byte hashes of the sequence to the last
39
+ position where this hash was found, which is the root of the binary
40
+ tree of sequences that share this hash bucket. */
41
+ uint32_t* buckets_; /* uint32_t[BUCKET_SIZE]; */
42
+
43
+ /* A position used to mark a non-existent sequence, i.e. a tree is empty if
44
+ its root is at invalid_pos_ and a node is a leaf if both its children
45
+ are at invalid_pos_. */
46
+ uint32_t invalid_pos_;
47
+
48
+ /* --- Dynamic size members --- */
49
+
50
+ /* The union of the binary trees of each hash bucket. The root of the tree
51
+ corresponding to a hash is a sequence starting at buckets_[hash] and
52
+ the left and right children of a sequence starting at pos are
53
+ forest_[2 * pos] and forest_[2 * pos + 1]. */
54
+ uint32_t* forest_; /* uint32_t[2 * num_nodes] */
55
+ } HashToBinaryTree;
56
+
57
+ static void FN(Initialize)(
58
+ HasherCommon* common, HashToBinaryTree* BROTLI_RESTRICT self,
59
+ const BrotliEncoderParams* params) {
60
+ self->buckets_ = (uint32_t*)common->extra;
61
+ self->forest_ = &self->buckets_[BUCKET_SIZE];
62
+
63
+ self->window_mask_ = (1u << params->lgwin) - 1u;
64
+ self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
65
+ }
66
+
67
+ static void FN(Prepare)
68
+ (HashToBinaryTree* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
69
+ size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
70
+ uint32_t invalid_pos = self->invalid_pos_;
71
+ uint32_t i;
72
+ uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
73
+ BROTLI_UNUSED(data);
74
+ BROTLI_UNUSED(one_shot);
75
+ BROTLI_UNUSED(input_size);
76
+ for (i = 0; i < BUCKET_SIZE; i++) {
77
+ buckets[i] = invalid_pos;
78
+ }
79
+ }
80
+
81
+ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
82
+ const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
83
+ size_t input_size) {
84
+ size_t num_nodes = (size_t)1 << params->lgwin;
85
+ if (one_shot && input_size < num_nodes) {
86
+ num_nodes = input_size;
87
+ }
88
+ return sizeof(uint32_t) * BUCKET_SIZE + 2 * sizeof(uint32_t) * num_nodes;
89
+ }
90
+
91
+ static BROTLI_INLINE size_t FN(LeftChildIndex)(
92
+ HashToBinaryTree* BROTLI_RESTRICT self,
93
+ const size_t pos) {
94
+ return 2 * (pos & self->window_mask_);
95
+ }
96
+
97
+ static BROTLI_INLINE size_t FN(RightChildIndex)(
98
+ HashToBinaryTree* BROTLI_RESTRICT self,
99
+ const size_t pos) {
100
+ return 2 * (pos & self->window_mask_) + 1;
101
+ }
102
+
103
+ /* Stores the hash of the next 4 bytes and in a single tree-traversal, the
104
+ hash bucket's binary tree is searched for matches and is re-rooted at the
105
+ current position.
106
+
107
+ If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
108
+ current position is searched for matches, but the state of the hash table
109
+ is not changed, since we can not know the final sorting order of the
110
+ current (incomplete) sequence.
111
+
112
+ This function must be called with increasing cur_ix positions. */
113
+ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
114
+ HashToBinaryTree* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
115
+ const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
116
+ const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
117
+ BackwardMatch* BROTLI_RESTRICT matches) {
118
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
119
+ const size_t max_comp_len =
120
+ BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
121
+ const BROTLI_BOOL should_reroot_tree =
122
+ TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
123
+ const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
124
+ uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
125
+ uint32_t* BROTLI_RESTRICT forest = self->forest_;
126
+ size_t prev_ix = buckets[key];
127
+ /* The forest index of the rightmost node of the left subtree of the new
128
+ root, updated as we traverse and re-root the tree of the hash bucket. */
129
+ size_t node_left = FN(LeftChildIndex)(self, cur_ix);
130
+ /* The forest index of the leftmost node of the right subtree of the new
131
+ root, updated as we traverse and re-root the tree of the hash bucket. */
132
+ size_t node_right = FN(RightChildIndex)(self, cur_ix);
133
+ /* The match length of the rightmost node of the left subtree of the new
134
+ root, updated as we traverse and re-root the tree of the hash bucket. */
135
+ size_t best_len_left = 0;
136
+ /* The match length of the leftmost node of the right subtree of the new
137
+ root, updated as we traverse and re-root the tree of the hash bucket. */
138
+ size_t best_len_right = 0;
139
+ size_t depth_remaining;
140
+ if (should_reroot_tree) {
141
+ buckets[key] = (uint32_t)cur_ix;
142
+ }
143
+ for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
144
+ const size_t backward = cur_ix - prev_ix;
145
+ const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
146
+ if (backward == 0 || backward > max_backward || depth_remaining == 0) {
147
+ if (should_reroot_tree) {
148
+ forest[node_left] = self->invalid_pos_;
149
+ forest[node_right] = self->invalid_pos_;
150
+ }
151
+ break;
152
+ }
153
+ {
154
+ const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
155
+ size_t len;
156
+ BROTLI_DCHECK(cur_len <= MAX_TREE_COMP_LENGTH);
157
+ len = cur_len +
158
+ FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
159
+ &data[prev_ix_masked + cur_len],
160
+ max_length - cur_len);
161
+ BROTLI_DCHECK(
162
+ 0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
163
+ if (matches && len > *best_len) {
164
+ *best_len = len;
165
+ InitBackwardMatch(matches++, backward, len);
166
+ }
167
+ if (len >= max_comp_len) {
168
+ if (should_reroot_tree) {
169
+ forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
170
+ forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
171
+ }
172
+ break;
173
+ }
174
+ if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
175
+ best_len_left = len;
176
+ if (should_reroot_tree) {
177
+ forest[node_left] = (uint32_t)prev_ix;
178
+ }
179
+ node_left = FN(RightChildIndex)(self, prev_ix);
180
+ prev_ix = forest[node_left];
181
+ } else {
182
+ best_len_right = len;
183
+ if (should_reroot_tree) {
184
+ forest[node_right] = (uint32_t)prev_ix;
185
+ }
186
+ node_right = FN(LeftChildIndex)(self, prev_ix);
187
+ prev_ix = forest[node_right];
188
+ }
189
+ }
190
+ }
191
+ return matches;
192
+ }
193
+
194
+ /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
195
+ length of max_length and stores the position cur_ix in the hash table.
196
+
197
+ Sets *num_matches to the number of matches found, and stores the found
198
+ matches in matches[0] to matches[*num_matches - 1]. The matches will be
199
+ sorted by strictly increasing length and (non-strictly) increasing
200
+ distance. */
201
+ static BROTLI_INLINE size_t FN(FindAllMatches)(
202
+ HashToBinaryTree* BROTLI_RESTRICT self,
203
+ const BrotliEncoderDictionary* dictionary,
204
+ const uint8_t* BROTLI_RESTRICT data,
205
+ const size_t ring_buffer_mask, const size_t cur_ix,
206
+ const size_t max_length, const size_t max_backward,
207
+ const size_t dictionary_distance, const BrotliEncoderParams* params,
208
+ BackwardMatch* matches) {
209
+ BackwardMatch* const orig_matches = matches;
210
+ const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
211
+ size_t best_len = 1;
212
+ const size_t short_match_max_backward =
213
+ params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
214
+ size_t stop = cur_ix - short_match_max_backward;
215
+ uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
216
+ size_t i;
217
+ if (cur_ix < short_match_max_backward) { stop = 0; }
218
+ for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
219
+ size_t prev_ix = i;
220
+ const size_t backward = cur_ix - prev_ix;
221
+ if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
222
+ break;
223
+ }
224
+ prev_ix &= ring_buffer_mask;
225
+ if (data[cur_ix_masked] != data[prev_ix] ||
226
+ data[cur_ix_masked + 1] != data[prev_ix + 1]) {
227
+ continue;
228
+ }
229
+ {
230
+ const size_t len =
231
+ FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
232
+ max_length);
233
+ if (len > best_len) {
234
+ best_len = len;
235
+ InitBackwardMatch(matches++, backward, len);
236
+ }
237
+ }
238
+ }
239
+ if (best_len < max_length) {
240
+ matches = FN(StoreAndFindMatches)(self, data, cur_ix,
241
+ ring_buffer_mask, max_length, max_backward, &best_len, matches);
242
+ }
243
+ for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
244
+ dict_matches[i] = kInvalidMatch;
245
+ }
246
+ {
247
+ size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
248
+ if (BrotliFindAllStaticDictionaryMatches(dictionary,
249
+ &data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
250
+ size_t maxlen = BROTLI_MIN(
251
+ size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
252
+ size_t l;
253
+ for (l = minlen; l <= maxlen; ++l) {
254
+ uint32_t dict_id = dict_matches[l];
255
+ if (dict_id < kInvalidMatch) {
256
+ size_t distance = dictionary_distance + (dict_id >> 5) + 1;
257
+ if (distance <= params->dist.max_distance) {
258
+ InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
259
+ }
260
+ }
261
+ }
262
+ }
263
+ }
264
+ return (size_t)(matches - orig_matches);
265
+ }
266
+
267
+ /* Stores the hash of the next 4 bytes and re-roots the binary tree at the
268
+ current sequence, without returning any matches.
269
+ REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
270
+ static BROTLI_INLINE void FN(Store)(HashToBinaryTree* BROTLI_RESTRICT self,
271
+ const uint8_t* BROTLI_RESTRICT data,
272
+ const size_t mask, const size_t ix) {
273
+ /* Maximum distance is window size - 16, see section 9.1. of the spec. */
274
+ const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
275
+ FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
276
+ max_backward, NULL, NULL);
277
+ }
278
+
279
+ static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* BROTLI_RESTRICT self,
280
+ const uint8_t* BROTLI_RESTRICT data, const size_t mask,
281
+ const size_t ix_start, const size_t ix_end) {
282
+ size_t i = ix_start;
283
+ size_t j = ix_start;
284
+ if (ix_start + 63 <= ix_end) {
285
+ i = ix_end - 63;
286
+ }
287
+ if (ix_start + 512 <= i) {
288
+ for (; j < i; j += 8) {
289
+ FN(Store)(self, data, mask, j);
290
+ }
291
+ }
292
+ for (; i < ix_end; ++i) {
293
+ FN(Store)(self, data, mask, i);
294
+ }
295
+ }
296
+
297
+ static BROTLI_INLINE void FN(StitchToPreviousBlock)(
298
+ HashToBinaryTree* BROTLI_RESTRICT self,
299
+ size_t num_bytes, size_t position, const uint8_t* ringbuffer,
300
+ size_t ringbuffer_mask) {
301
+ if (num_bytes >= FN(HashTypeLength)() - 1 &&
302
+ position >= MAX_TREE_COMP_LENGTH) {
303
+ /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
304
+ These could not be calculated before, since they require knowledge
305
+ of both the previous and the current block. */
306
+ const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
307
+ const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
308
+ size_t i;
309
+ for (i = i_start; i < i_end; ++i) {
310
+ /* Maximum distance is window size - 16, see section 9.1. of the spec.
311
+ Furthermore, we have to make sure that we don't look further back
312
+ from the start of the next block than the window size, otherwise we
313
+ could access already overwritten areas of the ring-buffer. */
314
+ const size_t max_backward =
315
+ self->window_mask_ - BROTLI_MAX(size_t,
316
+ BROTLI_WINDOW_GAP - 1,
317
+ position - i);
318
+ /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
319
+ end of the current block and that we have at least
320
+ MAX_TREE_COMP_LENGTH tail in the ring-buffer. */
321
+ FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
322
+ MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
323
+ }
324
+ }
325
+ }
326
+
327
+ #undef BUCKET_SIZE
328
+
329
+ #undef HashToBinaryTree
@@ -0,0 +1,100 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Build per-context histograms of literals, commands and distance codes. */
8
+
9
+ #include "brotli_enc_histogram.h"
10
+
11
+ #include "brotli_common_context.h"
12
+ #include "brotli_enc_block_splitter.h"
13
+ #include "brotli_enc_command.h"
14
+
15
+ #if defined(__cplusplus) || defined(c_plusplus)
16
+ extern "C" {
17
+ #endif
18
+
19
+ typedef struct BlockSplitIterator {
20
+ const BlockSplit* split_; /* Not owned. */
21
+ size_t idx_;
22
+ size_t type_;
23
+ size_t length_;
24
+ } BlockSplitIterator;
25
+
26
+ static void InitBlockSplitIterator(BlockSplitIterator* self,
27
+ const BlockSplit* split) {
28
+ self->split_ = split;
29
+ self->idx_ = 0;
30
+ self->type_ = 0;
31
+ self->length_ = split->lengths ? split->lengths[0] : 0;
32
+ }
33
+
34
+ static void BlockSplitIteratorNext(BlockSplitIterator* self) {
35
+ if (self->length_ == 0) {
36
+ ++self->idx_;
37
+ self->type_ = self->split_->types[self->idx_];
38
+ self->length_ = self->split_->lengths[self->idx_];
39
+ }
40
+ --self->length_;
41
+ }
42
+
43
+ void BrotliBuildHistogramsWithContext(
44
+ const Command* cmds, const size_t num_commands,
45
+ const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
46
+ const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
47
+ size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
48
+ const ContextType* context_modes, HistogramLiteral* literal_histograms,
49
+ HistogramCommand* insert_and_copy_histograms,
50
+ HistogramDistance* copy_dist_histograms) {
51
+ size_t pos = start_pos;
52
+ BlockSplitIterator literal_it;
53
+ BlockSplitIterator insert_and_copy_it;
54
+ BlockSplitIterator dist_it;
55
+ size_t i;
56
+
57
+ InitBlockSplitIterator(&literal_it, literal_split);
58
+ InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
59
+ InitBlockSplitIterator(&dist_it, dist_split);
60
+ for (i = 0; i < num_commands; ++i) {
61
+ const Command* cmd = &cmds[i];
62
+ size_t j;
63
+ BlockSplitIteratorNext(&insert_and_copy_it);
64
+ HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
65
+ cmd->cmd_prefix_);
66
+ /* TODO: unwrap iterator blocks. */
67
+ for (j = cmd->insert_len_; j != 0; --j) {
68
+ size_t context;
69
+ BlockSplitIteratorNext(&literal_it);
70
+ context = literal_it.type_;
71
+ if (context_modes) {
72
+ ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
73
+ context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
74
+ BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
75
+ }
76
+ HistogramAddLiteral(&literal_histograms[context],
77
+ ringbuffer[pos & mask]);
78
+ prev_byte2 = prev_byte;
79
+ prev_byte = ringbuffer[pos & mask];
80
+ ++pos;
81
+ }
82
+ pos += CommandCopyLen(cmd);
83
+ if (CommandCopyLen(cmd)) {
84
+ prev_byte2 = ringbuffer[(pos - 2) & mask];
85
+ prev_byte = ringbuffer[(pos - 1) & mask];
86
+ if (cmd->cmd_prefix_ >= 128) {
87
+ size_t context;
88
+ BlockSplitIteratorNext(&dist_it);
89
+ context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
90
+ CommandDistanceContext(cmd);
91
+ HistogramAddDistance(&copy_dist_histograms[context],
92
+ cmd->dist_prefix_ & 0x3FF);
93
+ }
94
+ }
95
+ }
96
+ }
97
+
98
+ #if defined(__cplusplus) || defined(c_plusplus)
99
+ } /* extern "C" */
100
+ #endif
@@ -0,0 +1,63 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Models the histograms of literals, commands and distance codes. */
8
+
9
+ #ifndef BROTLI_ENC_HISTOGRAM_H_
10
+ #define BROTLI_ENC_HISTOGRAM_H_
11
+
12
+ #include <string.h> /* memset */
13
+
14
+ #include "brotli_common_constants.h"
15
+ #include "brotli_common_context.h"
16
+ #include "brotli_common_platform.h"
17
+ #include "brotli_types.h"
18
+ #include "brotli_enc_block_splitter.h"
19
+ #include "brotli_enc_command.h"
20
+
21
+ #if defined(__cplusplus) || defined(c_plusplus)
22
+ extern "C" {
23
+ #endif
24
+
25
+ /* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
26
+ #define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
27
+
28
+ #define FN(X) X ## Literal
29
+ #define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
30
+ #define DataType uint8_t
31
+ #include "brotli_enc_histogram_inc.h" /* NOLINT(build/include) */
32
+ #undef DataType
33
+ #undef DATA_SIZE
34
+ #undef FN
35
+
36
+ #define FN(X) X ## Command
37
+ #define DataType uint16_t
38
+ #define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
39
+ #include "brotli_enc_histogram_inc.h" /* NOLINT(build/include) */
40
+ #undef DATA_SIZE
41
+ #undef FN
42
+
43
+ #define FN(X) X ## Distance
44
+ #define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
45
+ #include "brotli_enc_histogram_inc.h" /* NOLINT(build/include) */
46
+ #undef DataType
47
+ #undef DATA_SIZE
48
+ #undef FN
49
+
50
+ BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
51
+ const Command* cmds, const size_t num_commands,
52
+ const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
53
+ const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
54
+ size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
55
+ const ContextType* context_modes, HistogramLiteral* literal_histograms,
56
+ HistogramCommand* insert_and_copy_histograms,
57
+ HistogramDistance* copy_dist_histograms);
58
+
59
+ #if defined(__cplusplus) || defined(c_plusplus)
60
+ } /* extern "C" */
61
+ #endif
62
+
63
+ #endif /* BROTLI_ENC_HISTOGRAM_H_ */