isomorfeus-ferret 0.12.6 → 0.13.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +85 -16
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  91. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  92. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  93. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  94. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  95. data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
  96. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  97. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  98. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  99. data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
  100. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  101. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  102. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  103. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  104. data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
  105. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  106. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  107. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  108. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  109. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  110. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  111. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  112. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  113. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  114. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  115. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  116. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
  117. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  118. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  119. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  120. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  121. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
  122. data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
  123. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
  124. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  125. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  126. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  127. data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
  128. data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
  129. data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
  130. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  131. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  132. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
  133. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  134. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  135. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  136. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  137. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  138. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  139. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  140. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  141. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  142. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  143. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  144. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  145. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
  146. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  147. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  148. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  149. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  150. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  151. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  152. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  153. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  154. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
  155. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  156. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  157. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  158. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  159. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  160. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  161. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  162. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  163. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  164. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  165. data/ext/isomorfeus_ferret_ext/test.c +0 -17
  166. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  167. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  168. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  169. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  170. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  171. data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
  172. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  173. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  174. data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
  175. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  176. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  177. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  178. data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
  179. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  180. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  181. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  182. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  183. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  184. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  185. data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
  186. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  187. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  188. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  189. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  190. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  191. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  192. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  193. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  194. data/lib/isomorfeus/ferret/version.rb +1 -1
  195. metadata +113 -58
  196. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  197. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  198. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  199. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  200. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  201. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  202. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  203. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  204. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  205. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  206. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  207. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  208. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  209. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  210. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  211. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  212. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  213. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  214. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  215. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  216. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  217. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  218. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  219. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  220. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  221. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  222. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  223. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  224. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  225. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  226. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  227. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  228. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  229. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  230. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  231. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  232. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  233. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  234. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  235. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  236. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  237. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  238. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  239. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  240. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  241. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  242. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  243. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  244. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  245. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  246. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  247. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  248. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  249. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,790 @@
1
+ /* Copyright 2015 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Function for fast encoding of an input fragment, independently from the input
8
+ history. This function uses one-pass processing: when we find a backward
9
+ match, we immediately emit the corresponding command and literal codes to
10
+ the bit stream.
11
+
12
+ Adapted from the CompressFragment() function in
13
+ https://github.com/google/snappy/blob/master/snappy.cc */
14
+
15
+ #include "brotli_enc_compress_fragment.h"
16
+
17
+ #include <string.h> /* memcmp, memcpy, memset */
18
+
19
+ #include "brotli_common_constants.h"
20
+ #include "brotli_common_platform.h"
21
+ #include "brotli_types.h"
22
+ #include "brotli_enc_brotli_bit_stream.h"
23
+ #include "brotli_enc_entropy_encode.h"
24
+ #include "brotli_enc_fast_log.h"
25
+ #include "brotli_enc_find_match_length.h"
26
+ #include "brotli_enc_memory.h"
27
+ #include "brotli_enc_write_bits.h"
28
+
29
+ #if defined(__cplusplus) || defined(c_plusplus)
30
+ extern "C" {
31
+ #endif
32
+
33
+ #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
34
+
35
+ /* kHashMul32 multiplier has these properties:
36
+ * The multiplier must be odd. Otherwise we may lose the highest bit.
37
+ * No long streaks of ones or zeros.
38
+ * There is no effort to ensure that it is a prime, the oddity is enough
39
+ for this use.
40
+ * The number has been tuned heuristically against compression benchmarks. */
41
+ static const uint32_t kHashMul32 = 0x1E35A7BD;
42
+
43
+ static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
44
+ const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(p) << 24) * kHashMul32;
45
+ return (uint32_t)(h >> shift);
46
+ }
47
+
48
+ static BROTLI_INLINE uint32_t HashBytesAtOffset(
49
+ uint64_t v, int offset, size_t shift) {
50
+ BROTLI_DCHECK(offset >= 0);
51
+ BROTLI_DCHECK(offset <= 3);
52
+ {
53
+ const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
54
+ return (uint32_t)(h >> shift);
55
+ }
56
+ }
57
+
58
+ static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
59
+ return TO_BROTLI_BOOL(
60
+ BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2) &&
61
+ p1[4] == p2[4]);
62
+ }
63
+
64
+ /* Builds a literal prefix code into "depths" and "bits" based on the statistics
65
+ of the "input" string and stores it into the bit stream.
66
+ Note that the prefix code here is built from the pre-LZ77 input, therefore
67
+ we can only approximate the statistics of the actual literal stream.
68
+ Moreover, for long inputs we build a histogram from a sample of the input
69
+ and thus have to assign a non-zero depth for each literal.
70
+ Returns estimated compression ratio millibytes/char for encoding given input
71
+ with generated code. */
72
+ static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
73
+ const uint8_t* input,
74
+ const size_t input_size,
75
+ uint8_t depths[256],
76
+ uint16_t bits[256],
77
+ size_t* storage_ix,
78
+ uint8_t* storage) {
79
+ uint32_t histogram[256] = { 0 };
80
+ size_t histogram_total;
81
+ size_t i;
82
+ if (input_size < (1 << 15)) {
83
+ for (i = 0; i < input_size; ++i) {
84
+ ++histogram[input[i]];
85
+ }
86
+ histogram_total = input_size;
87
+ for (i = 0; i < 256; ++i) {
88
+ /* We weigh the first 11 samples with weight 3 to account for the
89
+ balancing effect of the LZ77 phase on the histogram. */
90
+ const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
91
+ histogram[i] += adjust;
92
+ histogram_total += adjust;
93
+ }
94
+ } else {
95
+ static const size_t kSampleRate = 29;
96
+ for (i = 0; i < input_size; i += kSampleRate) {
97
+ ++histogram[input[i]];
98
+ }
99
+ histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
100
+ for (i = 0; i < 256; ++i) {
101
+ /* We add 1 to each population count to avoid 0 bit depths (since this is
102
+ only a sample and we don't know if the symbol appears or not), and we
103
+ weigh the first 11 samples with weight 3 to account for the balancing
104
+ effect of the LZ77 phase on the histogram (more frequent symbols are
105
+ more likely to be in backward references instead as literals). */
106
+ const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
107
+ histogram[i] += adjust;
108
+ histogram_total += adjust;
109
+ }
110
+ }
111
+ BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
112
+ /* max_bits = */ 8,
113
+ depths, bits, storage_ix, storage);
114
+ if (BROTLI_IS_OOM(m)) return 0;
115
+ {
116
+ size_t literal_ratio = 0;
117
+ for (i = 0; i < 256; ++i) {
118
+ if (histogram[i]) literal_ratio += histogram[i] * depths[i];
119
+ }
120
+ /* Estimated encoding ratio, millibytes per symbol. */
121
+ return (literal_ratio * 125) / histogram_total;
122
+ }
123
+ }
124
+
125
+ /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
126
+ "bits" based on "histogram" and stores it into the bit stream. */
127
+ static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
128
+ uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
129
+ uint8_t* storage) {
130
+ /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
131
+ HuffmanTree tree[129];
132
+ uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
133
+ uint16_t cmd_bits[64];
134
+
135
+ BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
136
+ BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
137
+ /* We have to jump through a few hoops here in order to compute
138
+ the command bits because the symbols are in a different order than in
139
+ the full alphabet. This looks complicated, but having the symbols
140
+ in this order in the command bits saves a few branches in the Emit*
141
+ functions. */
142
+ memcpy(cmd_depth, depth, 24);
143
+ memcpy(cmd_depth + 24, depth + 40, 8);
144
+ memcpy(cmd_depth + 32, depth + 24, 8);
145
+ memcpy(cmd_depth + 40, depth + 48, 8);
146
+ memcpy(cmd_depth + 48, depth + 32, 8);
147
+ memcpy(cmd_depth + 56, depth + 56, 8);
148
+ BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
149
+ memcpy(bits, cmd_bits, 48);
150
+ memcpy(bits + 24, cmd_bits + 32, 16);
151
+ memcpy(bits + 32, cmd_bits + 48, 16);
152
+ memcpy(bits + 40, cmd_bits + 24, 16);
153
+ memcpy(bits + 48, cmd_bits + 40, 16);
154
+ memcpy(bits + 56, cmd_bits + 56, 16);
155
+ BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
156
+ {
157
+ /* Create the bit length array for the full command alphabet. */
158
+ size_t i;
159
+ memset(cmd_depth, 0, 64); /* only 64 first values were used */
160
+ memcpy(cmd_depth, depth, 8);
161
+ memcpy(cmd_depth + 64, depth + 8, 8);
162
+ memcpy(cmd_depth + 128, depth + 16, 8);
163
+ memcpy(cmd_depth + 192, depth + 24, 8);
164
+ memcpy(cmd_depth + 384, depth + 32, 8);
165
+ for (i = 0; i < 8; ++i) {
166
+ cmd_depth[128 + 8 * i] = depth[40 + i];
167
+ cmd_depth[256 + 8 * i] = depth[48 + i];
168
+ cmd_depth[448 + 8 * i] = depth[56 + i];
169
+ }
170
+ BrotliStoreHuffmanTree(
171
+ cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
172
+ }
173
+ BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
174
+ }
175
+
176
+ /* REQUIRES: insertlen < 6210 */
177
+ static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
178
+ const uint8_t depth[128],
179
+ const uint16_t bits[128],
180
+ uint32_t histo[128],
181
+ size_t* storage_ix,
182
+ uint8_t* storage) {
183
+ if (insertlen < 6) {
184
+ const size_t code = insertlen + 40;
185
+ BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
186
+ ++histo[code];
187
+ } else if (insertlen < 130) {
188
+ const size_t tail = insertlen - 2;
189
+ const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
190
+ const size_t prefix = tail >> nbits;
191
+ const size_t inscode = (nbits << 1) + prefix + 42;
192
+ BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
193
+ BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
194
+ ++histo[inscode];
195
+ } else if (insertlen < 2114) {
196
+ const size_t tail = insertlen - 66;
197
+ const uint32_t nbits = Log2FloorNonZero(tail);
198
+ const size_t code = nbits + 50;
199
+ BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
200
+ BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
201
+ ++histo[code];
202
+ } else {
203
+ BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
204
+ BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
205
+ ++histo[61];
206
+ }
207
+ }
208
+
209
+ static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
210
+ const uint8_t depth[128],
211
+ const uint16_t bits[128],
212
+ uint32_t histo[128],
213
+ size_t* storage_ix,
214
+ uint8_t* storage) {
215
+ if (insertlen < 22594) {
216
+ BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
217
+ BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
218
+ ++histo[62];
219
+ } else {
220
+ BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
221
+ BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
222
+ ++histo[63];
223
+ }
224
+ }
225
+
226
+ static BROTLI_INLINE void EmitCopyLen(size_t copylen,
227
+ const uint8_t depth[128],
228
+ const uint16_t bits[128],
229
+ uint32_t histo[128],
230
+ size_t* storage_ix,
231
+ uint8_t* storage) {
232
+ if (copylen < 10) {
233
+ BrotliWriteBits(
234
+ depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
235
+ ++histo[copylen + 14];
236
+ } else if (copylen < 134) {
237
+ const size_t tail = copylen - 6;
238
+ const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
239
+ const size_t prefix = tail >> nbits;
240
+ const size_t code = (nbits << 1) + prefix + 20;
241
+ BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
242
+ BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
243
+ ++histo[code];
244
+ } else if (copylen < 2118) {
245
+ const size_t tail = copylen - 70;
246
+ const uint32_t nbits = Log2FloorNonZero(tail);
247
+ const size_t code = nbits + 28;
248
+ BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
249
+ BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
250
+ ++histo[code];
251
+ } else {
252
+ BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
253
+ BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
254
+ ++histo[39];
255
+ }
256
+ }
257
+
258
+ static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
259
+ const uint8_t depth[128],
260
+ const uint16_t bits[128],
261
+ uint32_t histo[128],
262
+ size_t* storage_ix,
263
+ uint8_t* storage) {
264
+ if (copylen < 12) {
265
+ BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
266
+ ++histo[copylen - 4];
267
+ } else if (copylen < 72) {
268
+ const size_t tail = copylen - 8;
269
+ const uint32_t nbits = Log2FloorNonZero(tail) - 1;
270
+ const size_t prefix = tail >> nbits;
271
+ const size_t code = (nbits << 1) + prefix + 4;
272
+ BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
273
+ BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
274
+ ++histo[code];
275
+ } else if (copylen < 136) {
276
+ const size_t tail = copylen - 8;
277
+ const size_t code = (tail >> 5) + 30;
278
+ BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
279
+ BrotliWriteBits(5, tail & 31, storage_ix, storage);
280
+ BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
281
+ ++histo[code];
282
+ ++histo[64];
283
+ } else if (copylen < 2120) {
284
+ const size_t tail = copylen - 72;
285
+ const uint32_t nbits = Log2FloorNonZero(tail);
286
+ const size_t code = nbits + 28;
287
+ BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
288
+ BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
289
+ BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
290
+ ++histo[code];
291
+ ++histo[64];
292
+ } else {
293
+ BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
294
+ BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
295
+ BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
296
+ ++histo[39];
297
+ ++histo[64];
298
+ }
299
+ }
300
+
301
+ static BROTLI_INLINE void EmitDistance(size_t distance,
302
+ const uint8_t depth[128],
303
+ const uint16_t bits[128],
304
+ uint32_t histo[128],
305
+ size_t* storage_ix, uint8_t* storage) {
306
+ const size_t d = distance + 3;
307
+ const uint32_t nbits = Log2FloorNonZero(d) - 1u;
308
+ const size_t prefix = (d >> nbits) & 1;
309
+ const size_t offset = (2 + prefix) << nbits;
310
+ const size_t distcode = 2 * (nbits - 1) + prefix + 80;
311
+ BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
312
+ BrotliWriteBits(nbits, d - offset, storage_ix, storage);
313
+ ++histo[distcode];
314
+ }
315
+
316
+ static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
317
+ const uint8_t depth[256],
318
+ const uint16_t bits[256],
319
+ size_t* storage_ix, uint8_t* storage) {
320
+ size_t j;
321
+ for (j = 0; j < len; j++) {
322
+ const uint8_t lit = input[j];
323
+ BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
324
+ }
325
+ }
326
+
327
+ /* REQUIRES: len <= 1 << 24. */
328
+ static void BrotliStoreMetaBlockHeader(
329
+ size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
330
+ uint8_t* storage) {
331
+ size_t nibbles = 6;
332
+ /* ISLAST */
333
+ BrotliWriteBits(1, 0, storage_ix, storage);
334
+ if (len <= (1U << 16)) {
335
+ nibbles = 4;
336
+ } else if (len <= (1U << 20)) {
337
+ nibbles = 5;
338
+ }
339
+ BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
340
+ BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
341
+ /* ISUNCOMPRESSED */
342
+ BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
343
+ }
344
+
345
+ static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
346
+ uint8_t* array) {
347
+ while (n_bits > 0) {
348
+ size_t byte_pos = pos >> 3;
349
+ size_t n_unchanged_bits = pos & 7;
350
+ size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
351
+ size_t total_bits = n_unchanged_bits + n_changed_bits;
352
+ uint32_t mask =
353
+ (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
354
+ uint32_t unchanged_bits = array[byte_pos] & mask;
355
+ uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
356
+ array[byte_pos] =
357
+ (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
358
+ n_bits -= n_changed_bits;
359
+ bits >>= n_changed_bits;
360
+ pos += n_changed_bits;
361
+ }
362
+ }
363
+
364
+ static void RewindBitPosition(const size_t new_storage_ix,
365
+ size_t* storage_ix, uint8_t* storage) {
366
+ const size_t bitpos = new_storage_ix & 7;
367
+ const size_t mask = (1u << bitpos) - 1;
368
+ storage[new_storage_ix >> 3] &= (uint8_t)mask;
369
+ *storage_ix = new_storage_ix;
370
+ }
371
+
372
+ static BROTLI_BOOL ShouldMergeBlock(
373
+ const uint8_t* data, size_t len, const uint8_t* depths) {
374
+ size_t histo[256] = { 0 };
375
+ static const size_t kSampleRate = 43;
376
+ size_t i;
377
+ for (i = 0; i < len; i += kSampleRate) {
378
+ ++histo[data[i]];
379
+ }
380
+ {
381
+ const size_t total = (len + kSampleRate - 1) / kSampleRate;
382
+ double r = (FastLog2(total) + 0.5) * (double)total + 200;
383
+ for (i = 0; i < 256; ++i) {
384
+ r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
385
+ }
386
+ return TO_BROTLI_BOOL(r >= 0.0);
387
+ }
388
+ }
389
+
390
+ /* Acceptable loss for uncompressible speedup is 2% */
391
+ #define MIN_RATIO 980
392
+
393
+ static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
394
+ const uint8_t* metablock_start, const uint8_t* next_emit,
395
+ const size_t insertlen, const size_t literal_ratio) {
396
+ const size_t compressed = (size_t)(next_emit - metablock_start);
397
+ if (compressed * 50 > insertlen) {
398
+ return BROTLI_FALSE;
399
+ } else {
400
+ return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
401
+ }
402
+ }
403
+
404
+ static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
405
+ const size_t storage_ix_start,
406
+ size_t* storage_ix, uint8_t* storage) {
407
+ const size_t len = (size_t)(end - begin);
408
+ RewindBitPosition(storage_ix_start, storage_ix, storage);
409
+ BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
410
+ *storage_ix = (*storage_ix + 7u) & ~7u;
411
+ memcpy(&storage[*storage_ix >> 3], begin, len);
412
+ *storage_ix += len << 3;
413
+ storage[*storage_ix >> 3] = 0;
414
+ }
415
+
416
+ static uint32_t kCmdHistoSeed[128] = {
417
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
418
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
419
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
420
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
421
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
422
+ 1, 1, 1, 1, 0, 0, 0, 0,
423
+ };
424
+
425
+ static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
426
+ MemoryManager* m, const uint8_t* input, size_t input_size,
427
+ BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
428
+ uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
429
+ size_t* storage_ix, uint8_t* storage) {
430
+ uint32_t cmd_histo[128];
431
+ const uint8_t* ip_end;
432
+
433
+ /* "next_emit" is a pointer to the first byte that is not covered by a
434
+ previous copy. Bytes between "next_emit" and the start of the next copy or
435
+ the end of the input will be emitted as literal bytes. */
436
+ const uint8_t* next_emit = input;
437
+ /* Save the start of the first block for position and distance computations.
438
+ */
439
+ const uint8_t* base_ip = input;
440
+
441
+ static const size_t kFirstBlockSize = 3 << 15;
442
+ static const size_t kMergeBlockSize = 1 << 16;
443
+
444
+ const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
445
+ const size_t kMinMatchLen = 5;
446
+
447
+ const uint8_t* metablock_start = input;
448
+ size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
449
+ size_t total_block_size = block_size;
450
+ /* Save the bit position of the MLEN field of the meta-block header, so that
451
+ we can update it later if we decide to extend this meta-block. */
452
+ size_t mlen_storage_ix = *storage_ix + 3;
453
+
454
+ uint8_t lit_depth[256];
455
+ uint16_t lit_bits[256];
456
+
457
+ size_t literal_ratio;
458
+
459
+ const uint8_t* ip;
460
+ int last_distance;
461
+
462
+ const size_t shift = 64u - table_bits;
463
+
464
+ BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
465
+ /* No block splits, no contexts. */
466
+ BrotliWriteBits(13, 0, storage_ix, storage);
467
+
468
+ literal_ratio = BuildAndStoreLiteralPrefixCode(
469
+ m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
470
+ if (BROTLI_IS_OOM(m)) return;
471
+
472
+ {
473
+ /* Store the pre-compressed command and distance prefix codes. */
474
+ size_t i;
475
+ for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
476
+ BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
477
+ }
478
+ }
479
+ BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
480
+ storage_ix, storage);
481
+
482
+ emit_commands:
483
+ /* Initialize the command and distance histograms. We will gather
484
+ statistics of command and distance codes during the processing
485
+ of this block and use it to update the command and distance
486
+ prefix codes for the next block. */
487
+ memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
488
+
489
+ /* "ip" is the input pointer. */
490
+ ip = input;
491
+ last_distance = -1;
492
+ ip_end = input + block_size;
493
+
494
+ if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
495
+ /* For the last block, we need to keep a 16 bytes margin so that we can be
496
+ sure that all distances are at most window size - 16.
497
+ For all other blocks, we only need to keep a margin of 5 bytes so that
498
+ we don't go over the block size with a copy. */
499
+ const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
500
+ input_size - kInputMarginBytes);
501
+ const uint8_t* ip_limit = input + len_limit;
502
+
503
+ uint32_t next_hash;
504
+ for (next_hash = Hash(++ip, shift); ; ) {
505
+ /* Step 1: Scan forward in the input looking for a 5-byte-long match.
506
+ If we get close to exhausting the input then goto emit_remainder.
507
+
508
+ Heuristic match skipping: If 32 bytes are scanned with no matches
509
+ found, start looking only at every other byte. If 32 more bytes are
510
+ scanned, look at every third byte, etc.. When a match is found,
511
+ immediately go back to looking at every byte. This is a small loss
512
+ (~5% performance, ~0.1% density) for compressible data due to more
513
+ bookkeeping, but for non-compressible data (such as JPEG) it's a huge
514
+ win since the compressor quickly "realizes" the data is incompressible
515
+ and doesn't bother looking for matches everywhere.
516
+
517
+ The "skip" variable keeps track of how many bytes there are since the
518
+ last match; dividing it by 32 (i.e. right-shifting by five) gives the
519
+ number of bytes to move ahead for each iteration. */
520
+ uint32_t skip = 32;
521
+
522
+ const uint8_t* next_ip = ip;
523
+ const uint8_t* candidate;
524
+ BROTLI_DCHECK(next_emit < ip);
525
+ trawl:
526
+ do {
527
+ uint32_t hash = next_hash;
528
+ uint32_t bytes_between_hash_lookups = skip++ >> 5;
529
+ BROTLI_DCHECK(hash == Hash(next_ip, shift));
530
+ ip = next_ip;
531
+ next_ip = ip + bytes_between_hash_lookups;
532
+ if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
533
+ goto emit_remainder;
534
+ }
535
+ next_hash = Hash(next_ip, shift);
536
+ candidate = ip - last_distance;
537
+ if (IsMatch(ip, candidate)) {
538
+ if (BROTLI_PREDICT_TRUE(candidate < ip)) {
539
+ table[hash] = (int)(ip - base_ip);
540
+ break;
541
+ }
542
+ }
543
+ candidate = base_ip + table[hash];
544
+ BROTLI_DCHECK(candidate >= base_ip);
545
+ BROTLI_DCHECK(candidate < ip);
546
+
547
+ table[hash] = (int)(ip - base_ip);
548
+ } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
549
+
550
+ /* Check copy distance. If candidate is not feasible, continue search.
551
+ Checking is done outside of hot loop to reduce overhead. */
552
+ if (ip - candidate > MAX_DISTANCE) goto trawl;
553
+
554
+ /* Step 2: Emit the found match together with the literal bytes from
555
+ "next_emit" to the bit stream, and then see if we can find a next match
556
+ immediately afterwards. Repeat until we find no match for the input
557
+ without emitting some literal bytes. */
558
+
559
+ {
560
+ /* We have a 5-byte match at ip, and we need to emit bytes in
561
+ [next_emit, ip). */
562
+ const uint8_t* base = ip;
563
+ size_t matched = 5 + FindMatchLengthWithLimit(
564
+ candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
565
+ int distance = (int)(base - candidate); /* > 0 */
566
+ size_t insert = (size_t)(base - next_emit);
567
+ ip += matched;
568
+ BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
569
+ if (BROTLI_PREDICT_TRUE(insert < 6210)) {
570
+ EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
571
+ storage_ix, storage);
572
+ } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
573
+ literal_ratio)) {
574
+ EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
575
+ storage_ix, storage);
576
+ input_size -= (size_t)(base - input);
577
+ input = base;
578
+ next_emit = input;
579
+ goto next_block;
580
+ } else {
581
+ EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
582
+ storage_ix, storage);
583
+ }
584
+ EmitLiterals(next_emit, insert, lit_depth, lit_bits,
585
+ storage_ix, storage);
586
+ if (distance == last_distance) {
587
+ BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
588
+ ++cmd_histo[64];
589
+ } else {
590
+ EmitDistance((size_t)distance, cmd_depth, cmd_bits,
591
+ cmd_histo, storage_ix, storage);
592
+ last_distance = distance;
593
+ }
594
+ EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
595
+ storage_ix, storage);
596
+
597
+ next_emit = ip;
598
+ if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
599
+ goto emit_remainder;
600
+ }
601
+ /* We could immediately start working at ip now, but to improve
602
+ compression we first update "table" with the hashes of some positions
603
+ within the last copy. */
604
+ {
605
+ uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
606
+ uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
607
+ uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
608
+ table[prev_hash] = (int)(ip - base_ip - 3);
609
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
610
+ table[prev_hash] = (int)(ip - base_ip - 2);
611
+ prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
612
+ table[prev_hash] = (int)(ip - base_ip - 1);
613
+
614
+ candidate = base_ip + table[cur_hash];
615
+ table[cur_hash] = (int)(ip - base_ip);
616
+ }
617
+ }
618
+
619
+ while (IsMatch(ip, candidate)) {
620
+ /* We have a 5-byte match at ip, and no need to emit any literal bytes
621
+ prior to ip. */
622
+ const uint8_t* base = ip;
623
+ size_t matched = 5 + FindMatchLengthWithLimit(
624
+ candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
625
+ if (ip - candidate > MAX_DISTANCE) break;
626
+ ip += matched;
627
+ last_distance = (int)(base - candidate); /* > 0 */
628
+ BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
629
+ EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
630
+ storage_ix, storage);
631
+ EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
632
+ cmd_histo, storage_ix, storage);
633
+
634
+ next_emit = ip;
635
+ if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
636
+ goto emit_remainder;
637
+ }
638
+ /* We could immediately start working at ip now, but to improve
639
+ compression we first update "table" with the hashes of some positions
640
+ within the last copy. */
641
+ {
642
+ uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
643
+ uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
644
+ uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
645
+ table[prev_hash] = (int)(ip - base_ip - 3);
646
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
647
+ table[prev_hash] = (int)(ip - base_ip - 2);
648
+ prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
649
+ table[prev_hash] = (int)(ip - base_ip - 1);
650
+
651
+ candidate = base_ip + table[cur_hash];
652
+ table[cur_hash] = (int)(ip - base_ip);
653
+ }
654
+ }
655
+
656
+ next_hash = Hash(++ip, shift);
657
+ }
658
+ }
659
+
660
+ emit_remainder:
661
+ BROTLI_DCHECK(next_emit <= ip_end);
662
+ input += block_size;
663
+ input_size -= block_size;
664
+ block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
665
+
666
+ /* Decide if we want to continue this meta-block instead of emitting the
667
+ last insert-only command. */
668
+ if (input_size > 0 &&
669
+ total_block_size + block_size <= (1 << 20) &&
670
+ ShouldMergeBlock(input, block_size, lit_depth)) {
671
+ BROTLI_DCHECK(total_block_size > (1 << 16));
672
+ /* Update the size of the current meta-block and continue emitting commands.
673
+ We can do this because the current size and the new size both have 5
674
+ nibbles. */
675
+ total_block_size += block_size;
676
+ UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
677
+ goto emit_commands;
678
+ }
679
+
680
+ /* Emit the remaining bytes as literals. */
681
+ if (next_emit < ip_end) {
682
+ const size_t insert = (size_t)(ip_end - next_emit);
683
+ if (BROTLI_PREDICT_TRUE(insert < 6210)) {
684
+ EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
685
+ storage_ix, storage);
686
+ EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
687
+ } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
688
+ literal_ratio)) {
689
+ EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
690
+ storage_ix, storage);
691
+ } else {
692
+ EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
693
+ storage_ix, storage);
694
+ EmitLiterals(next_emit, insert, lit_depth, lit_bits,
695
+ storage_ix, storage);
696
+ }
697
+ }
698
+ next_emit = ip_end;
699
+
700
+ next_block:
701
+ /* If we have more data, write a new meta-block header and prefix codes and
702
+ then continue emitting commands. */
703
+ if (input_size > 0) {
704
+ metablock_start = input;
705
+ block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
706
+ total_block_size = block_size;
707
+ /* Save the bit position of the MLEN field of the meta-block header, so that
708
+ we can update it later if we decide to extend this meta-block. */
709
+ mlen_storage_ix = *storage_ix + 3;
710
+ BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
711
+ /* No block splits, no contexts. */
712
+ BrotliWriteBits(13, 0, storage_ix, storage);
713
+ literal_ratio = BuildAndStoreLiteralPrefixCode(
714
+ m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
715
+ if (BROTLI_IS_OOM(m)) return;
716
+ BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
717
+ storage_ix, storage);
718
+ goto emit_commands;
719
+ }
720
+
721
+ if (!is_last) {
722
+ /* If this is not the last block, update the command and distance prefix
723
+ codes for the next block and store the compressed forms. */
724
+ cmd_code[0] = 0;
725
+ *cmd_code_numbits = 0;
726
+ BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
727
+ cmd_code_numbits, cmd_code);
728
+ }
729
+ }
730
+
731
+ #define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
732
+
733
+ #define BAKE_METHOD_PARAM_(B) \
734
+ static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
735
+ MemoryManager* m, const uint8_t* input, size_t input_size, \
736
+ BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128], \
737
+ uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, \
738
+ size_t* storage_ix, uint8_t* storage) { \
739
+ BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B, \
740
+ cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
741
+ }
742
+ FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
743
+ #undef BAKE_METHOD_PARAM_
744
+
745
+ void BrotliCompressFragmentFast(
746
+ MemoryManager* m, const uint8_t* input, size_t input_size,
747
+ BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
748
+ uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
749
+ size_t* storage_ix, uint8_t* storage) {
750
+ const size_t initial_storage_ix = *storage_ix;
751
+ const size_t table_bits = Log2FloorNonZero(table_size);
752
+
753
+ if (input_size == 0) {
754
+ BROTLI_DCHECK(is_last);
755
+ BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
756
+ BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
757
+ *storage_ix = (*storage_ix + 7u) & ~7u;
758
+ return;
759
+ }
760
+
761
+ switch (table_bits) {
762
+ #define CASE_(B) \
763
+ case B: \
764
+ BrotliCompressFragmentFastImpl ## B( \
765
+ m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
766
+ cmd_code_numbits, cmd_code, storage_ix, storage); \
767
+ break;
768
+ FOR_TABLE_BITS_(CASE_)
769
+ #undef CASE_
770
+ default: BROTLI_DCHECK(0); break;
771
+ }
772
+
773
+ /* If output is larger than single uncompressed block, rewrite it. */
774
+ if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
775
+ EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
776
+ storage_ix, storage);
777
+ }
778
+
779
+ if (is_last) {
780
+ BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
781
+ BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
782
+ *storage_ix = (*storage_ix + 7u) & ~7u;
783
+ }
784
+ }
785
+
786
+ #undef FOR_TABLE_BITS_
787
+
788
+ #if defined(__cplusplus) || defined(c_plusplus)
789
+ } /* extern "C" */
790
+ #endif