isomorfeus-ferret 0.12.6 → 0.13.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (249) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +101 -19
  3. data/README.md +85 -16
  4. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
  5. data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
  6. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
  7. data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
  8. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  91. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  92. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  93. data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
  94. data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
  95. data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
  96. data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
  97. data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
  98. data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
  99. data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
  100. data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
  101. data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
  102. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
  103. data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
  104. data/ext/isomorfeus_ferret_ext/frb_index.c +513 -464
  105. data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
  106. data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
  107. data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
  108. data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
  109. data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
  110. data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
  111. data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
  112. data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
  113. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
  114. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
  115. data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
  116. data/ext/isomorfeus_ferret_ext/frt_document.h +10 -9
  117. data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
  118. data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
  119. data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
  120. data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
  121. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +2 -0
  122. data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
  123. data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
  124. data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
  125. data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
  126. data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
  127. data/ext/isomorfeus_ferret_ext/frt_index.c +714 -384
  128. data/ext/isomorfeus_ferret_ext/frt_index.h +274 -290
  129. data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
  130. data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
  131. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
  132. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
  133. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
  134. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
  135. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
  136. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
  137. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
  138. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
  139. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
  140. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
  141. data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
  142. data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
  143. data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
  144. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
  145. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +46 -84
  146. data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
  147. data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
  148. data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
  149. data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
  150. data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
  151. data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
  152. data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
  153. data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
  154. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
  155. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
  156. data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
  157. data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
  158. data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
  159. data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
  160. data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
  161. data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
  162. data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
  163. data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
  164. data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
  165. data/ext/isomorfeus_ferret_ext/test.c +0 -17
  166. data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
  167. data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
  168. data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
  169. data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
  170. data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
  171. data/ext/isomorfeus_ferret_ext/test_fields.c +111 -100
  172. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
  173. data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
  174. data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
  175. data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
  176. data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
  177. data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
  178. data/ext/isomorfeus_ferret_ext/test_index.c +373 -363
  179. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
  180. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
  181. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
  182. data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
  183. data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
  184. data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
  185. data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
  186. data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
  187. data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
  188. data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
  189. data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
  190. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
  191. data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
  192. data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
  193. data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
  194. data/lib/isomorfeus/ferret/version.rb +1 -1
  195. metadata +113 -58
  196. data/ext/isomorfeus_ferret_ext/email.rl +0 -21
  197. data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
  198. data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
  199. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
  200. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
  201. data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
  202. data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
  203. data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
  204. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
  205. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
  206. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
  207. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
  208. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
  209. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
  210. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
  211. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
  212. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
  213. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
  214. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
  215. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
  216. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
  217. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
  218. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
  219. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
  220. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
  221. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
  222. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
  223. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
  224. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
  225. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
  226. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
  227. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
  228. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
  229. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
  230. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
  231. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
  232. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
  233. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
  234. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
  235. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
  236. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
  237. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
  238. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
  239. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
  240. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
  241. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
  242. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
  243. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
  244. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
  245. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
  246. data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
  247. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
  248. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
  249. data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -0,0 +1,194 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Block split point selection utilities. */
8
+
9
+ #include "brotli_enc_block_splitter.h"
10
+
11
+ #include <string.h> /* memcpy, memset */
12
+
13
+ #include "brotli_common_platform.h"
14
+ #include "brotli_enc_bit_cost.h"
15
+ #include "brotli_enc_cluster.h"
16
+ #include "brotli_enc_command.h"
17
+ #include "brotli_enc_fast_log.h"
18
+ #include "brotli_enc_histogram.h"
19
+ #include "brotli_enc_memory.h"
20
+ #include "brotli_enc_quality.h"
21
+
22
+ #if defined(__cplusplus) || defined(c_plusplus)
23
+ extern "C" {
24
+ #endif
25
+
26
+ static const size_t kMaxLiteralHistograms = 100;
27
+ static const size_t kMaxCommandHistograms = 50;
28
+ static const double kLiteralBlockSwitchCost = 28.1;
29
+ static const double kCommandBlockSwitchCost = 13.5;
30
+ static const double kDistanceBlockSwitchCost = 14.6;
31
+ static const size_t kLiteralStrideLength = 70;
32
+ static const size_t kCommandStrideLength = 40;
33
+ static const size_t kSymbolsPerLiteralHistogram = 544;
34
+ static const size_t kSymbolsPerCommandHistogram = 530;
35
+ static const size_t kSymbolsPerDistanceHistogram = 544;
36
+ static const size_t kMinLengthForBlockSplitting = 128;
37
+ static const size_t kIterMulForRefining = 2;
38
+ static const size_t kMinItersForRefining = 100;
39
+
40
+ static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
41
+ /* Count how many we have. */
42
+ size_t total_length = 0;
43
+ size_t i;
44
+ for (i = 0; i < num_commands; ++i) {
45
+ total_length += cmds[i].insert_len_;
46
+ }
47
+ return total_length;
48
+ }
49
+
50
+ static void CopyLiteralsToByteArray(const Command* cmds,
51
+ const size_t num_commands,
52
+ const uint8_t* data,
53
+ const size_t offset,
54
+ const size_t mask,
55
+ uint8_t* literals) {
56
+ size_t pos = 0;
57
+ size_t from_pos = offset & mask;
58
+ size_t i;
59
+ for (i = 0; i < num_commands; ++i) {
60
+ size_t insert_len = cmds[i].insert_len_;
61
+ if (from_pos + insert_len > mask) {
62
+ size_t head_size = mask + 1 - from_pos;
63
+ memcpy(literals + pos, data + from_pos, head_size);
64
+ from_pos = 0;
65
+ pos += head_size;
66
+ insert_len -= head_size;
67
+ }
68
+ if (insert_len > 0) {
69
+ memcpy(literals + pos, data + from_pos, insert_len);
70
+ pos += insert_len;
71
+ }
72
+ from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
73
+ }
74
+ }
75
+
76
+ static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
77
+ /* Initial seed should be 7. In this case, loop length is (1 << 29). */
78
+ *seed *= 16807U;
79
+ return *seed;
80
+ }
81
+
82
+ static BROTLI_INLINE double BitCost(size_t count) {
83
+ return count == 0 ? -2.0 : FastLog2(count);
84
+ }
85
+
86
+ #define HISTOGRAMS_PER_BATCH 64
87
+ #define CLUSTERS_PER_BATCH 16
88
+
89
+ #define FN(X) X ## Literal
90
+ #define DataType uint8_t
91
+ /* NOLINTNEXTLINE(build/include) */
92
+ #include "brotli_enc_block_splitter_inc.h"
93
+ #undef DataType
94
+ #undef FN
95
+
96
+ #define FN(X) X ## Command
97
+ #define DataType uint16_t
98
+ /* NOLINTNEXTLINE(build/include) */
99
+ #include "brotli_enc_block_splitter_inc.h"
100
+ #undef FN
101
+
102
+ #define FN(X) X ## Distance
103
+ /* NOLINTNEXTLINE(build/include) */
104
+ #include "brotli_enc_block_splitter_inc.h"
105
+ #undef DataType
106
+ #undef FN
107
+
108
+ void BrotliInitBlockSplit(BlockSplit* self) {
109
+ self->num_types = 0;
110
+ self->num_blocks = 0;
111
+ self->types = 0;
112
+ self->lengths = 0;
113
+ self->types_alloc_size = 0;
114
+ self->lengths_alloc_size = 0;
115
+ }
116
+
117
+ void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
118
+ BROTLI_FREE(m, self->types);
119
+ BROTLI_FREE(m, self->lengths);
120
+ }
121
+
122
+ void BrotliSplitBlock(MemoryManager* m,
123
+ const Command* cmds,
124
+ const size_t num_commands,
125
+ const uint8_t* data,
126
+ const size_t pos,
127
+ const size_t mask,
128
+ const BrotliEncoderParams* params,
129
+ BlockSplit* literal_split,
130
+ BlockSplit* insert_and_copy_split,
131
+ BlockSplit* dist_split) {
132
+ {
133
+ size_t literals_count = CountLiterals(cmds, num_commands);
134
+ uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
135
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
136
+ /* Create a continuous array of literals. */
137
+ CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
138
+ /* Create the block split on the array of literals.
139
+ Literal histograms have alphabet size 256. */
140
+ SplitByteVectorLiteral(
141
+ m, literals, literals_count,
142
+ kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
143
+ kLiteralStrideLength, kLiteralBlockSwitchCost, params,
144
+ literal_split);
145
+ if (BROTLI_IS_OOM(m)) return;
146
+ BROTLI_FREE(m, literals);
147
+ }
148
+
149
+ {
150
+ /* Compute prefix codes for commands. */
151
+ uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
152
+ size_t i;
153
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
154
+ for (i = 0; i < num_commands; ++i) {
155
+ insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
156
+ }
157
+ /* Create the block split on the array of command prefixes. */
158
+ SplitByteVectorCommand(
159
+ m, insert_and_copy_codes, num_commands,
160
+ kSymbolsPerCommandHistogram, kMaxCommandHistograms,
161
+ kCommandStrideLength, kCommandBlockSwitchCost, params,
162
+ insert_and_copy_split);
163
+ if (BROTLI_IS_OOM(m)) return;
164
+ /* TODO: reuse for distances? */
165
+ BROTLI_FREE(m, insert_and_copy_codes);
166
+ }
167
+
168
+ {
169
+ /* Create a continuous array of distance prefixes. */
170
+ uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
171
+ size_t j = 0;
172
+ size_t i;
173
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
174
+ for (i = 0; i < num_commands; ++i) {
175
+ const Command* cmd = &cmds[i];
176
+ if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
177
+ distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
178
+ }
179
+ }
180
+ /* Create the block split on the array of distance prefixes. */
181
+ SplitByteVectorDistance(
182
+ m, distance_prefixes, j,
183
+ kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
184
+ kCommandStrideLength, kDistanceBlockSwitchCost, params,
185
+ dist_split);
186
+ if (BROTLI_IS_OOM(m)) return;
187
+ BROTLI_FREE(m, distance_prefixes);
188
+ }
189
+ }
190
+
191
+
192
+ #if defined(__cplusplus) || defined(c_plusplus)
193
+ } /* extern "C" */
194
+ #endif
@@ -0,0 +1,51 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Block split point selection utilities. */
8
+
9
+ #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
10
+ #define BROTLI_ENC_BLOCK_SPLITTER_H_
11
+
12
+ #include "brotli_common_platform.h"
13
+ #include "brotli_types.h"
14
+ #include "brotli_enc_command.h"
15
+ #include "brotli_enc_memory.h"
16
+ #include "brotli_enc_quality.h"
17
+
18
+ #if defined(__cplusplus) || defined(c_plusplus)
19
+ extern "C" {
20
+ #endif
21
+
22
+ typedef struct BlockSplit {
23
+ size_t num_types; /* Amount of distinct types */
24
+ size_t num_blocks; /* Amount of values in types and length */
25
+ uint8_t* types;
26
+ uint32_t* lengths;
27
+
28
+ size_t types_alloc_size;
29
+ size_t lengths_alloc_size;
30
+ } BlockSplit;
31
+
32
+ BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
33
+ BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
34
+ BlockSplit* self);
35
+
36
+ BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
37
+ const Command* cmds,
38
+ const size_t num_commands,
39
+ const uint8_t* data,
40
+ const size_t offset,
41
+ const size_t mask,
42
+ const BrotliEncoderParams* params,
43
+ BlockSplit* literal_split,
44
+ BlockSplit* insert_and_copy_split,
45
+ BlockSplit* dist_split);
46
+
47
+ #if defined(__cplusplus) || defined(c_plusplus)
48
+ } /* extern "C" */
49
+ #endif
50
+
51
+ #endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
@@ -0,0 +1,440 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN, DataType */
9
+
10
+ #define HistogramType FN(Histogram)
11
+
12
+ static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
13
+ size_t stride,
14
+ size_t num_histograms,
15
+ HistogramType* histograms) {
16
+ uint32_t seed = 7;
17
+ size_t block_length = length / num_histograms;
18
+ size_t i;
19
+ FN(ClearHistograms)(histograms, num_histograms);
20
+ for (i = 0; i < num_histograms; ++i) {
21
+ size_t pos = length * i / num_histograms;
22
+ if (i != 0) {
23
+ pos += MyRand(&seed) % block_length;
24
+ }
25
+ if (pos + stride >= length) {
26
+ pos = length - stride - 1;
27
+ }
28
+ FN(HistogramAddVector)(&histograms[i], data + pos, stride);
29
+ }
30
+ }
31
+
32
+ static void FN(RandomSample)(uint32_t* seed,
33
+ const DataType* data,
34
+ size_t length,
35
+ size_t stride,
36
+ HistogramType* sample) {
37
+ size_t pos = 0;
38
+ if (stride >= length) {
39
+ stride = length;
40
+ } else {
41
+ pos = MyRand(seed) % (length - stride + 1);
42
+ }
43
+ FN(HistogramAddVector)(sample, data + pos, stride);
44
+ }
45
+
46
+ static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
47
+ size_t stride,
48
+ size_t num_histograms,
49
+ HistogramType* histograms) {
50
+ size_t iters =
51
+ kIterMulForRefining * length / stride + kMinItersForRefining;
52
+ uint32_t seed = 7;
53
+ size_t iter;
54
+ iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
55
+ for (iter = 0; iter < iters; ++iter) {
56
+ HistogramType sample;
57
+ FN(HistogramClear)(&sample);
58
+ FN(RandomSample)(&seed, data, length, stride, &sample);
59
+ FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
60
+ }
61
+ }
62
+
63
+ /* Assigns a block id from the range [0, num_histograms) to each data element
64
+ in data[0..length) and fills in block_id[0..length) with the assigned values.
65
+ Returns the number of blocks, i.e. one plus the number of block switches. */
66
+ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
67
+ const double block_switch_bitcost,
68
+ const size_t num_histograms,
69
+ const HistogramType* histograms,
70
+ double* insert_cost,
71
+ double* cost,
72
+ uint8_t* switch_signal,
73
+ uint8_t* block_id) {
74
+ const size_t data_size = FN(HistogramDataSize)();
75
+ const size_t bitmaplen = (num_histograms + 7) >> 3;
76
+ size_t num_blocks = 1;
77
+ size_t i;
78
+ size_t j;
79
+ BROTLI_DCHECK(num_histograms <= 256);
80
+ if (num_histograms <= 1) {
81
+ for (i = 0; i < length; ++i) {
82
+ block_id[i] = 0;
83
+ }
84
+ return 1;
85
+ }
86
+ memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
87
+ for (i = 0; i < num_histograms; ++i) {
88
+ insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
89
+ }
90
+ for (i = data_size; i != 0;) {
91
+ --i;
92
+ for (j = 0; j < num_histograms; ++j) {
93
+ insert_cost[i * num_histograms + j] =
94
+ insert_cost[j] - BitCost(histograms[j].data_[i]);
95
+ }
96
+ }
97
+ memset(cost, 0, sizeof(cost[0]) * num_histograms);
98
+ memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
99
+ /* After each iteration of this loop, cost[k] will contain the difference
100
+ between the minimum cost of arriving at the current byte position using
101
+ entropy code k, and the minimum cost of arriving at the current byte
102
+ position. This difference is capped at the block switch cost, and if it
103
+ reaches block switch cost, it means that when we trace back from the last
104
+ position, we need to switch here. */
105
+ for (i = 0; i < length; ++i) {
106
+ const size_t byte_ix = i;
107
+ size_t ix = byte_ix * bitmaplen;
108
+ size_t insert_cost_ix = data[byte_ix] * num_histograms;
109
+ double min_cost = 1e99;
110
+ double block_switch_cost = block_switch_bitcost;
111
+ size_t k;
112
+ for (k = 0; k < num_histograms; ++k) {
113
+ /* We are coding the symbol in data[byte_ix] with entropy code k. */
114
+ cost[k] += insert_cost[insert_cost_ix + k];
115
+ if (cost[k] < min_cost) {
116
+ min_cost = cost[k];
117
+ block_id[byte_ix] = (uint8_t)k;
118
+ }
119
+ }
120
+ /* More blocks for the beginning. */
121
+ if (byte_ix < 2000) {
122
+ block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
123
+ }
124
+ for (k = 0; k < num_histograms; ++k) {
125
+ cost[k] -= min_cost;
126
+ if (cost[k] >= block_switch_cost) {
127
+ const uint8_t mask = (uint8_t)(1u << (k & 7));
128
+ cost[k] = block_switch_cost;
129
+ BROTLI_DCHECK((k >> 3) < bitmaplen);
130
+ switch_signal[ix + (k >> 3)] |= mask;
131
+ }
132
+ }
133
+ }
134
+ { /* Trace back from the last position and switch at the marked places. */
135
+ size_t byte_ix = length - 1;
136
+ size_t ix = byte_ix * bitmaplen;
137
+ uint8_t cur_id = block_id[byte_ix];
138
+ while (byte_ix > 0) {
139
+ const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
140
+ BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen);
141
+ --byte_ix;
142
+ ix -= bitmaplen;
143
+ if (switch_signal[ix + (cur_id >> 3)] & mask) {
144
+ if (cur_id != block_id[byte_ix]) {
145
+ cur_id = block_id[byte_ix];
146
+ ++num_blocks;
147
+ }
148
+ }
149
+ block_id[byte_ix] = cur_id;
150
+ }
151
+ }
152
+ return num_blocks;
153
+ }
154
+
155
+ static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
156
+ uint16_t* new_id, const size_t num_histograms) {
157
+ static const uint16_t kInvalidId = 256;
158
+ uint16_t next_id = 0;
159
+ size_t i;
160
+ for (i = 0; i < num_histograms; ++i) {
161
+ new_id[i] = kInvalidId;
162
+ }
163
+ for (i = 0; i < length; ++i) {
164
+ BROTLI_DCHECK(block_ids[i] < num_histograms);
165
+ if (new_id[block_ids[i]] == kInvalidId) {
166
+ new_id[block_ids[i]] = next_id++;
167
+ }
168
+ }
169
+ for (i = 0; i < length; ++i) {
170
+ block_ids[i] = (uint8_t)new_id[block_ids[i]];
171
+ BROTLI_DCHECK(block_ids[i] < num_histograms);
172
+ }
173
+ BROTLI_DCHECK(next_id <= num_histograms);
174
+ return next_id;
175
+ }
176
+
177
+ static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
178
+ const uint8_t* block_ids,
179
+ const size_t num_histograms,
180
+ HistogramType* histograms) {
181
+ size_t i;
182
+ FN(ClearHistograms)(histograms, num_histograms);
183
+ for (i = 0; i < length; ++i) {
184
+ FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
185
+ }
186
+ }
187
+
188
+ static void FN(ClusterBlocks)(MemoryManager* m,
189
+ const DataType* data, const size_t length,
190
+ const size_t num_blocks,
191
+ uint8_t* block_ids,
192
+ BlockSplit* split) {
193
+ uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
194
+ uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
195
+ const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
196
+ (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
197
+ size_t all_histograms_size = 0;
198
+ size_t all_histograms_capacity = expected_num_clusters;
199
+ HistogramType* all_histograms =
200
+ BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
201
+ size_t cluster_size_size = 0;
202
+ size_t cluster_size_capacity = expected_num_clusters;
203
+ uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
204
+ size_t num_clusters = 0;
205
+ HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
206
+ BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
207
+ size_t max_num_pairs =
208
+ HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
209
+ size_t pairs_capacity = max_num_pairs + 1;
210
+ HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
211
+ size_t pos = 0;
212
+ uint32_t* clusters;
213
+ size_t num_final_clusters;
214
+ static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
215
+ uint32_t* new_index;
216
+ size_t i;
217
+ uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 };
218
+ uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 };
219
+ uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 };
220
+ uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 };
221
+
222
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histogram_symbols) ||
223
+ BROTLI_IS_NULL(block_lengths) || BROTLI_IS_NULL(all_histograms) ||
224
+ BROTLI_IS_NULL(cluster_size) || BROTLI_IS_NULL(histograms) ||
225
+ BROTLI_IS_NULL(pairs)) {
226
+ return;
227
+ }
228
+
229
+ memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
230
+
231
+ {
232
+ size_t block_idx = 0;
233
+ for (i = 0; i < length; ++i) {
234
+ BROTLI_DCHECK(block_idx < num_blocks);
235
+ ++block_lengths[block_idx];
236
+ if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
237
+ ++block_idx;
238
+ }
239
+ }
240
+ BROTLI_DCHECK(block_idx == num_blocks);
241
+ }
242
+
243
+ for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
244
+ const size_t num_to_combine =
245
+ BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
246
+ size_t num_new_clusters;
247
+ size_t j;
248
+ for (j = 0; j < num_to_combine; ++j) {
249
+ size_t k;
250
+ FN(HistogramClear)(&histograms[j]);
251
+ for (k = 0; k < block_lengths[i + j]; ++k) {
252
+ FN(HistogramAdd)(&histograms[j], data[pos++]);
253
+ }
254
+ histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
255
+ new_clusters[j] = (uint32_t)j;
256
+ symbols[j] = (uint32_t)j;
257
+ sizes[j] = 1;
258
+ }
259
+ num_new_clusters = FN(BrotliHistogramCombine)(
260
+ histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
261
+ num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
262
+ BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
263
+ all_histograms_capacity, all_histograms_size + num_new_clusters);
264
+ BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
265
+ cluster_size_capacity, cluster_size_size + num_new_clusters);
266
+ if (BROTLI_IS_OOM(m)) return;
267
+ for (j = 0; j < num_new_clusters; ++j) {
268
+ all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
269
+ cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
270
+ remap[new_clusters[j]] = (uint32_t)j;
271
+ }
272
+ for (j = 0; j < num_to_combine; ++j) {
273
+ histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
274
+ }
275
+ num_clusters += num_new_clusters;
276
+ BROTLI_DCHECK(num_clusters == cluster_size_size);
277
+ BROTLI_DCHECK(num_clusters == all_histograms_size);
278
+ }
279
+ BROTLI_FREE(m, histograms);
280
+
281
+ max_num_pairs =
282
+ BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
283
+ if (pairs_capacity < max_num_pairs + 1) {
284
+ BROTLI_FREE(m, pairs);
285
+ pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
286
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(pairs)) return;
287
+ }
288
+
289
+ clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
290
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(clusters)) return;
291
+ for (i = 0; i < num_clusters; ++i) {
292
+ clusters[i] = (uint32_t)i;
293
+ }
294
+ num_final_clusters = FN(BrotliHistogramCombine)(
295
+ all_histograms, cluster_size, histogram_symbols, clusters, pairs,
296
+ num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
297
+ max_num_pairs);
298
+ BROTLI_FREE(m, pairs);
299
+ BROTLI_FREE(m, cluster_size);
300
+
301
+ new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
302
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return;
303
+ for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
304
+ pos = 0;
305
+ {
306
+ uint32_t next_index = 0;
307
+ for (i = 0; i < num_blocks; ++i) {
308
+ HistogramType histo;
309
+ size_t j;
310
+ uint32_t best_out;
311
+ double best_bits;
312
+ FN(HistogramClear)(&histo);
313
+ for (j = 0; j < block_lengths[i]; ++j) {
314
+ FN(HistogramAdd)(&histo, data[pos++]);
315
+ }
316
+ best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
317
+ best_bits =
318
+ FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
319
+ for (j = 0; j < num_final_clusters; ++j) {
320
+ const double cur_bits = FN(BrotliHistogramBitCostDistance)(
321
+ &histo, &all_histograms[clusters[j]]);
322
+ if (cur_bits < best_bits) {
323
+ best_bits = cur_bits;
324
+ best_out = clusters[j];
325
+ }
326
+ }
327
+ histogram_symbols[i] = best_out;
328
+ if (new_index[best_out] == kInvalidIndex) {
329
+ new_index[best_out] = next_index++;
330
+ }
331
+ }
332
+ }
333
+ BROTLI_FREE(m, clusters);
334
+ BROTLI_FREE(m, all_histograms);
335
+ BROTLI_ENSURE_CAPACITY(
336
+ m, uint8_t, split->types, split->types_alloc_size, num_blocks);
337
+ BROTLI_ENSURE_CAPACITY(
338
+ m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
339
+ if (BROTLI_IS_OOM(m)) return;
340
+ {
341
+ uint32_t cur_length = 0;
342
+ size_t block_idx = 0;
343
+ uint8_t max_type = 0;
344
+ for (i = 0; i < num_blocks; ++i) {
345
+ cur_length += block_lengths[i];
346
+ if (i + 1 == num_blocks ||
347
+ histogram_symbols[i] != histogram_symbols[i + 1]) {
348
+ const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
349
+ split->types[block_idx] = id;
350
+ split->lengths[block_idx] = cur_length;
351
+ max_type = BROTLI_MAX(uint8_t, max_type, id);
352
+ cur_length = 0;
353
+ ++block_idx;
354
+ }
355
+ }
356
+ split->num_blocks = block_idx;
357
+ split->num_types = (size_t)max_type + 1;
358
+ }
359
+ BROTLI_FREE(m, new_index);
360
+ BROTLI_FREE(m, block_lengths);
361
+ BROTLI_FREE(m, histogram_symbols);
362
+ }
363
+
364
+ static void FN(SplitByteVector)(MemoryManager* m,
365
+ const DataType* data, const size_t length,
366
+ const size_t literals_per_histogram,
367
+ const size_t max_histograms,
368
+ const size_t sampling_stride_length,
369
+ const double block_switch_cost,
370
+ const BrotliEncoderParams* params,
371
+ BlockSplit* split) {
372
+ const size_t data_size = FN(HistogramDataSize)();
373
+ size_t num_histograms = length / literals_per_histogram + 1;
374
+ HistogramType* histograms;
375
+ if (num_histograms > max_histograms) {
376
+ num_histograms = max_histograms;
377
+ }
378
+ if (length == 0) {
379
+ split->num_types = 1;
380
+ return;
381
+ } else if (length < kMinLengthForBlockSplitting) {
382
+ BROTLI_ENSURE_CAPACITY(m, uint8_t,
383
+ split->types, split->types_alloc_size, split->num_blocks + 1);
384
+ BROTLI_ENSURE_CAPACITY(m, uint32_t,
385
+ split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
386
+ if (BROTLI_IS_OOM(m)) return;
387
+ split->num_types = 1;
388
+ split->types[split->num_blocks] = 0;
389
+ split->lengths[split->num_blocks] = (uint32_t)length;
390
+ split->num_blocks++;
391
+ return;
392
+ }
393
+ histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
394
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(histograms)) return;
395
+ /* Find good entropy codes. */
396
+ FN(InitialEntropyCodes)(data, length,
397
+ sampling_stride_length,
398
+ num_histograms, histograms);
399
+ FN(RefineEntropyCodes)(data, length,
400
+ sampling_stride_length,
401
+ num_histograms, histograms);
402
+ {
403
+ /* Find a good path through literals with the good entropy codes. */
404
+ uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
405
+ size_t num_blocks = 0;
406
+ const size_t bitmaplen = (num_histograms + 7) >> 3;
407
+ double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
408
+ double* cost = BROTLI_ALLOC(m, double, num_histograms);
409
+ uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
410
+ uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
411
+ const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10;
412
+ size_t i;
413
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(block_ids) ||
414
+ BROTLI_IS_NULL(insert_cost) || BROTLI_IS_NULL(cost) ||
415
+ BROTLI_IS_NULL(switch_signal) || BROTLI_IS_NULL(new_id)) {
416
+ return;
417
+ }
418
+ for (i = 0; i < iters; ++i) {
419
+ num_blocks = FN(FindBlocks)(data, length,
420
+ block_switch_cost,
421
+ num_histograms, histograms,
422
+ insert_cost, cost, switch_signal,
423
+ block_ids);
424
+ num_histograms = FN(RemapBlockIds)(block_ids, length,
425
+ new_id, num_histograms);
426
+ FN(BuildBlockHistograms)(data, length, block_ids,
427
+ num_histograms, histograms);
428
+ }
429
+ BROTLI_FREE(m, insert_cost);
430
+ BROTLI_FREE(m, cost);
431
+ BROTLI_FREE(m, switch_signal);
432
+ BROTLI_FREE(m, new_id);
433
+ BROTLI_FREE(m, histograms);
434
+ FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
435
+ if (BROTLI_IS_OOM(m)) return;
436
+ BROTLI_FREE(m, block_ids);
437
+ }
438
+ }
439
+
440
+ #undef HistogramType