isomorfeus-ferret 0.12.4 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +77 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  91. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  92. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  93. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  94. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  95. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  96. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  97. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  98. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  99. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
  100. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  101. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  102. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  103. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  104. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  106. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  144. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  145. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  162. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  163. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  164. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  165. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  166. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  167. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  168. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  169. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  170. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  171. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  172. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  173. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  174. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  175. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  176. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  177. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  178. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  179. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  180. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  181. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  182. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  183. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  184. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  185. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  186. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  187. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  188. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  189. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  190. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  191. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  192. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  193. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  194. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  195. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  196. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  197. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  198. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  199. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  200. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  201. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  202. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  203. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  204. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  205. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  206. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  207. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  208. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  209. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  210. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  211. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  212. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  213. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  214. data/lib/isomorfeus/ferret/version.rb +1 -1
  215. metadata +125 -5
  216. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,105 @@
1
+ /* Copyright 2015 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Algorithms for distributing the literals and commands of a metablock between
8
+ block types and contexts. */
9
+
10
+ #ifndef BROTLI_ENC_METABLOCK_H_
11
+ #define BROTLI_ENC_METABLOCK_H_
12
+
13
+ #include "brotli_common_context.h"
14
+ #include "brotli_common_platform.h"
15
+ #include "brotli_types.h"
16
+ #include "brotli_enc_block_splitter.h"
17
+ #include "brotli_enc_command.h"
18
+ #include "brotli_enc_histogram.h"
19
+ #include "brotli_enc_memory.h"
20
+ #include "brotli_enc_quality.h"
21
+
22
+ #if defined(__cplusplus) || defined(c_plusplus)
23
+ extern "C" {
24
+ #endif
25
+
26
+ typedef struct MetaBlockSplit {
27
+ BlockSplit literal_split;
28
+ BlockSplit command_split;
29
+ BlockSplit distance_split;
30
+ uint32_t* literal_context_map;
31
+ size_t literal_context_map_size;
32
+ uint32_t* distance_context_map;
33
+ size_t distance_context_map_size;
34
+ HistogramLiteral* literal_histograms;
35
+ size_t literal_histograms_size;
36
+ HistogramCommand* command_histograms;
37
+ size_t command_histograms_size;
38
+ HistogramDistance* distance_histograms;
39
+ size_t distance_histograms_size;
40
+ } MetaBlockSplit;
41
+
42
+ static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
43
+ BrotliInitBlockSplit(&mb->literal_split);
44
+ BrotliInitBlockSplit(&mb->command_split);
45
+ BrotliInitBlockSplit(&mb->distance_split);
46
+ mb->literal_context_map = 0;
47
+ mb->literal_context_map_size = 0;
48
+ mb->distance_context_map = 0;
49
+ mb->distance_context_map_size = 0;
50
+ mb->literal_histograms = 0;
51
+ mb->literal_histograms_size = 0;
52
+ mb->command_histograms = 0;
53
+ mb->command_histograms_size = 0;
54
+ mb->distance_histograms = 0;
55
+ mb->distance_histograms_size = 0;
56
+ }
57
+
58
+ static BROTLI_INLINE void DestroyMetaBlockSplit(
59
+ MemoryManager* m, MetaBlockSplit* mb) {
60
+ BrotliDestroyBlockSplit(m, &mb->literal_split);
61
+ BrotliDestroyBlockSplit(m, &mb->command_split);
62
+ BrotliDestroyBlockSplit(m, &mb->distance_split);
63
+ BROTLI_FREE(m, mb->literal_context_map);
64
+ BROTLI_FREE(m, mb->distance_context_map);
65
+ BROTLI_FREE(m, mb->literal_histograms);
66
+ BROTLI_FREE(m, mb->command_histograms);
67
+ BROTLI_FREE(m, mb->distance_histograms);
68
+ }
69
+
70
+ /* Uses the slow shortest-path block splitter and does context clustering.
71
+ The distance parameters are dynamically selected based on the commands
72
+ which get recomputed under the new distance parameters. The new distance
73
+ parameters are stored into *params. */
74
+ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
75
+ const uint8_t* ringbuffer,
76
+ const size_t pos,
77
+ const size_t mask,
78
+ BrotliEncoderParams* params,
79
+ uint8_t prev_byte,
80
+ uint8_t prev_byte2,
81
+ Command* cmds,
82
+ size_t num_commands,
83
+ ContextType literal_context_mode,
84
+ MetaBlockSplit* mb);
85
+
86
+ /* Uses a fast greedy block splitter that tries to merge current block with the
87
+ last or the second last block and uses a static context clustering which
88
+ is the same for all block types. */
89
+ BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
90
+ MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
91
+ uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
92
+ size_t num_contexts, const uint32_t* static_context_map,
93
+ const Command* commands, size_t n_commands, MetaBlockSplit* mb);
94
+
95
+ BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
96
+ MetaBlockSplit* mb);
97
+
98
+ BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliEncoderParams* params,
99
+ uint32_t npostfix, uint32_t ndirect);
100
+
101
+ #if defined(__cplusplus) || defined(c_plusplus)
102
+ } /* extern "C" */
103
+ #endif
104
+
105
+ #endif /* BROTLI_ENC_METABLOCK_H_ */
@@ -0,0 +1,183 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2015 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN */
9
+
10
+ #define HistogramType FN(Histogram)
11
+
12
+ /* Greedy block splitter for one block category (literal, command or distance).
13
+ */
14
+ typedef struct FN(BlockSplitter) {
15
+ /* Alphabet size of particular block category. */
16
+ size_t alphabet_size_;
17
+ /* We collect at least this many symbols for each block. */
18
+ size_t min_block_size_;
19
+ /* We merge histograms A and B if
20
+ entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
21
+ where A is the current histogram and B is the histogram of the last or the
22
+ second last block type. */
23
+ double split_threshold_;
24
+
25
+ size_t num_blocks_;
26
+ BlockSplit* split_; /* not owned */
27
+ HistogramType* histograms_; /* not owned */
28
+ size_t* histograms_size_; /* not owned */
29
+
30
+ /* The number of symbols that we want to collect before deciding on whether
31
+ or not to merge the block with a previous one or emit a new block. */
32
+ size_t target_block_size_;
33
+ /* The number of symbols in the current histogram. */
34
+ size_t block_size_;
35
+ /* Offset of the current histogram. */
36
+ size_t curr_histogram_ix_;
37
+ /* Offset of the histograms of the previous two block types. */
38
+ size_t last_histogram_ix_[2];
39
+ /* Entropy of the previous two block types. */
40
+ double last_entropy_[2];
41
+ /* The number of times we merged the current block with the last one. */
42
+ size_t merge_last_count_;
43
+ } FN(BlockSplitter);
44
+
45
+ static void FN(InitBlockSplitter)(
46
+ MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
47
+ size_t min_block_size, double split_threshold, size_t num_symbols,
48
+ BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
49
+ size_t max_num_blocks = num_symbols / min_block_size + 1;
50
+ /* We have to allocate one more histogram than the maximum number of block
51
+ types for the current histogram when the meta-block is too big. */
52
+ size_t max_num_types =
53
+ BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
54
+ self->alphabet_size_ = alphabet_size;
55
+ self->min_block_size_ = min_block_size;
56
+ self->split_threshold_ = split_threshold;
57
+ self->num_blocks_ = 0;
58
+ self->split_ = split;
59
+ self->histograms_size_ = histograms_size;
60
+ self->target_block_size_ = min_block_size;
61
+ self->block_size_ = 0;
62
+ self->curr_histogram_ix_ = 0;
63
+ self->merge_last_count_ = 0;
64
+ BROTLI_ENSURE_CAPACITY(m, uint8_t,
65
+ split->types, split->types_alloc_size, max_num_blocks);
66
+ BROTLI_ENSURE_CAPACITY(m, uint32_t,
67
+ split->lengths, split->lengths_alloc_size, max_num_blocks);
68
+ if (BROTLI_IS_OOM(m)) return;
69
+ self->split_->num_blocks = max_num_blocks;
70
+ BROTLI_DCHECK(*histograms == 0);
71
+ *histograms_size = max_num_types;
72
+ *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
73
+ self->histograms_ = *histograms;
74
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(*histograms)) return;
75
+ /* Clear only current histogram. */
76
+ FN(HistogramClear)(&self->histograms_[0]);
77
+ self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
78
+ }
79
+
80
+ /* Does either of three things:
81
+ (1) emits the current block with a new block type;
82
+ (2) emits the current block with the type of the second last block;
83
+ (3) merges the current block with the last block. */
84
+ static void FN(BlockSplitterFinishBlock)(
85
+ FN(BlockSplitter)* self, BROTLI_BOOL is_final) {
86
+ BlockSplit* split = self->split_;
87
+ double* last_entropy = self->last_entropy_;
88
+ HistogramType* histograms = self->histograms_;
89
+ self->block_size_ =
90
+ BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
91
+ if (self->num_blocks_ == 0) {
92
+ /* Create first block. */
93
+ split->lengths[0] = (uint32_t)self->block_size_;
94
+ split->types[0] = 0;
95
+ last_entropy[0] =
96
+ BitsEntropy(histograms[0].data_, self->alphabet_size_);
97
+ last_entropy[1] = last_entropy[0];
98
+ ++self->num_blocks_;
99
+ ++split->num_types;
100
+ ++self->curr_histogram_ix_;
101
+ if (self->curr_histogram_ix_ < *self->histograms_size_)
102
+ FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
103
+ self->block_size_ = 0;
104
+ } else if (self->block_size_ > 0) {
105
+ double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
106
+ self->alphabet_size_);
107
+ HistogramType combined_histo[2];
108
+ double combined_entropy[2];
109
+ double diff[2];
110
+ size_t j;
111
+ for (j = 0; j < 2; ++j) {
112
+ size_t last_histogram_ix = self->last_histogram_ix_[j];
113
+ combined_histo[j] = histograms[self->curr_histogram_ix_];
114
+ FN(HistogramAddHistogram)(&combined_histo[j],
115
+ &histograms[last_histogram_ix]);
116
+ combined_entropy[j] = BitsEntropy(
117
+ &combined_histo[j].data_[0], self->alphabet_size_);
118
+ diff[j] = combined_entropy[j] - entropy - last_entropy[j];
119
+ }
120
+
121
+ if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
122
+ diff[0] > self->split_threshold_ &&
123
+ diff[1] > self->split_threshold_) {
124
+ /* Create new block. */
125
+ split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
126
+ split->types[self->num_blocks_] = (uint8_t)split->num_types;
127
+ self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
128
+ self->last_histogram_ix_[0] = (uint8_t)split->num_types;
129
+ last_entropy[1] = last_entropy[0];
130
+ last_entropy[0] = entropy;
131
+ ++self->num_blocks_;
132
+ ++split->num_types;
133
+ ++self->curr_histogram_ix_;
134
+ if (self->curr_histogram_ix_ < *self->histograms_size_)
135
+ FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
136
+ self->block_size_ = 0;
137
+ self->merge_last_count_ = 0;
138
+ self->target_block_size_ = self->min_block_size_;
139
+ } else if (diff[1] < diff[0] - 20.0) {
140
+ /* Combine this block with second last block. */
141
+ split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
142
+ split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
143
+ BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
144
+ histograms[self->last_histogram_ix_[0]] = combined_histo[1];
145
+ last_entropy[1] = last_entropy[0];
146
+ last_entropy[0] = combined_entropy[1];
147
+ ++self->num_blocks_;
148
+ self->block_size_ = 0;
149
+ FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
150
+ self->merge_last_count_ = 0;
151
+ self->target_block_size_ = self->min_block_size_;
152
+ } else {
153
+ /* Combine this block with last block. */
154
+ split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
155
+ histograms[self->last_histogram_ix_[0]] = combined_histo[0];
156
+ last_entropy[0] = combined_entropy[0];
157
+ if (split->num_types == 1) {
158
+ last_entropy[1] = last_entropy[0];
159
+ }
160
+ self->block_size_ = 0;
161
+ FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
162
+ if (++self->merge_last_count_ > 1) {
163
+ self->target_block_size_ += self->min_block_size_;
164
+ }
165
+ }
166
+ }
167
+ if (is_final) {
168
+ *self->histograms_size_ = split->num_types;
169
+ split->num_blocks = self->num_blocks_;
170
+ }
171
+ }
172
+
173
+ /* Adds the next symbol to the current histogram. When the current histogram
174
+ reaches the target size, decides on merging the block. */
175
+ static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
176
+ FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
177
+ ++self->block_size_;
178
+ if (self->block_size_ == self->target_block_size_) {
179
+ FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE);
180
+ }
181
+ }
182
+
183
+ #undef HistogramType
@@ -0,0 +1,46 @@
1
+ /* Copyright 2017 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Parameters for the Brotli encoder with chosen quality levels. */
8
+
9
+ #ifndef BROTLI_ENC_PARAMS_H_
10
+ #define BROTLI_ENC_PARAMS_H_
11
+
12
+ #include "brotli_encode.h"
13
+ #include "brotli_enc_encoder_dict.h"
14
+
15
+ typedef struct BrotliHasherParams {
16
+ int type;
17
+ int bucket_bits;
18
+ int block_bits;
19
+ int hash_len;
20
+ int num_last_distances_to_check;
21
+ } BrotliHasherParams;
22
+
23
+ typedef struct BrotliDistanceParams {
24
+ uint32_t distance_postfix_bits;
25
+ uint32_t num_direct_distance_codes;
26
+ uint32_t alphabet_size_max;
27
+ uint32_t alphabet_size_limit;
28
+ size_t max_distance;
29
+ } BrotliDistanceParams;
30
+
31
+ /* Encoding parameters */
32
+ typedef struct BrotliEncoderParams {
33
+ BrotliEncoderMode mode;
34
+ int quality;
35
+ int lgwin;
36
+ int lgblock;
37
+ size_t stream_offset;
38
+ size_t size_hint;
39
+ BROTLI_BOOL disable_literal_context_modeling;
40
+ BROTLI_BOOL large_window;
41
+ BrotliHasherParams hasher;
42
+ BrotliDistanceParams dist;
43
+ BrotliEncoderDictionary dictionary;
44
+ } BrotliEncoderParams;
45
+
46
+ #endif /* BROTLI_ENC_PARAMS_H_ */
@@ -0,0 +1,53 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Functions for encoding of integers into prefix codes the amount of extra
8
+ bits, and the actual values of the extra bits. */
9
+
10
+ #ifndef BROTLI_ENC_PREFIX_H_
11
+ #define BROTLI_ENC_PREFIX_H_
12
+
13
+ #include "brotli_common_constants.h"
14
+ #include "brotli_common_platform.h"
15
+ #include "brotli_types.h"
16
+ #include "brotli_enc_fast_log.h"
17
+
18
+ #if defined(__cplusplus) || defined(c_plusplus)
19
+ extern "C" {
20
+ #endif
21
+
22
+ /* Here distance_code is an intermediate code, i.e. one of the special codes or
23
+ the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
24
+ static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
25
+ size_t num_direct_codes,
26
+ size_t postfix_bits,
27
+ uint16_t* code,
28
+ uint32_t* extra_bits) {
29
+ if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
30
+ *code = (uint16_t)distance_code;
31
+ *extra_bits = 0;
32
+ return;
33
+ } else {
34
+ size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
35
+ (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
36
+ size_t bucket = Log2FloorNonZero(dist) - 1;
37
+ size_t postfix_mask = (1u << postfix_bits) - 1;
38
+ size_t postfix = dist & postfix_mask;
39
+ size_t prefix = (dist >> bucket) & 1;
40
+ size_t offset = (2 + prefix) << bucket;
41
+ size_t nbits = bucket - postfix_bits;
42
+ *code = (uint16_t)((nbits << 10) |
43
+ (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
44
+ ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
45
+ *extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
46
+ }
47
+ }
48
+
49
+ #if defined(__cplusplus) || defined(c_plusplus)
50
+ } /* extern "C" */
51
+ #endif
52
+
53
+ #endif /* BROTLI_ENC_PREFIX_H_ */
@@ -0,0 +1,165 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Constants and formulas that affect speed-ratio trade-offs and thus define
8
+ quality levels. */
9
+
10
+ #ifndef BROTLI_ENC_QUALITY_H_
11
+ #define BROTLI_ENC_QUALITY_H_
12
+
13
+ #include "brotli_common_platform.h"
14
+ #include "brotli_encode.h"
15
+ #include "brotli_enc_params.h"
16
+
17
+ #define FAST_ONE_PASS_COMPRESSION_QUALITY 0
18
+ #define FAST_TWO_PASS_COMPRESSION_QUALITY 1
19
+ #define ZOPFLIFICATION_QUALITY 10
20
+ #define HQ_ZOPFLIFICATION_QUALITY 11
21
+
22
+ #define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
23
+ #define MIN_QUALITY_FOR_BLOCK_SPLIT 4
24
+ #define MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS 4
25
+ #define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
26
+ #define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5
27
+ #define MIN_QUALITY_FOR_CONTEXT_MODELING 5
28
+ #define MIN_QUALITY_FOR_HQ_CONTEXT_MODELING 7
29
+ #define MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING 10
30
+
31
+ /* For quality below MIN_QUALITY_FOR_BLOCK_SPLIT there is no block splitting,
32
+ so we buffer at most this much literals and commands. */
33
+ #define MAX_NUM_DELAYED_SYMBOLS 0x2FFF
34
+
35
+ /* Returns hash-table size for quality levels 0 and 1. */
36
+ static BROTLI_INLINE size_t MaxHashTableSize(int quality) {
37
+ return quality == FAST_ONE_PASS_COMPRESSION_QUALITY ? 1 << 15 : 1 << 17;
38
+ }
39
+
40
+ /* The maximum length for which the zopflification uses distinct distances. */
41
+ #define MAX_ZOPFLI_LEN_QUALITY_10 150
42
+ #define MAX_ZOPFLI_LEN_QUALITY_11 325
43
+
44
+ /* Do not thoroughly search when a long copy is found. */
45
+ #define BROTLI_LONG_COPY_QUICK_STEP 16384
46
+
47
+ static BROTLI_INLINE size_t MaxZopfliLen(const BrotliEncoderParams* params) {
48
+ return params->quality <= 10 ?
49
+ MAX_ZOPFLI_LEN_QUALITY_10 :
50
+ MAX_ZOPFLI_LEN_QUALITY_11;
51
+ }
52
+
53
+ /* Number of best candidates to evaluate to expand Zopfli chain. */
54
+ static BROTLI_INLINE size_t MaxZopfliCandidates(
55
+ const BrotliEncoderParams* params) {
56
+ return params->quality <= 10 ? 1 : 5;
57
+ }
58
+
59
+ static BROTLI_INLINE void SanitizeParams(BrotliEncoderParams* params) {
60
+ params->quality = BROTLI_MIN(int, BROTLI_MAX_QUALITY,
61
+ BROTLI_MAX(int, BROTLI_MIN_QUALITY, params->quality));
62
+ if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
63
+ params->large_window = BROTLI_FALSE;
64
+ }
65
+ if (params->lgwin < BROTLI_MIN_WINDOW_BITS) {
66
+ params->lgwin = BROTLI_MIN_WINDOW_BITS;
67
+ } else {
68
+ int max_lgwin = params->large_window ? BROTLI_LARGE_MAX_WINDOW_BITS :
69
+ BROTLI_MAX_WINDOW_BITS;
70
+ if (params->lgwin > max_lgwin) params->lgwin = max_lgwin;
71
+ }
72
+ }
73
+
74
+ /* Returns optimized lg_block value. */
75
+ static BROTLI_INLINE int ComputeLgBlock(const BrotliEncoderParams* params) {
76
+ int lgblock = params->lgblock;
77
+ if (params->quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
78
+ params->quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
79
+ lgblock = params->lgwin;
80
+ } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
81
+ lgblock = 14;
82
+ } else if (lgblock == 0) {
83
+ lgblock = 16;
84
+ if (params->quality >= 9 && params->lgwin > lgblock) {
85
+ lgblock = BROTLI_MIN(int, 18, params->lgwin);
86
+ }
87
+ } else {
88
+ lgblock = BROTLI_MIN(int, BROTLI_MAX_INPUT_BLOCK_BITS,
89
+ BROTLI_MAX(int, BROTLI_MIN_INPUT_BLOCK_BITS, lgblock));
90
+ }
91
+ return lgblock;
92
+ }
93
+
94
+ /* Returns log2 of the size of main ring buffer area.
95
+ Allocate at least lgwin + 1 bits for the ring buffer so that the newly
96
+ added block fits there completely and we still get lgwin bits and at least
97
+ read_block_size_bits + 1 bits because the copy tail length needs to be
98
+ smaller than ring-buffer size. */
99
+ static BROTLI_INLINE int ComputeRbBits(const BrotliEncoderParams* params) {
100
+ return 1 + BROTLI_MAX(int, params->lgwin, params->lgblock);
101
+ }
102
+
103
+ static BROTLI_INLINE size_t MaxMetablockSize(
104
+ const BrotliEncoderParams* params) {
105
+ int bits =
106
+ BROTLI_MIN(int, ComputeRbBits(params), BROTLI_MAX_INPUT_BLOCK_BITS);
107
+ return (size_t)1 << bits;
108
+ }
109
+
110
+ /* When searching for backward references and have not seen matches for a long
111
+ time, we can skip some match lookups. Unsuccessful match lookups are very
112
+ expensive and this kind of a heuristic speeds up compression quite a lot.
113
+ At first 8 byte strides are taken and every second byte is put to hasher.
114
+ After 4x more literals stride by 16 bytes, every put 4-th byte to hasher.
115
+ Applied only to qualities 2 to 9. */
116
+ static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
117
+ const BrotliEncoderParams* params) {
118
+ return params->quality < 9 ? 64 : 512;
119
+ }
120
+
121
+ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
122
+ BrotliHasherParams* hparams) {
123
+ if (params->quality > 9) {
124
+ hparams->type = 10;
125
+ } else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
126
+ hparams->type = 54;
127
+ } else if (params->quality < 5) {
128
+ hparams->type = params->quality;
129
+ } else if (params->lgwin <= 16) {
130
+ hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
131
+ } else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
132
+ hparams->type = 6;
133
+ hparams->block_bits = params->quality - 1;
134
+ hparams->bucket_bits = 15;
135
+ hparams->hash_len = 5;
136
+ hparams->num_last_distances_to_check =
137
+ params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
138
+ } else {
139
+ hparams->type = 5;
140
+ hparams->block_bits = params->quality - 1;
141
+ hparams->bucket_bits = params->quality < 7 ? 14 : 15;
142
+ hparams->num_last_distances_to_check =
143
+ params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
144
+ }
145
+
146
+ if (params->lgwin > 24) {
147
+ /* Different hashers for large window brotli: not for qualities <= 2,
148
+ these are too fast for large window. Not for qualities >= 10: their
149
+ hasher already works well with large window. So the changes are:
150
+ H3 --> H35: for quality 3.
151
+ H54 --> H55: for quality 4 with size hint > 1MB
152
+ H6 --> H65: for qualities 5, 6, 7, 8, 9. */
153
+ if (hparams->type == 3) {
154
+ hparams->type = 35;
155
+ }
156
+ if (hparams->type == 54) {
157
+ hparams->type = 55;
158
+ }
159
+ if (hparams->type == 6) {
160
+ hparams->type = 65;
161
+ }
162
+ }
163
+ }
164
+
165
+ #endif /* BROTLI_ENC_QUALITY_H_ */