isomorfeus-ferret 0.12.4 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +77 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  91. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  92. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  93. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  94. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  95. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  96. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  97. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  98. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  99. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
  100. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  101. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  102. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  103. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  104. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  106. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  144. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  145. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  162. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  163. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  164. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  165. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  166. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  167. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  168. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  169. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  170. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  171. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  172. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  173. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  174. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  175. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  176. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  177. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  178. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  179. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  180. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  181. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  182. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  183. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  184. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  185. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  186. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  187. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  188. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  189. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  190. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  191. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  192. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  193. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  194. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  195. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  196. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  197. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  198. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  199. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  200. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  201. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  202. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  203. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  204. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  205. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  206. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  207. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  208. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  209. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  210. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  211. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  212. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  213. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  214. data/lib/isomorfeus/ferret/version.rb +1 -1
  215. metadata +125 -5
  216. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,84 @@
1
+ /* Copyright 2014 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Functions to convert brotli-related data structures into the
8
+ brotli bit stream. The functions here operate under
9
+ assumption that there is enough space in the storage, i.e., there are
10
+ no out-of-range checks anywhere.
11
+
12
+ These functions do bit addressing into a byte array. The byte array
13
+ is called "storage" and the index to the bit is called storage_ix
14
+ in function arguments. */
15
+
16
+ #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
17
+ #define BROTLI_ENC_BROTLI_BIT_STREAM_H_
18
+
19
+ #include "brotli_common_context.h"
20
+ #include "brotli_common_platform.h"
21
+ #include "brotli_types.h"
22
+ #include "brotli_enc_command.h"
23
+ #include "brotli_enc_entropy_encode.h"
24
+ #include "brotli_enc_memory.h"
25
+ #include "brotli_enc_metablock.h"
26
+
27
+ #if defined(__cplusplus) || defined(c_plusplus)
28
+ extern "C" {
29
+ #endif
30
+
31
+ /* All Store functions here will use a storage_ix, which is always the bit
32
+ position for the current storage. */
33
+
34
+ BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
35
+ HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
36
+
37
+ BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
38
+ MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
39
+ const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
40
+ uint8_t* storage);
41
+
42
+ /* REQUIRES: length > 0 */
43
+ /* REQUIRES: length <= (1 << 24) */
44
+ BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
45
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
46
+ uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
47
+ const BrotliEncoderParams* params, ContextType literal_context_mode,
48
+ const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
49
+ size_t* storage_ix, uint8_t* storage);
50
+
51
+ /* Stores the meta-block without doing any block splitting, just collects
52
+ one histogram per block category and uses that for entropy coding.
53
+ REQUIRES: length > 0
54
+ REQUIRES: length <= (1 << 24) */
55
+ BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
56
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
57
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
58
+ const Command* commands, size_t n_commands,
59
+ size_t* storage_ix, uint8_t* storage);
60
+
61
+ /* Same as above, but uses static prefix codes for histograms with a only a few
62
+ symbols, and uses static code length prefix codes for all other histograms.
63
+ REQUIRES: length > 0
64
+ REQUIRES: length <= (1 << 24) */
65
+ BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
66
+ const uint8_t* input, size_t start_pos, size_t length, size_t mask,
67
+ BROTLI_BOOL is_last, const BrotliEncoderParams* params,
68
+ const Command* commands, size_t n_commands,
69
+ size_t* storage_ix, uint8_t* storage);
70
+
71
+ /* This is for storing uncompressed blocks (simple raw storage of
72
+ bytes-as-bytes).
73
+ REQUIRES: length > 0
74
+ REQUIRES: length <= (1 << 24) */
75
+ BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
76
+ BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
77
+ size_t position, size_t mask, size_t len,
78
+ size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
79
+
80
+ #if defined(__cplusplus) || defined(c_plusplus)
81
+ } /* extern "C" */
82
+ #endif
83
+
84
+ #endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
@@ -0,0 +1,56 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Functions for clustering similar histograms together. */
8
+
9
+ #include "brotli_enc_cluster.h"
10
+
11
+ #include "brotli_common_platform.h"
12
+ #include "brotli_types.h"
13
+ #include "brotli_enc_bit_cost.h" /* BrotliPopulationCost */
14
+ #include "brotli_enc_fast_log.h"
15
+ #include "brotli_enc_histogram.h"
16
+ #include "brotli_enc_memory.h"
17
+
18
+ #if defined(__cplusplus) || defined(c_plusplus)
19
+ extern "C" {
20
+ #endif
21
+
22
+ static BROTLI_INLINE BROTLI_BOOL HistogramPairIsLess(
23
+ const HistogramPair* p1, const HistogramPair* p2) {
24
+ if (p1->cost_diff != p2->cost_diff) {
25
+ return TO_BROTLI_BOOL(p1->cost_diff > p2->cost_diff);
26
+ }
27
+ return TO_BROTLI_BOOL((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1));
28
+ }
29
+
30
+ /* Returns entropy reduction of the context map when we combine two clusters. */
31
+ static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
32
+ size_t size_c = size_a + size_b;
33
+ return (double)size_a * FastLog2(size_a) +
34
+ (double)size_b * FastLog2(size_b) -
35
+ (double)size_c * FastLog2(size_c);
36
+ }
37
+
38
+ #define CODE(X) X
39
+
40
+ #define FN(X) X ## Literal
41
+ #include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
42
+ #undef FN
43
+
44
+ #define FN(X) X ## Command
45
+ #include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
46
+ #undef FN
47
+
48
+ #define FN(X) X ## Distance
49
+ #include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
50
+ #undef FN
51
+
52
+ #undef CODE
53
+
54
+ #if defined(__cplusplus) || defined(c_plusplus)
55
+ } /* extern "C" */
56
+ #endif
@@ -0,0 +1,48 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Functions for clustering similar histograms together. */
8
+
9
+ #ifndef BROTLI_ENC_CLUSTER_H_
10
+ #define BROTLI_ENC_CLUSTER_H_
11
+
12
+ #include "brotli_common_platform.h"
13
+ #include "brotli_types.h"
14
+ #include "brotli_enc_histogram.h"
15
+ #include "brotli_enc_memory.h"
16
+
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
20
+
21
+ typedef struct HistogramPair {
22
+ uint32_t idx1;
23
+ uint32_t idx2;
24
+ double cost_combo;
25
+ double cost_diff;
26
+ } HistogramPair;
27
+
28
+ #define CODE(X) /* Declaration */;
29
+
30
+ #define FN(X) X ## Literal
31
+ #include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
32
+ #undef FN
33
+
34
+ #define FN(X) X ## Command
35
+ #include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
36
+ #undef FN
37
+
38
+ #define FN(X) X ## Distance
39
+ #include "brotli_enc_cluster_inc.h" /* NOLINT(build/include) */
40
+ #undef FN
41
+
42
+ #undef CODE
43
+
44
+ #if defined(__cplusplus) || defined(c_plusplus)
45
+ } /* extern "C" */
46
+ #endif
47
+
48
+ #endif /* BROTLI_ENC_CLUSTER_H_ */
@@ -0,0 +1,320 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN, CODE */
9
+
10
+ #define HistogramType FN(Histogram)
11
+
12
+ /* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
13
+ it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
14
+ BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
15
+ const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
16
+ uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
17
+ size_t* num_pairs) CODE({
18
+ BROTLI_BOOL is_good_pair = BROTLI_FALSE;
19
+ HistogramPair p;
20
+ p.idx1 = p.idx2 = 0;
21
+ p.cost_diff = p.cost_combo = 0;
22
+ if (idx1 == idx2) {
23
+ return;
24
+ }
25
+ if (idx2 < idx1) {
26
+ uint32_t t = idx2;
27
+ idx2 = idx1;
28
+ idx1 = t;
29
+ }
30
+ p.idx1 = idx1;
31
+ p.idx2 = idx2;
32
+ p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
33
+ p.cost_diff -= out[idx1].bit_cost_;
34
+ p.cost_diff -= out[idx2].bit_cost_;
35
+
36
+ if (out[idx1].total_count_ == 0) {
37
+ p.cost_combo = out[idx2].bit_cost_;
38
+ is_good_pair = BROTLI_TRUE;
39
+ } else if (out[idx2].total_count_ == 0) {
40
+ p.cost_combo = out[idx1].bit_cost_;
41
+ is_good_pair = BROTLI_TRUE;
42
+ } else {
43
+ double threshold = *num_pairs == 0 ? 1e99 :
44
+ BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
45
+ HistogramType combo = out[idx1];
46
+ double cost_combo;
47
+ FN(HistogramAddHistogram)(&combo, &out[idx2]);
48
+ cost_combo = FN(BrotliPopulationCost)(&combo);
49
+ if (cost_combo < threshold - p.cost_diff) {
50
+ p.cost_combo = cost_combo;
51
+ is_good_pair = BROTLI_TRUE;
52
+ }
53
+ }
54
+ if (is_good_pair) {
55
+ p.cost_diff += p.cost_combo;
56
+ if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
57
+ /* Replace the top of the queue if needed. */
58
+ if (*num_pairs < max_num_pairs) {
59
+ pairs[*num_pairs] = pairs[0];
60
+ ++(*num_pairs);
61
+ }
62
+ pairs[0] = p;
63
+ } else if (*num_pairs < max_num_pairs) {
64
+ pairs[*num_pairs] = p;
65
+ ++(*num_pairs);
66
+ }
67
+ }
68
+ })
69
+
70
+ BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
71
+ uint32_t* cluster_size,
72
+ uint32_t* symbols,
73
+ uint32_t* clusters,
74
+ HistogramPair* pairs,
75
+ size_t num_clusters,
76
+ size_t symbols_size,
77
+ size_t max_clusters,
78
+ size_t max_num_pairs) CODE({
79
+ double cost_diff_threshold = 0.0;
80
+ size_t min_cluster_size = 1;
81
+ size_t num_pairs = 0;
82
+
83
+ {
84
+ /* We maintain a vector of histogram pairs, with the property that the pair
85
+ with the maximum bit cost reduction is the first. */
86
+ size_t idx1;
87
+ for (idx1 = 0; idx1 < num_clusters; ++idx1) {
88
+ size_t idx2;
89
+ for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
90
+ FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
91
+ clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
92
+ }
93
+ }
94
+ }
95
+
96
+ while (num_clusters > min_cluster_size) {
97
+ uint32_t best_idx1;
98
+ uint32_t best_idx2;
99
+ size_t i;
100
+ if (pairs[0].cost_diff >= cost_diff_threshold) {
101
+ cost_diff_threshold = 1e99;
102
+ min_cluster_size = max_clusters;
103
+ continue;
104
+ }
105
+ /* Take the best pair from the top of heap. */
106
+ best_idx1 = pairs[0].idx1;
107
+ best_idx2 = pairs[0].idx2;
108
+ FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
109
+ out[best_idx1].bit_cost_ = pairs[0].cost_combo;
110
+ cluster_size[best_idx1] += cluster_size[best_idx2];
111
+ for (i = 0; i < symbols_size; ++i) {
112
+ if (symbols[i] == best_idx2) {
113
+ symbols[i] = best_idx1;
114
+ }
115
+ }
116
+ for (i = 0; i < num_clusters; ++i) {
117
+ if (clusters[i] == best_idx2) {
118
+ memmove(&clusters[i], &clusters[i + 1],
119
+ (num_clusters - i - 1) * sizeof(clusters[0]));
120
+ break;
121
+ }
122
+ }
123
+ --num_clusters;
124
+ {
125
+ /* Remove pairs intersecting the just combined best pair. */
126
+ size_t copy_to_idx = 0;
127
+ for (i = 0; i < num_pairs; ++i) {
128
+ HistogramPair* p = &pairs[i];
129
+ if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
130
+ p->idx1 == best_idx2 || p->idx2 == best_idx2) {
131
+ /* Remove invalid pair from the queue. */
132
+ continue;
133
+ }
134
+ if (HistogramPairIsLess(&pairs[0], p)) {
135
+ /* Replace the top of the queue if needed. */
136
+ HistogramPair front = pairs[0];
137
+ pairs[0] = *p;
138
+ pairs[copy_to_idx] = front;
139
+ } else {
140
+ pairs[copy_to_idx] = *p;
141
+ }
142
+ ++copy_to_idx;
143
+ }
144
+ num_pairs = copy_to_idx;
145
+ }
146
+
147
+ /* Push new pairs formed with the combined histogram to the heap. */
148
+ for (i = 0; i < num_clusters; ++i) {
149
+ FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
150
+ max_num_pairs, &pairs[0], &num_pairs);
151
+ }
152
+ }
153
+ return num_clusters;
154
+ })
155
+
156
+ /* What is the bit cost of moving histogram from cur_symbol to candidate. */
157
+ BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
158
+ const HistogramType* histogram, const HistogramType* candidate) CODE({
159
+ if (histogram->total_count_ == 0) {
160
+ return 0.0;
161
+ } else {
162
+ HistogramType tmp = *histogram;
163
+ FN(HistogramAddHistogram)(&tmp, candidate);
164
+ return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
165
+ }
166
+ })
167
+
168
+ /* Find the best 'out' histogram for each of the 'in' histograms.
169
+ When called, clusters[0..num_clusters) contains the unique values from
170
+ symbols[0..in_size), but this property is not preserved in this function.
171
+ Note: we assume that out[]->bit_cost_ is already up-to-date. */
172
+ BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
173
+ size_t in_size, const uint32_t* clusters, size_t num_clusters,
174
+ HistogramType* out, uint32_t* symbols) CODE({
175
+ size_t i;
176
+ for (i = 0; i < in_size; ++i) {
177
+ uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
178
+ double best_bits =
179
+ FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
180
+ size_t j;
181
+ for (j = 0; j < num_clusters; ++j) {
182
+ const double cur_bits =
183
+ FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
184
+ if (cur_bits < best_bits) {
185
+ best_bits = cur_bits;
186
+ best_out = clusters[j];
187
+ }
188
+ }
189
+ symbols[i] = best_out;
190
+ }
191
+
192
+ /* Recompute each out based on raw and symbols. */
193
+ for (i = 0; i < num_clusters; ++i) {
194
+ FN(HistogramClear)(&out[clusters[i]]);
195
+ }
196
+ for (i = 0; i < in_size; ++i) {
197
+ FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
198
+ }
199
+ })
200
+
201
+ /* Reorders elements of the out[0..length) array and changes values in
202
+ symbols[0..length) array in the following way:
203
+ * when called, symbols[] contains indexes into out[], and has N unique
204
+ values (possibly N < length)
205
+ * on return, symbols'[i] = f(symbols[i]) and
206
+ out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
207
+ where f is a bijection between the range of symbols[] and [0..N), and
208
+ the first occurrences of values in symbols'[i] come in consecutive
209
+ increasing order.
210
+ Returns N, the number of unique values in symbols[]. */
211
+ BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
212
+ HistogramType* out, uint32_t* symbols, size_t length) CODE({
213
+ static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
214
+ uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
215
+ uint32_t next_index;
216
+ HistogramType* tmp;
217
+ size_t i;
218
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return 0;
219
+ for (i = 0; i < length; ++i) {
220
+ new_index[i] = kInvalidIndex;
221
+ }
222
+ next_index = 0;
223
+ for (i = 0; i < length; ++i) {
224
+ if (new_index[symbols[i]] == kInvalidIndex) {
225
+ new_index[symbols[i]] = next_index;
226
+ ++next_index;
227
+ }
228
+ }
229
+ /* TODO: by using idea of "cycle-sort" we can avoid allocation of
230
+ tmp and reduce the number of copying by the factor of 2. */
231
+ tmp = BROTLI_ALLOC(m, HistogramType, next_index);
232
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp)) return 0;
233
+ next_index = 0;
234
+ for (i = 0; i < length; ++i) {
235
+ if (new_index[symbols[i]] == next_index) {
236
+ tmp[next_index] = out[symbols[i]];
237
+ ++next_index;
238
+ }
239
+ symbols[i] = new_index[symbols[i]];
240
+ }
241
+ BROTLI_FREE(m, new_index);
242
+ for (i = 0; i < next_index; ++i) {
243
+ out[i] = tmp[i];
244
+ }
245
+ BROTLI_FREE(m, tmp);
246
+ return next_index;
247
+ })
248
+
249
+ BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
250
+ MemoryManager* m, const HistogramType* in, const size_t in_size,
251
+ size_t max_histograms, HistogramType* out, size_t* out_size,
252
+ uint32_t* histogram_symbols) CODE({
253
+ uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
254
+ uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
255
+ size_t num_clusters = 0;
256
+ const size_t max_input_histograms = 64;
257
+ size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
258
+ /* For the first pass of clustering, we allow all pairs. */
259
+ HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
260
+ size_t i;
261
+
262
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(cluster_size) ||
263
+ BROTLI_IS_NULL(clusters) || BROTLI_IS_NULL(pairs)) {
264
+ return;
265
+ }
266
+
267
+ for (i = 0; i < in_size; ++i) {
268
+ cluster_size[i] = 1;
269
+ }
270
+
271
+ for (i = 0; i < in_size; ++i) {
272
+ out[i] = in[i];
273
+ out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
274
+ histogram_symbols[i] = (uint32_t)i;
275
+ }
276
+
277
+ for (i = 0; i < in_size; i += max_input_histograms) {
278
+ size_t num_to_combine =
279
+ BROTLI_MIN(size_t, in_size - i, max_input_histograms);
280
+ size_t num_new_clusters;
281
+ size_t j;
282
+ for (j = 0; j < num_to_combine; ++j) {
283
+ clusters[num_clusters + j] = (uint32_t)(i + j);
284
+ }
285
+ num_new_clusters =
286
+ FN(BrotliHistogramCombine)(out, cluster_size,
287
+ &histogram_symbols[i],
288
+ &clusters[num_clusters], pairs,
289
+ num_to_combine, num_to_combine,
290
+ max_histograms, pairs_capacity);
291
+ num_clusters += num_new_clusters;
292
+ }
293
+
294
+ {
295
+ /* For the second pass, we limit the total number of histogram pairs.
296
+ After this limit is reached, we only keep searching for the best pair. */
297
+ size_t max_num_pairs = BROTLI_MIN(size_t,
298
+ 64 * num_clusters, (num_clusters / 2) * num_clusters);
299
+ BROTLI_ENSURE_CAPACITY(
300
+ m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
301
+ if (BROTLI_IS_OOM(m)) return;
302
+
303
+ /* Collapse similar histograms. */
304
+ num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
305
+ histogram_symbols, clusters,
306
+ pairs, num_clusters, in_size,
307
+ max_histograms, max_num_pairs);
308
+ }
309
+ BROTLI_FREE(m, pairs);
310
+ BROTLI_FREE(m, cluster_size);
311
+ /* Find the optimal map from original histograms to the final ones. */
312
+ FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
313
+ out, histogram_symbols);
314
+ BROTLI_FREE(m, clusters);
315
+ /* Convert the context map to a canonical form. */
316
+ *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
317
+ if (BROTLI_IS_OOM(m)) return;
318
+ })
319
+
320
+ #undef HistogramType
@@ -0,0 +1,28 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ #include "brotli_enc_command.h"
8
+
9
+ #include "brotli_types.h"
10
+
11
+ #if defined(__cplusplus) || defined(c_plusplus)
12
+ extern "C" {
13
+ #endif
14
+
15
+ const uint32_t kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES] = {
16
+ 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26,
17
+ 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594};
18
+ const uint32_t kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES] = {
19
+ 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24};
20
+ const uint32_t kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES] = {
21
+ 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18,
22
+ 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118};
23
+ const uint32_t kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES] = {
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24};
25
+
26
+ #if defined(__cplusplus) || defined(c_plusplus)
27
+ } /* extern "C" */
28
+ #endif