isomorfeus-ferret 0.12.4 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +77 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  91. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  92. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  93. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  94. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  95. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  96. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  97. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  98. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  99. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
  100. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  101. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  102. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  103. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  104. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  106. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  144. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  145. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  162. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  163. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  164. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  165. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  166. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  167. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  168. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  169. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  170. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  171. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  172. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  173. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  174. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  175. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  176. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  177. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  178. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  179. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  180. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  181. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  182. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  183. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  184. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  185. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  186. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  187. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  188. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  189. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  190. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  191. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  192. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  193. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  194. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  195. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  196. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  197. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  198. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  199. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  200. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  201. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  202. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  203. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  204. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  205. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  206. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  207. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  208. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  209. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  210. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  211. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  212. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  213. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  214. data/lib/isomorfeus/ferret/version.rb +1 -1
  215. metadata +125 -5
  216. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,95 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Function to find backward reference copies. */
8
+
9
+ #ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
10
+ #define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
11
+
12
+ #include "brotli_common_constants.h"
13
+ #include "brotli_common_context.h"
14
+ #include "brotli_common_dictionary.h"
15
+ #include "brotli_common_platform.h"
16
+ #include "brotli_types.h"
17
+ #include "brotli_enc_command.h"
18
+ #include "brotli_enc_hash.h"
19
+ #include "brotli_enc_memory.h"
20
+ #include "brotli_enc_quality.h"
21
+
22
+ #if defined(__cplusplus) || defined(c_plusplus)
23
+ extern "C" {
24
+ #endif
25
+
26
+ BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
27
+ size_t num_bytes,
28
+ size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
29
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
30
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
31
+ Command* commands, size_t* num_commands, size_t* num_literals);
32
+
33
+ BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
34
+ size_t num_bytes,
35
+ size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
36
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
37
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
38
+ Command* commands, size_t* num_commands, size_t* num_literals);
39
+
40
+ typedef struct ZopfliNode {
41
+ /* Best length to get up to this byte (not including this byte itself)
42
+ highest 7 bit is used to reconstruct the length code. */
43
+ uint32_t length;
44
+ /* Distance associated with the length. */
45
+ uint32_t distance;
46
+ /* Number of literal inserts before this copy; highest 5 bits contain
47
+ distance short code + 1 (or zero if no short code). */
48
+ uint32_t dcode_insert_length;
49
+
50
+ /* This union holds information used by dynamic-programming. During forward
51
+ pass |cost| it used to store the goal function. When node is processed its
52
+ |cost| is invalidated in favor of |shortcut|. On path back-tracing pass
53
+ |next| is assigned the offset to next node on the path. */
54
+ union {
55
+ /* Smallest cost to get to this byte from the beginning, as found so far. */
56
+ float cost;
57
+ /* Offset to the next node on the path. Equals to command_length() of the
58
+ next node on the path. For last node equals to BROTLI_UINT32_MAX */
59
+ uint32_t next;
60
+ /* Node position that provides next distance for distance cache. */
61
+ uint32_t shortcut;
62
+ } u;
63
+ } ZopfliNode;
64
+
65
+ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
66
+
67
+ /* Computes the shortest path of commands from position to at most
68
+ position + num_bytes.
69
+
70
+ On return, path->size() is the number of commands found and path[i] is the
71
+ length of the i-th command (copy length plus insert length).
72
+ Note that the sum of the lengths of all commands can be less than num_bytes.
73
+
74
+ On return, the nodes[0..num_bytes] array will have the following
75
+ "ZopfliNode array invariant":
76
+ For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
77
+ (1) nodes[i].copy_length() >= 2
78
+ (2) nodes[i].command_length() <= i and
79
+ (3) nodes[i - nodes[i].command_length()].cost < kInfinity */
80
+ BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
81
+ MemoryManager* m, size_t num_bytes,
82
+ size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
83
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
84
+ const int* dist_cache, Hasher* hasher, ZopfliNode* nodes);
85
+
86
+ BROTLI_INTERNAL void BrotliZopfliCreateCommands(
87
+ const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,
88
+ int* dist_cache, size_t* last_insert_len, const BrotliEncoderParams* params,
89
+ Command* commands, size_t* num_literals);
90
+
91
+ #if defined(__cplusplus) || defined(c_plusplus)
92
+ } /* extern "C" */
93
+ #endif
94
+
95
+ #endif /* BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ */
@@ -0,0 +1,163 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: EXPORT_FN, FN */
9
+
10
+ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
11
+ size_t num_bytes, size_t position,
12
+ const uint8_t* ringbuffer, size_t ringbuffer_mask,
13
+ ContextLut literal_context_lut, const BrotliEncoderParams* params,
14
+ Hasher* hasher, int* dist_cache, size_t* last_insert_len,
15
+ Command* commands, size_t* num_commands, size_t* num_literals) {
16
+ HASHER()* privat = &hasher->privat.FN(_);
17
+ /* Set maximum distance, see section 9.1. of the spec. */
18
+ const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
19
+ const size_t position_offset = params->stream_offset;
20
+
21
+ const Command* const orig_commands = commands;
22
+ size_t insert_length = *last_insert_len;
23
+ const size_t pos_end = position + num_bytes;
24
+ const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
25
+ position + num_bytes - FN(StoreLookahead)() + 1 : position;
26
+
27
+ /* For speed up heuristics for random data. */
28
+ const size_t random_heuristics_window_size =
29
+ LiteralSpreeLengthForSparseSearch(params);
30
+ size_t apply_random_heuristics = position + random_heuristics_window_size;
31
+ const size_t gap = 0;
32
+
33
+ /* Minimum score to accept a backward reference. */
34
+ const score_t kMinScore = BROTLI_SCORE_BASE + 100;
35
+
36
+ BROTLI_UNUSED(literal_context_lut);
37
+
38
+ FN(PrepareDistanceCache)(privat, dist_cache);
39
+
40
+ while (position + FN(HashTypeLength)() < pos_end) {
41
+ size_t max_length = pos_end - position;
42
+ size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
43
+ size_t dictionary_start = BROTLI_MIN(size_t,
44
+ position + position_offset, max_backward_limit);
45
+ HasherSearchResult sr;
46
+ sr.len = 0;
47
+ sr.len_code_delta = 0;
48
+ sr.distance = 0;
49
+ sr.score = kMinScore;
50
+ FN(FindLongestMatch)(privat, &params->dictionary,
51
+ ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
52
+ max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
53
+ if (sr.score > kMinScore) {
54
+ /* Found a match. Let's look for something even better ahead. */
55
+ int delayed_backward_references_in_row = 0;
56
+ --max_length;
57
+ for (;; --max_length) {
58
+ const score_t cost_diff_lazy = 175;
59
+ HasherSearchResult sr2;
60
+ sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
61
+ BROTLI_MIN(size_t, sr.len - 1, max_length) : 0;
62
+ sr2.len_code_delta = 0;
63
+ sr2.distance = 0;
64
+ sr2.score = kMinScore;
65
+ max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
66
+ dictionary_start = BROTLI_MIN(size_t,
67
+ position + 1 + position_offset, max_backward_limit);
68
+ FN(FindLongestMatch)(privat,
69
+ &params->dictionary,
70
+ ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
71
+ max_distance, dictionary_start + gap, params->dist.max_distance,
72
+ &sr2);
73
+ if (sr2.score >= sr.score + cost_diff_lazy) {
74
+ /* Ok, let's just write one byte for now and start a match from the
75
+ next byte. */
76
+ ++position;
77
+ ++insert_length;
78
+ sr = sr2;
79
+ if (++delayed_backward_references_in_row < 4 &&
80
+ position + FN(HashTypeLength)() < pos_end) {
81
+ continue;
82
+ }
83
+ }
84
+ break;
85
+ }
86
+ apply_random_heuristics =
87
+ position + 2 * sr.len + random_heuristics_window_size;
88
+ dictionary_start = BROTLI_MIN(size_t,
89
+ position + position_offset, max_backward_limit);
90
+ {
91
+ /* The first 16 codes are special short-codes,
92
+ and the minimum offset is 1. */
93
+ size_t distance_code = ComputeDistanceCode(
94
+ sr.distance, dictionary_start + gap, dist_cache);
95
+ if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
96
+ dist_cache[3] = dist_cache[2];
97
+ dist_cache[2] = dist_cache[1];
98
+ dist_cache[1] = dist_cache[0];
99
+ dist_cache[0] = (int)sr.distance;
100
+ FN(PrepareDistanceCache)(privat, dist_cache);
101
+ }
102
+ InitCommand(commands++, &params->dist, insert_length,
103
+ sr.len, sr.len_code_delta, distance_code);
104
+ }
105
+ *num_literals += insert_length;
106
+ insert_length = 0;
107
+ /* Put the hash keys into the table, if there are enough bytes left.
108
+ Depending on the hasher implementation, it can push all positions
109
+ in the given range or only a subset of them.
110
+ Avoid hash poisoning with RLE data. */
111
+ {
112
+ size_t range_start = position + 2;
113
+ size_t range_end = BROTLI_MIN(size_t, position + sr.len, store_end);
114
+ if (sr.distance < (sr.len >> 2)) {
115
+ range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
116
+ range_start, position + sr.len - (sr.distance << 2)));
117
+ }
118
+ FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
119
+ range_end);
120
+ }
121
+ position += sr.len;
122
+ } else {
123
+ ++insert_length;
124
+ ++position;
125
+ /* If we have not seen matches for a long time, we can skip some
126
+ match lookups. Unsuccessful match lookups are very very expensive
127
+ and this kind of a heuristic speeds up compression quite
128
+ a lot. */
129
+ if (position > apply_random_heuristics) {
130
+ /* Going through uncompressible data, jump. */
131
+ if (position >
132
+ apply_random_heuristics + 4 * random_heuristics_window_size) {
133
+ /* It is quite a long time since we saw a copy, so we assume
134
+ that this data is not compressible, and store hashes less
135
+ often. Hashes of non compressible data are less likely to
136
+ turn out to be useful in the future, too, so we store less of
137
+ them to not to flood out the hash table of good compressible
138
+ data. */
139
+ const size_t kMargin =
140
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
141
+ size_t pos_jump =
142
+ BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
143
+ for (; position < pos_jump; position += 4) {
144
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
145
+ insert_length += 4;
146
+ }
147
+ } else {
148
+ const size_t kMargin =
149
+ BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
150
+ size_t pos_jump =
151
+ BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
152
+ for (; position < pos_jump; position += 2) {
153
+ FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
154
+ insert_length += 2;
155
+ }
156
+ }
157
+ }
158
+ }
159
+ }
160
+ insert_length += pos_end - position;
161
+ *last_insert_len = insert_length;
162
+ *num_commands += (size_t)(commands - orig_commands);
163
+ }
@@ -0,0 +1,35 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Functions to estimate the bit cost of Huffman trees. */
8
+
9
+ #include "brotli_enc_bit_cost.h"
10
+
11
+ #include "brotli_common_constants.h"
12
+ #include "brotli_common_platform.h"
13
+ #include "brotli_types.h"
14
+ #include "brotli_enc_fast_log.h"
15
+ #include "brotli_enc_histogram.h"
16
+
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
20
+
21
+ #define FN(X) X ## Literal
22
+ #include "brotli_enc_bit_cost_inc.h" /* NOLINT(build/include) */
23
+ #undef FN
24
+
25
+ #define FN(X) X ## Command
26
+ #include "brotli_enc_bit_cost_inc.h" /* NOLINT(build/include) */
27
+ #undef FN
28
+
29
+ #define FN(X) X ## Distance
30
+ #include "brotli_enc_bit_cost_inc.h" /* NOLINT(build/include) */
31
+ #undef FN
32
+
33
+ #if defined(__cplusplus) || defined(c_plusplus)
34
+ } /* extern "C" */
35
+ #endif
@@ -0,0 +1,63 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Functions to estimate the bit cost of Huffman trees. */
8
+
9
+ #ifndef BROTLI_ENC_BIT_COST_H_
10
+ #define BROTLI_ENC_BIT_COST_H_
11
+
12
+ #include "brotli_common_platform.h"
13
+ #include "brotli_types.h"
14
+ #include "brotli_enc_fast_log.h"
15
+ #include "brotli_enc_histogram.h"
16
+
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
20
+
21
+ static BROTLI_INLINE double ShannonEntropy(
22
+ const uint32_t* population, size_t size, size_t* total) {
23
+ size_t sum = 0;
24
+ double retval = 0;
25
+ const uint32_t* population_end = population + size;
26
+ size_t p;
27
+ if (size & 1) {
28
+ goto odd_number_of_elements_left;
29
+ }
30
+ while (population < population_end) {
31
+ p = *population++;
32
+ sum += p;
33
+ retval -= (double)p * FastLog2(p);
34
+ odd_number_of_elements_left:
35
+ p = *population++;
36
+ sum += p;
37
+ retval -= (double)p * FastLog2(p);
38
+ }
39
+ if (sum) retval += (double)sum * FastLog2(sum);
40
+ *total = sum;
41
+ return retval;
42
+ }
43
+
44
+ static BROTLI_INLINE double BitsEntropy(
45
+ const uint32_t* population, size_t size) {
46
+ size_t sum;
47
+ double retval = ShannonEntropy(population, size, &sum);
48
+ if (retval < sum) {
49
+ /* At least one bit per literal is needed. */
50
+ retval = (double)sum;
51
+ }
52
+ return retval;
53
+ }
54
+
55
+ BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
56
+ BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
57
+ BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
58
+
59
+ #if defined(__cplusplus) || defined(c_plusplus)
60
+ } /* extern "C" */
61
+ #endif
62
+
63
+ #endif /* BROTLI_ENC_BIT_COST_H_ */
@@ -0,0 +1,127 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN */
9
+
10
+ #define HistogramType FN(Histogram)
11
+
12
+ double FN(BrotliPopulationCost)(const HistogramType* histogram) {
13
+ static const double kOneSymbolHistogramCost = 12;
14
+ static const double kTwoSymbolHistogramCost = 20;
15
+ static const double kThreeSymbolHistogramCost = 28;
16
+ static const double kFourSymbolHistogramCost = 37;
17
+ const size_t data_size = FN(HistogramDataSize)();
18
+ int count = 0;
19
+ size_t s[5];
20
+ double bits = 0.0;
21
+ size_t i;
22
+ if (histogram->total_count_ == 0) {
23
+ return kOneSymbolHistogramCost;
24
+ }
25
+ for (i = 0; i < data_size; ++i) {
26
+ if (histogram->data_[i] > 0) {
27
+ s[count] = i;
28
+ ++count;
29
+ if (count > 4) break;
30
+ }
31
+ }
32
+ if (count == 1) {
33
+ return kOneSymbolHistogramCost;
34
+ }
35
+ if (count == 2) {
36
+ return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
37
+ }
38
+ if (count == 3) {
39
+ const uint32_t histo0 = histogram->data_[s[0]];
40
+ const uint32_t histo1 = histogram->data_[s[1]];
41
+ const uint32_t histo2 = histogram->data_[s[2]];
42
+ const uint32_t histomax =
43
+ BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
44
+ return (kThreeSymbolHistogramCost +
45
+ 2 * (histo0 + histo1 + histo2) - histomax);
46
+ }
47
+ if (count == 4) {
48
+ uint32_t histo[4];
49
+ uint32_t h23;
50
+ uint32_t histomax;
51
+ for (i = 0; i < 4; ++i) {
52
+ histo[i] = histogram->data_[s[i]];
53
+ }
54
+ /* Sort */
55
+ for (i = 0; i < 4; ++i) {
56
+ size_t j;
57
+ for (j = i + 1; j < 4; ++j) {
58
+ if (histo[j] > histo[i]) {
59
+ BROTLI_SWAP(uint32_t, histo, j, i);
60
+ }
61
+ }
62
+ }
63
+ h23 = histo[2] + histo[3];
64
+ histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
65
+ return (kFourSymbolHistogramCost +
66
+ 3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
67
+ }
68
+
69
+ {
70
+ /* In this loop we compute the entropy of the histogram and simultaneously
71
+ build a simplified histogram of the code length codes where we use the
72
+ zero repeat code 17, but we don't use the non-zero repeat code 16. */
73
+ size_t max_depth = 1;
74
+ uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
75
+ const double log2total = FastLog2(histogram->total_count_);
76
+ for (i = 0; i < data_size;) {
77
+ if (histogram->data_[i] > 0) {
78
+ /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
79
+ = log2(total_count) - log2(count(symbol)) */
80
+ double log2p = log2total - FastLog2(histogram->data_[i]);
81
+ /* Approximate the bit depth by round(-log2(P(symbol))) */
82
+ size_t depth = (size_t)(log2p + 0.5);
83
+ bits += histogram->data_[i] * log2p;
84
+ if (depth > 15) {
85
+ depth = 15;
86
+ }
87
+ if (depth > max_depth) {
88
+ max_depth = depth;
89
+ }
90
+ ++depth_histo[depth];
91
+ ++i;
92
+ } else {
93
+ /* Compute the run length of zeros and add the appropriate number of 0
94
+ and 17 code length codes to the code length code histogram. */
95
+ uint32_t reps = 1;
96
+ size_t k;
97
+ for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
98
+ ++reps;
99
+ }
100
+ i += reps;
101
+ if (i == data_size) {
102
+ /* Don't add any cost for the last zero run, since these are encoded
103
+ only implicitly. */
104
+ break;
105
+ }
106
+ if (reps < 3) {
107
+ depth_histo[0] += reps;
108
+ } else {
109
+ reps -= 2;
110
+ while (reps > 0) {
111
+ ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
112
+ /* Add the 3 extra bits for the 17 code length code. */
113
+ bits += 3;
114
+ reps >>= 3;
115
+ }
116
+ }
117
+ }
118
+ }
119
+ /* Add the estimated encoding cost of the code length code histogram. */
120
+ bits += (double)(18 + 2 * max_depth);
121
+ /* Add the entropy of the code length code histogram. */
122
+ bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
123
+ }
124
+ return bits;
125
+ }
126
+
127
+ #undef HistogramType
@@ -0,0 +1,34 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2014 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: FN */
9
+
10
+ #define HistogramType FN(Histogram)
11
+
12
+ /* Creates entropy codes for all block types and stores them to the bit
13
+ stream. */
14
+ static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
15
+ const HistogramType* histograms, const size_t histograms_size,
16
+ const size_t alphabet_size, HuffmanTree* tree,
17
+ size_t* storage_ix, uint8_t* storage) {
18
+ const size_t table_size = histograms_size * self->histogram_length_;
19
+ self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
20
+ self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
21
+ if (BROTLI_IS_OOM(m)) return;
22
+
23
+ {
24
+ size_t i;
25
+ for (i = 0; i < histograms_size; ++i) {
26
+ size_t ix = i * self->histogram_length_;
27
+ BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_,
28
+ alphabet_size, tree, &self->depths_[ix], &self->bits_[ix],
29
+ storage_ix, storage);
30
+ }
31
+ }
32
+ }
33
+
34
+ #undef HistogramType