isomorfeus-ferret 0.12.4 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +77 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  91. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  92. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  93. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  94. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  95. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  96. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  97. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  98. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  99. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
  100. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  101. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  102. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  103. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  104. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  106. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  144. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  145. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  162. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  163. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  164. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  165. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  166. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  167. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  168. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  169. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  170. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  171. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  172. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  173. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  174. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  175. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  176. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  177. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  178. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  179. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  180. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  181. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  182. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  183. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  184. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  185. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  186. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  187. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  188. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  189. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  190. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  191. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  192. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  193. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  194. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  195. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  196. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  197. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  198. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  199. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  200. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  201. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  202. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  203. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  204. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  205. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  206. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  207. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  208. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  209. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  210. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  211. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  212. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  213. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  214. data/lib/isomorfeus/ferret/version.rb +1 -1
  215. metadata +125 -5
  216. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,51 @@
1
+ /* NOLINT(build/header_guard) */
2
+ /* Copyright 2013 Google Inc. All Rights Reserved.
3
+
4
+ Distributed under MIT license.
5
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6
+ */
7
+
8
+ /* template parameters: Histogram, DATA_SIZE, DataType */
9
+
10
+ /* A simple container for histograms of data in blocks. */
11
+
12
+ typedef struct FN(Histogram) {
13
+ uint32_t data_[DATA_SIZE];
14
+ size_t total_count_;
15
+ double bit_cost_;
16
+ } FN(Histogram);
17
+
18
+ static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
19
+ memset(self->data_, 0, sizeof(self->data_));
20
+ self->total_count_ = 0;
21
+ self->bit_cost_ = HUGE_VAL;
22
+ }
23
+
24
+ static BROTLI_INLINE void FN(ClearHistograms)(
25
+ FN(Histogram)* array, size_t length) {
26
+ size_t i;
27
+ for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
28
+ }
29
+
30
+ static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
31
+ ++self->data_[val];
32
+ ++self->total_count_;
33
+ }
34
+
35
+ static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
36
+ const DataType* p, size_t n) {
37
+ self->total_count_ += n;
38
+ n += 1;
39
+ while (--n) ++self->data_[*p++];
40
+ }
41
+
42
+ static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
43
+ const FN(Histogram)* v) {
44
+ size_t i;
45
+ self->total_count_ += v->total_count_;
46
+ for (i = 0; i < DATA_SIZE; ++i) {
47
+ self->data_[i] += v->data_[i];
48
+ }
49
+ }
50
+
51
+ static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
@@ -0,0 +1,175 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Literal cost model to allow backward reference replacement to be efficient.
8
+ */
9
+
10
+ #include "brotli_enc_literal_cost.h"
11
+
12
+ #include "brotli_common_platform.h"
13
+ #include "brotli_types.h"
14
+ #include "brotli_enc_fast_log.h"
15
+ #include "brotli_enc_utf8_util.h"
16
+
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
20
+
21
+ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
22
+ if (c < 128) {
23
+ return 0; /* Next one is the 'Byte 1' again. */
24
+ } else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
25
+ return BROTLI_MIN(size_t, 1, clamp);
26
+ } else {
27
+ /* Let's decide over the last byte if this ends the sequence. */
28
+ if (last < 0xE0) {
29
+ return 0; /* Completed two or three byte coding. */
30
+ } else { /* Next one is the 'Byte 3' of utf-8 encoding. */
31
+ return BROTLI_MIN(size_t, 2, clamp);
32
+ }
33
+ }
34
+ }
35
+
36
+ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
37
+ const uint8_t* data) {
38
+ size_t counts[3] = { 0 };
39
+ size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
40
+ size_t last_c = 0;
41
+ size_t i;
42
+ for (i = 0; i < len; ++i) {
43
+ size_t c = data[(pos + i) & mask];
44
+ ++counts[UTF8Position(last_c, c, 2)];
45
+ last_c = c;
46
+ }
47
+ if (counts[2] < 500) {
48
+ max_utf8 = 1;
49
+ }
50
+ if (counts[1] + counts[2] < 25) {
51
+ max_utf8 = 0;
52
+ }
53
+ return max_utf8;
54
+ }
55
+
56
+ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
57
+ const uint8_t* data, float* cost) {
58
+ /* max_utf8 is 0 (normal ASCII single byte modeling),
59
+ 1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
60
+ const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
61
+ size_t histogram[3][256] = { { 0 } };
62
+ size_t window_half = 495;
63
+ size_t in_window = BROTLI_MIN(size_t, window_half, len);
64
+ size_t in_window_utf8[3] = { 0 };
65
+
66
+ size_t i;
67
+ { /* Bootstrap histograms. */
68
+ size_t last_c = 0;
69
+ size_t utf8_pos = 0;
70
+ for (i = 0; i < in_window; ++i) {
71
+ size_t c = data[(pos + i) & mask];
72
+ ++histogram[utf8_pos][c];
73
+ ++in_window_utf8[utf8_pos];
74
+ utf8_pos = UTF8Position(last_c, c, max_utf8);
75
+ last_c = c;
76
+ }
77
+ }
78
+
79
+ /* Compute bit costs with sliding window. */
80
+ for (i = 0; i < len; ++i) {
81
+ if (i >= window_half) {
82
+ /* Remove a byte in the past. */
83
+ size_t c =
84
+ i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
85
+ size_t last_c =
86
+ i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
87
+ size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
88
+ --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
89
+ --in_window_utf8[utf8_pos2];
90
+ }
91
+ if (i + window_half < len) {
92
+ /* Add a byte in the future. */
93
+ size_t c = data[(pos + i + window_half - 1) & mask];
94
+ size_t last_c = data[(pos + i + window_half - 2) & mask];
95
+ size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
96
+ ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
97
+ ++in_window_utf8[utf8_pos2];
98
+ }
99
+ {
100
+ size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
101
+ size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
102
+ size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
103
+ size_t masked_pos = (pos + i) & mask;
104
+ size_t histo = histogram[utf8_pos][data[masked_pos]];
105
+ double lit_cost;
106
+ if (histo == 0) {
107
+ histo = 1;
108
+ }
109
+ lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
110
+ lit_cost += 0.02905;
111
+ if (lit_cost < 1.0) {
112
+ lit_cost *= 0.5;
113
+ lit_cost += 0.5;
114
+ }
115
+ /* Make the first bytes more expensive -- seems to help, not sure why.
116
+ Perhaps because the entropy source is changing its properties
117
+ rapidly in the beginning of the file, perhaps because the beginning
118
+ of the data is a statistical "anomaly". */
119
+ if (i < 2000) {
120
+ lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
121
+ }
122
+ cost[i] = (float)lit_cost;
123
+ }
124
+ }
125
+ }
126
+
127
+ void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
128
+ const uint8_t* data, float* cost) {
129
+ if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
130
+ EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
131
+ return;
132
+ } else {
133
+ size_t histogram[256] = { 0 };
134
+ size_t window_half = 2000;
135
+ size_t in_window = BROTLI_MIN(size_t, window_half, len);
136
+
137
+ /* Bootstrap histogram. */
138
+ size_t i;
139
+ for (i = 0; i < in_window; ++i) {
140
+ ++histogram[data[(pos + i) & mask]];
141
+ }
142
+
143
+ /* Compute bit costs with sliding window. */
144
+ for (i = 0; i < len; ++i) {
145
+ size_t histo;
146
+ if (i >= window_half) {
147
+ /* Remove a byte in the past. */
148
+ --histogram[data[(pos + i - window_half) & mask]];
149
+ --in_window;
150
+ }
151
+ if (i + window_half < len) {
152
+ /* Add a byte in the future. */
153
+ ++histogram[data[(pos + i + window_half) & mask]];
154
+ ++in_window;
155
+ }
156
+ histo = histogram[data[(pos + i) & mask]];
157
+ if (histo == 0) {
158
+ histo = 1;
159
+ }
160
+ {
161
+ double lit_cost = FastLog2(in_window) - FastLog2(histo);
162
+ lit_cost += 0.029;
163
+ if (lit_cost < 1.0) {
164
+ lit_cost *= 0.5;
165
+ lit_cost += 0.5;
166
+ }
167
+ cost[i] = (float)lit_cost;
168
+ }
169
+ }
170
+ }
171
+ }
172
+
173
+ #if defined(__cplusplus) || defined(c_plusplus)
174
+ } /* extern "C" */
175
+ #endif
@@ -0,0 +1,30 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Literal cost model to allow backward reference replacement to be efficient.
8
+ */
9
+
10
+ #ifndef BROTLI_ENC_LITERAL_COST_H_
11
+ #define BROTLI_ENC_LITERAL_COST_H_
12
+
13
+ #include "brotli_common_platform.h"
14
+ #include "brotli_types.h"
15
+
16
+ #if defined(__cplusplus) || defined(c_plusplus)
17
+ extern "C" {
18
+ #endif
19
+
20
+ /* Estimates how many bits the literals in the interval [pos, pos + len) in the
21
+ ring-buffer (data, mask) will take entropy coded and writes these estimates
22
+ to the cost[0..len) array. */
23
+ BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
24
+ size_t pos, size_t len, size_t mask, const uint8_t* data, float* cost);
25
+
26
+ #if defined(__cplusplus) || defined(c_plusplus)
27
+ } /* extern "C" */
28
+ #endif
29
+
30
+ #endif /* BROTLI_ENC_LITERAL_COST_H_ */
@@ -0,0 +1,170 @@
1
+ /* Copyright 2015 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Algorithms for distributing the literals and commands of a metablock between
8
+ block types and contexts. */
9
+
10
+ #include "brotli_enc_memory.h"
11
+
12
+ #include <stdlib.h> /* exit, free, malloc */
13
+ #include <string.h> /* memcpy */
14
+
15
+ #include "brotli_common_platform.h"
16
+ #include "brotli_types.h"
17
+
18
+ #if defined(__cplusplus) || defined(c_plusplus)
19
+ extern "C" {
20
+ #endif
21
+
22
+ #define MAX_PERM_ALLOCATED 128
23
+ #define MAX_NEW_ALLOCATED 64
24
+ #define MAX_NEW_FREED 64
25
+
26
+ #define PERM_ALLOCATED_OFFSET 0
27
+ #define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
28
+ #define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
29
+
30
+ void BrotliInitMemoryManager(
31
+ MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
32
+ void* opaque) {
33
+ if (!alloc_func) {
34
+ m->alloc_func = BrotliDefaultAllocFunc;
35
+ m->free_func = BrotliDefaultFreeFunc;
36
+ m->opaque = 0;
37
+ } else {
38
+ m->alloc_func = alloc_func;
39
+ m->free_func = free_func;
40
+ m->opaque = opaque;
41
+ }
42
+ #if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
43
+ m->is_oom = BROTLI_FALSE;
44
+ m->perm_allocated = 0;
45
+ m->new_allocated = 0;
46
+ m->new_freed = 0;
47
+ #endif /* BROTLI_ENCODER_EXIT_ON_OOM */
48
+ }
49
+
50
+ #if defined(BROTLI_ENCODER_EXIT_ON_OOM)
51
+
52
+ void* BrotliAllocate(MemoryManager* m, size_t n) {
53
+ void* result = m->alloc_func(m->opaque, n);
54
+ if (!result) exit(EXIT_FAILURE);
55
+ return result;
56
+ }
57
+
58
+ void BrotliFree(MemoryManager* m, void* p) {
59
+ m->free_func(m->opaque, p);
60
+ }
61
+
62
+ void BrotliWipeOutMemoryManager(MemoryManager* m) {
63
+ BROTLI_UNUSED(m);
64
+ }
65
+
66
+ #else /* BROTLI_ENCODER_EXIT_ON_OOM */
67
+
68
+ static void SortPointers(void** items, const size_t n) {
69
+ /* Shell sort. */
70
+ static const size_t gaps[] = {23, 10, 4, 1};
71
+ int g = 0;
72
+ for (; g < 4; ++g) {
73
+ size_t gap = gaps[g];
74
+ size_t i;
75
+ for (i = gap; i < n; ++i) {
76
+ size_t j = i;
77
+ void* tmp = items[i];
78
+ for (; j >= gap && tmp < items[j - gap]; j -= gap) {
79
+ items[j] = items[j - gap];
80
+ }
81
+ items[j] = tmp;
82
+ }
83
+ }
84
+ }
85
+
86
+ static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
87
+ size_t a_read_index = 0;
88
+ size_t b_read_index = 0;
89
+ size_t a_write_index = 0;
90
+ size_t b_write_index = 0;
91
+ size_t annihilated = 0;
92
+ while (a_read_index < a_len && b_read_index < b_len) {
93
+ if (a[a_read_index] == b[b_read_index]) {
94
+ a_read_index++;
95
+ b_read_index++;
96
+ annihilated++;
97
+ } else if (a[a_read_index] < b[b_read_index]) {
98
+ a[a_write_index++] = a[a_read_index++];
99
+ } else {
100
+ b[b_write_index++] = b[b_read_index++];
101
+ }
102
+ }
103
+ while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
104
+ while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
105
+ return annihilated;
106
+ }
107
+
108
+ static void CollectGarbagePointers(MemoryManager* m) {
109
+ size_t annihilated;
110
+ SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
111
+ SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
112
+ annihilated = Annihilate(
113
+ m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
114
+ m->pointers + NEW_FREED_OFFSET, m->new_freed);
115
+ m->new_allocated -= annihilated;
116
+ m->new_freed -= annihilated;
117
+
118
+ if (m->new_freed != 0) {
119
+ annihilated = Annihilate(
120
+ m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
121
+ m->pointers + NEW_FREED_OFFSET, m->new_freed);
122
+ m->perm_allocated -= annihilated;
123
+ m->new_freed -= annihilated;
124
+ BROTLI_DCHECK(m->new_freed == 0);
125
+ }
126
+
127
+ if (m->new_allocated != 0) {
128
+ BROTLI_DCHECK(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
129
+ memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
130
+ m->pointers + NEW_ALLOCATED_OFFSET,
131
+ sizeof(void*) * m->new_allocated);
132
+ m->perm_allocated += m->new_allocated;
133
+ m->new_allocated = 0;
134
+ SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
135
+ }
136
+ }
137
+
138
+ void* BrotliAllocate(MemoryManager* m, size_t n) {
139
+ void* result = m->alloc_func(m->opaque, n);
140
+ if (!result) {
141
+ m->is_oom = BROTLI_TRUE;
142
+ return NULL;
143
+ }
144
+ if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
145
+ m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
146
+ return result;
147
+ }
148
+
149
+ void BrotliFree(MemoryManager* m, void* p) {
150
+ if (!p) return;
151
+ m->free_func(m->opaque, p);
152
+ if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
153
+ m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
154
+ }
155
+
156
+ void BrotliWipeOutMemoryManager(MemoryManager* m) {
157
+ size_t i;
158
+ CollectGarbagePointers(m);
159
+ /* Now all unfreed pointers are in perm-allocated list. */
160
+ for (i = 0; i < m->perm_allocated; ++i) {
161
+ m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
162
+ }
163
+ m->perm_allocated = 0;
164
+ }
165
+
166
+ #endif /* BROTLI_ENCODER_EXIT_ON_OOM */
167
+
168
+ #if defined(__cplusplus) || defined(c_plusplus)
169
+ } /* extern "C" */
170
+ #endif
@@ -0,0 +1,114 @@
1
+ /* Copyright 2016 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Macros for memory management. */
8
+
9
+ #ifndef BROTLI_ENC_MEMORY_H_
10
+ #define BROTLI_ENC_MEMORY_H_
11
+
12
+ #include <string.h> /* memcpy */
13
+
14
+ #include "brotli_common_platform.h"
15
+ #include "brotli_types.h"
16
+
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
20
+
21
+ #if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
22
+ !defined(BROTLI_ENCODER_EXIT_ON_OOM)
23
+ #define BROTLI_ENCODER_EXIT_ON_OOM
24
+ #endif
25
+
26
+ typedef struct MemoryManager {
27
+ brotli_alloc_func alloc_func;
28
+ brotli_free_func free_func;
29
+ void* opaque;
30
+ #if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
31
+ BROTLI_BOOL is_oom;
32
+ size_t perm_allocated;
33
+ size_t new_allocated;
34
+ size_t new_freed;
35
+ void* pointers[256];
36
+ #endif /* BROTLI_ENCODER_EXIT_ON_OOM */
37
+ } MemoryManager;
38
+
39
+ BROTLI_INTERNAL void BrotliInitMemoryManager(
40
+ MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
41
+ void* opaque);
42
+
43
+ BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
44
+ #define BROTLI_ALLOC(M, T, N) \
45
+ ((N) > 0 ? ((T*)BrotliAllocate((M), (N) * sizeof(T))) : NULL)
46
+
47
+ BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
48
+ #define BROTLI_FREE(M, P) { \
49
+ BrotliFree((M), (P)); \
50
+ P = NULL; \
51
+ }
52
+
53
+ #if defined(BROTLI_ENCODER_EXIT_ON_OOM)
54
+ #define BROTLI_IS_OOM(M) (!!0)
55
+ #else /* BROTLI_ENCODER_EXIT_ON_OOM */
56
+ #define BROTLI_IS_OOM(M) (!!(M)->is_oom)
57
+ #endif /* BROTLI_ENCODER_EXIT_ON_OOM */
58
+
59
+ /*
60
+ BROTLI_IS_NULL is a fake check, BROTLI_IS_OOM does the heavy lifting.
61
+ The only purpose of it is to explain static analyzers the state of things.
62
+ NB: use ONLY together with BROTLI_IS_OOM
63
+ AND ONLY for allocations in the current scope.
64
+ */
65
+ #if defined(__clang_analyzer__) && !defined(BROTLI_ENCODER_EXIT_ON_OOM)
66
+ #define BROTLI_IS_NULL(A) ((A) == nullptr)
67
+ #else /* defined(__clang_analyzer__) */
68
+ #define BROTLI_IS_NULL(A) (!!0)
69
+ #endif /* defined(__clang_analyzer__) */
70
+
71
+ BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
72
+
73
+ /*
74
+ Dynamically grows array capacity to at least the requested size
75
+ M: MemoryManager
76
+ T: data type
77
+ A: array
78
+ C: capacity
79
+ R: requested size
80
+ */
81
+ #define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) { \
82
+ if (C < (R)) { \
83
+ size_t _new_size = (C == 0) ? (R) : C; \
84
+ T* new_array; \
85
+ while (_new_size < (R)) _new_size *= 2; \
86
+ new_array = BROTLI_ALLOC((M), T, _new_size); \
87
+ if (!BROTLI_IS_OOM(M) && !BROTLI_IS_NULL(new_array) && C != 0) \
88
+ memcpy(new_array, A, C * sizeof(T)); \
89
+ BROTLI_FREE((M), A); \
90
+ A = new_array; \
91
+ C = _new_size; \
92
+ } \
93
+ }
94
+
95
+ /*
96
+ Appends value and dynamically grows array capacity when needed
97
+ M: MemoryManager
98
+ T: data type
99
+ A: array
100
+ C: array capacity
101
+ S: array size
102
+ V: value to append
103
+ */
104
+ #define BROTLI_ENSURE_CAPACITY_APPEND(M, T, A, C, S, V) { \
105
+ (S)++; \
106
+ BROTLI_ENSURE_CAPACITY(M, T, A, C, S); \
107
+ A[(S) - 1] = (V); \
108
+ }
109
+
110
+ #if defined(__cplusplus) || defined(c_plusplus)
111
+ } /* extern "C" */
112
+ #endif
113
+
114
+ #endif /* BROTLI_ENC_MEMORY_H_ */