isomorfeus-ferret 0.12.4 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +77 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  91. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  92. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  93. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  94. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  95. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  96. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  97. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  98. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  99. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
  100. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  101. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  102. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  103. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  104. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  106. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  144. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  145. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  162. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  163. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  164. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  165. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  166. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  167. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  168. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  169. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  170. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  171. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  172. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  173. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  174. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  175. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  176. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  177. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  178. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  179. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  180. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  181. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  182. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  183. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  184. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  185. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  186. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  187. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  188. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  189. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  190. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  191. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  192. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  193. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  194. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  195. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  196. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  197. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  198. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  199. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  200. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  201. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  202. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  203. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  204. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  205. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  206. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  207. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  208. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  209. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  210. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  211. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  212. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  213. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  214. data/lib/isomorfeus/ferret/version.rb +1 -1
  215. metadata +125 -5
  216. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,645 @@
1
+ /* Copyright 2015 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Function for fast encoding of an input fragment, independently from the input
8
+ history. This function uses two-pass processing: in the first pass we save
9
+ the found backward matches and literal bytes into a buffer, and in the
10
+ second pass we emit them into the bit stream using prefix codes built based
11
+ on the actual command and literal byte histograms. */
12
+
13
+ #include "brotli_enc_compress_fragment_two_pass.h"
14
+
15
+ #include <string.h> /* memcmp, memcpy, memset */
16
+
17
+ #include "brotli_common_constants.h"
18
+ #include "brotli_common_platform.h"
19
+ #include "brotli_types.h"
20
+ #include "brotli_enc_bit_cost.h"
21
+ #include "brotli_enc_brotli_bit_stream.h"
22
+ #include "brotli_enc_entropy_encode.h"
23
+ #include "brotli_enc_fast_log.h"
24
+ #include "brotli_enc_find_match_length.h"
25
+ #include "brotli_enc_memory.h"
26
+ #include "brotli_enc_write_bits.h"
27
+
28
+ #if defined(__cplusplus) || defined(c_plusplus)
29
+ extern "C" {
30
+ #endif
31
+
32
+ #define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
33
+
34
+ /* kHashMul32 multiplier has these properties:
35
+ * The multiplier must be odd. Otherwise we may lose the highest bit.
36
+ * No long streaks of ones or zeros.
37
+ * There is no effort to ensure that it is a prime, the oddity is enough
38
+ for this use.
39
+ * The number has been tuned heuristically against compression benchmarks. */
40
+ static const uint32_t kHashMul32 = 0x1E35A7BD;
41
+
42
+ static BROTLI_INLINE uint32_t Hash(const uint8_t* p,
43
+ size_t shift, size_t length) {
44
+ const uint64_t h =
45
+ (BROTLI_UNALIGNED_LOAD64LE(p) << ((8 - length) * 8)) * kHashMul32;
46
+ return (uint32_t)(h >> shift);
47
+ }
48
+
49
+ static BROTLI_INLINE uint32_t HashBytesAtOffset(uint64_t v, size_t offset,
50
+ size_t shift, size_t length) {
51
+ BROTLI_DCHECK(offset <= 8 - length);
52
+ {
53
+ const uint64_t h = ((v >> (8 * offset)) << ((8 - length) * 8)) * kHashMul32;
54
+ return (uint32_t)(h >> shift);
55
+ }
56
+ }
57
+
58
+ static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2,
59
+ size_t length) {
60
+ if (BrotliUnalignedRead32(p1) == BrotliUnalignedRead32(p2)) {
61
+ if (length == 4) return BROTLI_TRUE;
62
+ return TO_BROTLI_BOOL(p1[4] == p2[4] && p1[5] == p2[5]);
63
+ }
64
+ return BROTLI_FALSE;
65
+ }
66
+
67
+ /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
68
+ "bits" based on "histogram" and stores it into the bit stream. */
69
+ static void BuildAndStoreCommandPrefixCode(
70
+ const uint32_t histogram[128],
71
+ uint8_t depth[128], uint16_t bits[128],
72
+ size_t* storage_ix, uint8_t* storage) {
73
+ /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
74
+ HuffmanTree tree[129];
75
+ uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
76
+ uint16_t cmd_bits[64];
77
+ BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
78
+ BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
79
+ /* We have to jump through a few hoops here in order to compute
80
+ the command bits because the symbols are in a different order than in
81
+ the full alphabet. This looks complicated, but having the symbols
82
+ in this order in the command bits saves a few branches in the Emit*
83
+ functions. */
84
+ memcpy(cmd_depth, depth + 24, 24);
85
+ memcpy(cmd_depth + 24, depth, 8);
86
+ memcpy(cmd_depth + 32, depth + 48, 8);
87
+ memcpy(cmd_depth + 40, depth + 8, 8);
88
+ memcpy(cmd_depth + 48, depth + 56, 8);
89
+ memcpy(cmd_depth + 56, depth + 16, 8);
90
+ BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
91
+ memcpy(bits, cmd_bits + 24, 16);
92
+ memcpy(bits + 8, cmd_bits + 40, 16);
93
+ memcpy(bits + 16, cmd_bits + 56, 16);
94
+ memcpy(bits + 24, cmd_bits, 48);
95
+ memcpy(bits + 48, cmd_bits + 32, 16);
96
+ memcpy(bits + 56, cmd_bits + 48, 16);
97
+ BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
98
+ {
99
+ /* Create the bit length array for the full command alphabet. */
100
+ size_t i;
101
+ memset(cmd_depth, 0, 64); /* only 64 first values were used */
102
+ memcpy(cmd_depth, depth + 24, 8);
103
+ memcpy(cmd_depth + 64, depth + 32, 8);
104
+ memcpy(cmd_depth + 128, depth + 40, 8);
105
+ memcpy(cmd_depth + 192, depth + 48, 8);
106
+ memcpy(cmd_depth + 384, depth + 56, 8);
107
+ for (i = 0; i < 8; ++i) {
108
+ cmd_depth[128 + 8 * i] = depth[i];
109
+ cmd_depth[256 + 8 * i] = depth[8 + i];
110
+ cmd_depth[448 + 8 * i] = depth[16 + i];
111
+ }
112
+ BrotliStoreHuffmanTree(
113
+ cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
114
+ }
115
+ BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
116
+ }
117
+
118
+ static BROTLI_INLINE void EmitInsertLen(
119
+ uint32_t insertlen, uint32_t** commands) {
120
+ if (insertlen < 6) {
121
+ **commands = insertlen;
122
+ } else if (insertlen < 130) {
123
+ const uint32_t tail = insertlen - 2;
124
+ const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
125
+ const uint32_t prefix = tail >> nbits;
126
+ const uint32_t inscode = (nbits << 1) + prefix + 2;
127
+ const uint32_t extra = tail - (prefix << nbits);
128
+ **commands = inscode | (extra << 8);
129
+ } else if (insertlen < 2114) {
130
+ const uint32_t tail = insertlen - 66;
131
+ const uint32_t nbits = Log2FloorNonZero(tail);
132
+ const uint32_t code = nbits + 10;
133
+ const uint32_t extra = tail - (1u << nbits);
134
+ **commands = code | (extra << 8);
135
+ } else if (insertlen < 6210) {
136
+ const uint32_t extra = insertlen - 2114;
137
+ **commands = 21 | (extra << 8);
138
+ } else if (insertlen < 22594) {
139
+ const uint32_t extra = insertlen - 6210;
140
+ **commands = 22 | (extra << 8);
141
+ } else {
142
+ const uint32_t extra = insertlen - 22594;
143
+ **commands = 23 | (extra << 8);
144
+ }
145
+ ++(*commands);
146
+ }
147
+
148
+ static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
149
+ if (copylen < 10) {
150
+ **commands = (uint32_t)(copylen + 38);
151
+ } else if (copylen < 134) {
152
+ const size_t tail = copylen - 6;
153
+ const size_t nbits = Log2FloorNonZero(tail) - 1;
154
+ const size_t prefix = tail >> nbits;
155
+ const size_t code = (nbits << 1) + prefix + 44;
156
+ const size_t extra = tail - (prefix << nbits);
157
+ **commands = (uint32_t)(code | (extra << 8));
158
+ } else if (copylen < 2118) {
159
+ const size_t tail = copylen - 70;
160
+ const size_t nbits = Log2FloorNonZero(tail);
161
+ const size_t code = nbits + 52;
162
+ const size_t extra = tail - ((size_t)1 << nbits);
163
+ **commands = (uint32_t)(code | (extra << 8));
164
+ } else {
165
+ const size_t extra = copylen - 2118;
166
+ **commands = (uint32_t)(63 | (extra << 8));
167
+ }
168
+ ++(*commands);
169
+ }
170
+
171
+ static BROTLI_INLINE void EmitCopyLenLastDistance(
172
+ size_t copylen, uint32_t** commands) {
173
+ if (copylen < 12) {
174
+ **commands = (uint32_t)(copylen + 20);
175
+ ++(*commands);
176
+ } else if (copylen < 72) {
177
+ const size_t tail = copylen - 8;
178
+ const size_t nbits = Log2FloorNonZero(tail) - 1;
179
+ const size_t prefix = tail >> nbits;
180
+ const size_t code = (nbits << 1) + prefix + 28;
181
+ const size_t extra = tail - (prefix << nbits);
182
+ **commands = (uint32_t)(code | (extra << 8));
183
+ ++(*commands);
184
+ } else if (copylen < 136) {
185
+ const size_t tail = copylen - 8;
186
+ const size_t code = (tail >> 5) + 54;
187
+ const size_t extra = tail & 31;
188
+ **commands = (uint32_t)(code | (extra << 8));
189
+ ++(*commands);
190
+ **commands = 64;
191
+ ++(*commands);
192
+ } else if (copylen < 2120) {
193
+ const size_t tail = copylen - 72;
194
+ const size_t nbits = Log2FloorNonZero(tail);
195
+ const size_t code = nbits + 52;
196
+ const size_t extra = tail - ((size_t)1 << nbits);
197
+ **commands = (uint32_t)(code | (extra << 8));
198
+ ++(*commands);
199
+ **commands = 64;
200
+ ++(*commands);
201
+ } else {
202
+ const size_t extra = copylen - 2120;
203
+ **commands = (uint32_t)(63 | (extra << 8));
204
+ ++(*commands);
205
+ **commands = 64;
206
+ ++(*commands);
207
+ }
208
+ }
209
+
210
+ static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
211
+ uint32_t d = distance + 3;
212
+ uint32_t nbits = Log2FloorNonZero(d) - 1;
213
+ const uint32_t prefix = (d >> nbits) & 1;
214
+ const uint32_t offset = (2 + prefix) << nbits;
215
+ const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
216
+ uint32_t extra = d - offset;
217
+ **commands = distcode | (extra << 8);
218
+ ++(*commands);
219
+ }
220
+
221
+ /* REQUIRES: len <= 1 << 24. */
222
+ static void BrotliStoreMetaBlockHeader(
223
+ size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
224
+ uint8_t* storage) {
225
+ size_t nibbles = 6;
226
+ /* ISLAST */
227
+ BrotliWriteBits(1, 0, storage_ix, storage);
228
+ if (len <= (1U << 16)) {
229
+ nibbles = 4;
230
+ } else if (len <= (1U << 20)) {
231
+ nibbles = 5;
232
+ }
233
+ BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
234
+ BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
235
+ /* ISUNCOMPRESSED */
236
+ BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
237
+ }
238
+
239
+ static BROTLI_INLINE void CreateCommands(const uint8_t* input,
240
+ size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
241
+ size_t table_bits, size_t min_match,
242
+ uint8_t** literals, uint32_t** commands) {
243
+ /* "ip" is the input pointer. */
244
+ const uint8_t* ip = input;
245
+ const size_t shift = 64u - table_bits;
246
+ const uint8_t* ip_end = input + block_size;
247
+ /* "next_emit" is a pointer to the first byte that is not covered by a
248
+ previous copy. Bytes between "next_emit" and the start of the next copy or
249
+ the end of the input will be emitted as literal bytes. */
250
+ const uint8_t* next_emit = input;
251
+
252
+ int last_distance = -1;
253
+ const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
254
+
255
+ if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
256
+ /* For the last block, we need to keep a 16 bytes margin so that we can be
257
+ sure that all distances are at most window size - 16.
258
+ For all other blocks, we only need to keep a margin of 5 bytes so that
259
+ we don't go over the block size with a copy. */
260
+ const size_t len_limit = BROTLI_MIN(size_t, block_size - min_match,
261
+ input_size - kInputMarginBytes);
262
+ const uint8_t* ip_limit = input + len_limit;
263
+
264
+ uint32_t next_hash;
265
+ for (next_hash = Hash(++ip, shift, min_match); ; ) {
266
+ /* Step 1: Scan forward in the input looking for a 6-byte-long match.
267
+ If we get close to exhausting the input then goto emit_remainder.
268
+
269
+ Heuristic match skipping: If 32 bytes are scanned with no matches
270
+ found, start looking only at every other byte. If 32 more bytes are
271
+ scanned, look at every third byte, etc.. When a match is found,
272
+ immediately go back to looking at every byte. This is a small loss
273
+ (~5% performance, ~0.1% density) for compressible data due to more
274
+ bookkeeping, but for non-compressible data (such as JPEG) it's a huge
275
+ win since the compressor quickly "realizes" the data is incompressible
276
+ and doesn't bother looking for matches everywhere.
277
+
278
+ The "skip" variable keeps track of how many bytes there are since the
279
+ last match; dividing it by 32 (ie. right-shifting by five) gives the
280
+ number of bytes to move ahead for each iteration. */
281
+ uint32_t skip = 32;
282
+
283
+ const uint8_t* next_ip = ip;
284
+ const uint8_t* candidate;
285
+
286
+ BROTLI_DCHECK(next_emit < ip);
287
+ trawl:
288
+ do {
289
+ uint32_t hash = next_hash;
290
+ uint32_t bytes_between_hash_lookups = skip++ >> 5;
291
+ ip = next_ip;
292
+ BROTLI_DCHECK(hash == Hash(ip, shift, min_match));
293
+ next_ip = ip + bytes_between_hash_lookups;
294
+ if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
295
+ goto emit_remainder;
296
+ }
297
+ next_hash = Hash(next_ip, shift, min_match);
298
+ candidate = ip - last_distance;
299
+ if (IsMatch(ip, candidate, min_match)) {
300
+ if (BROTLI_PREDICT_TRUE(candidate < ip)) {
301
+ table[hash] = (int)(ip - base_ip);
302
+ break;
303
+ }
304
+ }
305
+ candidate = base_ip + table[hash];
306
+ BROTLI_DCHECK(candidate >= base_ip);
307
+ BROTLI_DCHECK(candidate < ip);
308
+
309
+ table[hash] = (int)(ip - base_ip);
310
+ } while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate, min_match)));
311
+
312
+ /* Check copy distance. If candidate is not feasible, continue search.
313
+ Checking is done outside of hot loop to reduce overhead. */
314
+ if (ip - candidate > MAX_DISTANCE) goto trawl;
315
+
316
+ /* Step 2: Emit the found match together with the literal bytes from
317
+ "next_emit", and then see if we can find a next match immediately
318
+ afterwards. Repeat until we find no match for the input
319
+ without emitting some literal bytes. */
320
+
321
+ {
322
+ /* We have a 6-byte match at ip, and we need to emit bytes in
323
+ [next_emit, ip). */
324
+ const uint8_t* base = ip;
325
+ size_t matched = min_match + FindMatchLengthWithLimit(
326
+ candidate + min_match, ip + min_match,
327
+ (size_t)(ip_end - ip) - min_match);
328
+ int distance = (int)(base - candidate); /* > 0 */
329
+ int insert = (int)(base - next_emit);
330
+ ip += matched;
331
+ BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
332
+ EmitInsertLen((uint32_t)insert, commands);
333
+ memcpy(*literals, next_emit, (size_t)insert);
334
+ *literals += insert;
335
+ if (distance == last_distance) {
336
+ **commands = 64;
337
+ ++(*commands);
338
+ } else {
339
+ EmitDistance((uint32_t)distance, commands);
340
+ last_distance = distance;
341
+ }
342
+ EmitCopyLenLastDistance(matched, commands);
343
+
344
+ next_emit = ip;
345
+ if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
346
+ goto emit_remainder;
347
+ }
348
+ {
349
+ /* We could immediately start working at ip now, but to improve
350
+ compression we first update "table" with the hashes of some
351
+ positions within the last copy. */
352
+ uint64_t input_bytes;
353
+ uint32_t cur_hash;
354
+ uint32_t prev_hash;
355
+ if (min_match == 4) {
356
+ input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
357
+ cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
358
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
359
+ table[prev_hash] = (int)(ip - base_ip - 3);
360
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
361
+ table[prev_hash] = (int)(ip - base_ip - 2);
362
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
363
+ table[prev_hash] = (int)(ip - base_ip - 1);
364
+ } else {
365
+ input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
366
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
367
+ table[prev_hash] = (int)(ip - base_ip - 5);
368
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
369
+ table[prev_hash] = (int)(ip - base_ip - 4);
370
+ prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
371
+ table[prev_hash] = (int)(ip - base_ip - 3);
372
+ input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
373
+ cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
374
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
375
+ table[prev_hash] = (int)(ip - base_ip - 2);
376
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
377
+ table[prev_hash] = (int)(ip - base_ip - 1);
378
+ }
379
+
380
+ candidate = base_ip + table[cur_hash];
381
+ table[cur_hash] = (int)(ip - base_ip);
382
+ }
383
+ }
384
+
385
+ while (ip - candidate <= MAX_DISTANCE &&
386
+ IsMatch(ip, candidate, min_match)) {
387
+ /* We have a 6-byte match at ip, and no need to emit any
388
+ literal bytes prior to ip. */
389
+ const uint8_t* base = ip;
390
+ size_t matched = min_match + FindMatchLengthWithLimit(
391
+ candidate + min_match, ip + min_match,
392
+ (size_t)(ip_end - ip) - min_match);
393
+ ip += matched;
394
+ last_distance = (int)(base - candidate); /* > 0 */
395
+ BROTLI_DCHECK(0 == memcmp(base, candidate, matched));
396
+ EmitCopyLen(matched, commands);
397
+ EmitDistance((uint32_t)last_distance, commands);
398
+
399
+ next_emit = ip;
400
+ if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
401
+ goto emit_remainder;
402
+ }
403
+ {
404
+ /* We could immediately start working at ip now, but to improve
405
+ compression we first update "table" with the hashes of some
406
+ positions within the last copy. */
407
+ uint64_t input_bytes;
408
+ uint32_t cur_hash;
409
+ uint32_t prev_hash;
410
+ if (min_match == 4) {
411
+ input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 3);
412
+ cur_hash = HashBytesAtOffset(input_bytes, 3, shift, min_match);
413
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
414
+ table[prev_hash] = (int)(ip - base_ip - 3);
415
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
416
+ table[prev_hash] = (int)(ip - base_ip - 2);
417
+ prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
418
+ table[prev_hash] = (int)(ip - base_ip - 1);
419
+ } else {
420
+ input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 5);
421
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
422
+ table[prev_hash] = (int)(ip - base_ip - 5);
423
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
424
+ table[prev_hash] = (int)(ip - base_ip - 4);
425
+ prev_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
426
+ table[prev_hash] = (int)(ip - base_ip - 3);
427
+ input_bytes = BROTLI_UNALIGNED_LOAD64LE(ip - 2);
428
+ cur_hash = HashBytesAtOffset(input_bytes, 2, shift, min_match);
429
+ prev_hash = HashBytesAtOffset(input_bytes, 0, shift, min_match);
430
+ table[prev_hash] = (int)(ip - base_ip - 2);
431
+ prev_hash = HashBytesAtOffset(input_bytes, 1, shift, min_match);
432
+ table[prev_hash] = (int)(ip - base_ip - 1);
433
+ }
434
+
435
+ candidate = base_ip + table[cur_hash];
436
+ table[cur_hash] = (int)(ip - base_ip);
437
+ }
438
+ }
439
+
440
+ next_hash = Hash(++ip, shift, min_match);
441
+ }
442
+ }
443
+
444
+ emit_remainder:
445
+ BROTLI_DCHECK(next_emit <= ip_end);
446
+ /* Emit the remaining bytes as literals. */
447
+ if (next_emit < ip_end) {
448
+ const uint32_t insert = (uint32_t)(ip_end - next_emit);
449
+ EmitInsertLen(insert, commands);
450
+ memcpy(*literals, next_emit, insert);
451
+ *literals += insert;
452
+ }
453
+ }
454
+
455
+ static void StoreCommands(MemoryManager* m,
456
+ const uint8_t* literals, const size_t num_literals,
457
+ const uint32_t* commands, const size_t num_commands,
458
+ size_t* storage_ix, uint8_t* storage) {
459
+ static const uint32_t kNumExtraBits[128] = {
460
+ 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
461
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
462
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
463
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464
+ 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
465
+ 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
466
+ 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
467
+ };
468
+ static const uint32_t kInsertOffset[24] = {
469
+ 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
470
+ 1090, 2114, 6210, 22594,
471
+ };
472
+
473
+ uint8_t lit_depths[256];
474
+ uint16_t lit_bits[256];
475
+ uint32_t lit_histo[256] = { 0 };
476
+ uint8_t cmd_depths[128] = { 0 };
477
+ uint16_t cmd_bits[128] = { 0 };
478
+ uint32_t cmd_histo[128] = { 0 };
479
+ size_t i;
480
+ for (i = 0; i < num_literals; ++i) {
481
+ ++lit_histo[literals[i]];
482
+ }
483
+ BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
484
+ /* max_bits = */ 8,
485
+ lit_depths, lit_bits,
486
+ storage_ix, storage);
487
+ if (BROTLI_IS_OOM(m)) return;
488
+
489
+ for (i = 0; i < num_commands; ++i) {
490
+ const uint32_t code = commands[i] & 0xFF;
491
+ BROTLI_DCHECK(code < 128);
492
+ ++cmd_histo[code];
493
+ }
494
+ cmd_histo[1] += 1;
495
+ cmd_histo[2] += 1;
496
+ cmd_histo[64] += 1;
497
+ cmd_histo[84] += 1;
498
+ BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
499
+ storage_ix, storage);
500
+
501
+ for (i = 0; i < num_commands; ++i) {
502
+ const uint32_t cmd = commands[i];
503
+ const uint32_t code = cmd & 0xFF;
504
+ const uint32_t extra = cmd >> 8;
505
+ BROTLI_DCHECK(code < 128);
506
+ BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
507
+ BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
508
+ if (code < 24) {
509
+ const uint32_t insert = kInsertOffset[code] + extra;
510
+ uint32_t j;
511
+ for (j = 0; j < insert; ++j) {
512
+ const uint8_t lit = *literals;
513
+ BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
514
+ ++literals;
515
+ }
516
+ }
517
+ }
518
+ }
519
+
520
+ /* Acceptable loss for uncompressible speedup is 2% */
521
+ #define MIN_RATIO 0.98
522
+ #define SAMPLE_RATE 43
523
+
524
+ static BROTLI_BOOL ShouldCompress(
525
+ const uint8_t* input, size_t input_size, size_t num_literals) {
526
+ double corpus_size = (double)input_size;
527
+ if ((double)num_literals < MIN_RATIO * corpus_size) {
528
+ return BROTLI_TRUE;
529
+ } else {
530
+ uint32_t literal_histo[256] = { 0 };
531
+ const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
532
+ size_t i;
533
+ for (i = 0; i < input_size; i += SAMPLE_RATE) {
534
+ ++literal_histo[input[i]];
535
+ }
536
+ return TO_BROTLI_BOOL(BitsEntropy(literal_histo, 256) < max_total_bit_cost);
537
+ }
538
+ }
539
+
540
+ static void RewindBitPosition(const size_t new_storage_ix,
541
+ size_t* storage_ix, uint8_t* storage) {
542
+ const size_t bitpos = new_storage_ix & 7;
543
+ const size_t mask = (1u << bitpos) - 1;
544
+ storage[new_storage_ix >> 3] &= (uint8_t)mask;
545
+ *storage_ix = new_storage_ix;
546
+ }
547
+
548
+ static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
549
+ size_t* storage_ix, uint8_t* storage) {
550
+ BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
551
+ *storage_ix = (*storage_ix + 7u) & ~7u;
552
+ memcpy(&storage[*storage_ix >> 3], input, input_size);
553
+ *storage_ix += input_size << 3;
554
+ storage[*storage_ix >> 3] = 0;
555
+ }
556
+
557
+ static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
558
+ MemoryManager* m, const uint8_t* input, size_t input_size,
559
+ BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
560
+ int* table, size_t table_bits, size_t min_match,
561
+ size_t* storage_ix, uint8_t* storage) {
562
+ /* Save the start of the first block for position and distance computations.
563
+ */
564
+ const uint8_t* base_ip = input;
565
+ BROTLI_UNUSED(is_last);
566
+
567
+ while (input_size > 0) {
568
+ size_t block_size =
569
+ BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
570
+ uint32_t* commands = command_buf;
571
+ uint8_t* literals = literal_buf;
572
+ size_t num_literals;
573
+ CreateCommands(input, block_size, input_size, base_ip, table,
574
+ table_bits, min_match, &literals, &commands);
575
+ num_literals = (size_t)(literals - literal_buf);
576
+ if (ShouldCompress(input, block_size, num_literals)) {
577
+ const size_t num_commands = (size_t)(commands - command_buf);
578
+ BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
579
+ /* No block splits, no contexts. */
580
+ BrotliWriteBits(13, 0, storage_ix, storage);
581
+ StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
582
+ storage_ix, storage);
583
+ if (BROTLI_IS_OOM(m)) return;
584
+ } else {
585
+ /* Since we did not find many backward references and the entropy of
586
+ the data is close to 8 bits, we can simply emit an uncompressed block.
587
+ This makes compression speed of uncompressible data about 3x faster. */
588
+ EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
589
+ }
590
+ input += block_size;
591
+ input_size -= block_size;
592
+ }
593
+ }
594
+
595
+ #define FOR_TABLE_BITS_(X) \
596
+ X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
597
+
598
+ #define BAKE_METHOD_PARAM_(B) \
599
+ static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B( \
600
+ MemoryManager* m, const uint8_t* input, size_t input_size, \
601
+ BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf, \
602
+ int* table, size_t* storage_ix, uint8_t* storage) { \
603
+ size_t min_match = (B <= 15) ? 4 : 6; \
604
+ BrotliCompressFragmentTwoPassImpl(m, input, input_size, is_last, command_buf,\
605
+ literal_buf, table, B, min_match, storage_ix, storage); \
606
+ }
607
+ FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
608
+ #undef BAKE_METHOD_PARAM_
609
+
610
+ void BrotliCompressFragmentTwoPass(
611
+ MemoryManager* m, const uint8_t* input, size_t input_size,
612
+ BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
613
+ int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
614
+ const size_t initial_storage_ix = *storage_ix;
615
+ const size_t table_bits = Log2FloorNonZero(table_size);
616
+ switch (table_bits) {
617
+ #define CASE_(B) \
618
+ case B: \
619
+ BrotliCompressFragmentTwoPassImpl ## B( \
620
+ m, input, input_size, is_last, command_buf, \
621
+ literal_buf, table, storage_ix, storage); \
622
+ break;
623
+ FOR_TABLE_BITS_(CASE_)
624
+ #undef CASE_
625
+ default: BROTLI_DCHECK(0); break;
626
+ }
627
+
628
+ /* If output is larger than single uncompressed block, rewrite it. */
629
+ if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
630
+ RewindBitPosition(initial_storage_ix, storage_ix, storage);
631
+ EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
632
+ }
633
+
634
+ if (is_last) {
635
+ BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
636
+ BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
637
+ *storage_ix = (*storage_ix + 7u) & ~7u;
638
+ }
639
+ }
640
+
641
+ #undef FOR_TABLE_BITS_
642
+
643
+ #if defined(__cplusplus) || defined(c_plusplus)
644
+ } /* extern "C" */
645
+ #endif
@@ -0,0 +1,54 @@
1
+ /* Copyright 2015 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Function for fast encoding of an input fragment, independently from the input
8
+ history. This function uses two-pass processing: in the first pass we save
9
+ the found backward matches and literal bytes into a buffer, and in the
10
+ second pass we emit them into the bit stream using prefix codes built based
11
+ on the actual command and literal byte histograms. */
12
+
13
+ #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
14
+ #define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
15
+
16
+ #include "brotli_common_platform.h"
17
+ #include "brotli_types.h"
18
+ #include "brotli_enc_memory.h"
19
+
20
+ #if defined(__cplusplus) || defined(c_plusplus)
21
+ extern "C" {
22
+ #endif
23
+
24
+ static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
25
+
26
+ /* Compresses "input" string to the "*storage" buffer as one or more complete
27
+ meta-blocks, and updates the "*storage_ix" bit position.
28
+
29
+ If "is_last" is 1, emits an additional empty last meta-block.
30
+
31
+ REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
32
+ REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
33
+ REQUIRES: "command_buf" and "literal_buf" point to at least
34
+ kCompressFragmentTwoPassBlockSize long arrays.
35
+ REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
36
+ REQUIRES: "table_size" is a power of two
37
+ OUTPUT: maximal copy distance <= |input_size|
38
+ OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
39
+ BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(MemoryManager* m,
40
+ const uint8_t* input,
41
+ size_t input_size,
42
+ BROTLI_BOOL is_last,
43
+ uint32_t* command_buf,
44
+ uint8_t* literal_buf,
45
+ int* table,
46
+ size_t table_size,
47
+ size_t* storage_ix,
48
+ uint8_t* storage);
49
+
50
+ #if defined(__cplusplus) || defined(c_plusplus)
51
+ } /* extern "C" */
52
+ #endif
53
+
54
+ #endif /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */