isomorfeus-ferret 0.12.4 → 0.12.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +612 -612
  3. data/README.md +77 -48
  4. data/ext/isomorfeus_ferret_ext/bm_hash.c +9 -6
  5. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +4 -2
  6. data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
  7. data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
  8. data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
  9. data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
  10. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
  11. data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
  12. data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
  13. data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
  14. data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
  15. data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
  16. data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
  17. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
  18. data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
  19. data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
  20. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
  21. data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
  22. data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
  23. data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
  24. data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
  25. data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
  26. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
  27. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
  28. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
  29. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
  30. data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
  31. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
  32. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
  33. data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
  34. data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
  35. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
  36. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
  37. data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
  38. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
  39. data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
  40. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
  41. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
  42. data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
  43. data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
  44. data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
  45. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
  46. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
  47. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
  48. data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
  50. data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
  51. data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
  52. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
  53. data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
  54. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
  55. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
  56. data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
  57. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
  58. data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
  59. data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
  60. data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
  61. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
  62. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
  63. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
  64. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
  65. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
  66. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
  67. data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
  68. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
  69. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
  70. data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
  71. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
  72. data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
  73. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
  74. data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
  75. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
  76. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
  77. data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
  78. data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
  79. data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
  80. data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
  81. data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
  82. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
  83. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
  84. data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
  85. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
  86. data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
  87. data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
  88. data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
  89. data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
  90. data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
  91. data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
  92. data/ext/isomorfeus_ferret_ext/frb_store.c +34 -5
  93. data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
  94. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
  95. data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
  96. data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
  97. data/ext/isomorfeus_ferret_ext/frt_posh.h +11 -19
  98. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +1844 -1911
  99. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +12 -15
  100. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
  101. data/ext/isomorfeus_ferret_ext/frt_scanner.c +1 -0
  102. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +1 -0
  103. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +1 -0
  104. data/ext/isomorfeus_ferret_ext/frt_search.h +1 -1
  105. data/ext/isomorfeus_ferret_ext/libstemmer.c +14 -11
  106. data/ext/isomorfeus_ferret_ext/libstemmer.h +4 -9
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +1167 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +6 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +1433 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +6 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +120 -143
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +1 -2
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +217 -237
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +1 -1
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +377 -432
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +1 -1
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +298 -342
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +1 -2
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +530 -524
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +1 -1
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +201 -214
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +1 -1
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1 -1
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +394 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +457 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +396 -439
  129. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +1 -1
  130. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +104 -128
  131. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +1 -1
  132. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +242 -273
  133. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +1 -1
  134. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +406 -461
  135. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +1 -2
  136. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +405 -456
  137. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +1 -1
  138. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +108 -126
  139. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +1 -1
  140. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +849 -0
  141. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +373 -405
  143. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +1 -1
  144. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +288 -305
  145. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +1 -1
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.c +1651 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_arabic.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.c +546 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_armenian.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.c +1171 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_basque.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.c +1436 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_catalan.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +121 -141
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +1 -1
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +221 -241
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +1 -1
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +381 -431
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +1 -1
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +300 -345
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +1 -1
  162. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +518 -511
  163. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +1 -1
  164. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +201 -209
  165. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +1 -1
  166. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.c +3660 -0
  167. data/ext/isomorfeus_ferret_ext/stem_UTF_8_greek.h +6 -0
  168. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.c +309 -0
  169. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hindi.h +6 -0
  170. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +306 -671
  171. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +1 -1
  172. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.c +394 -0
  173. data/ext/isomorfeus_ferret_ext/stem_UTF_8_indonesian.h +6 -0
  174. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.c +457 -0
  175. data/ext/isomorfeus_ferret_ext/stem_UTF_8_irish.h +6 -0
  176. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +400 -442
  177. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +1 -1
  178. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.c +824 -0
  179. data/ext/isomorfeus_ferret_ext/stem_UTF_8_lithuanian.h +6 -0
  180. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.c +408 -0
  181. data/ext/isomorfeus_ferret_ext/stem_UTF_8_nepali.h +6 -0
  182. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +105 -127
  183. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +1 -1
  184. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +245 -276
  185. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +1 -1
  186. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +409 -464
  187. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +1 -1
  188. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +376 -408
  189. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +1 -1
  190. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +272 -287
  191. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +1 -1
  192. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.c +6530 -0
  193. data/ext/isomorfeus_ferret_ext/stem_UTF_8_serbian.h +6 -0
  194. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +407 -458
  195. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +1 -1
  196. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +110 -125
  197. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +1 -1
  198. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.c +1865 -0
  199. data/ext/isomorfeus_ferret_ext/stem_UTF_8_tamil.h +6 -0
  200. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +698 -806
  201. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +1 -1
  202. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.c +1220 -0
  203. data/ext/isomorfeus_ferret_ext/stem_UTF_8_yiddish.h +6 -0
  204. data/ext/isomorfeus_ferret_ext/stem_api.c +1 -9
  205. data/ext/isomorfeus_ferret_ext/stem_api.h +1 -3
  206. data/ext/isomorfeus_ferret_ext/stem_header.h +30 -26
  207. data/ext/isomorfeus_ferret_ext/stem_modules.h +113 -26
  208. data/ext/isomorfeus_ferret_ext/stem_modules.txt +18 -5
  209. data/ext/isomorfeus_ferret_ext/stem_utilities.c +167 -132
  210. data/ext/isomorfeus_ferret_ext/test.c +7 -1
  211. data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
  212. data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
  213. data/ext/isomorfeus_ferret_ext/test_search.c +0 -1
  214. data/lib/isomorfeus/ferret/version.rb +1 -1
  215. metadata +125 -5
  216. data/ext/isomorfeus_ferret_ext/q_parser.y +0 -1366
@@ -0,0 +1,167 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Sliding window over the input data. */
8
+
9
+ #ifndef BROTLI_ENC_RINGBUFFER_H_
10
+ #define BROTLI_ENC_RINGBUFFER_H_
11
+
12
+ #include <string.h> /* memcpy */
13
+
14
+ #include "brotli_common_platform.h"
15
+ #include "brotli_types.h"
16
+ #include "brotli_enc_memory.h"
17
+ #include "brotli_enc_quality.h"
18
+
19
+ #if defined(__cplusplus) || defined(c_plusplus)
20
+ extern "C" {
21
+ #endif
22
+
23
+ /* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
24
+ data in a circular manner: writing a byte writes it to:
25
+ `position() % (1 << window_bits)'.
26
+ For convenience, the RingBuffer array contains another copy of the
27
+ first `1 << tail_bits' bytes:
28
+ buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
29
+ and another copy of the last two bytes:
30
+ buffer_[-1] == buffer_[(1 << window_bits) - 1] and
31
+ buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
32
+ typedef struct RingBuffer {
33
+ /* Size of the ring-buffer is (1 << window_bits) + tail_size_. */
34
+ const uint32_t size_;
35
+ const uint32_t mask_;
36
+ const uint32_t tail_size_;
37
+ const uint32_t total_size_;
38
+
39
+ uint32_t cur_size_;
40
+ /* Position to write in the ring buffer. */
41
+ uint32_t pos_;
42
+ /* The actual ring buffer containing the copy of the last two bytes, the data,
43
+ and the copy of the beginning as a tail. */
44
+ uint8_t* data_;
45
+ /* The start of the ring-buffer. */
46
+ uint8_t* buffer_;
47
+ } RingBuffer;
48
+
49
+ static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
50
+ rb->cur_size_ = 0;
51
+ rb->pos_ = 0;
52
+ rb->data_ = 0;
53
+ rb->buffer_ = 0;
54
+ }
55
+
56
+ static BROTLI_INLINE void RingBufferSetup(
57
+ const BrotliEncoderParams* params, RingBuffer* rb) {
58
+ int window_bits = ComputeRbBits(params);
59
+ int tail_bits = params->lgblock;
60
+ *(uint32_t*)&rb->size_ = 1u << window_bits;
61
+ *(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
62
+ *(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
63
+ *(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
64
+ }
65
+
66
+ static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
67
+ BROTLI_FREE(m, rb->data_);
68
+ }
69
+
70
+ /* Allocates or re-allocates data_ to the given length + plus some slack
71
+ region before and after. Fills the slack regions with zeros. */
72
+ static BROTLI_INLINE void RingBufferInitBuffer(
73
+ MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
74
+ static const size_t kSlackForEightByteHashingEverywhere = 7;
75
+ uint8_t* new_data = BROTLI_ALLOC(
76
+ m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
77
+ size_t i;
78
+ if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_data)) return;
79
+ if (rb->data_) {
80
+ memcpy(new_data, rb->data_,
81
+ 2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
82
+ BROTLI_FREE(m, rb->data_);
83
+ }
84
+ rb->data_ = new_data;
85
+ rb->cur_size_ = buflen;
86
+ rb->buffer_ = rb->data_ + 2;
87
+ rb->buffer_[-2] = rb->buffer_[-1] = 0;
88
+ for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
89
+ rb->buffer_[rb->cur_size_ + i] = 0;
90
+ }
91
+ }
92
+
93
+ static BROTLI_INLINE void RingBufferWriteTail(
94
+ const uint8_t* bytes, size_t n, RingBuffer* rb) {
95
+ const size_t masked_pos = rb->pos_ & rb->mask_;
96
+ if (BROTLI_PREDICT_FALSE(masked_pos < rb->tail_size_)) {
97
+ /* Just fill the tail buffer with the beginning data. */
98
+ const size_t p = rb->size_ + masked_pos;
99
+ memcpy(&rb->buffer_[p], bytes,
100
+ BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
101
+ }
102
+ }
103
+
104
+ /* Push bytes into the ring buffer. */
105
+ static BROTLI_INLINE void RingBufferWrite(
106
+ MemoryManager* m, const uint8_t* bytes, size_t n, RingBuffer* rb) {
107
+ if (rb->pos_ == 0 && n < rb->tail_size_) {
108
+ /* Special case for the first write: to process the first block, we don't
109
+ need to allocate the whole ring-buffer and we don't need the tail
110
+ either. However, we do this memory usage optimization only if the
111
+ first write is less than the tail size, which is also the input block
112
+ size, otherwise it is likely that other blocks will follow and we
113
+ will need to reallocate to the full size anyway. */
114
+ rb->pos_ = (uint32_t)n;
115
+ RingBufferInitBuffer(m, rb->pos_, rb);
116
+ if (BROTLI_IS_OOM(m)) return;
117
+ memcpy(rb->buffer_, bytes, n);
118
+ return;
119
+ }
120
+ if (rb->cur_size_ < rb->total_size_) {
121
+ /* Lazily allocate the full buffer. */
122
+ RingBufferInitBuffer(m, rb->total_size_, rb);
123
+ if (BROTLI_IS_OOM(m)) return;
124
+ /* Initialize the last two bytes to zero, so that we don't have to worry
125
+ later when we copy the last two bytes to the first two positions. */
126
+ rb->buffer_[rb->size_ - 2] = 0;
127
+ rb->buffer_[rb->size_ - 1] = 0;
128
+ /* Initialize tail; might be touched by "best_len++" optimization when
129
+ ring buffer is "full". */
130
+ rb->buffer_[rb->size_] = 241;
131
+ }
132
+ {
133
+ const size_t masked_pos = rb->pos_ & rb->mask_;
134
+ /* The length of the writes is limited so that we do not need to worry
135
+ about a write */
136
+ RingBufferWriteTail(bytes, n, rb);
137
+ if (BROTLI_PREDICT_TRUE(masked_pos + n <= rb->size_)) {
138
+ /* A single write fits. */
139
+ memcpy(&rb->buffer_[masked_pos], bytes, n);
140
+ } else {
141
+ /* Split into two writes.
142
+ Copy into the end of the buffer, including the tail buffer. */
143
+ memcpy(&rb->buffer_[masked_pos], bytes,
144
+ BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
145
+ /* Copy into the beginning of the buffer */
146
+ memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
147
+ n - (rb->size_ - masked_pos));
148
+ }
149
+ }
150
+ {
151
+ BROTLI_BOOL not_first_lap = (rb->pos_ & (1u << 31)) != 0;
152
+ uint32_t rb_pos_mask = (1u << 31) - 1;
153
+ rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
154
+ rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
155
+ rb->pos_ = (rb->pos_ & rb_pos_mask) + (uint32_t)(n & rb_pos_mask);
156
+ if (not_first_lap) {
157
+ /* Wrap, but preserve not-a-first-lap feature. */
158
+ rb->pos_ |= 1u << 31;
159
+ }
160
+ }
161
+ }
162
+
163
+ #if defined(__cplusplus) || defined(c_plusplus)
164
+ } /* extern "C" */
165
+ #endif
166
+
167
+ #endif /* BROTLI_ENC_RINGBUFFER_H_ */
@@ -0,0 +1,486 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ #include "brotli_enc_static_dict.h"
8
+
9
+ #include "brotli_common_dictionary.h"
10
+ #include "brotli_common_platform.h"
11
+ #include "brotli_common_transform.h"
12
+ #include "brotli_enc_encoder_dict.h"
13
+ #include "brotli_enc_find_match_length.h"
14
+
15
+ #if defined(__cplusplus) || defined(c_plusplus)
16
+ extern "C" {
17
+ #endif
18
+
19
+ static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
20
+ uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
21
+ /* The higher bits contain more mixture from the multiplication,
22
+ so we take our results from there. */
23
+ return h >> (32 - kDictNumBits);
24
+ }
25
+
26
+ static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
27
+ uint32_t* matches) {
28
+ uint32_t match = (uint32_t)((distance << 5) + len_code);
29
+ matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
30
+ }
31
+
32
+ static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
33
+ const uint8_t* data,
34
+ size_t id,
35
+ size_t len,
36
+ size_t maxlen) {
37
+ const size_t offset = dictionary->offsets_by_length[len] + len * id;
38
+ return FindMatchLengthWithLimit(&dictionary->data[offset], data,
39
+ BROTLI_MIN(size_t, len, maxlen));
40
+ }
41
+
42
+ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
43
+ DictWord w, const uint8_t* data, size_t max_length) {
44
+ if (w.len > max_length) {
45
+ return BROTLI_FALSE;
46
+ } else {
47
+ const size_t offset = dictionary->offsets_by_length[w.len] +
48
+ (size_t)w.len * (size_t)w.idx;
49
+ const uint8_t* dict = &dictionary->data[offset];
50
+ if (w.transform == 0) {
51
+ /* Match against base dictionary word. */
52
+ return
53
+ TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
54
+ } else if (w.transform == 10) {
55
+ /* Match against uppercase first transform.
56
+ Note that there are only ASCII uppercase words in the lookup table. */
57
+ return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
58
+ (dict[0] ^ 32) == data[0] &&
59
+ FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
60
+ w.len - 1u);
61
+ } else {
62
+ /* Match against uppercase all transform.
63
+ Note that there are only ASCII uppercase words in the lookup table. */
64
+ size_t i;
65
+ for (i = 0; i < w.len; ++i) {
66
+ if (dict[i] >= 'a' && dict[i] <= 'z') {
67
+ if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
68
+ } else {
69
+ if (dict[i] != data[i]) return BROTLI_FALSE;
70
+ }
71
+ }
72
+ return BROTLI_TRUE;
73
+ }
74
+ }
75
+ }
76
+
77
+ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
78
+ const BrotliEncoderDictionary* dictionary, const uint8_t* data,
79
+ size_t min_length, size_t max_length, uint32_t* matches) {
80
+ BROTLI_BOOL has_found_match = BROTLI_FALSE;
81
+ {
82
+ size_t offset = dictionary->buckets[Hash(data)];
83
+ BROTLI_BOOL end = !offset;
84
+ while (!end) {
85
+ DictWord w = dictionary->dict_words[offset++];
86
+ const size_t l = w.len & 0x1F;
87
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
88
+ const size_t id = w.idx;
89
+ end = !!(w.len & 0x80);
90
+ w.len = (uint8_t)l;
91
+ if (w.transform == 0) {
92
+ const size_t matchlen =
93
+ DictMatchLength(dictionary->words, data, id, l, max_length);
94
+ const uint8_t* s;
95
+ size_t minlen;
96
+ size_t maxlen;
97
+ size_t len;
98
+ /* Transform "" + BROTLI_TRANSFORM_IDENTITY + "" */
99
+ if (matchlen == l) {
100
+ AddMatch(id, l, l, matches);
101
+ has_found_match = BROTLI_TRUE;
102
+ }
103
+ /* Transforms "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "" and
104
+ "" + BROTLI_TRANSFORM_OMIT_LAST_1 + "ing " */
105
+ if (matchlen >= l - 1) {
106
+ AddMatch(id + 12 * n, l - 1, l, matches);
107
+ if (l + 2 < max_length &&
108
+ data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
109
+ data[l + 2] == ' ') {
110
+ AddMatch(id + 49 * n, l + 3, l, matches);
111
+ }
112
+ has_found_match = BROTLI_TRUE;
113
+ }
114
+ /* Transform "" + BROTLI_TRANSFORM_OMIT_LAST_# + "" (# = 2 .. 9) */
115
+ minlen = min_length;
116
+ if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
117
+ maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
118
+ for (len = minlen; len <= maxlen; ++len) {
119
+ size_t cut = l - len;
120
+ size_t transform_id = (cut << 2) +
121
+ (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
122
+ AddMatch(id + transform_id * n, len, l, matches);
123
+ has_found_match = BROTLI_TRUE;
124
+ }
125
+ if (matchlen < l || l + 6 >= max_length) {
126
+ continue;
127
+ }
128
+ s = &data[l];
129
+ /* Transforms "" + BROTLI_TRANSFORM_IDENTITY + <suffix> */
130
+ if (s[0] == ' ') {
131
+ AddMatch(id + n, l + 1, l, matches);
132
+ if (s[1] == 'a') {
133
+ if (s[2] == ' ') {
134
+ AddMatch(id + 28 * n, l + 3, l, matches);
135
+ } else if (s[2] == 's') {
136
+ if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
137
+ } else if (s[2] == 't') {
138
+ if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
139
+ } else if (s[2] == 'n') {
140
+ if (s[3] == 'd' && s[4] == ' ') {
141
+ AddMatch(id + 10 * n, l + 5, l, matches);
142
+ }
143
+ }
144
+ } else if (s[1] == 'b') {
145
+ if (s[2] == 'y' && s[3] == ' ') {
146
+ AddMatch(id + 38 * n, l + 4, l, matches);
147
+ }
148
+ } else if (s[1] == 'i') {
149
+ if (s[2] == 'n') {
150
+ if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
151
+ } else if (s[2] == 's') {
152
+ if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
153
+ }
154
+ } else if (s[1] == 'f') {
155
+ if (s[2] == 'o') {
156
+ if (s[3] == 'r' && s[4] == ' ') {
157
+ AddMatch(id + 25 * n, l + 5, l, matches);
158
+ }
159
+ } else if (s[2] == 'r') {
160
+ if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
161
+ AddMatch(id + 37 * n, l + 6, l, matches);
162
+ }
163
+ }
164
+ } else if (s[1] == 'o') {
165
+ if (s[2] == 'f') {
166
+ if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
167
+ } else if (s[2] == 'n') {
168
+ if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
169
+ }
170
+ } else if (s[1] == 'n') {
171
+ if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
172
+ AddMatch(id + 80 * n, l + 5, l, matches);
173
+ }
174
+ } else if (s[1] == 't') {
175
+ if (s[2] == 'h') {
176
+ if (s[3] == 'e') {
177
+ if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
178
+ } else if (s[3] == 'a') {
179
+ if (s[4] == 't' && s[5] == ' ') {
180
+ AddMatch(id + 29 * n, l + 6, l, matches);
181
+ }
182
+ }
183
+ } else if (s[2] == 'o') {
184
+ if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
185
+ }
186
+ } else if (s[1] == 'w') {
187
+ if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
188
+ AddMatch(id + 35 * n, l + 6, l, matches);
189
+ }
190
+ }
191
+ } else if (s[0] == '"') {
192
+ AddMatch(id + 19 * n, l + 1, l, matches);
193
+ if (s[1] == '>') {
194
+ AddMatch(id + 21 * n, l + 2, l, matches);
195
+ }
196
+ } else if (s[0] == '.') {
197
+ AddMatch(id + 20 * n, l + 1, l, matches);
198
+ if (s[1] == ' ') {
199
+ AddMatch(id + 31 * n, l + 2, l, matches);
200
+ if (s[2] == 'T' && s[3] == 'h') {
201
+ if (s[4] == 'e') {
202
+ if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
203
+ } else if (s[4] == 'i') {
204
+ if (s[5] == 's' && s[6] == ' ') {
205
+ AddMatch(id + 75 * n, l + 7, l, matches);
206
+ }
207
+ }
208
+ }
209
+ }
210
+ } else if (s[0] == ',') {
211
+ AddMatch(id + 76 * n, l + 1, l, matches);
212
+ if (s[1] == ' ') {
213
+ AddMatch(id + 14 * n, l + 2, l, matches);
214
+ }
215
+ } else if (s[0] == '\n') {
216
+ AddMatch(id + 22 * n, l + 1, l, matches);
217
+ if (s[1] == '\t') {
218
+ AddMatch(id + 50 * n, l + 2, l, matches);
219
+ }
220
+ } else if (s[0] == ']') {
221
+ AddMatch(id + 24 * n, l + 1, l, matches);
222
+ } else if (s[0] == '\'') {
223
+ AddMatch(id + 36 * n, l + 1, l, matches);
224
+ } else if (s[0] == ':') {
225
+ AddMatch(id + 51 * n, l + 1, l, matches);
226
+ } else if (s[0] == '(') {
227
+ AddMatch(id + 57 * n, l + 1, l, matches);
228
+ } else if (s[0] == '=') {
229
+ if (s[1] == '"') {
230
+ AddMatch(id + 70 * n, l + 2, l, matches);
231
+ } else if (s[1] == '\'') {
232
+ AddMatch(id + 86 * n, l + 2, l, matches);
233
+ }
234
+ } else if (s[0] == 'a') {
235
+ if (s[1] == 'l' && s[2] == ' ') {
236
+ AddMatch(id + 84 * n, l + 3, l, matches);
237
+ }
238
+ } else if (s[0] == 'e') {
239
+ if (s[1] == 'd') {
240
+ if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
241
+ } else if (s[1] == 'r') {
242
+ if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
243
+ } else if (s[1] == 's') {
244
+ if (s[2] == 't' && s[3] == ' ') {
245
+ AddMatch(id + 95 * n, l + 4, l, matches);
246
+ }
247
+ }
248
+ } else if (s[0] == 'f') {
249
+ if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
250
+ AddMatch(id + 90 * n, l + 4, l, matches);
251
+ }
252
+ } else if (s[0] == 'i') {
253
+ if (s[1] == 'v') {
254
+ if (s[2] == 'e' && s[3] == ' ') {
255
+ AddMatch(id + 92 * n, l + 4, l, matches);
256
+ }
257
+ } else if (s[1] == 'z') {
258
+ if (s[2] == 'e' && s[3] == ' ') {
259
+ AddMatch(id + 100 * n, l + 4, l, matches);
260
+ }
261
+ }
262
+ } else if (s[0] == 'l') {
263
+ if (s[1] == 'e') {
264
+ if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
265
+ AddMatch(id + 93 * n, l + 5, l, matches);
266
+ }
267
+ } else if (s[1] == 'y') {
268
+ if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
269
+ }
270
+ } else if (s[0] == 'o') {
271
+ if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
272
+ AddMatch(id + 106 * n, l + 4, l, matches);
273
+ }
274
+ }
275
+ } else {
276
+ /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
277
+ is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
278
+ transform. */
279
+ const BROTLI_BOOL is_all_caps =
280
+ TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
281
+ const uint8_t* s;
282
+ if (!IsMatch(dictionary->words, w, data, max_length)) {
283
+ continue;
284
+ }
285
+ /* Transform "" + kUppercase{First,All} + "" */
286
+ AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
287
+ has_found_match = BROTLI_TRUE;
288
+ if (l + 1 >= max_length) {
289
+ continue;
290
+ }
291
+ /* Transforms "" + kUppercase{First,All} + <suffix> */
292
+ s = &data[l];
293
+ if (s[0] == ' ') {
294
+ AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
295
+ } else if (s[0] == '"') {
296
+ AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
297
+ if (s[1] == '>') {
298
+ AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
299
+ }
300
+ } else if (s[0] == '.') {
301
+ AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
302
+ if (s[1] == ' ') {
303
+ AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
304
+ }
305
+ } else if (s[0] == ',') {
306
+ AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
307
+ if (s[1] == ' ') {
308
+ AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
309
+ }
310
+ } else if (s[0] == '\'') {
311
+ AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
312
+ } else if (s[0] == '(') {
313
+ AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
314
+ } else if (s[0] == '=') {
315
+ if (s[1] == '"') {
316
+ AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
317
+ } else if (s[1] == '\'') {
318
+ AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
319
+ }
320
+ }
321
+ }
322
+ }
323
+ }
324
+ /* Transforms with prefixes " " and "." */
325
+ if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
326
+ BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
327
+ size_t offset = dictionary->buckets[Hash(&data[1])];
328
+ BROTLI_BOOL end = !offset;
329
+ while (!end) {
330
+ DictWord w = dictionary->dict_words[offset++];
331
+ const size_t l = w.len & 0x1F;
332
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
333
+ const size_t id = w.idx;
334
+ end = !!(w.len & 0x80);
335
+ w.len = (uint8_t)l;
336
+ if (w.transform == 0) {
337
+ const uint8_t* s;
338
+ if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
339
+ continue;
340
+ }
341
+ /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + "" and
342
+ "." + BROTLI_TRANSFORM_IDENTITY + "" */
343
+ AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
344
+ has_found_match = BROTLI_TRUE;
345
+ if (l + 2 >= max_length) {
346
+ continue;
347
+ }
348
+ /* Transforms " " + BROTLI_TRANSFORM_IDENTITY + <suffix> and
349
+ "." + BROTLI_TRANSFORM_IDENTITY + <suffix>
350
+ */
351
+ s = &data[l + 1];
352
+ if (s[0] == ' ') {
353
+ AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
354
+ } else if (s[0] == '(') {
355
+ AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
356
+ } else if (is_space) {
357
+ if (s[0] == ',') {
358
+ AddMatch(id + 103 * n, l + 2, l, matches);
359
+ if (s[1] == ' ') {
360
+ AddMatch(id + 33 * n, l + 3, l, matches);
361
+ }
362
+ } else if (s[0] == '.') {
363
+ AddMatch(id + 71 * n, l + 2, l, matches);
364
+ if (s[1] == ' ') {
365
+ AddMatch(id + 52 * n, l + 3, l, matches);
366
+ }
367
+ } else if (s[0] == '=') {
368
+ if (s[1] == '"') {
369
+ AddMatch(id + 81 * n, l + 3, l, matches);
370
+ } else if (s[1] == '\'') {
371
+ AddMatch(id + 98 * n, l + 3, l, matches);
372
+ }
373
+ }
374
+ }
375
+ } else if (is_space) {
376
+ /* Set is_all_caps=0 for BROTLI_TRANSFORM_UPPERCASE_FIRST and
377
+ is_all_caps=1 otherwise (BROTLI_TRANSFORM_UPPERCASE_ALL)
378
+ transform. */
379
+ const BROTLI_BOOL is_all_caps =
380
+ TO_BROTLI_BOOL(w.transform != BROTLI_TRANSFORM_UPPERCASE_FIRST);
381
+ const uint8_t* s;
382
+ if (!IsMatch(dictionary->words, w, &data[1], max_length - 1)) {
383
+ continue;
384
+ }
385
+ /* Transforms " " + kUppercase{First,All} + "" */
386
+ AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
387
+ has_found_match = BROTLI_TRUE;
388
+ if (l + 2 >= max_length) {
389
+ continue;
390
+ }
391
+ /* Transforms " " + kUppercase{First,All} + <suffix> */
392
+ s = &data[l + 1];
393
+ if (s[0] == ' ') {
394
+ AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
395
+ } else if (s[0] == ',') {
396
+ if (!is_all_caps) {
397
+ AddMatch(id + 109 * n, l + 2, l, matches);
398
+ }
399
+ if (s[1] == ' ') {
400
+ AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
401
+ }
402
+ } else if (s[0] == '.') {
403
+ AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
404
+ if (s[1] == ' ') {
405
+ AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
406
+ }
407
+ } else if (s[0] == '=') {
408
+ if (s[1] == '"') {
409
+ AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
410
+ } else if (s[1] == '\'') {
411
+ AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
412
+ }
413
+ }
414
+ }
415
+ }
416
+ }
417
+ if (max_length >= 6) {
418
+ /* Transforms with prefixes "e ", "s ", ", " and "\xC2\xA0" */
419
+ if ((data[1] == ' ' &&
420
+ (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
421
+ (data[0] == 0xC2 && data[1] == 0xA0)) {
422
+ size_t offset = dictionary->buckets[Hash(&data[2])];
423
+ BROTLI_BOOL end = !offset;
424
+ while (!end) {
425
+ DictWord w = dictionary->dict_words[offset++];
426
+ const size_t l = w.len & 0x1F;
427
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
428
+ const size_t id = w.idx;
429
+ end = !!(w.len & 0x80);
430
+ w.len = (uint8_t)l;
431
+ if (w.transform == 0 &&
432
+ IsMatch(dictionary->words, w, &data[2], max_length - 2)) {
433
+ if (data[0] == 0xC2) {
434
+ AddMatch(id + 102 * n, l + 2, l, matches);
435
+ has_found_match = BROTLI_TRUE;
436
+ } else if (l + 2 < max_length && data[l + 2] == ' ') {
437
+ size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
438
+ AddMatch(id + t * n, l + 3, l, matches);
439
+ has_found_match = BROTLI_TRUE;
440
+ }
441
+ }
442
+ }
443
+ }
444
+ }
445
+ if (max_length >= 9) {
446
+ /* Transforms with prefixes " the " and ".com/" */
447
+ if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
448
+ data[3] == 'e' && data[4] == ' ') ||
449
+ (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
450
+ data[3] == 'm' && data[4] == '/')) {
451
+ size_t offset = dictionary->buckets[Hash(&data[5])];
452
+ BROTLI_BOOL end = !offset;
453
+ while (!end) {
454
+ DictWord w = dictionary->dict_words[offset++];
455
+ const size_t l = w.len & 0x1F;
456
+ const size_t n = (size_t)1 << dictionary->words->size_bits_by_length[l];
457
+ const size_t id = w.idx;
458
+ end = !!(w.len & 0x80);
459
+ w.len = (uint8_t)l;
460
+ if (w.transform == 0 &&
461
+ IsMatch(dictionary->words, w, &data[5], max_length - 5)) {
462
+ AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
463
+ has_found_match = BROTLI_TRUE;
464
+ if (l + 5 < max_length) {
465
+ const uint8_t* s = &data[l + 5];
466
+ if (data[0] == ' ') {
467
+ if (l + 8 < max_length &&
468
+ s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
469
+ AddMatch(id + 62 * n, l + 9, l, matches);
470
+ if (l + 12 < max_length &&
471
+ s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
472
+ AddMatch(id + 73 * n, l + 13, l, matches);
473
+ }
474
+ }
475
+ }
476
+ }
477
+ }
478
+ }
479
+ }
480
+ }
481
+ return has_found_match;
482
+ }
483
+
484
+ #if defined(__cplusplus) || defined(c_plusplus)
485
+ } /* extern "C" */
486
+ #endif
@@ -0,0 +1,40 @@
1
+ /* Copyright 2013 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Class to model the static dictionary. */
8
+
9
+ #ifndef BROTLI_ENC_STATIC_DICT_H_
10
+ #define BROTLI_ENC_STATIC_DICT_H_
11
+
12
+ #include "brotli_common_dictionary.h"
13
+ #include "brotli_common_platform.h"
14
+ #include "brotli_types.h"
15
+ #include "brotli_enc_encoder_dict.h"
16
+
17
+ #if defined(__cplusplus) || defined(c_plusplus)
18
+ extern "C" {
19
+ #endif
20
+
21
+ #define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
22
+ static const uint32_t kInvalidMatch = 0xFFFFFFF;
23
+
24
+ /* Matches data against static dictionary words, and for each length l,
25
+ for which a match is found, updates matches[l] to be the minimum possible
26
+ (distance << 5) + len_code.
27
+ Returns 1 if matches have been found, otherwise 0.
28
+ Prerequisites:
29
+ matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
30
+ all elements are initialized to kInvalidMatch */
31
+ BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
32
+ const BrotliEncoderDictionary* dictionary,
33
+ const uint8_t* data, size_t min_length, size_t max_length,
34
+ uint32_t* matches);
35
+
36
+ #if defined(__cplusplus) || defined(c_plusplus)
37
+ } /* extern "C" */
38
+ #endif
39
+
40
+ #endif /* BROTLI_ENC_STATIC_DICT_H_ */