multi_compress 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -3
  3. data/GET_STARTED.md +3 -3
  4. data/README.md +75 -66
  5. data/THIRD_PARTY_NOTICES.md +24 -0
  6. data/ext/multi_compress/brotli_dec_static_init.c +3 -0
  7. data/ext/multi_compress/brotli_enc_static_init.c +3 -0
  8. data/ext/multi_compress/extconf.rb +79 -3
  9. data/ext/multi_compress/multi_compress.c +199 -120
  10. data/ext/multi_compress/vendor/.vendored +2 -2
  11. data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
  13. data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
  14. data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
  15. data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
  16. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
  17. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
  18. data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
  20. data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
  21. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
  22. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
  23. data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
  24. data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
  25. data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
  26. data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
  27. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
  28. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
  29. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
  30. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
  31. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
  32. data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
  33. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
  34. data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
  35. data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
  36. data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
  37. data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
  38. data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
  40. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
  41. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
  42. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
  43. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
  44. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
  45. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
  46. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
  47. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
  48. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
  49. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
  50. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
  51. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
  52. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
  53. data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
  54. data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
  55. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
  56. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
  57. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
  58. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
  59. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
  60. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
  61. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
  62. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
  63. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
  65. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
  66. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
  67. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
  68. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
  69. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
  70. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
  71. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
  72. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
  73. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
  74. data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
  76. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
  77. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
  79. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
  83. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
  84. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
  85. data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
  87. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
  88. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
  89. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
  90. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
  91. data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
  92. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
  93. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
  94. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
  95. data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
  96. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
  97. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
  98. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
  99. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
  100. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
  101. data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
  102. data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
  103. data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
  104. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
  105. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
  106. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
  107. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
  108. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
  109. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
  110. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
  111. data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
  112. data/ext/multi_compress/vendor/zstd/COPYING +339 -0
  113. data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
  114. data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
  115. data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
  116. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  117. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
  118. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
  119. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
  120. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
  121. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
  122. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
  123. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
  124. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
  125. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
  126. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
  127. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
  128. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
  129. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
  130. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
  131. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
  132. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
  133. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
  134. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
  135. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
  136. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
  137. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
  138. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
  139. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
  140. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
  141. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
  142. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
  143. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
  144. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
  145. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
  146. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
  147. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
  148. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
  149. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
  170. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
  171. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
  178. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
  179. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  182. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
  183. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
  187. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
  188. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
  189. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
  190. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
  191. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
  192. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
  193. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
  194. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
  204. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
  205. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
  206. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
  207. data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
  208. data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
  209. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
  210. data/lib/multi_compress/version.rb +1 -1
  211. data/lib/multi_compress.rb +80 -41
  212. metadata +29 -2
@@ -11,18 +11,12 @@
11
11
  #include "../common/transform.h"
12
12
  #include "encoder_dict.h"
13
13
  #include "find_match_length.h"
14
+ #include "hash_base.h"
14
15
 
15
16
  #if defined(__cplusplus) || defined(c_plusplus)
16
17
  extern "C" {
17
18
  #endif
18
19
 
19
- static BROTLI_INLINE uint32_t Hash(const uint8_t* data) {
20
- uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kDictHashMul32;
21
- /* The higher bits contain more mixture from the multiplication,
22
- so we take our results from there. */
23
- return h >> (32 - kDictNumBits);
24
- }
25
-
26
20
  static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
27
21
  uint32_t* matches) {
28
22
  uint32_t match = (uint32_t)((distance << 5) + len_code);
@@ -96,7 +90,7 @@ static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
96
90
  }
97
91
  #endif /* BROTLI_EXPERIMENTAL */
98
92
  {
99
- size_t offset = dictionary->buckets[Hash(data)];
93
+ size_t offset = dictionary->buckets[Hash15(data)];
100
94
  BROTLI_BOOL end = !offset;
101
95
  while (!end) {
102
96
  DictWord w = dictionary->dict_words[offset++];
@@ -341,7 +335,7 @@ static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
341
335
  /* Transforms with prefixes " " and "." */
342
336
  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
343
337
  BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
344
- size_t offset = dictionary->buckets[Hash(&data[1])];
338
+ size_t offset = dictionary->buckets[Hash15(&data[1])];
345
339
  BROTLI_BOOL end = !offset;
346
340
  while (!end) {
347
341
  DictWord w = dictionary->dict_words[offset++];
@@ -436,7 +430,7 @@ static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
436
430
  if ((data[1] == ' ' &&
437
431
  (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
438
432
  (data[0] == 0xC2 && data[1] == 0xA0)) {
439
- size_t offset = dictionary->buckets[Hash(&data[2])];
433
+ size_t offset = dictionary->buckets[Hash15(&data[2])];
440
434
  BROTLI_BOOL end = !offset;
441
435
  while (!end) {
442
436
  DictWord w = dictionary->dict_words[offset++];
@@ -465,7 +459,7 @@ static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
465
459
  data[3] == 'e' && data[4] == ' ') ||
466
460
  (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
467
461
  data[3] == 'm' && data[4] == '/')) {
468
- size_t offset = dictionary->buckets[Hash(&data[5])];
462
+ size_t offset = dictionary->buckets[Hash15(&data[5])];
469
463
  BROTLI_BOOL end = !offset;
470
464
  while (!end) {
471
465
  DictWord w = dictionary->dict_words[offset++];
@@ -9,10 +9,8 @@
9
9
  #ifndef BROTLI_ENC_STATIC_DICT_H_
10
10
  #define BROTLI_ENC_STATIC_DICT_H_
11
11
 
12
- #include <brotli/types.h>
13
-
14
- #include "../common/dictionary.h"
15
12
  #include "../common/platform.h"
13
+
16
14
  #include "encoder_dict.h"
17
15
 
18
16
  #if defined(__cplusplus) || defined(c_plusplus)
@@ -0,0 +1,224 @@
1
+ /* Copyright 2025 Google Inc. All Rights Reserved.
2
+
3
+ Distributed under MIT license.
4
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5
+ */
6
+
7
+ /* Lookup table for static dictionary and transforms. */
8
+
9
+ #include "static_dict_lut.h"
10
+
11
+ #include "../common/platform.h" /* IWYU pragma: keep */
12
+ #include "../common/static_init.h"
13
+
14
+ #if (BROTLI_STATIC_INIT != BROTLI_STATIC_INIT_NONE)
15
+ #include "../common/dictionary.h"
16
+ #include "../common/transform.h"
17
+ #include "hash_base.h"
18
+ #endif
19
+
20
+ #if defined(__cplusplus) || defined(c_plusplus)
21
+ extern "C" {
22
+ #endif
23
+
24
+ #if (BROTLI_STATIC_INIT != BROTLI_STATIC_INIT_NONE)
25
+
26
+ /* TODO(eustas): deal with largest bucket(s). Not it contains 163 items. */
27
+ static BROTLI_BOOL BROTLI_COLD DoBrotliEncoderInitStaticDictionaryLut(
28
+ const BrotliDictionary* dict, uint16_t* buckets, DictWord* words,
29
+ void* arena) {
30
+ DictWord* slots = (DictWord*)arena;
31
+ uint16_t* heads = (uint16_t*)(slots + BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS);
32
+ uint16_t* counts = heads + BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS;
33
+ uint16_t* prev = counts + BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS;
34
+ size_t next_slot = 0;
35
+ uint8_t transformed_word[24];
36
+ uint8_t transformed_other_word[24];
37
+ size_t l;
38
+ size_t pos;
39
+ size_t i;
40
+
41
+ memset(counts, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * sizeof(uint16_t));
42
+ memset(heads, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * sizeof(uint16_t));
43
+ memset(prev, 0, BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS * sizeof(uint16_t));
44
+
45
+ for (l = 4; l <= 24; ++l) {
46
+ size_t n = 1u << dict->size_bits_by_length[l];
47
+ const uint8_t* dict_words = dict->data + dict->offsets_by_length[l];
48
+ for (i = 0; i < n; ++i) {
49
+ const uint8_t* dict_word = dict_words + l * i;
50
+ uint32_t key = Hash15(dict_word);
51
+ slots[next_slot].len = (uint8_t)l;
52
+ slots[next_slot].transform = BROTLI_TRANSFORM_IDENTITY;
53
+ slots[next_slot].idx = (uint16_t)i;
54
+ prev[next_slot] = heads[key];
55
+ heads[key] = (uint16_t)next_slot;
56
+ counts[key]++;
57
+ ++next_slot;
58
+ }
59
+ for (i = 0; i < n; ++i) {
60
+ uint32_t key;
61
+ uint32_t prefix;
62
+ BROTLI_BOOL found;
63
+ size_t curr;
64
+ const uint8_t* dict_word = dict_words + l * i;
65
+ if (dict_word[0] < 'a' || dict_word[0] > 'z') continue;
66
+ memcpy(transformed_word, dict_word, l);
67
+ transformed_word[0] = transformed_word[0] - 32;
68
+ key = Hash15(transformed_word);
69
+ prefix = BROTLI_UNALIGNED_LOAD32LE(transformed_word) & ~0x20202020u;
70
+ found = BROTLI_FALSE;
71
+ curr = heads[key];
72
+ while (curr != 0) {
73
+ const uint8_t* other_word;
74
+ uint32_t other_prefix;
75
+ if (slots[curr].len != l) break;
76
+ other_word = dict_words + l * slots[curr].idx;
77
+ other_prefix = BROTLI_UNALIGNED_LOAD32LE(other_word) & ~0x20202020u;
78
+ if (prefix == other_prefix) {
79
+ if (memcmp(transformed_word, other_word, l) == 0) {
80
+ found = BROTLI_TRUE;
81
+ break;
82
+ }
83
+ }
84
+ curr = prev[curr];
85
+ }
86
+ if (found) continue;
87
+ slots[next_slot].len = (uint8_t)l;
88
+ slots[next_slot].transform = BROTLI_TRANSFORM_UPPERCASE_FIRST;
89
+ slots[next_slot].idx = (uint16_t)i;
90
+ prev[next_slot] = heads[key];
91
+ heads[key] = (uint16_t)next_slot;
92
+ counts[key]++;
93
+ ++next_slot;
94
+ }
95
+ for (i = 0; i < n; ++i) {
96
+ const uint8_t* dict_word = dict_words + l * i;
97
+ BROTLI_BOOL is_ascii = BROTLI_TRUE;
98
+ BROTLI_BOOL has_lower = BROTLI_FALSE;
99
+ size_t k;
100
+ uint32_t prefix;
101
+ uint32_t key;
102
+ size_t curr;
103
+ BROTLI_BOOL found;
104
+ for (k = 0; k < l; ++k) {
105
+ if (dict_word[k] >= 128) is_ascii = BROTLI_FALSE;
106
+ if (k > 0 && dict_word[k] >= 'a' && dict_word[k] <= 'z')
107
+ has_lower = BROTLI_TRUE;
108
+ }
109
+ if (!is_ascii || !has_lower) continue;
110
+ memcpy(transformed_word, dict_word, l);
111
+ prefix = BROTLI_UNALIGNED_LOAD32LE(transformed_word) & ~0x20202020u;
112
+ for (k = 0; k < l; ++k) {
113
+ if (transformed_word[k] >= 'a' && transformed_word[k] <= 'z') {
114
+ transformed_word[k] = transformed_word[k] - 32;
115
+ }
116
+ }
117
+ key = Hash15(transformed_word);
118
+ found = BROTLI_FALSE;
119
+ curr = heads[key];
120
+ while (curr != 0) {
121
+ const uint8_t* other_word;
122
+ uint32_t other_prefix;
123
+ if (slots[curr].len != l) break;
124
+ other_word = dict_words + l * slots[curr].idx;
125
+ other_prefix = BROTLI_UNALIGNED_LOAD32LE(other_word) & ~0x20202020u;
126
+ if (prefix == other_prefix) {
127
+ if (slots[curr].transform == BROTLI_TRANSFORM_IDENTITY) {
128
+ if (memcmp(transformed_word, other_word, l) == 0) {
129
+ found = BROTLI_TRUE;
130
+ break;
131
+ }
132
+ } else if (slots[curr].transform ==
133
+ BROTLI_TRANSFORM_UPPERCASE_FIRST) {
134
+ if ((transformed_word[0] == (other_word[0] - 32)) &&
135
+ memcmp(transformed_word + 1, other_word + 1, l - 1) == 0) {
136
+ found = BROTLI_TRUE;
137
+ break;
138
+ }
139
+ } else {
140
+ for (k = 0; k < l; ++k) {
141
+ if (other_word[k] >= 'a' && other_word[k] <= 'z') {
142
+ transformed_other_word[k] = other_word[k] - 32;
143
+ } else {
144
+ transformed_other_word[k] = other_word[k];
145
+ }
146
+ }
147
+ if (memcmp(transformed_word, transformed_other_word, l) == 0) {
148
+ found = BROTLI_TRUE;
149
+ break;
150
+ }
151
+ }
152
+ }
153
+ curr = prev[curr];
154
+ }
155
+ if (found) {
156
+ continue;
157
+ }
158
+ slots[next_slot].len = (uint8_t)l;
159
+ slots[next_slot].transform = BROTLI_TRANSFORM_UPPERCASE_ALL;
160
+ slots[next_slot].idx = (uint16_t)i;
161
+ prev[next_slot] = heads[key];
162
+ heads[key] = (uint16_t)next_slot;
163
+ counts[key]++;
164
+ ++next_slot;
165
+ }
166
+ }
167
+
168
+ if (next_slot != 31704) return BROTLI_FALSE;
169
+ pos = 0;
170
+ /* Unused; makes offsets start from 1. */
171
+ words[pos].len = 0;
172
+ words[pos].transform = 0;
173
+ words[pos].idx = 0;
174
+ pos++;
175
+ for (i = 0; i < BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS; ++i) {
176
+ size_t num_words = counts[i];
177
+ size_t curr;
178
+ if (num_words == 0) {
179
+ buckets[i] = 0;
180
+ continue;
181
+ }
182
+ buckets[i] = (uint16_t)pos;
183
+ curr = heads[i];
184
+ pos += num_words;
185
+ for (size_t k = 0; k < num_words; ++k) {
186
+ words[pos - 1 - k] = slots[curr];
187
+ curr = prev[curr];
188
+ }
189
+ words[pos - 1].len |= 0x80;
190
+ }
191
+ return BROTLI_TRUE;
192
+ }
193
+
194
+ BROTLI_BOOL BrotliEncoderInitStaticDictionaryLut(
195
+ const BrotliDictionary* dict, uint16_t* buckets, DictWord* words) {
196
+ size_t arena_size =
197
+ BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS *
198
+ (sizeof(uint16_t) + sizeof(DictWord)) +
199
+ BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS * 2 * sizeof(uint16_t);
200
+ void* arena = malloc(arena_size);
201
+ BROTLI_BOOL ok;
202
+ if (arena == NULL) {
203
+ return BROTLI_FALSE;
204
+ }
205
+ ok = DoBrotliEncoderInitStaticDictionaryLut(dict, buckets, words, arena);
206
+ free(arena);
207
+ return ok;
208
+ }
209
+
210
+ BROTLI_MODEL("small")
211
+ uint16_t kStaticDictionaryBuckets[BROTLI_ENC_STATIC_DICT_LUT_NUM_BUCKETS];
212
+ BROTLI_MODEL("small")
213
+ DictWord kStaticDictionaryWords[BROTLI_ENC_STATIC_DICT_LUT_NUM_ITEMS];
214
+
215
+ #else /* BROTLI_STATIC_INIT */
216
+
217
+ /* Embed kStaticDictionaryBuckets and kStaticDictionaryWords. */
218
+ #include "static_dict_lut_inc.h"
219
+
220
+ #endif /* BROTLI_STATIC_INIT */
221
+
222
+ #if defined(__cplusplus) || defined(c_plusplus)
223
+ } /* extern "C" */
224
+ #endif