multi_compress 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +13 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +116 -0
  5. data/ext/multi_compress/extconf.rb +171 -0
  6. data/ext/multi_compress/multi_compress.c +1534 -0
  7. data/ext/multi_compress/vendor/brotli/c/common/constants.c +15 -0
  8. data/ext/multi_compress/vendor/brotli/c/common/constants.h +201 -0
  9. data/ext/multi_compress/vendor/brotli/c/common/context.c +156 -0
  10. data/ext/multi_compress/vendor/brotli/c/common/context.h +113 -0
  11. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +5916 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +64 -0
  13. data/ext/multi_compress/vendor/brotli/c/common/platform.c +23 -0
  14. data/ext/multi_compress/vendor/brotli/c/common/platform.h +541 -0
  15. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +521 -0
  16. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  17. data/ext/multi_compress/vendor/brotli/c/common/transform.c +291 -0
  18. data/ext/multi_compress/vendor/brotli/c/common/transform.h +85 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/version.h +51 -0
  20. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +78 -0
  21. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +423 -0
  22. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +2875 -0
  23. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +342 -0
  24. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +122 -0
  25. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +733 -0
  26. data/ext/multi_compress/vendor/brotli/c/dec/state.c +183 -0
  27. data/ext/multi_compress/vendor/brotli/c/dec/state.h +400 -0
  28. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +207 -0
  29. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +40 -0
  30. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +939 -0
  31. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +96 -0
  32. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_inc.h +189 -0
  33. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +36 -0
  34. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +64 -0
  35. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +127 -0
  36. data/ext/multi_compress/vendor/brotli/c/enc/block_encoder_inc.h +34 -0
  37. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +217 -0
  38. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +52 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +481 -0
  40. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +1336 -0
  41. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +89 -0
  42. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +57 -0
  43. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +49 -0
  44. data/ext/multi_compress/vendor/brotli/c/enc/cluster_inc.h +325 -0
  45. data/ext/multi_compress/vendor/brotli/c/enc/command.c +28 -0
  46. data/ext/multi_compress/vendor/brotli/c/enc/command.h +191 -0
  47. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  48. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  49. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +800 -0
  50. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +86 -0
  51. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +657 -0
  52. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +72 -0
  53. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +1848 -0
  54. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +25 -0
  55. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +1996 -0
  56. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +640 -0
  57. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +157 -0
  58. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +504 -0
  59. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +123 -0
  60. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +542 -0
  61. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +105 -0
  62. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +67 -0
  63. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +72 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +728 -0
  65. data/ext/multi_compress/vendor/brotli/c/enc/hash_composite_inc.h +140 -0
  66. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +295 -0
  67. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +262 -0
  68. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +258 -0
  69. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +266 -0
  70. data/ext/multi_compress/vendor/brotli/c/enc/hash_rolling_inc.h +212 -0
  71. data/ext/multi_compress/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +330 -0
  72. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +100 -0
  73. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +64 -0
  74. data/ext/multi_compress/vendor/brotli/c/enc/histogram_inc.h +51 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +180 -0
  76. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +32 -0
  77. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +194 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +131 -0
  79. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +677 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +106 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +185 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/params.h +47 -0
  83. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +54 -0
  84. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +202 -0
  85. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +168 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/state.h +104 -0
  87. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +542 -0
  88. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +41 -0
  89. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +5866 -0
  90. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +85 -0
  91. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +33 -0
  92. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +88 -0
  93. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +409 -0
  94. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +501 -0
  95. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +305 -0
  96. data/ext/multi_compress/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  97. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +83 -0
  98. data/ext/multi_compress/vendor/lz4/lib/LICENSE +24 -0
  99. data/ext/multi_compress/vendor/lz4/lib/Makefile +244 -0
  100. data/ext/multi_compress/vendor/lz4/lib/README.md +193 -0
  101. data/ext/multi_compress/vendor/lz4/lib/dll/example/Makefile +63 -0
  102. data/ext/multi_compress/vendor/lz4/lib/dll/example/README.md +69 -0
  103. data/ext/multi_compress/vendor/lz4/lib/dll/example/fullbench-dll.sln +25 -0
  104. data/ext/multi_compress/vendor/lz4/lib/dll/example/fullbench-dll.vcxproj +182 -0
  105. data/ext/multi_compress/vendor/lz4/lib/liblz4-dll.rc.in +35 -0
  106. data/ext/multi_compress/vendor/lz4/lib/liblz4.pc.in +14 -0
  107. data/ext/multi_compress/vendor/lz4/lib/lz4.c +2829 -0
  108. data/ext/multi_compress/vendor/lz4/lib/lz4.h +884 -0
  109. data/ext/multi_compress/vendor/lz4/lib/lz4file.c +341 -0
  110. data/ext/multi_compress/vendor/lz4/lib/lz4file.h +93 -0
  111. data/ext/multi_compress/vendor/lz4/lib/lz4frame.c +2136 -0
  112. data/ext/multi_compress/vendor/lz4/lib/lz4frame.h +751 -0
  113. data/ext/multi_compress/vendor/lz4/lib/lz4frame_static.h +47 -0
  114. data/ext/multi_compress/vendor/lz4/lib/lz4hc.c +2192 -0
  115. data/ext/multi_compress/vendor/lz4/lib/lz4hc.h +414 -0
  116. data/ext/multi_compress/vendor/lz4/lib/xxhash.c +1030 -0
  117. data/ext/multi_compress/vendor/lz4/lib/xxhash.h +328 -0
  118. data/ext/multi_compress/vendor/zstd/lib/BUCK +232 -0
  119. data/ext/multi_compress/vendor/zstd/lib/Makefile +369 -0
  120. data/ext/multi_compress/vendor/zstd/lib/README.md +237 -0
  121. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  122. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +200 -0
  123. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +457 -0
  124. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +450 -0
  125. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +249 -0
  126. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +30 -0
  127. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +116 -0
  128. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +340 -0
  129. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +63 -0
  130. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +168 -0
  131. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +640 -0
  132. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +313 -0
  133. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +286 -0
  134. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +426 -0
  135. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +371 -0
  136. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +90 -0
  137. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +158 -0
  138. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +182 -0
  139. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +150 -0
  140. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +18 -0
  141. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +7020 -0
  142. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +48 -0
  143. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +111 -0
  144. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +392 -0
  145. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +163 -0
  146. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +134 -0
  147. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +625 -0
  148. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +181 -0
  149. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +75 -0
  150. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +1464 -0
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +7153 -0
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +1534 -0
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +235 -0
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +39 -0
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +442 -0
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +688 -0
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +748 -0
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +770 -0
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +50 -0
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +968 -0
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +38 -0
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +2199 -0
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +202 -0
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +730 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +117 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +1576 -0
  170. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +80 -0
  171. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +1882 -0
  172. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +113 -0
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +1944 -0
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +244 -0
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +44 -0
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +2407 -0
  178. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +2215 -0
  179. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +73 -0
  180. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +240 -0
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +214 -0
  182. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +26 -0
  183. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +167 -0
  184. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +77 -0
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +1261 -0
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +152 -0
  187. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.c +1913 -0
  188. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +67 -0
  189. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +766 -0
  190. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +1133 -0
  191. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +48 -0
  192. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +63 -0
  193. data/ext/multi_compress/vendor/zstd/lib/dll/example/build_package.bat +20 -0
  194. data/ext/multi_compress/vendor/zstd/lib/dll/example/fullbench-dll.sln +25 -0
  195. data/ext/multi_compress/vendor/zstd/lib/dll/example/fullbench-dll.vcxproj +181 -0
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +452 -0
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +2127 -0
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +94 -0
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +3465 -0
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +93 -0
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +3105 -0
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +93 -0
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +3598 -0
  204. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +142 -0
  205. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +4005 -0
  206. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +162 -0
  207. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +4106 -0
  208. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +172 -0
  209. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +4490 -0
  210. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +187 -0
  211. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +237 -0
  212. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +16 -0
  213. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +35 -0
  214. data/ext/multi_compress/vendor/zstd/lib/zdict.h +474 -0
  215. data/ext/multi_compress/vendor/zstd/lib/zstd.h +3089 -0
  216. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +114 -0
  217. data/lib/multi_compress/version.rb +5 -0
  218. data/lib/multi_compress.rb +329 -0
  219. metadata +322 -0
@@ -0,0 +1,1534 @@
1
+ #include <ruby.h>
2
+ #include <ruby/encoding.h>
3
+ #include <zstd.h>
4
+ #include <zdict.h>
5
+ #include <lz4.h>
6
+ #include <lz4hc.h>
7
+ #include <brotli/encode.h>
8
+ #include <brotli/decode.h>
9
+ #include <string.h>
10
+ #include <stdlib.h>
11
+ #include <stdio.h>
12
+
13
+ #define MAX_DECOMPRESS_SIZE (256ULL * 1024 * 1024)
14
+
15
+ static VALUE mMultiCompress;
16
+ static VALUE eError;
17
+ static VALUE eDataError;
18
+ static VALUE eMemError;
19
+ static VALUE eStreamError;
20
+ static VALUE eUnsupportedError;
21
+ static VALUE eLevelError;
22
+ static VALUE cDeflater;
23
+ static VALUE cInflater;
24
+ static VALUE cWriter;
25
+ static VALUE cReader;
26
+ static VALUE cDictionary;
27
+ static VALUE mZstd;
28
+ static VALUE mLZ4;
29
+ static VALUE mBrotli;
30
+
31
+ typedef enum { ALGO_ZSTD = 0, ALGO_LZ4 = 1, ALGO_BROTLI = 2 } compress_algo_t;
32
+
33
+ static compress_algo_t sym_to_algo(VALUE sym) {
34
+ ID id = SYM2ID(sym);
35
+ if (id == rb_intern("zstd"))
36
+ return ALGO_ZSTD;
37
+ if (id == rb_intern("lz4"))
38
+ return ALGO_LZ4;
39
+ if (id == rb_intern("brotli"))
40
+ return ALGO_BROTLI;
41
+ rb_raise(rb_eArgError, "Unknown algorithm: %s", rb_id2name(id));
42
+ return ALGO_ZSTD;
43
+ }
44
+
45
+ static int resolve_level(compress_algo_t algo, VALUE level_val) {
46
+ if (NIL_P(level_val)) {
47
+ switch (algo) {
48
+ case ALGO_ZSTD:
49
+ return 3;
50
+ case ALGO_LZ4:
51
+ return 1;
52
+ case ALGO_BROTLI:
53
+ return 6;
54
+ }
55
+ }
56
+
57
+ if (SYMBOL_P(level_val)) {
58
+ ID id = SYM2ID(level_val);
59
+ if (id == rb_intern("fastest")) {
60
+ switch (algo) {
61
+ case ALGO_ZSTD:
62
+ return 1;
63
+ case ALGO_LZ4:
64
+ return 1;
65
+ case ALGO_BROTLI:
66
+ return 0;
67
+ }
68
+ } else if (id == rb_intern("default")) {
69
+ switch (algo) {
70
+ case ALGO_ZSTD:
71
+ return 3;
72
+ case ALGO_LZ4:
73
+ return 1;
74
+ case ALGO_BROTLI:
75
+ return 6;
76
+ }
77
+ } else if (id == rb_intern("best")) {
78
+ switch (algo) {
79
+ case ALGO_ZSTD:
80
+ return 19;
81
+ case ALGO_LZ4:
82
+ return 16;
83
+ case ALGO_BROTLI:
84
+ return 11;
85
+ }
86
+ }
87
+ rb_raise(eLevelError, "Unknown named level: %s", rb_id2name(id));
88
+ }
89
+
90
+ int level = NUM2INT(level_val);
91
+
92
+ switch (algo) {
93
+ case ALGO_ZSTD:
94
+ if (level < 1 || level > 22)
95
+ rb_raise(eLevelError, "zstd level must be 1..22, got %d", level);
96
+ break;
97
+ case ALGO_LZ4:
98
+ if (level < 1 || level > 16)
99
+ rb_raise(eLevelError, "lz4 level must be 1..16, got %d", level);
100
+ break;
101
+ case ALGO_BROTLI:
102
+ if (level < 0 || level > 11)
103
+ rb_raise(eLevelError, "brotli level must be 0..11, got %d", level);
104
+ break;
105
+ }
106
+
107
+ return level;
108
+ }
109
+
110
+ static compress_algo_t detect_algo(const uint8_t *data, size_t len) {
111
+ if (len >= 4) {
112
+ if (data[0] == 0x28 && data[1] == 0xB5 && data[2] == 0x2F && data[3] == 0xFD) {
113
+ return ALGO_ZSTD;
114
+ }
115
+ }
116
+
117
+ if (len >= 8) {
118
+ uint32_t orig = (uint32_t)data[0] | ((uint32_t)data[1] << 8) | ((uint32_t)data[2] << 16) |
119
+ ((uint32_t)data[3] << 24);
120
+ uint32_t comp = (uint32_t)data[4] | ((uint32_t)data[5] << 8) | ((uint32_t)data[6] << 16) |
121
+ ((uint32_t)data[7] << 24);
122
+ if (orig > 0 && orig <= 256 * 1024 * 1024 && comp > 0 && comp <= 256 * 1024 * 1024 &&
123
+ 8 + comp <= len) {
124
+ return ALGO_LZ4;
125
+ }
126
+ }
127
+
128
+ rb_raise(eDataError, "cannot detect compression format (no magic bytes found). "
129
+ "Use algo: :zstd, :lz4, or :brotli explicitly.");
130
+ return ALGO_ZSTD;
131
+ }
132
+
133
+ typedef struct {
134
+ compress_algo_t algo;
135
+ uint8_t *data;
136
+ size_t size;
137
+ } dictionary_t;
138
+
139
+ static void dict_free(void *ptr) {
140
+ dictionary_t *dict = (dictionary_t *)ptr;
141
+ if (dict->data)
142
+ xfree(dict->data);
143
+ xfree(dict);
144
+ }
145
+
146
+ static size_t dict_memsize(const void *ptr) {
147
+ const dictionary_t *d = (const dictionary_t *)ptr;
148
+ return sizeof(dictionary_t) + d->size;
149
+ }
150
+
151
+ static const rb_data_type_t dictionary_type = {
152
+ "Compress::Dictionary", {NULL, dict_free, dict_memsize}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY};
153
+
154
+ static VALUE dict_alloc(VALUE klass) {
155
+ dictionary_t *d = ALLOC(dictionary_t);
156
+ memset(d, 0, sizeof(dictionary_t));
157
+ return TypedData_Wrap_Struct(klass, &dictionary_type, d);
158
+ }
159
+
160
+ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
161
+ VALUE data, opts;
162
+ rb_scan_args(argc, argv, "1:", &data, &opts);
163
+ StringValue(data);
164
+
165
+ VALUE algo_sym = Qnil, level_val = Qnil, dict_val = Qnil;
166
+ if (!NIL_P(opts)) {
167
+ algo_sym = rb_hash_aref(opts, ID2SYM(rb_intern("algo")));
168
+ level_val = rb_hash_aref(opts, ID2SYM(rb_intern("level")));
169
+ dict_val = rb_hash_aref(opts, ID2SYM(rb_intern("dictionary")));
170
+ }
171
+
172
+ compress_algo_t algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
173
+ int level = resolve_level(algo, level_val);
174
+
175
+ dictionary_t *dict = NULL;
176
+ if (!NIL_P(dict_val)) {
177
+ if (algo == ALGO_LZ4) {
178
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
179
+ }
180
+ TypedData_Get_Struct(dict_val, dictionary_t, &dictionary_type, dict);
181
+ }
182
+
183
+ const char *src = RSTRING_PTR(data);
184
+ size_t slen = RSTRING_LEN(data);
185
+
186
+ switch (algo) {
187
+ case ALGO_ZSTD: {
188
+ size_t bound = ZSTD_compressBound(slen);
189
+ VALUE dst = rb_str_buf_new(bound);
190
+
191
+ size_t csize;
192
+ if (dict) {
193
+ ZSTD_CCtx *cctx = ZSTD_createCCtx();
194
+ if (!cctx)
195
+ rb_raise(eMemError, "zstd: failed to create context");
196
+ ZSTD_CDict *cdict = ZSTD_createCDict(dict->data, dict->size, level);
197
+ if (!cdict) {
198
+ ZSTD_freeCCtx(cctx);
199
+ rb_raise(eMemError, "zstd: failed to create cdict");
200
+ }
201
+ csize = ZSTD_compress_usingCDict(cctx, RSTRING_PTR(dst), bound, src, slen, cdict);
202
+ ZSTD_freeCDict(cdict);
203
+ ZSTD_freeCCtx(cctx);
204
+ } else {
205
+ csize = ZSTD_compress(RSTRING_PTR(dst), bound, src, slen, level);
206
+ }
207
+
208
+ if (ZSTD_isError(csize)) {
209
+ rb_raise(eError, "zstd compress: %s", ZSTD_getErrorName(csize));
210
+ }
211
+ rb_str_set_len(dst, csize);
212
+ return dst;
213
+ }
214
+ case ALGO_LZ4: {
215
+ if (slen > (size_t)INT_MAX)
216
+ rb_raise(eError, "lz4: input too large (max 2GB)");
217
+ int bound = LZ4_compressBound((int)slen);
218
+ VALUE dst = rb_str_buf_new(8 + bound + 4);
219
+ char *out = RSTRING_PTR(dst);
220
+
221
+ out[0] = (slen >> 0) & 0xFF;
222
+ out[1] = (slen >> 8) & 0xFF;
223
+ out[2] = (slen >> 16) & 0xFF;
224
+ out[3] = (slen >> 24) & 0xFF;
225
+
226
+ int csize;
227
+ if (level > 1) {
228
+ csize = LZ4_compress_HC(src, out + 8, (int)slen, bound, level);
229
+ } else {
230
+ csize = LZ4_compress_default(src, out + 8, (int)slen, bound);
231
+ }
232
+ if (csize <= 0) {
233
+ rb_raise(eError, "lz4 compress failed");
234
+ }
235
+
236
+ out[4] = (csize >> 0) & 0xFF;
237
+ out[5] = (csize >> 8) & 0xFF;
238
+ out[6] = (csize >> 16) & 0xFF;
239
+ out[7] = (csize >> 24) & 0xFF;
240
+
241
+ size_t total = 8 + csize;
242
+ out[total] = 0;
243
+ out[total + 1] = 0;
244
+ out[total + 2] = 0;
245
+ out[total + 3] = 0;
246
+
247
+ rb_str_set_len(dst, total + 4);
248
+ return dst;
249
+ }
250
+ case ALGO_BROTLI: {
251
+ size_t out_len = BrotliEncoderMaxCompressedSize(slen);
252
+ if (out_len == 0)
253
+ out_len = slen + 1024;
254
+ VALUE dst = rb_str_buf_new(out_len);
255
+
256
+ if (dict) {
257
+ BrotliEncoderState *enc = BrotliEncoderCreateInstance(NULL, NULL, NULL);
258
+ if (!enc)
259
+ rb_raise(eMemError, "brotli: failed to create encoder");
260
+ BrotliEncoderSetParameter(enc, BROTLI_PARAM_QUALITY, level);
261
+ BrotliEncoderPreparedDictionary *pd =
262
+ BrotliEncoderPrepareDictionary(BROTLI_SHARED_DICTIONARY_RAW, dict->size, dict->data,
263
+ BROTLI_MAX_QUALITY, NULL, NULL, NULL);
264
+ BrotliEncoderAttachPreparedDictionary(enc, pd);
265
+
266
+ size_t available_in = slen;
267
+ const uint8_t *next_in = (const uint8_t *)src;
268
+ size_t available_out = out_len;
269
+ uint8_t *next_out = (uint8_t *)RSTRING_PTR(dst);
270
+ size_t initial_out = available_out;
271
+
272
+ BROTLI_BOOL ok =
273
+ BrotliEncoderCompressStream(enc, BROTLI_OPERATION_FINISH, &available_in, &next_in,
274
+ &available_out, &next_out, NULL);
275
+
276
+ BrotliEncoderDestroyPreparedDictionary(pd);
277
+ BrotliEncoderDestroyInstance(enc);
278
+ if (!ok)
279
+ rb_raise(eError, "brotli compress with dict failed");
280
+
281
+ rb_str_set_len(dst, initial_out - available_out);
282
+ } else {
283
+ BROTLI_BOOL ok =
284
+ BrotliEncoderCompress(level, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE, slen,
285
+ (const uint8_t *)src, &out_len, (uint8_t *)RSTRING_PTR(dst));
286
+
287
+ if (!ok)
288
+ rb_raise(eError, "brotli compress failed");
289
+ rb_str_set_len(dst, out_len);
290
+ }
291
+ return dst;
292
+ }
293
+ }
294
+
295
+ return Qnil;
296
+ }
297
+
298
+ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
299
+ VALUE data, opts;
300
+ rb_scan_args(argc, argv, "1:", &data, &opts);
301
+ StringValue(data);
302
+
303
+ VALUE algo_sym = Qnil, dict_val = Qnil;
304
+ if (!NIL_P(opts)) {
305
+ algo_sym = rb_hash_aref(opts, ID2SYM(rb_intern("algo")));
306
+ dict_val = rb_hash_aref(opts, ID2SYM(rb_intern("dictionary")));
307
+ }
308
+
309
+ const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
310
+ size_t slen = RSTRING_LEN(data);
311
+
312
+ compress_algo_t algo;
313
+ if (NIL_P(algo_sym)) {
314
+ algo = detect_algo(src, slen);
315
+ } else {
316
+ algo = sym_to_algo(algo_sym);
317
+ }
318
+
319
+ dictionary_t *dict = NULL;
320
+ if (!NIL_P(dict_val)) {
321
+ if (algo == ALGO_LZ4) {
322
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
323
+ }
324
+ TypedData_Get_Struct(dict_val, dictionary_t, &dictionary_type, dict);
325
+ }
326
+
327
+ switch (algo) {
328
+ case ALGO_ZSTD: {
329
+ unsigned long long frame_size = ZSTD_getFrameContentSize(src, slen);
330
+ if (frame_size == ZSTD_CONTENTSIZE_ERROR) {
331
+ rb_raise(eDataError, "zstd: not valid compressed data");
332
+ }
333
+
334
+ ZSTD_DCtx *dctx = ZSTD_createDCtx();
335
+ if (!dctx)
336
+ rb_raise(eMemError, "zstd: failed to create dctx");
337
+
338
+ if (dict) {
339
+ size_t r = ZSTD_DCtx_loadDictionary(dctx, dict->data, dict->size);
340
+ if (ZSTD_isError(r)) {
341
+ ZSTD_freeDCtx(dctx);
342
+ rb_raise(eError, "zstd dict load: %s", ZSTD_getErrorName(r));
343
+ }
344
+ }
345
+
346
+ size_t alloc_size;
347
+ if (frame_size != ZSTD_CONTENTSIZE_UNKNOWN && frame_size <= 256ULL * 1024 * 1024) {
348
+ alloc_size = (size_t)frame_size;
349
+ } else {
350
+ alloc_size = (slen > MAX_DECOMPRESS_SIZE / 8) ? MAX_DECOMPRESS_SIZE : slen * 8;
351
+ if (alloc_size < 4096)
352
+ alloc_size = 4096;
353
+ }
354
+
355
+ VALUE dst = rb_str_buf_new(alloc_size);
356
+ size_t total_out = 0;
357
+
358
+ ZSTD_inBuffer input = {src, slen, 0};
359
+ while (input.pos < input.size) {
360
+ if (total_out >= alloc_size) {
361
+ if (alloc_size >= MAX_DECOMPRESS_SIZE) {
362
+ ZSTD_freeDCtx(dctx);
363
+ rb_raise(eDataError, "zstd: decompressed size exceeds limit (%lluMB)",
364
+ (unsigned long long)(MAX_DECOMPRESS_SIZE / (1024 * 1024)));
365
+ }
366
+ alloc_size *= 2;
367
+ if (alloc_size > MAX_DECOMPRESS_SIZE)
368
+ alloc_size = MAX_DECOMPRESS_SIZE;
369
+ rb_str_resize(dst, alloc_size);
370
+ }
371
+
372
+ ZSTD_outBuffer output = {RSTRING_PTR(dst) + total_out, alloc_size - total_out, 0};
373
+ size_t ret = ZSTD_decompressStream(dctx, &output, &input);
374
+ if (ZSTD_isError(ret)) {
375
+ ZSTD_freeDCtx(dctx);
376
+ rb_raise(eDataError, "zstd decompress: %s", ZSTD_getErrorName(ret));
377
+ }
378
+ total_out += output.pos;
379
+ if (ret == 0)
380
+ break;
381
+ }
382
+
383
+ ZSTD_freeDCtx(dctx);
384
+ rb_str_set_len(dst, total_out);
385
+ return dst;
386
+ }
387
+ case ALGO_LZ4: {
388
+ if (slen < 4)
389
+ rb_raise(eDataError, "lz4: data too short");
390
+
391
+ VALUE result = rb_str_buf_new(0);
392
+ size_t pos = 0;
393
+
394
+ while (pos + 4 <= slen) {
395
+ uint32_t orig_size = (uint32_t)src[pos] | ((uint32_t)src[pos + 1] << 8) |
396
+ ((uint32_t)src[pos + 2] << 16) | ((uint32_t)src[pos + 3] << 24);
397
+ if (orig_size == 0)
398
+ break;
399
+
400
+ if (pos + 8 > slen)
401
+ rb_raise(eDataError, "lz4: truncated block header");
402
+
403
+ uint32_t comp_size = (uint32_t)src[pos + 4] | ((uint32_t)src[pos + 5] << 8) |
404
+ ((uint32_t)src[pos + 6] << 16) | ((uint32_t)src[pos + 7] << 24);
405
+
406
+ if (pos + 8 + comp_size > slen)
407
+ rb_raise(eDataError, "lz4: truncated block data");
408
+ if (orig_size > 256 * 1024 * 1024)
409
+ rb_raise(eDataError, "lz4: block too large (%u)", orig_size);
410
+
411
+ VALUE block = rb_str_buf_new(orig_size);
412
+ int dsize = LZ4_decompress_safe((const char *)(src + pos + 8), RSTRING_PTR(block),
413
+ (int)comp_size, (int)orig_size);
414
+ if (dsize < 0)
415
+ rb_raise(eDataError, "lz4 decompress failed");
416
+
417
+ rb_str_set_len(block, dsize);
418
+ rb_str_cat(result, RSTRING_PTR(block), dsize);
419
+ pos += 8 + comp_size;
420
+ }
421
+
422
+ return result;
423
+ }
424
+ case ALGO_BROTLI: {
425
+ size_t alloc_size = (slen > MAX_DECOMPRESS_SIZE / 4) ? MAX_DECOMPRESS_SIZE : slen * 4;
426
+ if (alloc_size < 1024)
427
+ alloc_size = 1024;
428
+
429
+ BrotliDecoderState *dec = BrotliDecoderCreateInstance(NULL, NULL, NULL);
430
+ if (!dec)
431
+ rb_raise(eMemError, "brotli: failed to create decoder");
432
+
433
+ if (dict) {
434
+ BrotliDecoderAttachDictionary(dec, BROTLI_SHARED_DICTIONARY_RAW, dict->size,
435
+ dict->data);
436
+ }
437
+
438
+ VALUE dst = rb_str_buf_new(alloc_size);
439
+ size_t total_out = 0;
440
+
441
+ size_t available_in = slen;
442
+ const uint8_t *next_in = src;
443
+
444
+ BrotliDecoderResult res = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
445
+ while (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
446
+ size_t available_out = alloc_size - total_out;
447
+ uint8_t *next_out = (uint8_t *)RSTRING_PTR(dst) + total_out;
448
+
449
+ res = BrotliDecoderDecompressStream(dec, &available_in, &next_in, &available_out,
450
+ &next_out, NULL);
451
+
452
+ total_out = next_out - (uint8_t *)RSTRING_PTR(dst);
453
+
454
+ if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
455
+ if (alloc_size >= MAX_DECOMPRESS_SIZE) {
456
+ BrotliDecoderDestroyInstance(dec);
457
+ rb_raise(eDataError, "brotli: decompressed size exceeds limit (%lluMB)",
458
+ (unsigned long long)(MAX_DECOMPRESS_SIZE / (1024 * 1024)));
459
+ }
460
+ alloc_size *= 2;
461
+ if (alloc_size > MAX_DECOMPRESS_SIZE)
462
+ alloc_size = MAX_DECOMPRESS_SIZE;
463
+ rb_str_resize(dst, alloc_size);
464
+ }
465
+ }
466
+
467
+ BrotliDecoderDestroyInstance(dec);
468
+
469
+ if (res != BROTLI_DECODER_RESULT_SUCCESS) {
470
+ rb_raise(eDataError, "brotli decompress failed");
471
+ }
472
+ rb_str_set_len(dst, total_out);
473
+ return dst;
474
+ }
475
+ }
476
+
477
+ return Qnil;
478
+ }
479
+
480
+ static const uint32_t crc32_table[256] = {
481
+ 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
482
+ 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
483
+ 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
484
+ 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
485
+ 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
486
+ 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
487
+ 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
488
+ 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
489
+ 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
490
+ 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
491
+ 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
492
+ 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
493
+ 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
494
+ 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
495
+ 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
496
+ 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
497
+ 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
498
+ 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
499
+ 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
500
+ 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
501
+ 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
502
+ 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
503
+ 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
504
+ 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
505
+ 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
506
+ 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
507
+ 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
508
+ 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
509
+ 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
510
+ 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
511
+ 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
512
+ 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D};
513
+
514
+ static VALUE compress_crc32(int argc, VALUE *argv, VALUE self) {
515
+ VALUE data, prev;
516
+ rb_scan_args(argc, argv, "11", &data, &prev);
517
+ StringValue(data);
518
+
519
+ const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
520
+ size_t len = RSTRING_LEN(data);
521
+ uint32_t crc = NIL_P(prev) ? 0 : NUM2UINT(prev);
522
+
523
+ crc = ~crc;
524
+ for (size_t i = 0; i < len; i++) {
525
+ crc = crc32_table[(crc ^ src[i]) & 0xFF] ^ (crc >> 8);
526
+ }
527
+ crc = ~crc;
528
+
529
+ return UINT2NUM(crc);
530
+ }
531
+
532
+ static VALUE compress_adler32(int argc, VALUE *argv, VALUE self) {
533
+ VALUE data, prev;
534
+ rb_scan_args(argc, argv, "11", &data, &prev);
535
+ StringValue(data);
536
+
537
+ const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
538
+ size_t len = RSTRING_LEN(data);
539
+ uint32_t adler = NIL_P(prev) ? 1 : NUM2UINT(prev);
540
+
541
+ uint32_t s1 = adler & 0xFFFF;
542
+ uint32_t s2 = (adler >> 16) & 0xFFFF;
543
+ const uint32_t BASE = 65521;
544
+
545
+ while (len > 0) {
546
+ size_t chunk = len > 5552 ? 5552 : len;
547
+ len -= chunk;
548
+ for (size_t i = 0; i < chunk; i++) {
549
+ s1 += src[i];
550
+ s2 += s1;
551
+ }
552
+ s1 %= BASE;
553
+ s2 %= BASE;
554
+ src += chunk;
555
+ }
556
+
557
+ return UINT2NUM((s2 << 16) | s1);
558
+ }
559
+
560
+ static VALUE compress_algorithms(VALUE self) {
561
+ VALUE ary = rb_ary_new_capa(3);
562
+ rb_ary_push(ary, ID2SYM(rb_intern("zstd")));
563
+ rb_ary_push(ary, ID2SYM(rb_intern("lz4")));
564
+ rb_ary_push(ary, ID2SYM(rb_intern("brotli")));
565
+ return ary;
566
+ }
567
+
568
+ static VALUE compress_available_p(VALUE self, VALUE algo_sym) {
569
+ sym_to_algo(algo_sym);
570
+ return Qtrue;
571
+ }
572
+
573
+ static VALUE compress_version(VALUE self, VALUE algo_sym) {
574
+ compress_algo_t algo = sym_to_algo(algo_sym);
575
+ switch (algo) {
576
+ case ALGO_ZSTD:
577
+ return rb_str_new_cstr(ZSTD_versionString());
578
+ case ALGO_LZ4:
579
+ return rb_sprintf("%d.%d.%d", LZ4_VERSION_MAJOR, LZ4_VERSION_MINOR, LZ4_VERSION_RELEASE);
580
+ case ALGO_BROTLI:
581
+ return rb_sprintf("%d.%d.%d", BrotliEncoderVersion() >> 24,
582
+ (BrotliEncoderVersion() >> 12) & 0xFFF, BrotliEncoderVersion() & 0xFFF);
583
+ }
584
+ return Qnil;
585
+ }
586
+
587
+ typedef struct {
588
+ compress_algo_t algo;
589
+ int level;
590
+ int closed;
591
+ int finished;
592
+
593
+ union {
594
+ ZSTD_CStream *zstd;
595
+ BrotliEncoderState *brotli;
596
+ LZ4_stream_t *lz4;
597
+ } ctx;
598
+
599
+ struct {
600
+ char *buf;
601
+ size_t len;
602
+ size_t cap;
603
+ } lz4_buf;
604
+ } deflater_t;
605
+
606
+ static void deflater_free(void *ptr) {
607
+ deflater_t *d = (deflater_t *)ptr;
608
+ if (!d->closed) {
609
+ switch (d->algo) {
610
+ case ALGO_ZSTD:
611
+ if (d->ctx.zstd)
612
+ ZSTD_freeCStream(d->ctx.zstd);
613
+ break;
614
+ case ALGO_BROTLI:
615
+ if (d->ctx.brotli)
616
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
617
+ break;
618
+ case ALGO_LZ4:
619
+ if (d->ctx.lz4)
620
+ LZ4_freeStream(d->ctx.lz4);
621
+ break;
622
+ }
623
+ }
624
+ if (d->lz4_buf.buf)
625
+ xfree(d->lz4_buf.buf);
626
+ xfree(d);
627
+ }
628
+
629
+ static size_t deflater_memsize(const void *ptr) {
630
+ const deflater_t *d = (const deflater_t *)ptr;
631
+ return sizeof(deflater_t) + d->lz4_buf.cap;
632
+ }
633
+
634
+ static const rb_data_type_t deflater_type = {"Compress::Deflater",
635
+ {NULL, deflater_free, deflater_memsize},
636
+ 0,
637
+ 0,
638
+ RUBY_TYPED_FREE_IMMEDIATELY};
639
+
640
+ static VALUE deflater_alloc(VALUE klass) {
641
+ deflater_t *d = ALLOC(deflater_t);
642
+ memset(d, 0, sizeof(deflater_t));
643
+ return TypedData_Wrap_Struct(klass, &deflater_type, d);
644
+ }
645
+
646
+ static VALUE deflater_initialize(int argc, VALUE *argv, VALUE self) {
647
+ VALUE opts;
648
+ rb_scan_args(argc, argv, "0:", &opts);
649
+
650
+ deflater_t *d;
651
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
652
+
653
+ VALUE algo_sym = Qnil, level_val = Qnil, dict_val = Qnil;
654
+ if (!NIL_P(opts)) {
655
+ algo_sym = rb_hash_aref(opts, ID2SYM(rb_intern("algo")));
656
+ level_val = rb_hash_aref(opts, ID2SYM(rb_intern("level")));
657
+ dict_val = rb_hash_aref(opts, ID2SYM(rb_intern("dictionary")));
658
+ }
659
+
660
+ d->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
661
+ d->level = resolve_level(d->algo, level_val);
662
+ d->closed = 0;
663
+ d->finished = 0;
664
+
665
+ dictionary_t *dict = NULL;
666
+ if (!NIL_P(dict_val)) {
667
+ if (d->algo == ALGO_LZ4) {
668
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
669
+ }
670
+ TypedData_Get_Struct(dict_val, dictionary_t, &dictionary_type, dict);
671
+ }
672
+
673
+ switch (d->algo) {
674
+ case ALGO_ZSTD: {
675
+ d->ctx.zstd = ZSTD_createCStream();
676
+ if (!d->ctx.zstd)
677
+ rb_raise(eMemError, "zstd: failed to create stream");
678
+
679
+ if (dict) {
680
+ ZSTD_CCtx_reset(d->ctx.zstd, ZSTD_reset_session_only);
681
+ ZSTD_CCtx_setParameter(d->ctx.zstd, ZSTD_c_compressionLevel, d->level);
682
+ size_t r = ZSTD_CCtx_loadDictionary(d->ctx.zstd, dict->data, dict->size);
683
+ if (ZSTD_isError(r))
684
+ rb_raise(eError, "zstd dict load: %s", ZSTD_getErrorName(r));
685
+ } else {
686
+ size_t r = ZSTD_initCStream(d->ctx.zstd, d->level);
687
+ if (ZSTD_isError(r))
688
+ rb_raise(eError, "zstd init: %s", ZSTD_getErrorName(r));
689
+ }
690
+ break;
691
+ }
692
+ case ALGO_BROTLI: {
693
+ d->ctx.brotli = BrotliEncoderCreateInstance(NULL, NULL, NULL);
694
+ if (!d->ctx.brotli)
695
+ rb_raise(eMemError, "brotli: failed to create encoder");
696
+ BrotliEncoderSetParameter(d->ctx.brotli, BROTLI_PARAM_QUALITY, d->level);
697
+ if (dict) {
698
+ BrotliEncoderPreparedDictionary *pd =
699
+ BrotliEncoderPrepareDictionary(BROTLI_SHARED_DICTIONARY_RAW, dict->size, dict->data,
700
+ BROTLI_MAX_QUALITY, NULL, NULL, NULL);
701
+ BrotliEncoderAttachPreparedDictionary(d->ctx.brotli, pd);
702
+ BrotliEncoderDestroyPreparedDictionary(pd);
703
+ }
704
+ break;
705
+ }
706
+ case ALGO_LZ4: {
707
+ d->ctx.lz4 = LZ4_createStream();
708
+ if (!d->ctx.lz4)
709
+ rb_raise(eMemError, "lz4: failed to create stream");
710
+ LZ4_resetStream(d->ctx.lz4);
711
+ d->lz4_buf.cap = 64 * 1024;
712
+ d->lz4_buf.buf = ALLOC_N(char, d->lz4_buf.cap);
713
+ d->lz4_buf.len = 0;
714
+ break;
715
+ }
716
+ }
717
+
718
+ return self;
719
+ }
720
+
721
+ static VALUE lz4_compress_block(deflater_t *d) {
722
+ if (d->lz4_buf.len == 0)
723
+ return rb_str_new("", 0);
724
+
725
+ if (d->lz4_buf.len > (size_t)INT_MAX)
726
+ rb_raise(eError, "lz4: block too large (max 2GB)");
727
+ int src_size = (int)d->lz4_buf.len;
728
+ int bound = LZ4_compressBound(src_size);
729
+
730
+ VALUE output = rb_str_buf_new(8 + bound);
731
+ char *out = RSTRING_PTR(output);
732
+
733
+ out[0] = (src_size >> 0) & 0xFF;
734
+ out[1] = (src_size >> 8) & 0xFF;
735
+ out[2] = (src_size >> 16) & 0xFF;
736
+ out[3] = (src_size >> 24) & 0xFF;
737
+
738
+ int csize = LZ4_compress_fast_continue(d->ctx.lz4, d->lz4_buf.buf, out + 8, src_size, bound, 1);
739
+
740
+ if (csize <= 0)
741
+ rb_raise(eError, "lz4 stream compress block failed");
742
+
743
+ out[4] = (csize >> 0) & 0xFF;
744
+ out[5] = (csize >> 8) & 0xFF;
745
+ out[6] = (csize >> 16) & 0xFF;
746
+ out[7] = (csize >> 24) & 0xFF;
747
+
748
+ rb_str_set_len(output, 8 + csize);
749
+ d->lz4_buf.len = 0;
750
+ return output;
751
+ }
752
+
753
+ static VALUE deflater_write(VALUE self, VALUE chunk) {
754
+ deflater_t *d;
755
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
756
+ if (d->closed)
757
+ rb_raise(eStreamError, "stream is closed");
758
+ if (d->finished)
759
+ rb_raise(eStreamError, "stream is already finished");
760
+ StringValue(chunk);
761
+
762
+ const char *src = RSTRING_PTR(chunk);
763
+ size_t slen = RSTRING_LEN(chunk);
764
+ if (slen == 0)
765
+ return rb_str_new("", 0);
766
+
767
+ switch (d->algo) {
768
+ case ALGO_ZSTD: {
769
+ ZSTD_inBuffer input = {src, slen, 0};
770
+ size_t out_cap = ZSTD_CStreamOutSize();
771
+ VALUE result = rb_str_buf_new(0);
772
+
773
+ while (input.pos < input.size) {
774
+ VALUE buf = rb_str_buf_new(out_cap);
775
+ ZSTD_outBuffer output = {RSTRING_PTR(buf), out_cap, 0};
776
+ size_t ret = ZSTD_compressStream(d->ctx.zstd, &output, &input);
777
+ if (ZSTD_isError(ret))
778
+ rb_raise(eError, "zstd compress stream: %s", ZSTD_getErrorName(ret));
779
+ if (output.pos > 0)
780
+ rb_str_cat(result, RSTRING_PTR(buf), output.pos);
781
+ }
782
+ return result;
783
+ }
784
+ case ALGO_BROTLI: {
785
+ size_t available_in = slen;
786
+ const uint8_t *next_in = (const uint8_t *)src;
787
+ VALUE result = rb_str_buf_new(0);
788
+
789
+ while (available_in > 0 || BrotliEncoderHasMoreOutput(d->ctx.brotli)) {
790
+ size_t available_out = 0;
791
+ uint8_t *next_out = NULL;
792
+ BROTLI_BOOL ok =
793
+ BrotliEncoderCompressStream(d->ctx.brotli, BROTLI_OPERATION_PROCESS, &available_in,
794
+ &next_in, &available_out, &next_out, NULL);
795
+ if (!ok)
796
+ rb_raise(eError, "brotli compress stream failed");
797
+
798
+ const uint8_t *out_data;
799
+ size_t out_size = 0;
800
+ out_data = BrotliEncoderTakeOutput(d->ctx.brotli, &out_size);
801
+ if (out_size > 0)
802
+ rb_str_cat(result, (const char *)out_data, out_size);
803
+ }
804
+ return result;
805
+ }
806
+ case ALGO_LZ4: {
807
+ VALUE result = rb_str_buf_new(0);
808
+ while (slen > 0) {
809
+ size_t space = d->lz4_buf.cap - d->lz4_buf.len;
810
+ size_t copy = slen < space ? slen : space;
811
+ memcpy(d->lz4_buf.buf + d->lz4_buf.len, src, copy);
812
+ d->lz4_buf.len += copy;
813
+ src += copy;
814
+ slen -= copy;
815
+ if (d->lz4_buf.len >= d->lz4_buf.cap) {
816
+ VALUE block = lz4_compress_block(d);
817
+ rb_str_cat(result, RSTRING_PTR(block), RSTRING_LEN(block));
818
+ }
819
+ }
820
+ return result;
821
+ }
822
+ }
823
+ return rb_str_new("", 0);
824
+ }
825
+
826
+ static VALUE deflater_flush(VALUE self) {
827
+ deflater_t *d;
828
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
829
+ if (d->closed)
830
+ rb_raise(eStreamError, "stream is closed");
831
+ if (d->finished)
832
+ rb_raise(eStreamError, "stream is already finished");
833
+
834
+ switch (d->algo) {
835
+ case ALGO_ZSTD: {
836
+ size_t out_cap = ZSTD_CStreamOutSize();
837
+ VALUE result = rb_str_buf_new(0);
838
+ size_t ret;
839
+ do {
840
+ VALUE buf = rb_str_buf_new(out_cap);
841
+ ZSTD_outBuffer output = {RSTRING_PTR(buf), out_cap, 0};
842
+ ret = ZSTD_flushStream(d->ctx.zstd, &output);
843
+ if (ZSTD_isError(ret))
844
+ rb_raise(eError, "zstd flush: %s", ZSTD_getErrorName(ret));
845
+ if (output.pos > 0)
846
+ rb_str_cat(result, RSTRING_PTR(buf), output.pos);
847
+ } while (ret > 0);
848
+ return result;
849
+ }
850
+ case ALGO_BROTLI: {
851
+ size_t available_in = 0;
852
+ const uint8_t *next_in = NULL;
853
+ VALUE result = rb_str_buf_new(0);
854
+ do {
855
+ size_t available_out = 0;
856
+ uint8_t *next_out = NULL;
857
+ BROTLI_BOOL ok =
858
+ BrotliEncoderCompressStream(d->ctx.brotli, BROTLI_OPERATION_FLUSH, &available_in,
859
+ &next_in, &available_out, &next_out, NULL);
860
+ if (!ok)
861
+ rb_raise(eError, "brotli flush failed");
862
+ const uint8_t *out_data;
863
+ size_t out_size = 0;
864
+ out_data = BrotliEncoderTakeOutput(d->ctx.brotli, &out_size);
865
+ if (out_size > 0)
866
+ rb_str_cat(result, (const char *)out_data, out_size);
867
+ } while (BrotliEncoderHasMoreOutput(d->ctx.brotli));
868
+ return result;
869
+ }
870
+ case ALGO_LZ4:
871
+ return lz4_compress_block(d);
872
+ }
873
+ return rb_str_new("", 0);
874
+ }
875
+
876
+ static VALUE deflater_finish(VALUE self) {
877
+ deflater_t *d;
878
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
879
+ if (d->closed)
880
+ rb_raise(eStreamError, "stream is closed");
881
+ if (d->finished)
882
+ return rb_str_new("", 0);
883
+ d->finished = 1;
884
+
885
+ switch (d->algo) {
886
+ case ALGO_ZSTD: {
887
+ size_t out_cap = ZSTD_CStreamOutSize();
888
+ VALUE result = rb_str_buf_new(0);
889
+ size_t ret;
890
+ do {
891
+ VALUE buf = rb_str_buf_new(out_cap);
892
+ ZSTD_outBuffer output = {RSTRING_PTR(buf), out_cap, 0};
893
+ ret = ZSTD_endStream(d->ctx.zstd, &output);
894
+ if (ZSTD_isError(ret))
895
+ rb_raise(eError, "zstd end stream: %s", ZSTD_getErrorName(ret));
896
+ if (output.pos > 0)
897
+ rb_str_cat(result, RSTRING_PTR(buf), output.pos);
898
+ } while (ret > 0);
899
+ return result;
900
+ }
901
+ case ALGO_BROTLI: {
902
+ size_t available_in = 0;
903
+ const uint8_t *next_in = NULL;
904
+ VALUE result = rb_str_buf_new(0);
905
+ do {
906
+ size_t available_out = 0;
907
+ uint8_t *next_out = NULL;
908
+ BROTLI_BOOL ok =
909
+ BrotliEncoderCompressStream(d->ctx.brotli, BROTLI_OPERATION_FINISH, &available_in,
910
+ &next_in, &available_out, &next_out, NULL);
911
+ if (!ok)
912
+ rb_raise(eError, "brotli finish failed");
913
+ const uint8_t *out_data;
914
+ size_t out_size = 0;
915
+ out_data = BrotliEncoderTakeOutput(d->ctx.brotli, &out_size);
916
+ if (out_size > 0)
917
+ rb_str_cat(result, (const char *)out_data, out_size);
918
+ } while (BrotliEncoderHasMoreOutput(d->ctx.brotli) ||
919
+ !BrotliEncoderIsFinished(d->ctx.brotli));
920
+ return result;
921
+ }
922
+ case ALGO_LZ4: {
923
+ VALUE result = rb_str_buf_new(0);
924
+ if (d->lz4_buf.len > 0) {
925
+ VALUE block = lz4_compress_block(d);
926
+ rb_str_cat(result, RSTRING_PTR(block), RSTRING_LEN(block));
927
+ }
928
+
929
+ char eos[4] = {0, 0, 0, 0};
930
+ rb_str_cat(result, eos, 4);
931
+ return result;
932
+ }
933
+ }
934
+ return rb_str_new("", 0);
935
+ }
936
+
937
+ static VALUE deflater_reset(VALUE self) {
938
+ deflater_t *d;
939
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
940
+
941
+ switch (d->algo) {
942
+ case ALGO_ZSTD:
943
+ if (d->ctx.zstd) {
944
+ ZSTD_CCtx_reset(d->ctx.zstd, ZSTD_reset_session_only);
945
+ ZSTD_CCtx_setParameter(d->ctx.zstd, ZSTD_c_compressionLevel, d->level);
946
+ }
947
+ break;
948
+ case ALGO_BROTLI:
949
+ if (d->ctx.brotli) {
950
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
951
+ d->ctx.brotli = BrotliEncoderCreateInstance(NULL, NULL, NULL);
952
+ if (!d->ctx.brotli)
953
+ rb_raise(eMemError, "brotli: failed to recreate encoder");
954
+ BrotliEncoderSetParameter(d->ctx.brotli, BROTLI_PARAM_QUALITY, d->level);
955
+ }
956
+ break;
957
+ case ALGO_LZ4:
958
+ if (d->ctx.lz4)
959
+ LZ4_resetStream(d->ctx.lz4);
960
+ d->lz4_buf.len = 0;
961
+ break;
962
+ }
963
+ d->closed = 0;
964
+ d->finished = 0;
965
+ return self;
966
+ }
967
+
968
+ static VALUE deflater_close(VALUE self) {
969
+ deflater_t *d;
970
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
971
+ if (d->closed)
972
+ return Qnil;
973
+
974
+ switch (d->algo) {
975
+ case ALGO_ZSTD:
976
+ if (d->ctx.zstd) {
977
+ ZSTD_freeCStream(d->ctx.zstd);
978
+ d->ctx.zstd = NULL;
979
+ }
980
+ break;
981
+ case ALGO_BROTLI:
982
+ if (d->ctx.brotli) {
983
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
984
+ d->ctx.brotli = NULL;
985
+ }
986
+ break;
987
+ case ALGO_LZ4:
988
+ if (d->ctx.lz4) {
989
+ LZ4_freeStream(d->ctx.lz4);
990
+ d->ctx.lz4 = NULL;
991
+ }
992
+ break;
993
+ }
994
+ d->closed = 1;
995
+ return Qnil;
996
+ }
997
+
998
+ static VALUE deflater_closed_p(VALUE self) {
999
+ deflater_t *d;
1000
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
1001
+ return d->closed ? Qtrue : Qfalse;
1002
+ }
1003
+
1004
+ typedef struct {
1005
+ compress_algo_t algo;
1006
+ int closed;
1007
+ int finished;
1008
+
1009
+ union {
1010
+ ZSTD_DStream *zstd;
1011
+ BrotliDecoderState *brotli;
1012
+ } ctx;
1013
+
1014
+ struct {
1015
+ char *buf;
1016
+ size_t len;
1017
+ size_t cap;
1018
+ } lz4_buf;
1019
+ } inflater_t;
1020
+
1021
+ static void inflater_free(void *ptr) {
1022
+ inflater_t *inf = (inflater_t *)ptr;
1023
+ if (!inf->closed) {
1024
+ switch (inf->algo) {
1025
+ case ALGO_ZSTD:
1026
+ if (inf->ctx.zstd)
1027
+ ZSTD_freeDStream(inf->ctx.zstd);
1028
+ break;
1029
+ case ALGO_BROTLI:
1030
+ if (inf->ctx.brotli)
1031
+ BrotliDecoderDestroyInstance(inf->ctx.brotli);
1032
+ break;
1033
+ case ALGO_LZ4:
1034
+ break;
1035
+ }
1036
+ }
1037
+ if (inf->lz4_buf.buf)
1038
+ xfree(inf->lz4_buf.buf);
1039
+ xfree(inf);
1040
+ }
1041
+
1042
+ static size_t inflater_memsize(const void *ptr) {
1043
+ const inflater_t *inf = (const inflater_t *)ptr;
1044
+ return sizeof(inflater_t) + inf->lz4_buf.cap;
1045
+ }
1046
+
1047
+ static const rb_data_type_t inflater_type = {"Compress::Inflater",
1048
+ {NULL, inflater_free, inflater_memsize},
1049
+ 0,
1050
+ 0,
1051
+ RUBY_TYPED_FREE_IMMEDIATELY};
1052
+
1053
+ static VALUE inflater_alloc(VALUE klass) {
1054
+ inflater_t *inf = ALLOC(inflater_t);
1055
+ memset(inf, 0, sizeof(inflater_t));
1056
+ return TypedData_Wrap_Struct(klass, &inflater_type, inf);
1057
+ }
1058
+
1059
+ static VALUE inflater_initialize(int argc, VALUE *argv, VALUE self) {
1060
+ VALUE opts;
1061
+ rb_scan_args(argc, argv, "0:", &opts);
1062
+
1063
+ inflater_t *inf;
1064
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
1065
+
1066
+ VALUE algo_sym = Qnil, dict_val = Qnil;
1067
+ if (!NIL_P(opts)) {
1068
+ algo_sym = rb_hash_aref(opts, ID2SYM(rb_intern("algo")));
1069
+ dict_val = rb_hash_aref(opts, ID2SYM(rb_intern("dictionary")));
1070
+ }
1071
+
1072
+ inf->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
1073
+ inf->closed = 0;
1074
+ inf->finished = 0;
1075
+
1076
+ dictionary_t *dict = NULL;
1077
+ if (!NIL_P(dict_val)) {
1078
+ if (inf->algo == ALGO_LZ4) {
1079
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
1080
+ }
1081
+ TypedData_Get_Struct(dict_val, dictionary_t, &dictionary_type, dict);
1082
+ }
1083
+
1084
+ switch (inf->algo) {
1085
+ case ALGO_ZSTD:
1086
+ inf->ctx.zstd = ZSTD_createDStream();
1087
+ if (!inf->ctx.zstd)
1088
+ rb_raise(eMemError, "zstd: failed to create dstream");
1089
+ if (dict) {
1090
+ ZSTD_DCtx_reset(inf->ctx.zstd, ZSTD_reset_session_only);
1091
+ size_t r = ZSTD_DCtx_loadDictionary(inf->ctx.zstd, dict->data, dict->size);
1092
+ if (ZSTD_isError(r))
1093
+ rb_raise(eError, "zstd dict load: %s", ZSTD_getErrorName(r));
1094
+ } else {
1095
+ ZSTD_initDStream(inf->ctx.zstd);
1096
+ }
1097
+ break;
1098
+ case ALGO_BROTLI:
1099
+ inf->ctx.brotli = BrotliDecoderCreateInstance(NULL, NULL, NULL);
1100
+ if (!inf->ctx.brotli)
1101
+ rb_raise(eMemError, "brotli: failed to create decoder");
1102
+ if (dict) {
1103
+ BrotliDecoderAttachDictionary(inf->ctx.brotli, BROTLI_SHARED_DICTIONARY_RAW, dict->size,
1104
+ dict->data);
1105
+ }
1106
+ break;
1107
+ case ALGO_LZ4:
1108
+ inf->lz4_buf.cap = 16 * 1024;
1109
+ inf->lz4_buf.buf = ALLOC_N(char, inf->lz4_buf.cap);
1110
+ inf->lz4_buf.len = 0;
1111
+ break;
1112
+ }
1113
+
1114
+ return self;
1115
+ }
1116
+
1117
+ static VALUE inflater_write(VALUE self, VALUE chunk) {
1118
+ inflater_t *inf;
1119
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
1120
+ if (inf->closed)
1121
+ rb_raise(eStreamError, "stream is closed");
1122
+ StringValue(chunk);
1123
+
1124
+ const char *src = RSTRING_PTR(chunk);
1125
+ size_t slen = RSTRING_LEN(chunk);
1126
+ if (slen == 0)
1127
+ return rb_str_new("", 0);
1128
+
1129
+ switch (inf->algo) {
1130
+ case ALGO_ZSTD: {
1131
+ ZSTD_inBuffer input = {src, slen, 0};
1132
+ size_t out_cap = ZSTD_DStreamOutSize();
1133
+ VALUE result = rb_str_buf_new(0);
1134
+ while (input.pos < input.size) {
1135
+ VALUE buf = rb_str_buf_new(out_cap);
1136
+ ZSTD_outBuffer output = {RSTRING_PTR(buf), out_cap, 0};
1137
+ size_t ret = ZSTD_decompressStream(inf->ctx.zstd, &output, &input);
1138
+ if (ZSTD_isError(ret))
1139
+ rb_raise(eDataError, "zstd decompress stream: %s", ZSTD_getErrorName(ret));
1140
+ if (output.pos > 0)
1141
+ rb_str_cat(result, RSTRING_PTR(buf), output.pos);
1142
+ if (ret == 0)
1143
+ break;
1144
+ }
1145
+ return result;
1146
+ }
1147
+ case ALGO_BROTLI: {
1148
+ size_t available_in = slen;
1149
+ const uint8_t *next_in = (const uint8_t *)src;
1150
+ VALUE result = rb_str_buf_new(0);
1151
+ while (available_in > 0 || BrotliDecoderHasMoreOutput(inf->ctx.brotli)) {
1152
+ size_t available_out = 0;
1153
+ uint8_t *next_out = NULL;
1154
+ BrotliDecoderResult res = BrotliDecoderDecompressStream(
1155
+ inf->ctx.brotli, &available_in, &next_in, &available_out, &next_out, NULL);
1156
+ if (res == BROTLI_DECODER_RESULT_ERROR)
1157
+ rb_raise(eDataError, "brotli decompress stream: %s",
1158
+ BrotliDecoderErrorString(BrotliDecoderGetErrorCode(inf->ctx.brotli)));
1159
+ const uint8_t *out_data;
1160
+ size_t out_size = 0;
1161
+ out_data = BrotliDecoderTakeOutput(inf->ctx.brotli, &out_size);
1162
+ if (out_size > 0)
1163
+ rb_str_cat(result, (const char *)out_data, out_size);
1164
+ if (res == BROTLI_DECODER_RESULT_SUCCESS)
1165
+ break;
1166
+ if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && available_in == 0)
1167
+ break;
1168
+ }
1169
+ return result;
1170
+ }
1171
+ case ALGO_LZ4: {
1172
+ VALUE result = rb_str_buf_new(0);
1173
+
1174
+ size_t needed = inf->lz4_buf.len + slen;
1175
+ if (needed > inf->lz4_buf.cap) {
1176
+ inf->lz4_buf.cap = needed * 2;
1177
+ REALLOC_N(inf->lz4_buf.buf, char, inf->lz4_buf.cap);
1178
+ }
1179
+ memcpy(inf->lz4_buf.buf + inf->lz4_buf.len, src, slen);
1180
+ inf->lz4_buf.len += slen;
1181
+
1182
+ size_t pos = 0;
1183
+ while (pos + 4 <= inf->lz4_buf.len) {
1184
+ const uint8_t *p = (const uint8_t *)(inf->lz4_buf.buf + pos);
1185
+ uint32_t orig_size = (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
1186
+ ((uint32_t)p[3] << 24);
1187
+ if (orig_size == 0) {
1188
+ inf->finished = 1;
1189
+ pos += 4;
1190
+ break;
1191
+ }
1192
+ if (pos + 8 > inf->lz4_buf.len)
1193
+ break;
1194
+ uint32_t comp_size = (uint32_t)p[4] | ((uint32_t)p[5] << 8) | ((uint32_t)p[6] << 16) |
1195
+ ((uint32_t)p[7] << 24);
1196
+ if (pos + 8 + comp_size > inf->lz4_buf.len)
1197
+ break;
1198
+ if (orig_size > 64 * 1024 * 1024)
1199
+ rb_raise(eDataError, "lz4 stream: block too large (%u)", orig_size);
1200
+
1201
+ VALUE block = rb_str_buf_new(orig_size);
1202
+ int dsize = LZ4_decompress_safe(inf->lz4_buf.buf + pos + 8, RSTRING_PTR(block),
1203
+ (int)comp_size, (int)orig_size);
1204
+ if (dsize < 0)
1205
+ rb_raise(eDataError, "lz4 stream decompress block failed");
1206
+ rb_str_set_len(block, dsize);
1207
+ rb_str_cat(result, RSTRING_PTR(block), dsize);
1208
+ pos += 8 + comp_size;
1209
+ }
1210
+
1211
+ if (pos > 0) {
1212
+ inf->lz4_buf.len -= pos;
1213
+ if (inf->lz4_buf.len > 0)
1214
+ memmove(inf->lz4_buf.buf, inf->lz4_buf.buf + pos, inf->lz4_buf.len);
1215
+ }
1216
+ return result;
1217
+ }
1218
+ }
1219
+ return rb_str_new("", 0);
1220
+ }
1221
+
1222
+ static VALUE inflater_finish(VALUE self) {
1223
+ inflater_t *inf;
1224
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
1225
+ if (inf->closed)
1226
+ rb_raise(eStreamError, "stream is closed");
1227
+ inf->finished = 1;
1228
+ return rb_str_new("", 0);
1229
+ }
1230
+
1231
+ static VALUE inflater_reset(VALUE self) {
1232
+ inflater_t *inf;
1233
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
1234
+
1235
+ switch (inf->algo) {
1236
+ case ALGO_ZSTD:
1237
+ if (inf->ctx.zstd)
1238
+ ZSTD_DCtx_reset(inf->ctx.zstd, ZSTD_reset_session_only);
1239
+ break;
1240
+ case ALGO_BROTLI:
1241
+ if (inf->ctx.brotli) {
1242
+ BrotliDecoderDestroyInstance(inf->ctx.brotli);
1243
+ inf->ctx.brotli = BrotliDecoderCreateInstance(NULL, NULL, NULL);
1244
+ if (!inf->ctx.brotli)
1245
+ rb_raise(eMemError, "brotli: failed to recreate decoder");
1246
+ }
1247
+ break;
1248
+ case ALGO_LZ4:
1249
+ inf->lz4_buf.len = 0;
1250
+ break;
1251
+ }
1252
+ inf->closed = 0;
1253
+ inf->finished = 0;
1254
+ return self;
1255
+ }
1256
+
1257
+ static VALUE inflater_close(VALUE self) {
1258
+ inflater_t *inf;
1259
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
1260
+ if (inf->closed)
1261
+ return Qnil;
1262
+
1263
+ switch (inf->algo) {
1264
+ case ALGO_ZSTD:
1265
+ if (inf->ctx.zstd) {
1266
+ ZSTD_freeDStream(inf->ctx.zstd);
1267
+ inf->ctx.zstd = NULL;
1268
+ }
1269
+ break;
1270
+ case ALGO_BROTLI:
1271
+ if (inf->ctx.brotli) {
1272
+ BrotliDecoderDestroyInstance(inf->ctx.brotli);
1273
+ inf->ctx.brotli = NULL;
1274
+ }
1275
+ break;
1276
+ case ALGO_LZ4:
1277
+ break;
1278
+ }
1279
+ inf->closed = 1;
1280
+ return Qnil;
1281
+ }
1282
+
1283
+ static VALUE inflater_closed_p(VALUE self) {
1284
+ inflater_t *inf;
1285
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
1286
+ return inf->closed ? Qtrue : Qfalse;
1287
+ }
1288
+
1289
+ static VALUE dict_initialize(int argc, VALUE *argv, VALUE self) {
1290
+ VALUE raw, opts;
1291
+ rb_scan_args(argc, argv, "1:", &raw, &opts);
1292
+ StringValue(raw);
1293
+
1294
+ dictionary_t *d;
1295
+ TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
1296
+
1297
+ VALUE algo_sym = Qnil;
1298
+ if (!NIL_P(opts)) {
1299
+ algo_sym = rb_hash_aref(opts, ID2SYM(rb_intern("algo")));
1300
+ }
1301
+ d->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
1302
+
1303
+ if (d->algo == ALGO_LZ4)
1304
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
1305
+
1306
+ d->size = RSTRING_LEN(raw);
1307
+ d->data = ALLOC_N(uint8_t, d->size);
1308
+ memcpy(d->data, RSTRING_PTR(raw), d->size);
1309
+
1310
+ return self;
1311
+ }
1312
+
1313
+ static VALUE dict_train(int argc, VALUE *argv, VALUE self) {
1314
+ VALUE samples, opts;
1315
+ rb_scan_args(argc, argv, "1:", &samples, &opts);
1316
+ Check_Type(samples, T_ARRAY);
1317
+
1318
+ VALUE algo_sym = Qnil, size_val = Qnil;
1319
+ if (!NIL_P(opts)) {
1320
+ algo_sym = rb_hash_aref(opts, ID2SYM(rb_intern("algo")));
1321
+ size_val = rb_hash_aref(opts, ID2SYM(rb_intern("size")));
1322
+ }
1323
+
1324
+ compress_algo_t algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
1325
+ size_t dict_capacity = NIL_P(size_val) ? 32768 : NUM2SIZET(size_val);
1326
+
1327
+ if (algo == ALGO_LZ4)
1328
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionary training");
1329
+
1330
+ long num_samples = RARRAY_LEN(samples);
1331
+ if (num_samples < 1)
1332
+ rb_raise(rb_eArgError, "need at least 1 sample for training");
1333
+
1334
+ size_t total_size = 0;
1335
+ for (long i = 0; i < num_samples; i++) {
1336
+ VALUE s = rb_ary_entry(samples, i);
1337
+ StringValue(s);
1338
+ total_size += RSTRING_LEN(s);
1339
+ }
1340
+
1341
+ char *concat = ALLOC_N(char, total_size);
1342
+ size_t *sizes = ALLOC_N(size_t, num_samples);
1343
+ size_t offset = 0;
1344
+
1345
+ for (long i = 0; i < num_samples; i++) {
1346
+ VALUE s = rb_ary_entry(samples, i);
1347
+ size_t slen = RSTRING_LEN(s);
1348
+ memcpy(concat + offset, RSTRING_PTR(s), slen);
1349
+ sizes[i] = slen;
1350
+ offset += slen;
1351
+ }
1352
+
1353
+ uint8_t *dict_buf = ALLOC_N(uint8_t, dict_capacity);
1354
+
1355
+ if (algo == ALGO_ZSTD) {
1356
+ size_t result =
1357
+ ZDICT_trainFromBuffer(dict_buf, dict_capacity, concat, sizes, (unsigned)num_samples);
1358
+ xfree(concat);
1359
+ xfree(sizes);
1360
+
1361
+ if (ZDICT_isError(result)) {
1362
+ xfree(dict_buf);
1363
+ rb_raise(eError, "zstd dict training: %s", ZDICT_getErrorName(result));
1364
+ }
1365
+
1366
+ VALUE dict_obj = rb_obj_alloc(cDictionary);
1367
+ dictionary_t *d;
1368
+ TypedData_Get_Struct(dict_obj, dictionary_t, &dictionary_type, d);
1369
+ d->algo = ALGO_ZSTD;
1370
+ d->data = dict_buf;
1371
+ d->size = result;
1372
+ return dict_obj;
1373
+ } else {
1374
+ xfree(sizes);
1375
+ size_t actual_size = total_size < dict_capacity ? total_size : dict_capacity;
1376
+ memcpy(dict_buf, concat, actual_size);
1377
+ xfree(concat);
1378
+
1379
+ VALUE dict_obj = rb_obj_alloc(cDictionary);
1380
+ dictionary_t *d;
1381
+ TypedData_Get_Struct(dict_obj, dictionary_t, &dictionary_type, d);
1382
+ d->algo = ALGO_BROTLI;
1383
+ d->data = dict_buf;
1384
+ d->size = actual_size;
1385
+ return dict_obj;
1386
+ }
1387
+ }
1388
+
1389
+ static VALUE dict_load(int argc, VALUE *argv, VALUE self) {
1390
+ VALUE path, opts;
1391
+ rb_scan_args(argc, argv, "1:", &path, &opts);
1392
+ StringValue(path);
1393
+
1394
+ VALUE algo_sym = Qnil;
1395
+ if (!NIL_P(opts)) {
1396
+ algo_sym = rb_hash_aref(opts, ID2SYM(rb_intern("algo")));
1397
+ }
1398
+ compress_algo_t algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
1399
+
1400
+ if (algo == ALGO_LZ4)
1401
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
1402
+
1403
+ const char *cpath = RSTRING_PTR(path);
1404
+ FILE *f = fopen(cpath, "rb");
1405
+ if (!f)
1406
+ rb_sys_fail(cpath);
1407
+
1408
+ fseek(f, 0, SEEK_END);
1409
+ long file_size = ftell(f);
1410
+ fseek(f, 0, SEEK_SET);
1411
+
1412
+ if (file_size <= 0) {
1413
+ fclose(f);
1414
+ rb_raise(eDataError, "dictionary file is empty: %s", cpath);
1415
+ }
1416
+
1417
+ uint8_t *buf = ALLOC_N(uint8_t, file_size);
1418
+ size_t read_bytes = fread(buf, 1, file_size, f);
1419
+ fclose(f);
1420
+
1421
+ if ((long)read_bytes != file_size) {
1422
+ xfree(buf);
1423
+ rb_raise(eDataError, "failed to read dictionary: %s", cpath);
1424
+ }
1425
+
1426
+ VALUE dict_obj = rb_obj_alloc(cDictionary);
1427
+ dictionary_t *d;
1428
+ TypedData_Get_Struct(dict_obj, dictionary_t, &dictionary_type, d);
1429
+ d->algo = algo;
1430
+ d->data = buf;
1431
+ d->size = (size_t)file_size;
1432
+ return dict_obj;
1433
+ }
1434
+
1435
+ static VALUE dict_save(VALUE self, VALUE path) {
1436
+ dictionary_t *d;
1437
+ TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
1438
+
1439
+ const char *cpath = StringValueCStr(path);
1440
+ FILE *f = fopen(cpath, "wb");
1441
+ if (!f)
1442
+ rb_sys_fail(cpath);
1443
+
1444
+ size_t written = fwrite(d->data, 1, d->size, f);
1445
+ fclose(f);
1446
+
1447
+ if (written != d->size)
1448
+ rb_raise(eError, "failed to write dictionary to %s", cpath);
1449
+ return path;
1450
+ }
1451
+
1452
+ static VALUE dict_algo(VALUE self) {
1453
+ dictionary_t *d;
1454
+ TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
1455
+ switch (d->algo) {
1456
+ case ALGO_ZSTD:
1457
+ return ID2SYM(rb_intern("zstd"));
1458
+ case ALGO_LZ4:
1459
+ return ID2SYM(rb_intern("lz4"));
1460
+ case ALGO_BROTLI:
1461
+ return ID2SYM(rb_intern("brotli"));
1462
+ }
1463
+ return Qnil;
1464
+ }
1465
+
1466
+ static VALUE dict_size(VALUE self) {
1467
+ dictionary_t *d;
1468
+ TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
1469
+ return SIZET2NUM(d->size);
1470
+ }
1471
+
1472
+ void Init_multi_compress(void) {
1473
+ mMultiCompress = rb_define_module("MultiCompress");
1474
+
1475
+ eError = rb_define_class_under(mMultiCompress, "Error", rb_eStandardError);
1476
+ eDataError = rb_define_class_under(mMultiCompress, "DataError", eError);
1477
+ eMemError = rb_define_class_under(mMultiCompress, "MemError", eError);
1478
+ eStreamError = rb_define_class_under(mMultiCompress, "StreamError", eError);
1479
+ eUnsupportedError = rb_define_class_under(mMultiCompress, "UnsupportedError", eError);
1480
+ eLevelError = rb_define_class_under(mMultiCompress, "LevelError", eError);
1481
+
1482
+ mZstd = rb_define_module_under(mMultiCompress, "Zstd");
1483
+ mLZ4 = rb_define_module_under(mMultiCompress, "LZ4");
1484
+ mBrotli = rb_define_module_under(mMultiCompress, "Brotli");
1485
+
1486
+ rb_define_const(mZstd, "MIN_LEVEL", INT2FIX(1));
1487
+ rb_define_const(mZstd, "MAX_LEVEL", INT2FIX(22));
1488
+ rb_define_const(mZstd, "DEFAULT_LEVEL", INT2FIX(3));
1489
+ rb_define_const(mLZ4, "MIN_LEVEL", INT2FIX(1));
1490
+ rb_define_const(mLZ4, "MAX_LEVEL", INT2FIX(16));
1491
+ rb_define_const(mLZ4, "DEFAULT_LEVEL", INT2FIX(1));
1492
+ rb_define_const(mBrotli, "MIN_LEVEL", INT2FIX(0));
1493
+ rb_define_const(mBrotli, "MAX_LEVEL", INT2FIX(11));
1494
+ rb_define_const(mBrotli, "DEFAULT_LEVEL", INT2FIX(6));
1495
+
1496
+ rb_define_module_function(mMultiCompress, "compress", compress_compress, -1);
1497
+ rb_define_module_function(mMultiCompress, "decompress", compress_decompress, -1);
1498
+ rb_define_module_function(mMultiCompress, "crc32", compress_crc32, -1);
1499
+ rb_define_module_function(mMultiCompress, "adler32", compress_adler32, -1);
1500
+ rb_define_module_function(mMultiCompress, "algorithms", compress_algorithms, 0);
1501
+ rb_define_module_function(mMultiCompress, "available?", compress_available_p, 1);
1502
+ rb_define_module_function(mMultiCompress, "version", compress_version, 1);
1503
+
1504
+ cDeflater = rb_define_class_under(mMultiCompress, "Deflater", rb_cObject);
1505
+ rb_define_alloc_func(cDeflater, deflater_alloc);
1506
+ rb_define_method(cDeflater, "initialize", deflater_initialize, -1);
1507
+ rb_define_method(cDeflater, "write", deflater_write, 1);
1508
+ rb_define_method(cDeflater, "flush", deflater_flush, 0);
1509
+ rb_define_method(cDeflater, "finish", deflater_finish, 0);
1510
+ rb_define_method(cDeflater, "reset", deflater_reset, 0);
1511
+ rb_define_method(cDeflater, "close", deflater_close, 0);
1512
+ rb_define_method(cDeflater, "closed?", deflater_closed_p, 0);
1513
+
1514
+ cInflater = rb_define_class_under(mMultiCompress, "Inflater", rb_cObject);
1515
+ rb_define_alloc_func(cInflater, inflater_alloc);
1516
+ rb_define_method(cInflater, "initialize", inflater_initialize, -1);
1517
+ rb_define_method(cInflater, "write", inflater_write, 1);
1518
+ rb_define_method(cInflater, "finish", inflater_finish, 0);
1519
+ rb_define_method(cInflater, "reset", inflater_reset, 0);
1520
+ rb_define_method(cInflater, "close", inflater_close, 0);
1521
+ rb_define_method(cInflater, "closed?", inflater_closed_p, 0);
1522
+
1523
+ cWriter = rb_define_class_under(mMultiCompress, "Writer", rb_cObject);
1524
+ cReader = rb_define_class_under(mMultiCompress, "Reader", rb_cObject);
1525
+
1526
+ cDictionary = rb_define_class_under(mMultiCompress, "Dictionary", rb_cObject);
1527
+ rb_define_alloc_func(cDictionary, dict_alloc);
1528
+ rb_define_method(cDictionary, "initialize", dict_initialize, -1);
1529
+ rb_define_singleton_method(cDictionary, "train", dict_train, -1);
1530
+ rb_define_singleton_method(cDictionary, "load", dict_load, -1);
1531
+ rb_define_method(cDictionary, "save", dict_save, 1);
1532
+ rb_define_method(cDictionary, "algo", dict_algo, 0);
1533
+ rb_define_method(cDictionary, "size", dict_size, 0);
1534
+ }