multi_compress 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +152 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +177 -0
  5. data/ext/multi_compress/extconf.rb +190 -0
  6. data/ext/multi_compress/multi_compress.c +2912 -0
  7. data/ext/multi_compress/vendor/.vendored +3 -0
  8. data/ext/multi_compress/vendor/brotli/c/common/constants.c +15 -0
  9. data/ext/multi_compress/vendor/brotli/c/common/constants.h +201 -0
  10. data/ext/multi_compress/vendor/brotli/c/common/context.c +156 -0
  11. data/ext/multi_compress/vendor/brotli/c/common/context.h +113 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +5916 -0
  13. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +64 -0
  14. data/ext/multi_compress/vendor/brotli/c/common/platform.c +23 -0
  15. data/ext/multi_compress/vendor/brotli/c/common/platform.h +541 -0
  16. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +521 -0
  17. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  18. data/ext/multi_compress/vendor/brotli/c/common/transform.c +291 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/transform.h +85 -0
  20. data/ext/multi_compress/vendor/brotli/c/common/version.h +51 -0
  21. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +78 -0
  22. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +423 -0
  23. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +2875 -0
  24. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +342 -0
  25. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +122 -0
  26. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +733 -0
  27. data/ext/multi_compress/vendor/brotli/c/dec/state.c +183 -0
  28. data/ext/multi_compress/vendor/brotli/c/dec/state.h +400 -0
  29. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +207 -0
  30. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +40 -0
  31. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +939 -0
  32. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +96 -0
  33. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_inc.h +189 -0
  34. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +36 -0
  35. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +64 -0
  36. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +127 -0
  37. data/ext/multi_compress/vendor/brotli/c/enc/block_encoder_inc.h +34 -0
  38. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +217 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +52 -0
  40. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +481 -0
  41. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +1336 -0
  42. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +89 -0
  43. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +57 -0
  44. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +49 -0
  45. data/ext/multi_compress/vendor/brotli/c/enc/cluster_inc.h +325 -0
  46. data/ext/multi_compress/vendor/brotli/c/enc/command.c +28 -0
  47. data/ext/multi_compress/vendor/brotli/c/enc/command.h +191 -0
  48. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  49. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  50. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +800 -0
  51. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +86 -0
  52. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +657 -0
  53. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +72 -0
  54. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +1848 -0
  55. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +25 -0
  56. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +1996 -0
  57. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +640 -0
  58. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +157 -0
  59. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +504 -0
  60. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +123 -0
  61. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +542 -0
  62. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +105 -0
  63. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +67 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +72 -0
  65. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +728 -0
  66. data/ext/multi_compress/vendor/brotli/c/enc/hash_composite_inc.h +140 -0
  67. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +295 -0
  68. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +262 -0
  69. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +258 -0
  70. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +266 -0
  71. data/ext/multi_compress/vendor/brotli/c/enc/hash_rolling_inc.h +212 -0
  72. data/ext/multi_compress/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +330 -0
  73. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +100 -0
  74. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +64 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/histogram_inc.h +51 -0
  76. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +180 -0
  77. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +32 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +194 -0
  79. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +131 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +677 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +106 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +185 -0
  83. data/ext/multi_compress/vendor/brotli/c/enc/params.h +47 -0
  84. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +54 -0
  85. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +202 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +168 -0
  87. data/ext/multi_compress/vendor/brotli/c/enc/state.h +104 -0
  88. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +542 -0
  89. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +41 -0
  90. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +5866 -0
  91. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +85 -0
  92. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +33 -0
  93. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +88 -0
  94. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +409 -0
  95. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +501 -0
  96. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +305 -0
  97. data/ext/multi_compress/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  98. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +83 -0
  99. data/ext/multi_compress/vendor/lz4/lib/LICENSE +24 -0
  100. data/ext/multi_compress/vendor/lz4/lib/Makefile +244 -0
  101. data/ext/multi_compress/vendor/lz4/lib/README.md +193 -0
  102. data/ext/multi_compress/vendor/lz4/lib/dll/example/Makefile +63 -0
  103. data/ext/multi_compress/vendor/lz4/lib/dll/example/README.md +69 -0
  104. data/ext/multi_compress/vendor/lz4/lib/dll/example/fullbench-dll.sln +25 -0
  105. data/ext/multi_compress/vendor/lz4/lib/dll/example/fullbench-dll.vcxproj +182 -0
  106. data/ext/multi_compress/vendor/lz4/lib/liblz4-dll.rc.in +35 -0
  107. data/ext/multi_compress/vendor/lz4/lib/liblz4.pc.in +14 -0
  108. data/ext/multi_compress/vendor/lz4/lib/lz4.c +2829 -0
  109. data/ext/multi_compress/vendor/lz4/lib/lz4.h +884 -0
  110. data/ext/multi_compress/vendor/lz4/lib/lz4file.c +341 -0
  111. data/ext/multi_compress/vendor/lz4/lib/lz4file.h +93 -0
  112. data/ext/multi_compress/vendor/lz4/lib/lz4frame.c +2136 -0
  113. data/ext/multi_compress/vendor/lz4/lib/lz4frame.h +751 -0
  114. data/ext/multi_compress/vendor/lz4/lib/lz4frame_static.h +47 -0
  115. data/ext/multi_compress/vendor/lz4/lib/lz4hc.c +2192 -0
  116. data/ext/multi_compress/vendor/lz4/lib/lz4hc.h +414 -0
  117. data/ext/multi_compress/vendor/lz4/lib/xxhash.c +1030 -0
  118. data/ext/multi_compress/vendor/lz4/lib/xxhash.h +328 -0
  119. data/ext/multi_compress/vendor/zstd/lib/BUCK +232 -0
  120. data/ext/multi_compress/vendor/zstd/lib/Makefile +357 -0
  121. data/ext/multi_compress/vendor/zstd/lib/README.md +217 -0
  122. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +478 -0
  123. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +335 -0
  124. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +213 -0
  125. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +24 -0
  126. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +107 -0
  127. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +368 -0
  128. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +56 -0
  129. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +159 -0
  130. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +717 -0
  131. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +403 -0
  132. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +364 -0
  133. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +442 -0
  134. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +355 -0
  135. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +84 -0
  136. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +137 -0
  137. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +122 -0
  138. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +155 -0
  139. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +24 -0
  140. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +5686 -0
  141. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +83 -0
  142. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +111 -0
  143. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +493 -0
  144. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +163 -0
  145. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +134 -0
  146. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +741 -0
  147. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +181 -0
  148. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +75 -0
  149. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +1370 -0
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +6327 -0
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +1458 -0
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +159 -0
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +31 -0
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +442 -0
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +573 -0
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +676 -0
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +696 -0
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +38 -0
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +675 -0
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +37 -0
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +2104 -0
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +125 -0
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +724 -0
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +117 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +1446 -0
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +56 -0
  170. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +1859 -0
  171. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +113 -0
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +1889 -0
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +585 -0
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +244 -0
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +44 -0
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +2230 -0
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +2072 -0
  178. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +68 -0
  179. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +236 -0
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +214 -0
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +26 -0
  182. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +167 -0
  183. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +1253 -0
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +158 -0
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.c +1913 -0
  187. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +67 -0
  188. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +766 -0
  189. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +1205 -0
  190. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +48 -0
  191. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +63 -0
  192. data/ext/multi_compress/vendor/zstd/lib/dll/example/build_package.bat +20 -0
  193. data/ext/multi_compress/vendor/zstd/lib/dll/example/fullbench-dll.sln +25 -0
  194. data/ext/multi_compress/vendor/zstd/lib/dll/example/fullbench-dll.vcxproj +181 -0
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +415 -0
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +2158 -0
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +94 -0
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +3518 -0
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +93 -0
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +3160 -0
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +93 -0
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +3647 -0
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +142 -0
  204. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +4050 -0
  205. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +162 -0
  206. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +4154 -0
  207. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +172 -0
  208. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +4541 -0
  209. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +187 -0
  210. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +203 -0
  211. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +16 -0
  212. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +25 -0
  213. data/ext/multi_compress/vendor/zstd/lib/zdict.h +452 -0
  214. data/ext/multi_compress/vendor/zstd/lib/zstd.h +2575 -0
  215. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +95 -0
  216. data/lib/multi_compress/version.rb +5 -0
  217. data/lib/multi_compress.rb +329 -0
  218. metadata +322 -0
@@ -0,0 +1,2912 @@
1
+ #include <ruby.h>
2
+ #include <ruby/encoding.h>
3
+ #include <ruby/thread.h>
4
+ #include <ruby/fiber/scheduler.h>
5
+ #include <brotli/decode.h>
6
+ #include <brotli/encode.h>
7
+ #include <lz4.h>
8
+ #include <lz4hc.h>
9
+ #include <pthread.h>
10
+ #include <stdio.h>
11
+ #include <stdint.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+ #include <unistd.h>
15
+ #include <zstd.h>
16
+ #include <zdict.h>
17
+
18
+ #define MAX_DECOMPRESS_SIZE (512ULL * 1024 * 1024)
19
+ #define DEFAULT_MAX_RATIO 1000ULL
20
+ #define RATIO_MIN_INPUT_BYTES 1024ULL
21
+ #define DICT_FILE_MAX_SIZE (32ULL * 1024 * 1024)
22
+
23
+ typedef struct {
24
+ size_t gvl_unlock_threshold;
25
+ size_t fiber_yield_chunk;
26
+ size_t fiber_stream_threshold;
27
+ } algo_policy_t;
28
+
29
+ static const algo_policy_t ZSTD_POLICY = {
30
+ .gvl_unlock_threshold = 64 * 1024,
31
+ .fiber_yield_chunk = 64 * 1024,
32
+ .fiber_stream_threshold = 32 * 1024,
33
+ };
34
+
35
+ static const algo_policy_t LZ4_POLICY = {
36
+ .gvl_unlock_threshold = 128 * 1024,
37
+ .fiber_yield_chunk = 128 * 1024,
38
+ .fiber_stream_threshold = 64 * 1024,
39
+ };
40
+
41
+ static const algo_policy_t BROTLI_POLICY = {
42
+ .gvl_unlock_threshold = 16 * 1024,
43
+ .fiber_yield_chunk = 16 * 1024,
44
+ .fiber_stream_threshold = 8 * 1024,
45
+ };
46
+
47
+ static VALUE mMultiCompress;
48
+ static VALUE eError;
49
+ static VALUE eDataError;
50
+ static VALUE eMemError;
51
+ static VALUE eStreamError;
52
+ static VALUE eUnsupportedError;
53
+ static VALUE eLevelError;
54
+ static VALUE cDeflater;
55
+ static VALUE cInflater;
56
+ static VALUE cWriter;
57
+ static VALUE cReader;
58
+ static VALUE cDictionary;
59
+ static VALUE mZstd;
60
+ static VALUE mLZ4;
61
+ static VALUE mBrotli;
62
+ static rb_encoding *binary_encoding;
63
+ static struct {
64
+ ID zstd, lz4, brotli;
65
+ ID algo, algorithm, level, dictionary, size;
66
+ ID max_output_size, max_ratio;
67
+ ID fastest, default_, best;
68
+ ID yield_, join;
69
+ ID ivar_dictionary;
70
+ } id_cache;
71
+
72
+ static struct {
73
+ VALUE zstd, lz4, brotli;
74
+ VALUE algo, algorithm, level, dictionary, size;
75
+ VALUE max_output_size, max_ratio;
76
+ } sym_cache;
77
+
78
+ typedef enum { ALGO_ZSTD = 0, ALGO_LZ4 = 1, ALGO_BROTLI = 2 } compress_algo_t;
79
+
80
+ typedef struct dictionary_s dictionary_t;
81
+ static const rb_data_type_t dictionary_type;
82
+
83
+ static void init_id_cache(void) {
84
+ id_cache.zstd = rb_intern("zstd");
85
+ id_cache.lz4 = rb_intern("lz4");
86
+ id_cache.brotli = rb_intern("brotli");
87
+ id_cache.algo = rb_intern("algo");
88
+ id_cache.algorithm = rb_intern("algorithm");
89
+ id_cache.level = rb_intern("level");
90
+ id_cache.dictionary = rb_intern("dictionary");
91
+ id_cache.size = rb_intern("size");
92
+ id_cache.max_output_size = rb_intern("max_output_size");
93
+ id_cache.max_ratio = rb_intern("max_ratio");
94
+ id_cache.fastest = rb_intern("fastest");
95
+ id_cache.default_ = rb_intern("default");
96
+ id_cache.best = rb_intern("best");
97
+ id_cache.yield_ = rb_intern("yield");
98
+ id_cache.join = rb_intern("join");
99
+ id_cache.ivar_dictionary = rb_intern("@dictionary");
100
+
101
+ sym_cache.zstd = ID2SYM(id_cache.zstd);
102
+ sym_cache.lz4 = ID2SYM(id_cache.lz4);
103
+ sym_cache.brotli = ID2SYM(id_cache.brotli);
104
+ sym_cache.algo = ID2SYM(id_cache.algo);
105
+ sym_cache.algorithm = ID2SYM(id_cache.algorithm);
106
+ sym_cache.level = ID2SYM(id_cache.level);
107
+ sym_cache.dictionary = ID2SYM(id_cache.dictionary);
108
+ sym_cache.size = ID2SYM(id_cache.size);
109
+ sym_cache.max_output_size = ID2SYM(id_cache.max_output_size);
110
+ sym_cache.max_ratio = ID2SYM(id_cache.max_ratio);
111
+ }
112
+
113
+ static inline VALUE opt_get(VALUE opts, VALUE sym) {
114
+ return NIL_P(opts) ? Qnil : rb_hash_aref(opts, sym);
115
+ }
116
+
117
+ static inline VALUE opt_lookup2(VALUE opts, VALUE sym, VALUE default_value) {
118
+ return NIL_P(opts) ? default_value : rb_hash_lookup2(opts, sym, default_value);
119
+ }
120
+
121
+ static inline void reject_algorithm_keyword(VALUE opts) {
122
+ if (NIL_P(opts))
123
+ return;
124
+ if (rb_hash_lookup2(opts, sym_cache.algorithm, Qundef) != Qundef) {
125
+ rb_raise(rb_eArgError, "unknown keyword: :algorithm (use :algo)");
126
+ }
127
+ }
128
+
129
+ static inline dictionary_t *opt_dictionary(VALUE dict_val) {
130
+ dictionary_t *dict;
131
+
132
+ if (NIL_P(dict_val))
133
+ return NULL;
134
+ if (!rb_obj_is_kind_of(dict_val, cDictionary)) {
135
+ rb_raise(rb_eTypeError, "dictionary must be a MultiCompress::Dictionary");
136
+ }
137
+
138
+ TypedData_Get_Struct(dict_val, dictionary_t, &dictionary_type, dict);
139
+ return dict;
140
+ }
141
+
142
+ static inline void raise_if_path_has_null_byte(VALUE path) {
143
+ if (memchr(RSTRING_PTR(path), '\0', (size_t)RSTRING_LEN(path)) != NULL) {
144
+ rb_raise(rb_eArgError, "path contains null byte");
145
+ }
146
+ }
147
+
148
+ static inline void join_thread(VALUE thread) {
149
+ rb_funcall(thread, id_cache.join, 0);
150
+ }
151
+
152
+ static inline void scheduler_yield(VALUE scheduler) {
153
+ rb_funcall(scheduler, id_cache.yield_, 0);
154
+ }
155
+
156
+ static inline VALUE dictionary_ivar_get(VALUE self) {
157
+ return rb_ivar_get(self, id_cache.ivar_dictionary);
158
+ }
159
+
160
+ static inline void dictionary_ivar_set(VALUE self, VALUE dictionary) {
161
+ rb_ivar_set(self, id_cache.ivar_dictionary, dictionary);
162
+ }
163
+
164
+ static inline uint32_t read_le_u32(const uint8_t *p) {
165
+ return (uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24);
166
+ }
167
+
168
+ static inline void write_le_u32(uint8_t *p, uint32_t v) {
169
+ p[0] = (uint8_t)(v & 0xFF);
170
+ p[1] = (uint8_t)((v >> 8) & 0xFF);
171
+ p[2] = (uint8_t)((v >> 16) & 0xFF);
172
+ p[3] = (uint8_t)((v >> 24) & 0xFF);
173
+ }
174
+
175
+ static inline const algo_policy_t *algo_policy(compress_algo_t algo) {
176
+ switch (algo) {
177
+ case ALGO_ZSTD:
178
+ return &ZSTD_POLICY;
179
+ case ALGO_LZ4:
180
+ return &LZ4_POLICY;
181
+ case ALGO_BROTLI:
182
+ return &BROTLI_POLICY;
183
+ }
184
+ return &ZSTD_POLICY;
185
+ }
186
+
187
+ static compress_algo_t sym_to_algo(VALUE sym) {
188
+ if (!SYMBOL_P(sym)) {
189
+ rb_raise(rb_eTypeError, "algo must be a Symbol (:zstd, :lz4, :brotli)");
190
+ }
191
+
192
+ ID id = SYM2ID(sym);
193
+ if (id == id_cache.zstd)
194
+ return ALGO_ZSTD;
195
+ if (id == id_cache.lz4)
196
+ return ALGO_LZ4;
197
+ if (id == id_cache.brotli)
198
+ return ALGO_BROTLI;
199
+ rb_raise(rb_eArgError, "Unknown algorithm: %s", rb_id2name(id));
200
+ return ALGO_ZSTD;
201
+ }
202
+
203
+ static inline VALUE algo_to_sym(compress_algo_t algo) {
204
+ switch (algo) {
205
+ case ALGO_ZSTD:
206
+ return sym_cache.zstd;
207
+ case ALGO_LZ4:
208
+ return sym_cache.lz4;
209
+ case ALGO_BROTLI:
210
+ return sym_cache.brotli;
211
+ }
212
+ return Qnil;
213
+ }
214
+
215
+ typedef struct {
216
+ int min, max, fastest, default_, best;
217
+ const char *name;
218
+ } level_spec_t;
219
+
220
+ static const level_spec_t level_spec[] = {
221
+ [ALGO_ZSTD] = {.min = 1, .max = 22, .fastest = 1, .default_ = 3, .best = 19, .name = "zstd"},
222
+ [ALGO_LZ4] = {.min = 1, .max = 16, .fastest = 1, .default_ = 1, .best = 16, .name = "lz4"},
223
+ [ALGO_BROTLI] =
224
+ {.min = 0, .max = 11, .fastest = 0, .default_ = 6, .best = 11, .name = "brotli"},
225
+ };
226
+
227
+ static int resolve_level(compress_algo_t algo, VALUE level_val) {
228
+ const level_spec_t *spec = &level_spec[algo];
229
+
230
+ if (NIL_P(level_val))
231
+ return spec->default_;
232
+
233
+ if (SYMBOL_P(level_val)) {
234
+ ID id = SYM2ID(level_val);
235
+ if (id == id_cache.fastest)
236
+ return spec->fastest;
237
+ if (id == id_cache.default_)
238
+ return spec->default_;
239
+ if (id == id_cache.best)
240
+ return spec->best;
241
+ rb_raise(eLevelError, "Unknown named level: %s", rb_id2name(id));
242
+ }
243
+
244
+ int level = NUM2INT(level_val);
245
+ if (level < spec->min || level > spec->max)
246
+ rb_raise(eLevelError, "%s level must be %d..%d, got %d", spec->name, spec->min, spec->max,
247
+ level);
248
+ return level;
249
+ }
250
+
251
+ static compress_algo_t detect_algo(const uint8_t *data, size_t len) {
252
+ if (len >= 4) {
253
+ if (data[0] == 0x28 && data[1] == 0xB5 && data[2] == 0x2F && data[3] == 0xFD) {
254
+ return ALGO_ZSTD;
255
+ }
256
+ }
257
+
258
+ if (len >= 12) {
259
+ uint32_t orig = read_le_u32(data);
260
+ uint32_t comp = read_le_u32(data + 4);
261
+ if (orig > 0 && orig <= 256U * 1024 * 1024 && comp > 0 && comp <= 256U * 1024 * 1024 &&
262
+ orig <= (uint32_t)INT_MAX && comp <= (uint32_t)LZ4_compressBound((int)orig) &&
263
+ (size_t)8 + (size_t)comp + 4 == len) {
264
+ size_t tail = 8 + (size_t)comp;
265
+ if (data[tail] == 0 && data[tail + 1] == 0 && data[tail + 2] == 0 &&
266
+ data[tail + 3] == 0) {
267
+ return ALGO_LZ4;
268
+ }
269
+ }
270
+ }
271
+
272
+ rb_raise(eDataError, "cannot detect compression format (no magic bytes found). "
273
+ "Use algo: :zstd, :lz4, or :brotli explicitly.");
274
+ return ALGO_ZSTD;
275
+ }
276
+
277
+ static inline VALUE rb_binary_str_new(const char *ptr, long len) {
278
+ VALUE str = rb_str_new(ptr, len);
279
+ rb_enc_associate(str, binary_encoding);
280
+ return str;
281
+ }
282
+
283
+ static inline VALUE rb_binary_str_buf_new(long capa) {
284
+ VALUE str = rb_str_buf_new(capa);
285
+ rb_enc_associate(str, binary_encoding);
286
+ return str;
287
+ }
288
+
289
+ static inline VALUE rb_binary_str_buf_reserve(long capa) {
290
+ VALUE str = rb_str_buf_new(capa);
291
+ rb_enc_associate(str, binary_encoding);
292
+ if (capa > 0)
293
+ rb_str_modify_expand(str, capa + 1);
294
+ return str;
295
+ }
296
+
297
+ static inline void grow_binary_str(VALUE str, size_t cur_len, size_t new_cap) {
298
+ size_t cur_cap = (size_t)rb_str_capacity(str);
299
+ if (new_cap <= cur_cap)
300
+ return;
301
+ rb_str_set_len(str, (long)cur_len);
302
+ rb_str_modify_expand(str, (long)(new_cap - cur_len));
303
+ }
304
+
305
+ typedef struct {
306
+ size_t max_output_size;
307
+ int max_ratio_enabled;
308
+ unsigned long long max_ratio;
309
+ } limits_config_t;
310
+
311
+ static void limits_config_init(limits_config_t *limits) {
312
+ limits->max_output_size = (size_t)MAX_DECOMPRESS_SIZE;
313
+ limits->max_ratio_enabled = 1;
314
+ limits->max_ratio = DEFAULT_MAX_RATIO;
315
+ }
316
+
317
+ static void limits_config_apply_opts(VALUE opts, limits_config_t *limits) {
318
+ if (NIL_P(opts))
319
+ return;
320
+
321
+ VALUE val = opt_lookup2(opts, sym_cache.max_output_size, Qundef);
322
+ if (val != Qundef && !NIL_P(val)) {
323
+ size_t max_output_size = NUM2SIZET(val);
324
+ if (max_output_size == 0)
325
+ rb_raise(rb_eArgError, "max_output_size must be greater than 0");
326
+ limits->max_output_size = max_output_size;
327
+ }
328
+
329
+ val = opt_lookup2(opts, sym_cache.max_ratio, Qundef);
330
+ if (val == Qundef)
331
+ return;
332
+ if (NIL_P(val)) {
333
+ limits->max_ratio_enabled = 0;
334
+ limits->max_ratio = 0;
335
+ return;
336
+ }
337
+
338
+ unsigned long long max_ratio = NUM2ULL(val);
339
+ if (max_ratio == 0)
340
+ rb_raise(rb_eArgError, "max_ratio must be greater than 0 or nil");
341
+ limits->max_ratio_enabled = 1;
342
+ limits->max_ratio = max_ratio;
343
+ }
344
+
345
+ static void parse_limits_from_opts(VALUE opts, limits_config_t *limits) {
346
+ limits_config_init(limits);
347
+ limits_config_apply_opts(opts, limits);
348
+ }
349
+
350
+ static size_t checked_add_size(size_t left, size_t right, const char *message) {
351
+ if (SIZE_MAX - left < right)
352
+ rb_raise(eDataError, "%s", message);
353
+ return left + right;
354
+ }
355
+
356
+ static size_t ratio_limit_bytes(size_t total_input, unsigned long long max_ratio) {
357
+ if (total_input == 0)
358
+ return SIZE_MAX;
359
+ if (max_ratio > ((unsigned long long)SIZE_MAX / (unsigned long long)total_input))
360
+ return SIZE_MAX;
361
+ return total_input * (size_t)max_ratio;
362
+ }
363
+
364
+ static void enforce_output_and_ratio_limits(size_t total_output, size_t total_input,
365
+ size_t max_output_size, int max_ratio_enabled,
366
+ unsigned long long max_ratio) {
367
+ if (total_output > max_output_size) {
368
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)", max_output_size);
369
+ }
370
+
371
+ if (!max_ratio_enabled || total_input < RATIO_MIN_INPUT_BYTES)
372
+ return;
373
+
374
+ size_t ratio_limit = ratio_limit_bytes(total_input, max_ratio);
375
+ if (total_output > ratio_limit) {
376
+ size_t ratio = total_input == 0 ? 0 : (total_output / total_input);
377
+ rb_raise(eDataError, "decompression ratio exceeds limit (ratio=%zu, max=%llu)", ratio,
378
+ max_ratio);
379
+ }
380
+ }
381
+
382
+ static VALUE current_fiber_scheduler(void) {
383
+ VALUE sched = rb_fiber_scheduler_current();
384
+ if (sched == Qnil || sched == Qfalse)
385
+ return Qnil;
386
+ return sched;
387
+ }
388
+
389
+ static int has_fiber_scheduler(void) {
390
+ return current_fiber_scheduler() != Qnil;
391
+ }
392
+
393
+ static void unblock_noop(void *arg) {
394
+ (void)arg;
395
+ }
396
+
397
+ static inline void run_without_gvl(void *(*func)(void *), void *arg) {
398
+ rb_thread_call_without_gvl(func, arg, unblock_noop, NULL);
399
+ }
400
+
401
+ typedef struct {
402
+ void *(*func)(void *);
403
+ void *arg;
404
+
405
+ VALUE scheduler;
406
+ VALUE blocker;
407
+ VALUE fiber;
408
+ } fiber_worker_ctx_t;
409
+
410
+ static void *fiber_worker_nogvl(void *arg) {
411
+ fiber_worker_ctx_t *c = (fiber_worker_ctx_t *)arg;
412
+ c->func(c->arg);
413
+ return NULL;
414
+ }
415
+
416
+ static VALUE fiber_worker_thread(void *arg) {
417
+ fiber_worker_ctx_t *c = (fiber_worker_ctx_t *)arg;
418
+ rb_thread_call_without_gvl(fiber_worker_nogvl, c, RUBY_UBF_PROCESS, NULL);
419
+ rb_fiber_scheduler_unblock(c->scheduler, c->blocker, c->fiber);
420
+ return Qnil;
421
+ }
422
+
423
+ static void run_via_fiber_worker(VALUE scheduler, void *(*func)(void *), void *arg) {
424
+ fiber_worker_ctx_t ctx = {
425
+ .func = func,
426
+ .arg = arg,
427
+ .scheduler = scheduler,
428
+ .blocker = rb_obj_alloc(rb_cObject),
429
+ .fiber = rb_fiber_current(),
430
+ };
431
+ VALUE th = rb_thread_create(fiber_worker_thread, &ctx);
432
+ rb_fiber_scheduler_block(scheduler, ctx.blocker, Qnil);
433
+ join_thread(th);
434
+ }
435
+
436
+ static inline size_t fiber_maybe_yield(size_t bytes_since_yield, size_t just_processed,
437
+ size_t yield_chunk, int *did_yield) {
438
+ *did_yield = 0;
439
+ bytes_since_yield += just_processed;
440
+ if (bytes_since_yield >= yield_chunk) {
441
+ VALUE scheduler = current_fiber_scheduler();
442
+ if (scheduler != Qnil) {
443
+ scheduler_yield(scheduler);
444
+ *did_yield = 1;
445
+ }
446
+ return 0;
447
+ }
448
+ return bytes_since_yield;
449
+ }
450
+
451
+ #define DICT_CDICT_CACHE_SIZE 4
452
+
453
+ typedef struct {
454
+ int level;
455
+ ZSTD_CDict *cdict;
456
+ } cdict_cache_entry_t;
457
+
458
+ struct dictionary_s {
459
+ compress_algo_t algo;
460
+ uint8_t *data;
461
+ size_t size;
462
+
463
+ cdict_cache_entry_t cdict_cache[DICT_CDICT_CACHE_SIZE];
464
+ int cdict_cache_count;
465
+
466
+ ZSTD_DDict *ddict;
467
+ };
468
+
469
+ static void dict_free(void *ptr) {
470
+ dictionary_t *dict = (dictionary_t *)ptr;
471
+ for (int i = 0; i < dict->cdict_cache_count; i++) {
472
+ if (dict->cdict_cache[i].cdict)
473
+ ZSTD_freeCDict(dict->cdict_cache[i].cdict);
474
+ }
475
+ if (dict->ddict)
476
+ ZSTD_freeDDict(dict->ddict);
477
+ if (dict->data)
478
+ xfree(dict->data);
479
+ xfree(dict);
480
+ }
481
+
482
+ static size_t dict_memsize(const void *ptr) {
483
+ const dictionary_t *d = (const dictionary_t *)ptr;
484
+ size_t total = sizeof(dictionary_t) + d->size;
485
+
486
+ for (int i = 0; i < d->cdict_cache_count; i++) {
487
+ if (d->cdict_cache[i].cdict)
488
+ total += d->size + 4096;
489
+ }
490
+ if (d->ddict)
491
+ total += d->size + 4096;
492
+ return total;
493
+ }
494
+
495
+ static const rb_data_type_t dictionary_type = {
496
+ "Compress::Dictionary", {NULL, dict_free, dict_memsize}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY};
497
+
498
+ static VALUE dict_alloc(VALUE klass) {
499
+ dictionary_t *d = ALLOC(dictionary_t);
500
+ memset(d, 0, sizeof(dictionary_t));
501
+ return TypedData_Wrap_Struct(klass, &dictionary_type, d);
502
+ }
503
+
504
+ static ZSTD_CDict *dict_get_cdict(dictionary_t *dict, int level) {
505
+ for (int i = 0; i < dict->cdict_cache_count; i++) {
506
+ if (dict->cdict_cache[i].level == level)
507
+ return dict->cdict_cache[i].cdict;
508
+ }
509
+
510
+ ZSTD_CDict *cdict = ZSTD_createCDict(dict->data, dict->size, level);
511
+ if (!cdict)
512
+ return NULL;
513
+
514
+ for (int i = 0; i < dict->cdict_cache_count; i++) {
515
+ if (dict->cdict_cache[i].level == level) {
516
+ ZSTD_freeCDict(cdict);
517
+ return dict->cdict_cache[i].cdict;
518
+ }
519
+ }
520
+
521
+ if (dict->cdict_cache_count < DICT_CDICT_CACHE_SIZE) {
522
+ dict->cdict_cache[dict->cdict_cache_count].level = level;
523
+ dict->cdict_cache[dict->cdict_cache_count].cdict = cdict;
524
+ dict->cdict_cache_count++;
525
+ } else {
526
+ ZSTD_CDict *old_cdict = dict->cdict_cache[0].cdict;
527
+ memmove(&dict->cdict_cache[0], &dict->cdict_cache[1],
528
+ sizeof(cdict_cache_entry_t) * (DICT_CDICT_CACHE_SIZE - 1));
529
+ dict->cdict_cache[DICT_CDICT_CACHE_SIZE - 1].level = level;
530
+ dict->cdict_cache[DICT_CDICT_CACHE_SIZE - 1].cdict = cdict;
531
+ if (old_cdict)
532
+ ZSTD_freeCDict(old_cdict);
533
+ }
534
+ return cdict;
535
+ }
536
+
537
+ static ZSTD_DDict *dict_get_ddict(dictionary_t *dict) {
538
+ if (!dict->ddict) {
539
+ dict->ddict = ZSTD_createDDict(dict->data, dict->size);
540
+ }
541
+ return dict->ddict;
542
+ }
543
+
544
+ typedef struct {
545
+ const char *src;
546
+ size_t src_len;
547
+ char *dst;
548
+ size_t dst_cap;
549
+ int level;
550
+ ZSTD_CDict *cdict;
551
+ size_t result;
552
+ int error;
553
+ } zstd_compress_args_t;
554
+
555
+ static void *zstd_compress_nogvl(void *arg) {
556
+ zstd_compress_args_t *a = (zstd_compress_args_t *)arg;
557
+ if (a->cdict) {
558
+ ZSTD_CCtx *cctx = ZSTD_createCCtx();
559
+ if (!cctx) {
560
+ a->error = 1;
561
+ return NULL;
562
+ }
563
+ a->result =
564
+ ZSTD_compress_usingCDict(cctx, a->dst, a->dst_cap, a->src, a->src_len, a->cdict);
565
+ ZSTD_freeCCtx(cctx);
566
+ } else {
567
+ a->result = ZSTD_compress(a->dst, a->dst_cap, a->src, a->src_len, a->level);
568
+ }
569
+ a->error = 0;
570
+ return NULL;
571
+ }
572
+
573
+ typedef struct {
574
+ const void *src;
575
+ size_t src_len;
576
+ void *dst;
577
+ size_t dst_cap;
578
+ ZSTD_DDict *ddict;
579
+ size_t result;
580
+ int error;
581
+ } zstd_decompress_args_t;
582
+
583
+ static void *zstd_decompress_nogvl(void *arg) {
584
+ zstd_decompress_args_t *a = (zstd_decompress_args_t *)arg;
585
+ if (a->ddict) {
586
+ ZSTD_DCtx *dctx = ZSTD_createDCtx();
587
+ if (!dctx) {
588
+ a->error = 1;
589
+ return NULL;
590
+ }
591
+ a->result =
592
+ ZSTD_decompress_usingDDict(dctx, a->dst, a->dst_cap, a->src, a->src_len, a->ddict);
593
+ ZSTD_freeDCtx(dctx);
594
+ } else {
595
+ a->result = ZSTD_decompress(a->dst, a->dst_cap, a->src, a->src_len);
596
+ }
597
+ a->error = 0;
598
+ return NULL;
599
+ }
600
+
601
+ typedef struct {
602
+ const uint8_t *src;
603
+ size_t src_len;
604
+ char *dst;
605
+ size_t out_offset;
606
+ int error;
607
+ char err_msg[64];
608
+ } lz4_decompress_all_args_t;
609
+
610
+ static void *lz4_decompress_all_nogvl(void *arg) {
611
+ lz4_decompress_all_args_t *a = (lz4_decompress_all_args_t *)arg;
612
+ const uint8_t *src = a->src;
613
+ size_t slen = a->src_len;
614
+ char *out_ptr = a->dst;
615
+ size_t out_offset = 0;
616
+ size_t pos = 0;
617
+
618
+ while (pos + 4 <= slen) {
619
+ uint32_t orig_size = read_le_u32(src + pos);
620
+ if (orig_size == 0)
621
+ break;
622
+ uint32_t comp_size = read_le_u32(src + pos + 4);
623
+
624
+ int dsize = LZ4_decompress_safe((const char *)(src + pos + 8), out_ptr + out_offset,
625
+ (int)comp_size, (int)orig_size);
626
+ if (dsize < 0) {
627
+ a->error = 1;
628
+ snprintf(a->err_msg, sizeof(a->err_msg), "lz4 decompress failed");
629
+ return NULL;
630
+ }
631
+
632
+ out_offset += dsize;
633
+ pos += 8 + comp_size;
634
+ }
635
+
636
+ a->out_offset = out_offset;
637
+ a->error = 0;
638
+ return NULL;
639
+ }
640
+
641
+ typedef struct {
642
+ const char *src;
643
+ int src_len;
644
+ char *dst;
645
+ int dst_cap;
646
+ int level;
647
+ int result;
648
+ } lz4_compress_args_t;
649
+
650
+ static void *lz4_compress_nogvl(void *arg) {
651
+ lz4_compress_args_t *a = (lz4_compress_args_t *)arg;
652
+ if (a->level > 1) {
653
+ a->result = LZ4_compress_HC(a->src, a->dst, a->src_len, a->dst_cap, a->level);
654
+ } else {
655
+ a->result = LZ4_compress_default(a->src, a->dst, a->src_len, a->dst_cap);
656
+ }
657
+ return NULL;
658
+ }
659
+
660
+ typedef struct {
661
+ int level;
662
+ size_t src_len;
663
+ const uint8_t *src;
664
+ size_t *out_len;
665
+ uint8_t *dst;
666
+ BROTLI_BOOL result;
667
+ } brotli_compress_args_t;
668
+
669
+ static void *brotli_compress_nogvl(void *arg) {
670
+ brotli_compress_args_t *a = (brotli_compress_args_t *)arg;
671
+ a->result = BrotliEncoderCompress(a->level, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE,
672
+ a->src_len, a->src, a->out_len, a->dst);
673
+ return NULL;
674
+ }
675
+
676
+ typedef struct {
677
+ ZSTD_CStream *cstream;
678
+ ZSTD_outBuffer *output;
679
+ ZSTD_inBuffer *input;
680
+ size_t result;
681
+ } zstd_stream_chunk_args_t;
682
+
683
+ static void *zstd_compress_stream_chunk_nogvl(void *arg) {
684
+ zstd_stream_chunk_args_t *a = (zstd_stream_chunk_args_t *)arg;
685
+ a->result = ZSTD_compressStream(a->cstream, a->output, a->input);
686
+ return NULL;
687
+ }
688
+
689
+ typedef struct {
690
+ const char *src;
691
+ size_t src_len;
692
+ int level;
693
+ ZSTD_CDict *cdict;
694
+ char *dst;
695
+ size_t dst_cap;
696
+ size_t result;
697
+ int error;
698
+
699
+ VALUE scheduler;
700
+ VALUE blocker;
701
+ VALUE fiber;
702
+ } zstd_fiber_compress_t;
703
+
704
+ typedef struct {
705
+ ZSTD_CStream *cstream;
706
+ ZSTD_inBuffer *input;
707
+ ZSTD_outBuffer *output;
708
+ size_t result;
709
+
710
+ VALUE scheduler;
711
+ VALUE blocker;
712
+ VALUE fiber;
713
+ } zstd_stream_chunk_fiber_t;
714
+
715
+ static void *zstd_stream_chunk_fiber_nogvl(void *arg) {
716
+ zstd_stream_chunk_fiber_t *a = (zstd_stream_chunk_fiber_t *)arg;
717
+ a->result = ZSTD_compressStream(a->cstream, a->output, a->input);
718
+ return NULL;
719
+ }
720
+
721
+ static VALUE zstd_stream_chunk_fiber_thread(void *arg) {
722
+ zstd_stream_chunk_fiber_t *a = (zstd_stream_chunk_fiber_t *)arg;
723
+ rb_thread_call_without_gvl(zstd_stream_chunk_fiber_nogvl, a, RUBY_UBF_PROCESS, NULL);
724
+ rb_fiber_scheduler_unblock(a->scheduler, a->blocker, a->fiber);
725
+ return Qnil;
726
+ }
727
+
728
+ typedef struct {
729
+ BrotliEncoderState *enc;
730
+ BrotliEncoderOperation op;
731
+ size_t *available_in;
732
+ const uint8_t **next_in;
733
+ size_t *available_out;
734
+ uint8_t **next_out;
735
+ BROTLI_BOOL result;
736
+
737
+ VALUE scheduler;
738
+ VALUE blocker;
739
+ VALUE fiber;
740
+ } brotli_stream_chunk_fiber_t;
741
+
742
+ static void *brotli_stream_chunk_fiber_nogvl(void *arg) {
743
+ brotli_stream_chunk_fiber_t *a = (brotli_stream_chunk_fiber_t *)arg;
744
+ a->result = BrotliEncoderCompressStream(a->enc, a->op, a->available_in, a->next_in,
745
+ a->available_out, a->next_out, NULL);
746
+ return NULL;
747
+ }
748
+
749
+ static VALUE brotli_stream_chunk_fiber_thread(void *arg) {
750
+ brotli_stream_chunk_fiber_t *a = (brotli_stream_chunk_fiber_t *)arg;
751
+ rb_thread_call_without_gvl(brotli_stream_chunk_fiber_nogvl, a, RUBY_UBF_PROCESS, NULL);
752
+ rb_fiber_scheduler_unblock(a->scheduler, a->blocker, a->fiber);
753
+ return Qnil;
754
+ }
755
+
756
+ typedef struct {
757
+ size_t encoded_size;
758
+ const uint8_t *encoded_buffer;
759
+ size_t *decoded_size;
760
+ uint8_t *decoded_buffer;
761
+ BrotliDecoderResult result;
762
+ } brotli_decompress_args_t;
763
+
764
+ static void *brotli_decompress_nogvl(void *arg) {
765
+ brotli_decompress_args_t *a = (brotli_decompress_args_t *)arg;
766
+ a->result = BrotliDecoderDecompress(a->encoded_size, a->encoded_buffer, a->decoded_size,
767
+ a->decoded_buffer);
768
+ return NULL;
769
+ }
770
+
771
+ typedef struct {
772
+ ZSTD_DStream *dstream;
773
+ ZSTD_outBuffer *output;
774
+ ZSTD_inBuffer *input;
775
+ size_t result;
776
+ } zstd_decompress_stream_chunk_args_t;
777
+
778
+ static void *zstd_decompress_stream_chunk_nogvl(void *arg) {
779
+ zstd_decompress_stream_chunk_args_t *a = (zstd_decompress_stream_chunk_args_t *)arg;
780
+ a->result = ZSTD_decompressStream(a->dstream, a->output, a->input);
781
+ return NULL;
782
+ }
783
+
784
+ typedef struct {
785
+ BrotliDecoderState *dec;
786
+ size_t *available_in;
787
+ const uint8_t **next_in;
788
+ size_t *available_out;
789
+ uint8_t **next_out;
790
+ BrotliDecoderResult result;
791
+ } brotli_decompress_stream_args_t;
792
+
793
+ static void *brotli_decompress_stream_nogvl(void *arg) {
794
+ brotli_decompress_stream_args_t *a = (brotli_decompress_stream_args_t *)arg;
795
+ a->result = BrotliDecoderDecompressStream(a->dec, a->available_in, a->next_in, a->available_out,
796
+ a->next_out, NULL);
797
+ return NULL;
798
+ }
799
+
800
+ static void *zstd_fiber_compress_nogvl(void *arg) {
801
+ zstd_fiber_compress_t *a = (zstd_fiber_compress_t *)arg;
802
+ if (a->cdict) {
803
+ ZSTD_CCtx *cctx = ZSTD_createCCtx();
804
+ if (!cctx) {
805
+ a->error = 1;
806
+ return NULL;
807
+ }
808
+ a->result =
809
+ ZSTD_compress_usingCDict(cctx, a->dst, a->dst_cap, a->src, a->src_len, a->cdict);
810
+ ZSTD_freeCCtx(cctx);
811
+ } else {
812
+ a->result = ZSTD_compress(a->dst, a->dst_cap, a->src, a->src_len, a->level);
813
+ }
814
+ return NULL;
815
+ }
816
+
817
+ static VALUE zstd_fiber_compress_thread(void *arg) {
818
+ zstd_fiber_compress_t *a = (zstd_fiber_compress_t *)arg;
819
+ rb_thread_call_without_gvl(zstd_fiber_compress_nogvl, a, RUBY_UBF_PROCESS, NULL);
820
+ rb_fiber_scheduler_unblock(a->scheduler, a->blocker, a->fiber);
821
+ return Qnil;
822
+ }
823
+
824
+ static VALUE compress_compress(int argc, VALUE *argv, VALUE self) {
825
+ VALUE data, opts;
826
+ rb_scan_args(argc, argv, "1:", &data, &opts);
827
+ StringValue(data);
828
+ reject_algorithm_keyword(opts);
829
+
830
+ VALUE algo_sym = Qnil, level_val = Qnil, dict_val = Qnil;
831
+ if (!NIL_P(opts)) {
832
+ algo_sym = opt_get(opts, sym_cache.algo);
833
+ level_val = opt_get(opts, sym_cache.level);
834
+ dict_val = opt_get(opts, sym_cache.dictionary);
835
+ }
836
+
837
+ compress_algo_t algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
838
+ int level = resolve_level(algo, level_val);
839
+
840
+ dictionary_t *dict = NULL;
841
+ if (!NIL_P(dict_val)) {
842
+ if (algo == ALGO_LZ4) {
843
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
844
+ }
845
+ dict = opt_dictionary(dict_val);
846
+ }
847
+
848
+ const char *src = RSTRING_PTR(data);
849
+ size_t slen = RSTRING_LEN(data);
850
+ const algo_policy_t *policy = algo_policy(algo);
851
+
852
+ switch (algo) {
853
+ case ALGO_ZSTD: {
854
+ size_t bound = ZSTD_compressBound(slen);
855
+
856
+ ZSTD_CDict *cdict = NULL;
857
+ if (dict) {
858
+ cdict = dict_get_cdict(dict, level);
859
+ if (!cdict)
860
+ rb_raise(eMemError, "zstd: failed to create/get cdict");
861
+ }
862
+
863
+ if (slen < policy->gvl_unlock_threshold) {
864
+ VALUE dst = rb_binary_str_buf_reserve(bound);
865
+ size_t csize;
866
+ if (cdict) {
867
+ ZSTD_CCtx *cctx = ZSTD_createCCtx();
868
+ if (!cctx)
869
+ rb_raise(eMemError, "zstd: failed to create context");
870
+ csize = ZSTD_compress_usingCDict(cctx, RSTRING_PTR(dst), bound, src, slen, cdict);
871
+ ZSTD_freeCCtx(cctx);
872
+ } else {
873
+ csize = ZSTD_compress(RSTRING_PTR(dst), bound, src, slen, level);
874
+ }
875
+ if (ZSTD_isError(csize))
876
+ rb_raise(eError, "zstd compress: %s", ZSTD_getErrorName(csize));
877
+ rb_str_set_len(dst, (long)csize);
878
+ RB_GC_GUARD(data);
879
+ return dst;
880
+ }
881
+
882
+ {
883
+ VALUE scheduler = current_fiber_scheduler();
884
+ if (scheduler != Qnil) {
885
+ char *out_buf = (char *)malloc(bound);
886
+ if (!out_buf)
887
+ rb_raise(eMemError, "zstd: malloc failed");
888
+
889
+ VALUE blocker = rb_obj_alloc(rb_cObject);
890
+
891
+ zstd_fiber_compress_t fargs = {
892
+ .src = src,
893
+ .src_len = slen,
894
+ .level = level,
895
+ .cdict = cdict,
896
+ .dst = out_buf,
897
+ .dst_cap = bound,
898
+ .result = 0,
899
+ .error = 0,
900
+ .scheduler = scheduler,
901
+ .blocker = blocker,
902
+ .fiber = rb_fiber_current(),
903
+ };
904
+
905
+ VALUE rb_thread = rb_thread_create(zstd_fiber_compress_thread, &fargs);
906
+ rb_fiber_scheduler_block(scheduler, blocker, Qnil);
907
+ join_thread(rb_thread);
908
+
909
+ if (fargs.error) {
910
+ free(out_buf);
911
+ rb_raise(eMemError, "zstd: failed to create context");
912
+ }
913
+ if (ZSTD_isError(fargs.result)) {
914
+ free(out_buf);
915
+ rb_raise(eError, "zstd compress: %s", ZSTD_getErrorName(fargs.result));
916
+ }
917
+
918
+ VALUE result = rb_binary_str_new(out_buf, (long)fargs.result);
919
+ free(out_buf);
920
+ RB_GC_GUARD(data);
921
+ return result;
922
+ }
923
+ }
924
+
925
+ {
926
+ VALUE dst = rb_binary_str_buf_reserve(bound);
927
+ zstd_compress_args_t args = {
928
+ .src = src,
929
+ .src_len = slen,
930
+ .dst = RSTRING_PTR(dst),
931
+ .dst_cap = bound,
932
+ .level = level,
933
+ .cdict = cdict,
934
+ .result = 0,
935
+ .error = 0,
936
+ };
937
+ run_without_gvl(zstd_compress_nogvl, &args);
938
+
939
+ if (args.error)
940
+ rb_raise(eMemError, "zstd: failed to create context");
941
+ if (ZSTD_isError(args.result))
942
+ rb_raise(eError, "zstd compress: %s", ZSTD_getErrorName(args.result));
943
+
944
+ rb_str_set_len(dst, (long)args.result);
945
+ RB_GC_GUARD(data);
946
+ return dst;
947
+ }
948
+ }
949
+ case ALGO_LZ4: {
950
+ if (slen > (size_t)INT_MAX)
951
+ rb_raise(eError, "lz4: input too large (max 2GB)");
952
+ int bound = LZ4_compressBound((int)slen);
953
+
954
+ int csize;
955
+ if (slen >= policy->gvl_unlock_threshold) {
956
+ VALUE dst = rb_binary_str_buf_reserve(8 + (size_t)bound + 4);
957
+ char *out = RSTRING_PTR(dst);
958
+
959
+ write_le_u32((uint8_t *)out, (uint32_t)slen);
960
+
961
+ lz4_compress_args_t args = {
962
+ .src = src,
963
+ .src_len = (int)slen,
964
+ .dst = out + 8,
965
+ .dst_cap = bound,
966
+ .level = level,
967
+ };
968
+
969
+ VALUE scheduler = current_fiber_scheduler();
970
+ if (scheduler != Qnil) {
971
+ run_via_fiber_worker(scheduler, lz4_compress_nogvl, &args);
972
+ } else {
973
+ run_without_gvl(lz4_compress_nogvl, &args);
974
+ }
975
+ csize = args.result;
976
+
977
+ if (csize <= 0)
978
+ rb_raise(eError, "lz4 compress failed");
979
+
980
+ write_le_u32((uint8_t *)out + 4, (uint32_t)csize);
981
+
982
+ size_t total = 8 + (size_t)csize;
983
+ write_le_u32((uint8_t *)out + total, 0);
984
+
985
+ rb_str_set_len(dst, (long)(total + 4));
986
+ RB_GC_GUARD(data);
987
+ return dst;
988
+ } else {
989
+ VALUE dst = rb_binary_str_buf_reserve(8 + bound + 4);
990
+ char *out = RSTRING_PTR(dst);
991
+
992
+ write_le_u32((uint8_t *)out, (uint32_t)slen);
993
+
994
+ if (level > 1) {
995
+ csize = LZ4_compress_HC(src, out + 8, (int)slen, bound, level);
996
+ } else {
997
+ csize = LZ4_compress_default(src, out + 8, (int)slen, bound);
998
+ }
999
+ if (csize <= 0)
1000
+ rb_raise(eError, "lz4 compress failed");
1001
+
1002
+ write_le_u32((uint8_t *)out + 4, (uint32_t)csize);
1003
+
1004
+ size_t total = 8 + csize;
1005
+ write_le_u32((uint8_t *)out + total, 0);
1006
+
1007
+ rb_str_set_len(dst, total + 4);
1008
+ RB_GC_GUARD(data);
1009
+ return dst;
1010
+ }
1011
+ }
1012
+ case ALGO_BROTLI: {
1013
+ size_t out_len = BrotliEncoderMaxCompressedSize(slen);
1014
+ if (out_len == 0)
1015
+ out_len = slen + (slen >> 2) + 1024;
1016
+
1017
+ if (dict) {
1018
+ VALUE dst = rb_binary_str_buf_reserve(out_len);
1019
+ BrotliEncoderState *enc = BrotliEncoderCreateInstance(NULL, NULL, NULL);
1020
+ if (!enc)
1021
+ rb_raise(eMemError, "brotli: failed to create encoder");
1022
+
1023
+ BrotliEncoderPreparedDictionary *pd =
1024
+ BrotliEncoderPrepareDictionary(BROTLI_SHARED_DICTIONARY_RAW, dict->size, dict->data,
1025
+ BROTLI_MAX_QUALITY, NULL, NULL, NULL);
1026
+ if (!pd) {
1027
+ BrotliEncoderDestroyInstance(enc);
1028
+ rb_raise(eMemError, "brotli: failed to prepare dictionary");
1029
+ }
1030
+
1031
+ if (!BrotliEncoderSetParameter(enc, BROTLI_PARAM_QUALITY, level) ||
1032
+ !BrotliEncoderAttachPreparedDictionary(enc, pd)) {
1033
+ BrotliEncoderDestroyPreparedDictionary(pd);
1034
+ BrotliEncoderDestroyInstance(enc);
1035
+ rb_raise(eError, "brotli: failed to attach dictionary");
1036
+ }
1037
+
1038
+ size_t available_in = slen;
1039
+ const uint8_t *next_in = (const uint8_t *)src;
1040
+ size_t available_out = out_len;
1041
+ uint8_t *next_out = (uint8_t *)RSTRING_PTR(dst);
1042
+ size_t initial_out = available_out;
1043
+
1044
+ BROTLI_BOOL ok =
1045
+ BrotliEncoderCompressStream(enc, BROTLI_OPERATION_FINISH, &available_in, &next_in,
1046
+ &available_out, &next_out, NULL);
1047
+
1048
+ BrotliEncoderDestroyPreparedDictionary(pd);
1049
+ BrotliEncoderDestroyInstance(enc);
1050
+ if (!ok)
1051
+ rb_raise(eError, "brotli compress with dict failed");
1052
+
1053
+ rb_str_set_len(dst, initial_out - available_out);
1054
+ RB_GC_GUARD(data);
1055
+ return dst;
1056
+ } else if (slen >= policy->gvl_unlock_threshold) {
1057
+ VALUE dst = rb_binary_str_buf_reserve(out_len);
1058
+ size_t actual_out_len = out_len;
1059
+
1060
+ brotli_compress_args_t args = {
1061
+ .level = level,
1062
+ .src_len = slen,
1063
+ .src = (const uint8_t *)src,
1064
+ .out_len = &actual_out_len,
1065
+ .dst = (uint8_t *)RSTRING_PTR(dst),
1066
+ };
1067
+
1068
+ VALUE scheduler = current_fiber_scheduler();
1069
+ if (scheduler != Qnil) {
1070
+ run_via_fiber_worker(scheduler, brotli_compress_nogvl, &args);
1071
+ } else {
1072
+ run_without_gvl(brotli_compress_nogvl, &args);
1073
+ }
1074
+
1075
+ if (!args.result)
1076
+ rb_raise(eError, "brotli compress failed");
1077
+
1078
+ rb_str_set_len(dst, (long)actual_out_len);
1079
+ RB_GC_GUARD(data);
1080
+ return dst;
1081
+ } else {
1082
+ VALUE dst = rb_binary_str_buf_reserve(out_len);
1083
+ BROTLI_BOOL ok =
1084
+ BrotliEncoderCompress(level, BROTLI_DEFAULT_WINDOW, BROTLI_DEFAULT_MODE, slen,
1085
+ (const uint8_t *)src, &out_len, (uint8_t *)RSTRING_PTR(dst));
1086
+ if (!ok)
1087
+ rb_raise(eError, "brotli compress failed");
1088
+ rb_str_set_len(dst, out_len);
1089
+ RB_GC_GUARD(data);
1090
+ return dst;
1091
+ }
1092
+ }
1093
+ }
1094
+
1095
+ return Qnil;
1096
+ }
1097
+
1098
+ static VALUE compress_decompress(int argc, VALUE *argv, VALUE self) {
1099
+ VALUE data, opts;
1100
+ rb_scan_args(argc, argv, "1:", &data, &opts);
1101
+ StringValue(data);
1102
+ reject_algorithm_keyword(opts);
1103
+
1104
+ VALUE algo_sym = Qnil, dict_val = Qnil;
1105
+ limits_config_t limits;
1106
+ parse_limits_from_opts(opts, &limits);
1107
+ if (!NIL_P(opts)) {
1108
+ algo_sym = opt_get(opts, sym_cache.algo);
1109
+ dict_val = opt_get(opts, sym_cache.dictionary);
1110
+ }
1111
+
1112
+ const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
1113
+ size_t slen = RSTRING_LEN(data);
1114
+
1115
+ compress_algo_t algo;
1116
+ if (NIL_P(algo_sym)) {
1117
+ algo = detect_algo(src, slen);
1118
+ } else {
1119
+ algo = sym_to_algo(algo_sym);
1120
+ }
1121
+
1122
+ const algo_policy_t *policy = algo_policy(algo);
1123
+
1124
+ dictionary_t *dict = NULL;
1125
+ if (!NIL_P(dict_val)) {
1126
+ if (algo == ALGO_LZ4) {
1127
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
1128
+ }
1129
+ dict = opt_dictionary(dict_val);
1130
+ }
1131
+
1132
+ switch (algo) {
1133
+ case ALGO_ZSTD: {
1134
+ unsigned long long frame_size = ZSTD_getFrameContentSize(src, slen);
1135
+ if (frame_size == ZSTD_CONTENTSIZE_ERROR) {
1136
+ rb_raise(eDataError, "zstd: not valid compressed data");
1137
+ }
1138
+
1139
+ if (frame_size != ZSTD_CONTENTSIZE_UNKNOWN) {
1140
+ if (frame_size > limits.max_output_size) {
1141
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1142
+ limits.max_output_size);
1143
+ }
1144
+ enforce_output_and_ratio_limits((size_t)frame_size, slen, limits.max_output_size,
1145
+ limits.max_ratio_enabled, limits.max_ratio);
1146
+ }
1147
+
1148
+ if (frame_size != ZSTD_CONTENTSIZE_UNKNOWN && frame_size <= limits.max_output_size) {
1149
+ size_t dsize;
1150
+
1151
+ if (frame_size >= algo_policy(ALGO_ZSTD)->gvl_unlock_threshold) {
1152
+ VALUE dst = rb_binary_str_buf_reserve((size_t)frame_size);
1153
+
1154
+ ZSTD_DDict *ddict = NULL;
1155
+ if (dict) {
1156
+ ddict = dict_get_ddict(dict);
1157
+ if (!ddict)
1158
+ rb_raise(eMemError, "zstd: failed to create ddict");
1159
+ }
1160
+
1161
+ zstd_decompress_args_t args = {
1162
+ .src = src,
1163
+ .src_len = slen,
1164
+ .dst = RSTRING_PTR(dst),
1165
+ .dst_cap = (size_t)frame_size,
1166
+ .ddict = ddict,
1167
+ };
1168
+
1169
+ VALUE scheduler = current_fiber_scheduler();
1170
+ if (scheduler != Qnil) {
1171
+ run_via_fiber_worker(scheduler, zstd_decompress_nogvl, &args);
1172
+ } else {
1173
+ run_without_gvl(zstd_decompress_nogvl, &args);
1174
+ }
1175
+
1176
+ if (args.error)
1177
+ rb_raise(eMemError, "zstd: failed to create dctx");
1178
+ dsize = args.result;
1179
+ if (ZSTD_isError(dsize))
1180
+ rb_raise(eDataError, "zstd decompress: %s", ZSTD_getErrorName(dsize));
1181
+
1182
+ enforce_output_and_ratio_limits(dsize, slen, limits.max_output_size,
1183
+ limits.max_ratio_enabled, limits.max_ratio);
1184
+ rb_str_set_len(dst, (long)dsize);
1185
+ RB_GC_GUARD(data);
1186
+ return dst;
1187
+ } else {
1188
+ VALUE dst = rb_binary_str_buf_reserve((size_t)frame_size);
1189
+
1190
+ if (dict) {
1191
+ ZSTD_DDict *ddict = dict_get_ddict(dict);
1192
+ if (!ddict)
1193
+ rb_raise(eMemError, "zstd: failed to create ddict");
1194
+ ZSTD_DCtx *dctx = ZSTD_createDCtx();
1195
+ if (!dctx)
1196
+ rb_raise(eMemError, "zstd: failed to create dctx");
1197
+ dsize = ZSTD_decompress_usingDDict(dctx, RSTRING_PTR(dst), (size_t)frame_size,
1198
+ src, slen, ddict);
1199
+ ZSTD_freeDCtx(dctx);
1200
+ } else {
1201
+ dsize = ZSTD_decompress(RSTRING_PTR(dst), (size_t)frame_size, src, slen);
1202
+ }
1203
+
1204
+ if (ZSTD_isError(dsize))
1205
+ rb_raise(eDataError, "zstd decompress: %s", ZSTD_getErrorName(dsize));
1206
+ enforce_output_and_ratio_limits(dsize, slen, limits.max_output_size,
1207
+ limits.max_ratio_enabled, limits.max_ratio);
1208
+ rb_str_set_len(dst, dsize);
1209
+ RB_GC_GUARD(data);
1210
+ return dst;
1211
+ }
1212
+ }
1213
+
1214
+ ZSTD_DCtx *dctx = ZSTD_createDCtx();
1215
+ if (!dctx)
1216
+ rb_raise(eMemError, "zstd: failed to create dctx");
1217
+
1218
+ if (dict) {
1219
+ ZSTD_DDict *ddict = dict_get_ddict(dict);
1220
+ if (ddict) {
1221
+ size_t r = ZSTD_DCtx_refDDict(dctx, ddict);
1222
+ if (ZSTD_isError(r)) {
1223
+ ZSTD_freeDCtx(dctx);
1224
+ rb_raise(eError, "zstd dict ref: %s", ZSTD_getErrorName(r));
1225
+ }
1226
+ }
1227
+ }
1228
+
1229
+ size_t alloc_size = (slen > limits.max_output_size / 8) ? limits.max_output_size : slen * 8;
1230
+ if (alloc_size < 4096)
1231
+ alloc_size = limits.max_output_size < 4096 ? limits.max_output_size : 4096;
1232
+ if (alloc_size == 0)
1233
+ alloc_size = limits.max_output_size;
1234
+
1235
+ VALUE dst = rb_binary_str_buf_reserve(alloc_size);
1236
+ size_t total_out = 0;
1237
+
1238
+ ZSTD_inBuffer input = {src, slen, 0};
1239
+ while (input.pos < input.size) {
1240
+ if (total_out >= alloc_size) {
1241
+ if (alloc_size >= limits.max_output_size) {
1242
+ ZSTD_freeDCtx(dctx);
1243
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1244
+ limits.max_output_size);
1245
+ }
1246
+ size_t next_cap = alloc_size * 2;
1247
+ if (next_cap > limits.max_output_size)
1248
+ next_cap = limits.max_output_size;
1249
+ alloc_size = next_cap;
1250
+ grow_binary_str(dst, total_out, alloc_size);
1251
+ }
1252
+
1253
+ size_t remaining_budget = limits.max_output_size - total_out;
1254
+ if (remaining_budget == 0) {
1255
+ ZSTD_freeDCtx(dctx);
1256
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1257
+ limits.max_output_size);
1258
+ }
1259
+
1260
+ size_t out_cap = alloc_size - total_out;
1261
+ if (out_cap > remaining_budget)
1262
+ out_cap = remaining_budget;
1263
+
1264
+ ZSTD_outBuffer output = {RSTRING_PTR(dst) + total_out, out_cap, 0};
1265
+ size_t ret = ZSTD_decompressStream(dctx, &output, &input);
1266
+ if (ZSTD_isError(ret)) {
1267
+ ZSTD_freeDCtx(dctx);
1268
+ rb_raise(eDataError, "zstd decompress: %s", ZSTD_getErrorName(ret));
1269
+ }
1270
+ total_out = checked_add_size(total_out, output.pos,
1271
+ "decompressed output exceeds representable size");
1272
+ enforce_output_and_ratio_limits(total_out, slen, limits.max_output_size,
1273
+ limits.max_ratio_enabled, limits.max_ratio);
1274
+ if (ret == 0)
1275
+ break;
1276
+ }
1277
+
1278
+ ZSTD_freeDCtx(dctx);
1279
+ rb_str_set_len(dst, total_out);
1280
+ RB_GC_GUARD(data);
1281
+ return dst;
1282
+ }
1283
+ case ALGO_LZ4: {
1284
+ if (slen < 4)
1285
+ rb_raise(eDataError, "lz4: data too short");
1286
+
1287
+ size_t total_orig = 0;
1288
+ size_t scan_pos = 0;
1289
+ while (scan_pos + 4 <= slen) {
1290
+ uint32_t orig_size = read_le_u32(src + scan_pos);
1291
+ if (orig_size == 0)
1292
+ break;
1293
+ if (scan_pos + 8 > slen)
1294
+ rb_raise(eDataError, "lz4: truncated block header");
1295
+ uint32_t comp_size = read_le_u32(src + scan_pos + 4);
1296
+ if (scan_pos + 8 + comp_size > slen)
1297
+ rb_raise(eDataError, "lz4: truncated block data");
1298
+ if (orig_size > 256 * 1024 * 1024)
1299
+ rb_raise(eDataError, "lz4: block too large (%u)", orig_size);
1300
+ total_orig = checked_add_size(total_orig, orig_size,
1301
+ "decompressed output exceeds representable size");
1302
+ if (total_orig > limits.max_output_size)
1303
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1304
+ limits.max_output_size);
1305
+ scan_pos += 8 + comp_size;
1306
+ }
1307
+
1308
+ enforce_output_and_ratio_limits(total_orig, slen, limits.max_output_size,
1309
+ limits.max_ratio_enabled, limits.max_ratio);
1310
+
1311
+ VALUE result = rb_binary_str_buf_reserve(total_orig);
1312
+
1313
+ lz4_decompress_all_args_t args = {
1314
+ .src = src,
1315
+ .src_len = slen,
1316
+ .dst = RSTRING_PTR(result),
1317
+ .out_offset = 0,
1318
+ .error = 0,
1319
+ };
1320
+
1321
+ if (total_orig >= algo_policy(ALGO_LZ4)->gvl_unlock_threshold) {
1322
+ VALUE scheduler = current_fiber_scheduler();
1323
+ if (scheduler != Qnil) {
1324
+ run_via_fiber_worker(scheduler, lz4_decompress_all_nogvl, &args);
1325
+ } else {
1326
+ run_without_gvl(lz4_decompress_all_nogvl, &args);
1327
+ }
1328
+ } else {
1329
+ lz4_decompress_all_nogvl(&args);
1330
+ }
1331
+
1332
+ if (args.error)
1333
+ rb_raise(eDataError, "%s", args.err_msg);
1334
+
1335
+ enforce_output_and_ratio_limits(args.out_offset, slen, limits.max_output_size,
1336
+ limits.max_ratio_enabled, limits.max_ratio);
1337
+ rb_str_set_len(result, args.out_offset);
1338
+ RB_GC_GUARD(data);
1339
+ return result;
1340
+ }
1341
+ case ALGO_BROTLI: {
1342
+ size_t alloc_size = (slen > limits.max_output_size / 4) ? limits.max_output_size : slen * 4;
1343
+ if (alloc_size < 1024)
1344
+ alloc_size = limits.max_output_size < 1024 ? limits.max_output_size : 1024;
1345
+ if (alloc_size == 0)
1346
+ alloc_size = limits.max_output_size;
1347
+
1348
+ BrotliDecoderState *dec = BrotliDecoderCreateInstance(NULL, NULL, NULL);
1349
+ if (!dec)
1350
+ rb_raise(eMemError, "brotli: failed to create decoder");
1351
+
1352
+ if (dict) {
1353
+ BrotliDecoderAttachDictionary(dec, BROTLI_SHARED_DICTIONARY_RAW, dict->size,
1354
+ dict->data);
1355
+ }
1356
+
1357
+ VALUE dst = rb_binary_str_buf_reserve(alloc_size);
1358
+ size_t total_out = 0;
1359
+
1360
+ size_t available_in = slen;
1361
+ const uint8_t *next_in = src;
1362
+
1363
+ VALUE scheduler = current_fiber_scheduler();
1364
+
1365
+ BrotliDecoderResult res = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT;
1366
+ while (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
1367
+ size_t remaining_budget = limits.max_output_size - total_out;
1368
+ if (remaining_budget == 0) {
1369
+ BrotliDecoderDestroyInstance(dec);
1370
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1371
+ limits.max_output_size);
1372
+ }
1373
+
1374
+ size_t available_out = alloc_size - total_out;
1375
+ if (available_out > remaining_budget)
1376
+ available_out = remaining_budget;
1377
+ uint8_t *next_out = (uint8_t *)RSTRING_PTR(dst) + total_out;
1378
+
1379
+ if (scheduler != Qnil && available_in >= policy->fiber_stream_threshold) {
1380
+ brotli_decompress_stream_args_t sargs = {
1381
+ .dec = dec,
1382
+ .available_in = &available_in,
1383
+ .next_in = &next_in,
1384
+ .available_out = &available_out,
1385
+ .next_out = &next_out,
1386
+ .result = BROTLI_DECODER_RESULT_ERROR,
1387
+ };
1388
+ run_via_fiber_worker(scheduler, brotli_decompress_stream_nogvl, &sargs);
1389
+ res = sargs.result;
1390
+ } else {
1391
+ res = BrotliDecoderDecompressStream(dec, &available_in, &next_in, &available_out,
1392
+ &next_out, NULL);
1393
+ }
1394
+
1395
+ total_out = next_out - (uint8_t *)RSTRING_PTR(dst);
1396
+ enforce_output_and_ratio_limits(total_out, slen, limits.max_output_size,
1397
+ limits.max_ratio_enabled, limits.max_ratio);
1398
+
1399
+ if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
1400
+ if (alloc_size >= limits.max_output_size) {
1401
+ BrotliDecoderDestroyInstance(dec);
1402
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
1403
+ limits.max_output_size);
1404
+ }
1405
+ size_t next_cap = alloc_size * 2;
1406
+ if (next_cap > limits.max_output_size)
1407
+ next_cap = limits.max_output_size;
1408
+ alloc_size = next_cap;
1409
+ grow_binary_str(dst, total_out, alloc_size);
1410
+ }
1411
+ }
1412
+
1413
+ BrotliDecoderDestroyInstance(dec);
1414
+
1415
+ if (res != BROTLI_DECODER_RESULT_SUCCESS) {
1416
+ rb_raise(eDataError, "brotli decompress failed");
1417
+ }
1418
+ rb_str_set_len(dst, total_out);
1419
+ RB_GC_GUARD(data);
1420
+ return dst;
1421
+ }
1422
+ }
1423
+
1424
+ return Qnil;
1425
+ }
1426
+
1427
+ static uint32_t crc32_tables[8][256];
1428
+ static int crc32_tables_initialized = 0;
1429
+
1430
+ static void crc32_init_tables(void) {
1431
+ if (crc32_tables_initialized)
1432
+ return;
1433
+
1434
+ for (uint32_t i = 0; i < 256; i++) {
1435
+ uint32_t crc = i;
1436
+ for (int j = 0; j < 8; j++) {
1437
+ crc = (crc >> 1) ^ (0xEDB88320 & (-(int32_t)(crc & 1)));
1438
+ }
1439
+ crc32_tables[0][i] = crc;
1440
+ }
1441
+
1442
+ for (uint32_t i = 0; i < 256; i++) {
1443
+ uint32_t crc = crc32_tables[0][i];
1444
+ for (int t = 1; t < 8; t++) {
1445
+ crc = crc32_tables[0][crc & 0xFF] ^ (crc >> 8);
1446
+ crc32_tables[t][i] = crc;
1447
+ }
1448
+ }
1449
+
1450
+ crc32_tables_initialized = 1;
1451
+ }
1452
+
1453
+ static uint32_t crc32_compute(const uint8_t *data, size_t len, uint32_t crc) {
1454
+ crc = ~crc;
1455
+
1456
+ while (len >= 8) {
1457
+ uint32_t val0 = crc ^ ((uint32_t)data[0] | ((uint32_t)data[1] << 8) |
1458
+ ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 24));
1459
+ uint32_t val1 = (uint32_t)data[4] | ((uint32_t)data[5] << 8) | ((uint32_t)data[6] << 16) |
1460
+ ((uint32_t)data[7] << 24);
1461
+
1462
+ crc = crc32_tables[7][(val0) & 0xFF] ^ crc32_tables[6][(val0 >> 8) & 0xFF] ^
1463
+ crc32_tables[5][(val0 >> 16) & 0xFF] ^ crc32_tables[4][(val0 >> 24) & 0xFF] ^
1464
+ crc32_tables[3][(val1) & 0xFF] ^ crc32_tables[2][(val1 >> 8) & 0xFF] ^
1465
+ crc32_tables[1][(val1 >> 16) & 0xFF] ^ crc32_tables[0][(val1 >> 24) & 0xFF];
1466
+
1467
+ data += 8;
1468
+ len -= 8;
1469
+ }
1470
+
1471
+ while (len--) {
1472
+ crc = crc32_tables[0][(crc ^ *data++) & 0xFF] ^ (crc >> 8);
1473
+ }
1474
+
1475
+ return ~crc;
1476
+ }
1477
+
1478
+ static VALUE compress_crc32(int argc, VALUE *argv, VALUE self) {
1479
+ VALUE data, prev;
1480
+ rb_scan_args(argc, argv, "11", &data, &prev);
1481
+ StringValue(data);
1482
+
1483
+ const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
1484
+ size_t len = RSTRING_LEN(data);
1485
+ uint32_t crc = NIL_P(prev) ? 0 : NUM2UINT(prev);
1486
+
1487
+ return UINT2NUM(crc32_compute(src, len, crc));
1488
+ }
1489
+
1490
+ static VALUE compress_adler32(int argc, VALUE *argv, VALUE self) {
1491
+ VALUE data, prev;
1492
+ rb_scan_args(argc, argv, "11", &data, &prev);
1493
+ StringValue(data);
1494
+
1495
+ const uint8_t *src = (const uint8_t *)RSTRING_PTR(data);
1496
+ size_t len = RSTRING_LEN(data);
1497
+ uint32_t adler = NIL_P(prev) ? 1 : NUM2UINT(prev);
1498
+
1499
+ uint32_t s1 = adler & 0xFFFF;
1500
+ uint32_t s2 = (adler >> 16) & 0xFFFF;
1501
+ const uint32_t BASE = 65521;
1502
+
1503
+ while (len > 0) {
1504
+ size_t chunk = len > 5552 ? 5552 : len;
1505
+ len -= chunk;
1506
+ for (size_t i = 0; i < chunk; i++) {
1507
+ s1 += src[i];
1508
+ s2 += s1;
1509
+ }
1510
+ s1 %= BASE;
1511
+ s2 %= BASE;
1512
+ src += chunk;
1513
+ }
1514
+
1515
+ return UINT2NUM((s2 << 16) | s1);
1516
+ }
1517
+
1518
+ static VALUE compress_algorithms(VALUE self) {
1519
+ VALUE ary = rb_ary_new_capa(3);
1520
+ rb_ary_push(ary, sym_cache.zstd);
1521
+ rb_ary_push(ary, sym_cache.lz4);
1522
+ rb_ary_push(ary, sym_cache.brotli);
1523
+ return ary;
1524
+ }
1525
+
1526
+ static VALUE compress_available_p(VALUE self, VALUE algo_sym) {
1527
+ sym_to_algo(algo_sym);
1528
+ return Qtrue;
1529
+ }
1530
+
1531
+ static VALUE compress_version(VALUE self, VALUE algo_sym) {
1532
+ compress_algo_t algo = sym_to_algo(algo_sym);
1533
+ switch (algo) {
1534
+ case ALGO_ZSTD:
1535
+ return rb_str_new_cstr(ZSTD_versionString());
1536
+ case ALGO_LZ4:
1537
+ return rb_sprintf("%d.%d.%d", LZ4_VERSION_MAJOR, LZ4_VERSION_MINOR, LZ4_VERSION_RELEASE);
1538
+ case ALGO_BROTLI:
1539
+ return rb_sprintf("%d.%d.%d", BrotliEncoderVersion() >> 24,
1540
+ (BrotliEncoderVersion() >> 12) & 0xFFF, BrotliEncoderVersion() & 0xFFF);
1541
+ }
1542
+ return Qnil;
1543
+ }
1544
+
1545
+ #define LZ4_RING_BUFFER_SIZE (64 * 1024)
1546
+ #define LZ4_RING_BUFFER_TOTAL (LZ4_RING_BUFFER_SIZE * 2)
1547
+
1548
+ typedef struct {
1549
+ compress_algo_t algo;
1550
+ int level;
1551
+ int closed;
1552
+ int finished;
1553
+
1554
+ union {
1555
+ ZSTD_CStream *zstd;
1556
+ BrotliEncoderState *brotli;
1557
+ LZ4_stream_t *lz4;
1558
+ } ctx;
1559
+
1560
+ struct {
1561
+ char *buf;
1562
+ size_t ring_offset;
1563
+ size_t pending;
1564
+ } lz4_ring;
1565
+ } deflater_t;
1566
+
1567
+ static void deflater_free(void *ptr) {
1568
+ deflater_t *d = (deflater_t *)ptr;
1569
+ if (!d->closed) {
1570
+ switch (d->algo) {
1571
+ case ALGO_ZSTD:
1572
+ if (d->ctx.zstd)
1573
+ ZSTD_freeCStream(d->ctx.zstd);
1574
+ break;
1575
+ case ALGO_BROTLI:
1576
+ if (d->ctx.brotli)
1577
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
1578
+ break;
1579
+ case ALGO_LZ4:
1580
+ if (d->ctx.lz4)
1581
+ LZ4_freeStream(d->ctx.lz4);
1582
+ break;
1583
+ }
1584
+ }
1585
+ if (d->lz4_ring.buf)
1586
+ xfree(d->lz4_ring.buf);
1587
+ xfree(d);
1588
+ }
1589
+
1590
+ static size_t deflater_memsize(const void *ptr) {
1591
+ const deflater_t *d = (const deflater_t *)ptr;
1592
+ size_t s = sizeof(deflater_t);
1593
+ if (d->lz4_ring.buf)
1594
+ s += LZ4_RING_BUFFER_TOTAL;
1595
+ return s;
1596
+ }
1597
+
1598
+ static const rb_data_type_t deflater_type = {"Compress::Deflater",
1599
+ {NULL, deflater_free, deflater_memsize},
1600
+ 0,
1601
+ 0,
1602
+ RUBY_TYPED_FREE_IMMEDIATELY};
1603
+
1604
+ static VALUE deflater_alloc(VALUE klass) {
1605
+ deflater_t *d = ALLOC(deflater_t);
1606
+ memset(d, 0, sizeof(deflater_t));
1607
+ return TypedData_Wrap_Struct(klass, &deflater_type, d);
1608
+ }
1609
+
1610
+ static VALUE deflater_initialize(int argc, VALUE *argv, VALUE self) {
1611
+ VALUE opts;
1612
+ rb_scan_args(argc, argv, "0:", &opts);
1613
+ reject_algorithm_keyword(opts);
1614
+
1615
+ deflater_t *d;
1616
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
1617
+
1618
+ VALUE algo_sym = Qnil, level_val = Qnil, dict_val = Qnil;
1619
+ if (!NIL_P(opts)) {
1620
+ algo_sym = opt_get(opts, sym_cache.algo);
1621
+ level_val = opt_get(opts, sym_cache.level);
1622
+ dict_val = opt_get(opts, sym_cache.dictionary);
1623
+ }
1624
+
1625
+ d->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
1626
+ d->level = resolve_level(d->algo, level_val);
1627
+ d->closed = 0;
1628
+ d->finished = 0;
1629
+
1630
+ dictionary_t *dict = NULL;
1631
+ if (!NIL_P(dict_val)) {
1632
+ if (d->algo == ALGO_LZ4) {
1633
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
1634
+ }
1635
+ dict = opt_dictionary(dict_val);
1636
+ dictionary_ivar_set(self, dict_val);
1637
+ }
1638
+
1639
+ switch (d->algo) {
1640
+ case ALGO_ZSTD: {
1641
+ d->ctx.zstd = ZSTD_createCStream();
1642
+ if (!d->ctx.zstd)
1643
+ rb_raise(eMemError, "zstd: failed to create stream");
1644
+
1645
+ if (dict) {
1646
+ ZSTD_CCtx_reset(d->ctx.zstd, ZSTD_reset_session_only);
1647
+ ZSTD_CCtx_setParameter(d->ctx.zstd, ZSTD_c_compressionLevel, d->level);
1648
+ size_t r = ZSTD_CCtx_loadDictionary(d->ctx.zstd, dict->data, dict->size);
1649
+ if (ZSTD_isError(r))
1650
+ rb_raise(eError, "zstd dict load: %s", ZSTD_getErrorName(r));
1651
+ } else {
1652
+ size_t r = ZSTD_initCStream(d->ctx.zstd, d->level);
1653
+ if (ZSTD_isError(r))
1654
+ rb_raise(eError, "zstd init: %s", ZSTD_getErrorName(r));
1655
+ }
1656
+ break;
1657
+ }
1658
+ case ALGO_BROTLI: {
1659
+ d->ctx.brotli = BrotliEncoderCreateInstance(NULL, NULL, NULL);
1660
+ if (!d->ctx.brotli)
1661
+ rb_raise(eMemError, "brotli: failed to create encoder");
1662
+ if (!BrotliEncoderSetParameter(d->ctx.brotli, BROTLI_PARAM_QUALITY, d->level)) {
1663
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
1664
+ d->ctx.brotli = NULL;
1665
+ rb_raise(eError, "brotli: failed to set quality parameter");
1666
+ }
1667
+ if (dict) {
1668
+ BrotliEncoderPreparedDictionary *pd =
1669
+ BrotliEncoderPrepareDictionary(BROTLI_SHARED_DICTIONARY_RAW, dict->size, dict->data,
1670
+ BROTLI_MAX_QUALITY, NULL, NULL, NULL);
1671
+ if (!pd) {
1672
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
1673
+ d->ctx.brotli = NULL;
1674
+ rb_raise(eMemError, "brotli: failed to prepare dictionary");
1675
+ }
1676
+ if (!BrotliEncoderAttachPreparedDictionary(d->ctx.brotli, pd)) {
1677
+ BrotliEncoderDestroyPreparedDictionary(pd);
1678
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
1679
+ d->ctx.brotli = NULL;
1680
+ rb_raise(eError, "brotli: failed to attach dictionary");
1681
+ }
1682
+ BrotliEncoderDestroyPreparedDictionary(pd);
1683
+ }
1684
+ break;
1685
+ }
1686
+ case ALGO_LZ4: {
1687
+ d->ctx.lz4 = LZ4_createStream();
1688
+ if (!d->ctx.lz4)
1689
+ rb_raise(eMemError, "lz4: failed to create stream");
1690
+ LZ4_resetStream(d->ctx.lz4);
1691
+ d->lz4_ring.buf = ALLOC_N(char, LZ4_RING_BUFFER_TOTAL);
1692
+ d->lz4_ring.ring_offset = 0;
1693
+ d->lz4_ring.pending = 0;
1694
+ break;
1695
+ }
1696
+ }
1697
+
1698
+ return self;
1699
+ }
1700
+
1701
+ static VALUE lz4_compress_ring_block(deflater_t *d) {
1702
+ if (d->lz4_ring.pending == 0)
1703
+ return rb_binary_str_new("", 0);
1704
+
1705
+ char *block_start = d->lz4_ring.buf + d->lz4_ring.ring_offset - d->lz4_ring.pending;
1706
+ int src_size = (int)d->lz4_ring.pending;
1707
+ int bound = LZ4_compressBound(src_size);
1708
+
1709
+ VALUE output = rb_binary_str_buf_new(8 + bound);
1710
+ char *out = RSTRING_PTR(output);
1711
+
1712
+ write_le_u32((uint8_t *)out, (uint32_t)src_size);
1713
+
1714
+ int csize = LZ4_compress_fast_continue(d->ctx.lz4, block_start, out + 8, src_size, bound, 1);
1715
+ if (csize <= 0)
1716
+ rb_raise(eError, "lz4 stream compress block failed");
1717
+
1718
+ write_le_u32((uint8_t *)out + 4, (uint32_t)csize);
1719
+
1720
+ rb_str_set_len(output, 8 + csize);
1721
+ d->lz4_ring.pending = 0;
1722
+
1723
+ if (d->lz4_ring.ring_offset >= LZ4_RING_BUFFER_SIZE) {
1724
+ d->lz4_ring.ring_offset = 0;
1725
+ }
1726
+
1727
+ return output;
1728
+ }
1729
+
1730
+ static VALUE deflater_write(VALUE self, VALUE chunk) {
1731
+ deflater_t *d;
1732
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
1733
+ if (d->closed)
1734
+ rb_raise(eStreamError, "stream is closed");
1735
+ if (d->finished)
1736
+ rb_raise(eStreamError, "stream is already finished");
1737
+ StringValue(chunk);
1738
+
1739
+ const char *src = RSTRING_PTR(chunk);
1740
+ size_t slen = RSTRING_LEN(chunk);
1741
+ const algo_policy_t *policy = algo_policy(d->algo);
1742
+ if (slen == 0)
1743
+ return rb_binary_str_new("", 0);
1744
+
1745
+ switch (d->algo) {
1746
+ case ALGO_ZSTD: {
1747
+ ZSTD_inBuffer input = {src, slen, 0};
1748
+ size_t out_cap = ZSTD_CStreamOutSize();
1749
+ size_t result_cap = out_cap > slen ? out_cap : slen;
1750
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
1751
+ size_t result_len = 0;
1752
+ VALUE scheduler = current_fiber_scheduler();
1753
+
1754
+ while (input.pos < input.size) {
1755
+ if (result_len + out_cap > result_cap) {
1756
+ result_cap = result_cap * 2;
1757
+ grow_binary_str(result, result_len, result_cap);
1758
+ }
1759
+
1760
+ ZSTD_outBuffer output = {RSTRING_PTR(result) + result_len, out_cap, 0};
1761
+
1762
+ if (scheduler != Qnil && (input.size - input.pos) >= policy->fiber_stream_threshold) {
1763
+ zstd_stream_chunk_fiber_t fargs = {
1764
+ .cstream = d->ctx.zstd,
1765
+ .input = &input,
1766
+ .output = &output,
1767
+ .result = 0,
1768
+ .scheduler = scheduler,
1769
+ .blocker = rb_obj_alloc(rb_cObject),
1770
+ .fiber = rb_fiber_current(),
1771
+ };
1772
+ VALUE th = rb_thread_create(zstd_stream_chunk_fiber_thread, &fargs);
1773
+ rb_fiber_scheduler_block(scheduler, fargs.blocker, Qnil);
1774
+ join_thread(th);
1775
+
1776
+ if (ZSTD_isError(fargs.result))
1777
+ rb_raise(eError, "zstd compress stream: %s", ZSTD_getErrorName(fargs.result));
1778
+ } else if (scheduler == Qnil &&
1779
+ (input.size - input.pos) >= policy->gvl_unlock_threshold) {
1780
+ zstd_stream_chunk_args_t args = {
1781
+ .cstream = d->ctx.zstd,
1782
+ .output = &output,
1783
+ .input = &input,
1784
+ .result = 0,
1785
+ };
1786
+ run_without_gvl(zstd_compress_stream_chunk_nogvl, &args);
1787
+ if (ZSTD_isError(args.result))
1788
+ rb_raise(eError, "zstd compress stream: %s", ZSTD_getErrorName(args.result));
1789
+ } else {
1790
+ size_t ret = ZSTD_compressStream(d->ctx.zstd, &output, &input);
1791
+ if (ZSTD_isError(ret))
1792
+ rb_raise(eError, "zstd compress stream: %s", ZSTD_getErrorName(ret));
1793
+ }
1794
+ result_len += output.pos;
1795
+ }
1796
+ rb_str_set_len(result, result_len);
1797
+ RB_GC_GUARD(chunk);
1798
+ return result;
1799
+ }
1800
+ case ALGO_BROTLI: {
1801
+ size_t available_in = slen;
1802
+ const uint8_t *next_in = (const uint8_t *)src;
1803
+ size_t result_cap = slen;
1804
+ if (result_cap < 1024)
1805
+ result_cap = 1024;
1806
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
1807
+ size_t result_len = 0;
1808
+ VALUE scheduler = current_fiber_scheduler();
1809
+ int use_fiber = (scheduler != Qnil);
1810
+ size_t fiber_counter = 0;
1811
+
1812
+ while (available_in > 0 || BrotliEncoderHasMoreOutput(d->ctx.brotli)) {
1813
+ size_t available_out = 0;
1814
+ uint8_t *next_out = NULL;
1815
+ BROTLI_BOOL ok;
1816
+
1817
+ if (use_fiber && available_in >= policy->fiber_stream_threshold) {
1818
+ brotli_stream_chunk_fiber_t fargs = {
1819
+ .enc = d->ctx.brotli,
1820
+ .op = BROTLI_OPERATION_PROCESS,
1821
+ .available_in = &available_in,
1822
+ .next_in = &next_in,
1823
+ .available_out = &available_out,
1824
+ .next_out = &next_out,
1825
+ .result = BROTLI_FALSE,
1826
+ .scheduler = scheduler,
1827
+ .blocker = rb_obj_alloc(rb_cObject),
1828
+ .fiber = rb_fiber_current(),
1829
+ };
1830
+ VALUE th = rb_thread_create(brotli_stream_chunk_fiber_thread, &fargs);
1831
+ rb_fiber_scheduler_block(scheduler, fargs.blocker, Qnil);
1832
+ join_thread(th);
1833
+ ok = fargs.result;
1834
+ } else {
1835
+ ok = BrotliEncoderCompressStream(d->ctx.brotli, BROTLI_OPERATION_PROCESS,
1836
+ &available_in, &next_in, &available_out, &next_out,
1837
+ NULL);
1838
+ }
1839
+ if (!ok)
1840
+ rb_raise(eError, "brotli compress stream failed");
1841
+
1842
+ const uint8_t *out_data;
1843
+ size_t out_size = 0;
1844
+ out_data = BrotliEncoderTakeOutput(d->ctx.brotli, &out_size);
1845
+ if (out_size > 0) {
1846
+ if (result_len + out_size > result_cap) {
1847
+ result_cap = (result_len + out_size) * 2;
1848
+ grow_binary_str(result, result_len, result_cap);
1849
+ }
1850
+
1851
+ memcpy(RSTRING_PTR(result) + result_len, out_data, out_size);
1852
+ result_len += out_size;
1853
+ if (use_fiber) {
1854
+ int did_yield = 0;
1855
+ fiber_counter = fiber_maybe_yield(fiber_counter, out_size,
1856
+ policy->fiber_yield_chunk, &did_yield);
1857
+ (void)did_yield;
1858
+ }
1859
+ }
1860
+ }
1861
+ rb_str_set_len(result, result_len);
1862
+ RB_GC_GUARD(chunk);
1863
+ return result;
1864
+ }
1865
+ case ALGO_LZ4: {
1866
+ VALUE result = rb_binary_str_buf_reserve(0);
1867
+ size_t result_len = 0;
1868
+ size_t result_cap = 0;
1869
+ int use_fiber = has_fiber_scheduler();
1870
+ size_t fiber_counter = 0;
1871
+
1872
+ while (slen > 0) {
1873
+ size_t space = LZ4_RING_BUFFER_SIZE - d->lz4_ring.pending;
1874
+ size_t copy = slen < space ? slen : space;
1875
+
1876
+ if (d->lz4_ring.ring_offset + copy > LZ4_RING_BUFFER_TOTAL) {
1877
+ rb_raise(eError, "lz4: ring buffer overflow");
1878
+ }
1879
+
1880
+ memcpy(d->lz4_ring.buf + d->lz4_ring.ring_offset, src, copy);
1881
+ d->lz4_ring.ring_offset += copy;
1882
+ d->lz4_ring.pending += copy;
1883
+ src += copy;
1884
+ slen -= copy;
1885
+
1886
+ if (d->lz4_ring.pending >= (size_t)LZ4_RING_BUFFER_SIZE) {
1887
+ VALUE block = lz4_compress_ring_block(d);
1888
+ size_t blen = RSTRING_LEN(block);
1889
+ if (blen > 0) {
1890
+ if (result_len + blen > result_cap) {
1891
+ result_cap = (result_len + blen) * 2;
1892
+ if (result_cap < 256)
1893
+ result_cap = 256;
1894
+ grow_binary_str(result, result_len, result_cap);
1895
+ }
1896
+ memcpy(RSTRING_PTR(result) + result_len, RSTRING_PTR(block), blen);
1897
+ result_len += blen;
1898
+ }
1899
+ if (use_fiber) {
1900
+ int did_yield = 0;
1901
+ fiber_counter = fiber_maybe_yield(fiber_counter, LZ4_RING_BUFFER_SIZE,
1902
+ policy->fiber_yield_chunk, &did_yield);
1903
+ (void)did_yield;
1904
+ }
1905
+ }
1906
+ }
1907
+ rb_str_set_len(result, result_len);
1908
+ RB_GC_GUARD(chunk);
1909
+ return result;
1910
+ }
1911
+ }
1912
+ return rb_binary_str_new("", 0);
1913
+ }
1914
+
1915
+ static VALUE deflater_flush(VALUE self) {
1916
+ deflater_t *d;
1917
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
1918
+ if (d->closed)
1919
+ rb_raise(eStreamError, "stream is closed");
1920
+ if (d->finished)
1921
+ rb_raise(eStreamError, "stream is already finished");
1922
+
1923
+ switch (d->algo) {
1924
+ case ALGO_ZSTD: {
1925
+ size_t out_cap = ZSTD_CStreamOutSize();
1926
+ size_t result_cap = out_cap;
1927
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
1928
+ size_t result_len = 0;
1929
+ size_t ret;
1930
+
1931
+ do {
1932
+ if (result_len + out_cap > result_cap) {
1933
+ result_cap *= 2;
1934
+ grow_binary_str(result, result_len, result_cap);
1935
+ }
1936
+
1937
+ ZSTD_outBuffer output = {RSTRING_PTR(result) + result_len, out_cap, 0};
1938
+ ret = ZSTD_flushStream(d->ctx.zstd, &output);
1939
+ if (ZSTD_isError(ret))
1940
+ rb_raise(eError, "zstd flush: %s", ZSTD_getErrorName(ret));
1941
+ result_len += output.pos;
1942
+ } while (ret > 0);
1943
+
1944
+ rb_str_set_len(result, result_len);
1945
+ return result;
1946
+ }
1947
+ case ALGO_BROTLI: {
1948
+ size_t available_in = 0;
1949
+ const uint8_t *next_in = NULL;
1950
+ size_t result_cap = 1024;
1951
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
1952
+ size_t result_len = 0;
1953
+
1954
+ do {
1955
+ size_t available_out = 0;
1956
+ uint8_t *next_out = NULL;
1957
+ BROTLI_BOOL ok =
1958
+ BrotliEncoderCompressStream(d->ctx.brotli, BROTLI_OPERATION_FLUSH, &available_in,
1959
+ &next_in, &available_out, &next_out, NULL);
1960
+ if (!ok)
1961
+ rb_raise(eError, "brotli flush failed");
1962
+ const uint8_t *out_data;
1963
+ size_t out_size = 0;
1964
+ out_data = BrotliEncoderTakeOutput(d->ctx.brotli, &out_size);
1965
+ if (out_size > 0) {
1966
+ if (result_len + out_size > result_cap) {
1967
+ result_cap = (result_len + out_size) * 2;
1968
+ grow_binary_str(result, result_len, result_cap);
1969
+ }
1970
+
1971
+ memcpy(RSTRING_PTR(result) + result_len, out_data, out_size);
1972
+ result_len += out_size;
1973
+ }
1974
+ } while (BrotliEncoderHasMoreOutput(d->ctx.brotli));
1975
+
1976
+ rb_str_set_len(result, result_len);
1977
+ return result;
1978
+ }
1979
+ case ALGO_LZ4:
1980
+ return lz4_compress_ring_block(d);
1981
+ }
1982
+ return rb_binary_str_new("", 0);
1983
+ }
1984
+
1985
+ static VALUE deflater_finish(VALUE self) {
1986
+ deflater_t *d;
1987
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
1988
+ if (d->closed)
1989
+ rb_raise(eStreamError, "stream is closed");
1990
+ if (d->finished)
1991
+ return rb_binary_str_new("", 0);
1992
+ d->finished = 1;
1993
+
1994
+ switch (d->algo) {
1995
+ case ALGO_ZSTD: {
1996
+ size_t out_cap = ZSTD_CStreamOutSize();
1997
+ size_t result_cap = out_cap;
1998
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
1999
+ size_t result_len = 0;
2000
+ size_t ret;
2001
+
2002
+ do {
2003
+ if (result_len + out_cap > result_cap) {
2004
+ result_cap *= 2;
2005
+ grow_binary_str(result, result_len, result_cap);
2006
+ }
2007
+
2008
+ ZSTD_outBuffer output = {RSTRING_PTR(result) + result_len, out_cap, 0};
2009
+ ret = ZSTD_endStream(d->ctx.zstd, &output);
2010
+ if (ZSTD_isError(ret))
2011
+ rb_raise(eError, "zstd end stream: %s", ZSTD_getErrorName(ret));
2012
+ result_len += output.pos;
2013
+ } while (ret > 0);
2014
+
2015
+ rb_str_set_len(result, result_len);
2016
+ return result;
2017
+ }
2018
+ case ALGO_BROTLI: {
2019
+ size_t available_in = 0;
2020
+ const uint8_t *next_in = NULL;
2021
+ size_t result_cap = 1024;
2022
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
2023
+ size_t result_len = 0;
2024
+
2025
+ do {
2026
+ size_t available_out = 0;
2027
+ uint8_t *next_out = NULL;
2028
+ BROTLI_BOOL ok =
2029
+ BrotliEncoderCompressStream(d->ctx.brotli, BROTLI_OPERATION_FINISH, &available_in,
2030
+ &next_in, &available_out, &next_out, NULL);
2031
+ if (!ok)
2032
+ rb_raise(eError, "brotli finish failed");
2033
+ const uint8_t *out_data;
2034
+ size_t out_size = 0;
2035
+ out_data = BrotliEncoderTakeOutput(d->ctx.brotli, &out_size);
2036
+ if (out_size > 0) {
2037
+ if (result_len + out_size > result_cap) {
2038
+ result_cap = (result_len + out_size) * 2;
2039
+ grow_binary_str(result, result_len, result_cap);
2040
+ }
2041
+
2042
+ memcpy(RSTRING_PTR(result) + result_len, out_data, out_size);
2043
+ result_len += out_size;
2044
+ }
2045
+ } while (BrotliEncoderHasMoreOutput(d->ctx.brotli) ||
2046
+ !BrotliEncoderIsFinished(d->ctx.brotli));
2047
+
2048
+ rb_str_set_len(result, result_len);
2049
+ return result;
2050
+ }
2051
+ case ALGO_LZ4: {
2052
+ size_t result_cap = 256;
2053
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
2054
+ size_t result_len = 0;
2055
+
2056
+ if (d->lz4_ring.pending > 0) {
2057
+ VALUE block = lz4_compress_ring_block(d);
2058
+ size_t blen = RSTRING_LEN(block);
2059
+ if (blen > 0) {
2060
+ if (blen + 4 > result_cap) {
2061
+ result_cap = blen + 4;
2062
+ grow_binary_str(result, result_len, result_cap);
2063
+ }
2064
+
2065
+ memcpy(RSTRING_PTR(result), RSTRING_PTR(block), blen);
2066
+ result_len = blen;
2067
+ }
2068
+ }
2069
+
2070
+ if (result_len + 4 > result_cap) {
2071
+ result_cap = result_len + 4;
2072
+ grow_binary_str(result, result_len, result_cap);
2073
+ }
2074
+
2075
+ char *out = RSTRING_PTR(result) + result_len;
2076
+ write_le_u32((uint8_t *)out, 0);
2077
+ result_len += 4;
2078
+
2079
+ rb_str_set_len(result, result_len);
2080
+ return result;
2081
+ }
2082
+ }
2083
+ return rb_binary_str_new("", 0);
2084
+ }
2085
+
2086
+ static VALUE deflater_reset(VALUE self) {
2087
+ deflater_t *d;
2088
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
2089
+
2090
+ VALUE dict_val = dictionary_ivar_get(self);
2091
+ dictionary_t *dict = NULL;
2092
+ if (!NIL_P(dict_val)) {
2093
+ TypedData_Get_Struct(dict_val, dictionary_t, &dictionary_type, dict);
2094
+ }
2095
+
2096
+ switch (d->algo) {
2097
+ case ALGO_ZSTD:
2098
+ if (d->ctx.zstd) {
2099
+ ZSTD_CCtx_reset(d->ctx.zstd, ZSTD_reset_session_only);
2100
+ ZSTD_CCtx_setParameter(d->ctx.zstd, ZSTD_c_compressionLevel, d->level);
2101
+ if (dict) {
2102
+ size_t r = ZSTD_CCtx_loadDictionary(d->ctx.zstd, dict->data, dict->size);
2103
+ if (ZSTD_isError(r))
2104
+ rb_raise(eError, "zstd dict reload on reset: %s", ZSTD_getErrorName(r));
2105
+ }
2106
+ }
2107
+ break;
2108
+ case ALGO_BROTLI:
2109
+ if (d->ctx.brotli) {
2110
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
2111
+ d->ctx.brotli = BrotliEncoderCreateInstance(NULL, NULL, NULL);
2112
+ if (!d->ctx.brotli)
2113
+ rb_raise(eMemError, "brotli: failed to recreate encoder");
2114
+ if (!BrotliEncoderSetParameter(d->ctx.brotli, BROTLI_PARAM_QUALITY, d->level))
2115
+ rb_raise(eError, "brotli: failed to set quality on reset");
2116
+ if (dict) {
2117
+ BrotliEncoderPreparedDictionary *pd = BrotliEncoderPrepareDictionary(
2118
+ BROTLI_SHARED_DICTIONARY_RAW, dict->size, dict->data, BROTLI_MAX_QUALITY, NULL,
2119
+ NULL, NULL);
2120
+ if (!pd)
2121
+ rb_raise(eMemError, "brotli: failed to prepare dictionary on reset");
2122
+ if (!BrotliEncoderAttachPreparedDictionary(d->ctx.brotli, pd)) {
2123
+ BrotliEncoderDestroyPreparedDictionary(pd);
2124
+ rb_raise(eError, "brotli: failed to reattach dictionary on reset");
2125
+ }
2126
+ BrotliEncoderDestroyPreparedDictionary(pd);
2127
+ }
2128
+ }
2129
+ break;
2130
+ case ALGO_LZ4:
2131
+ if (d->ctx.lz4)
2132
+ LZ4_resetStream(d->ctx.lz4);
2133
+ d->lz4_ring.ring_offset = 0;
2134
+ d->lz4_ring.pending = 0;
2135
+ break;
2136
+ }
2137
+ d->closed = 0;
2138
+ d->finished = 0;
2139
+ return self;
2140
+ }
2141
+
2142
+ static VALUE deflater_close(VALUE self) {
2143
+ deflater_t *d;
2144
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
2145
+ if (d->closed)
2146
+ return Qnil;
2147
+
2148
+ switch (d->algo) {
2149
+ case ALGO_ZSTD:
2150
+ if (d->ctx.zstd) {
2151
+ ZSTD_freeCStream(d->ctx.zstd);
2152
+ d->ctx.zstd = NULL;
2153
+ }
2154
+ break;
2155
+ case ALGO_BROTLI:
2156
+ if (d->ctx.brotli) {
2157
+ BrotliEncoderDestroyInstance(d->ctx.brotli);
2158
+ d->ctx.brotli = NULL;
2159
+ }
2160
+ break;
2161
+ case ALGO_LZ4:
2162
+ if (d->ctx.lz4) {
2163
+ LZ4_freeStream(d->ctx.lz4);
2164
+ d->ctx.lz4 = NULL;
2165
+ }
2166
+ break;
2167
+ }
2168
+ d->closed = 1;
2169
+ return Qnil;
2170
+ }
2171
+
2172
+ static VALUE deflater_closed_p(VALUE self) {
2173
+ deflater_t *d;
2174
+ TypedData_Get_Struct(self, deflater_t, &deflater_type, d);
2175
+ return d->closed ? Qtrue : Qfalse;
2176
+ }
2177
+
2178
+ typedef struct {
2179
+ compress_algo_t algo;
2180
+ int closed;
2181
+ int finished;
2182
+ size_t max_output_size;
2183
+ size_t total_output;
2184
+ size_t total_input;
2185
+ int max_ratio_enabled;
2186
+ unsigned long long max_ratio;
2187
+
2188
+ union {
2189
+ ZSTD_DStream *zstd;
2190
+ BrotliDecoderState *brotli;
2191
+ } ctx;
2192
+
2193
+ struct {
2194
+ char *buf;
2195
+ size_t len;
2196
+ size_t cap;
2197
+ size_t offset;
2198
+ } lz4_buf;
2199
+ } inflater_t;
2200
+
2201
+ static void inflater_free(void *ptr) {
2202
+ inflater_t *inf = (inflater_t *)ptr;
2203
+ if (!inf->closed) {
2204
+ switch (inf->algo) {
2205
+ case ALGO_ZSTD:
2206
+ if (inf->ctx.zstd)
2207
+ ZSTD_freeDStream(inf->ctx.zstd);
2208
+ break;
2209
+ case ALGO_BROTLI:
2210
+ if (inf->ctx.brotli)
2211
+ BrotliDecoderDestroyInstance(inf->ctx.brotli);
2212
+ break;
2213
+ case ALGO_LZ4:
2214
+ break;
2215
+ }
2216
+ }
2217
+ if (inf->lz4_buf.buf)
2218
+ xfree(inf->lz4_buf.buf);
2219
+ xfree(inf);
2220
+ }
2221
+
2222
+ static size_t inflater_memsize(const void *ptr) {
2223
+ const inflater_t *inf = (const inflater_t *)ptr;
2224
+ return sizeof(inflater_t) + inf->lz4_buf.cap;
2225
+ }
2226
+
2227
+ static const rb_data_type_t inflater_type = {"Compress::Inflater",
2228
+ {NULL, inflater_free, inflater_memsize},
2229
+ 0,
2230
+ 0,
2231
+ RUBY_TYPED_FREE_IMMEDIATELY};
2232
+
2233
+ static VALUE inflater_alloc(VALUE klass) {
2234
+ inflater_t *inf = ALLOC(inflater_t);
2235
+ memset(inf, 0, sizeof(inflater_t));
2236
+ return TypedData_Wrap_Struct(klass, &inflater_type, inf);
2237
+ }
2238
+
2239
+ static VALUE inflater_initialize(int argc, VALUE *argv, VALUE self) {
2240
+ VALUE opts;
2241
+ rb_scan_args(argc, argv, "0:", &opts);
2242
+ reject_algorithm_keyword(opts);
2243
+
2244
+ inflater_t *inf;
2245
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
2246
+
2247
+ VALUE algo_sym = Qnil, dict_val = Qnil;
2248
+ limits_config_t limits;
2249
+ parse_limits_from_opts(opts, &limits);
2250
+ if (!NIL_P(opts)) {
2251
+ algo_sym = opt_get(opts, sym_cache.algo);
2252
+ dict_val = opt_get(opts, sym_cache.dictionary);
2253
+ }
2254
+
2255
+ inf->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
2256
+ inf->closed = 0;
2257
+ inf->finished = 0;
2258
+ inf->max_output_size = limits.max_output_size;
2259
+ inf->total_output = 0;
2260
+ inf->total_input = 0;
2261
+ inf->max_ratio_enabled = limits.max_ratio_enabled;
2262
+ inf->max_ratio = limits.max_ratio;
2263
+
2264
+ dictionary_t *dict = NULL;
2265
+ if (!NIL_P(dict_val)) {
2266
+ if (inf->algo == ALGO_LZ4) {
2267
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
2268
+ }
2269
+ dict = opt_dictionary(dict_val);
2270
+ dictionary_ivar_set(self, dict_val);
2271
+ }
2272
+
2273
+ switch (inf->algo) {
2274
+ case ALGO_ZSTD:
2275
+ inf->ctx.zstd = ZSTD_createDStream();
2276
+ if (!inf->ctx.zstd)
2277
+ rb_raise(eMemError, "zstd: failed to create dstream");
2278
+ if (dict) {
2279
+ ZSTD_DCtx_reset(inf->ctx.zstd, ZSTD_reset_session_only);
2280
+ size_t r = ZSTD_DCtx_loadDictionary(inf->ctx.zstd, dict->data, dict->size);
2281
+ if (ZSTD_isError(r))
2282
+ rb_raise(eError, "zstd dict load: %s", ZSTD_getErrorName(r));
2283
+ } else {
2284
+ ZSTD_initDStream(inf->ctx.zstd);
2285
+ }
2286
+ break;
2287
+ case ALGO_BROTLI:
2288
+ inf->ctx.brotli = BrotliDecoderCreateInstance(NULL, NULL, NULL);
2289
+ if (!inf->ctx.brotli)
2290
+ rb_raise(eMemError, "brotli: failed to create decoder");
2291
+ if (dict) {
2292
+ BrotliDecoderAttachDictionary(inf->ctx.brotli, BROTLI_SHARED_DICTIONARY_RAW, dict->size,
2293
+ dict->data);
2294
+ }
2295
+ break;
2296
+ case ALGO_LZ4:
2297
+ inf->lz4_buf.cap = 16 * 1024;
2298
+ inf->lz4_buf.buf = ALLOC_N(char, inf->lz4_buf.cap);
2299
+ inf->lz4_buf.len = 0;
2300
+ inf->lz4_buf.offset = 0;
2301
+ break;
2302
+ }
2303
+
2304
+ return self;
2305
+ }
2306
+
2307
+ static VALUE inflater_write(VALUE self, VALUE chunk) {
2308
+ inflater_t *inf;
2309
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
2310
+ if (inf->closed)
2311
+ rb_raise(eStreamError, "stream is closed");
2312
+ StringValue(chunk);
2313
+
2314
+ const char *src = RSTRING_PTR(chunk);
2315
+ size_t slen = RSTRING_LEN(chunk);
2316
+ const algo_policy_t *policy = algo_policy(inf->algo);
2317
+ if (slen == 0)
2318
+ return rb_binary_str_new("", 0);
2319
+
2320
+ inf->total_input =
2321
+ checked_add_size(inf->total_input, slen, "compressed input exceeds representable size");
2322
+
2323
+ switch (inf->algo) {
2324
+ case ALGO_ZSTD: {
2325
+ ZSTD_inBuffer input = {src, slen, 0};
2326
+ size_t out_cap = ZSTD_DStreamOutSize();
2327
+ size_t result_cap = out_cap > slen * 2 ? out_cap : slen * 2;
2328
+ size_t remaining_total_budget =
2329
+ inf->max_output_size > inf->total_output ? inf->max_output_size - inf->total_output : 0;
2330
+ if (remaining_total_budget == 0)
2331
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
2332
+ inf->max_output_size);
2333
+ if (result_cap > remaining_total_budget)
2334
+ result_cap = remaining_total_budget;
2335
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
2336
+ size_t result_len = 0;
2337
+ VALUE scheduler = current_fiber_scheduler();
2338
+
2339
+ while (input.pos < input.size) {
2340
+ size_t remaining_budget = inf->max_output_size - inf->total_output - result_len;
2341
+ if (remaining_budget == 0)
2342
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
2343
+ inf->max_output_size);
2344
+
2345
+ if (result_len + out_cap > result_cap) {
2346
+ size_t next_cap = result_cap * 2;
2347
+ if (next_cap > inf->max_output_size - inf->total_output)
2348
+ next_cap = inf->max_output_size - inf->total_output;
2349
+ result_cap = next_cap;
2350
+ rb_str_resize(result, result_cap);
2351
+ }
2352
+
2353
+ size_t current_out_cap = out_cap;
2354
+ if (current_out_cap > remaining_budget)
2355
+ current_out_cap = remaining_budget;
2356
+
2357
+ ZSTD_outBuffer output = {RSTRING_PTR(result) + result_len, current_out_cap, 0};
2358
+ size_t ret;
2359
+
2360
+ if (scheduler != Qnil && (input.size - input.pos) >= policy->fiber_stream_threshold) {
2361
+ zstd_decompress_stream_chunk_args_t args = {
2362
+ .dstream = inf->ctx.zstd,
2363
+ .output = &output,
2364
+ .input = &input,
2365
+ .result = 0,
2366
+ };
2367
+ run_via_fiber_worker(scheduler, zstd_decompress_stream_chunk_nogvl, &args);
2368
+ ret = args.result;
2369
+ } else {
2370
+ ret = ZSTD_decompressStream(inf->ctx.zstd, &output, &input);
2371
+ }
2372
+
2373
+ if (ZSTD_isError(ret))
2374
+ rb_raise(eDataError, "zstd decompress stream: %s", ZSTD_getErrorName(ret));
2375
+ result_len = checked_add_size(result_len, output.pos,
2376
+ "decompressed output exceeds representable size");
2377
+ size_t total_output = checked_add_size(
2378
+ inf->total_output, result_len, "decompressed output exceeds representable size");
2379
+ enforce_output_and_ratio_limits(total_output, inf->total_input, inf->max_output_size,
2380
+ inf->max_ratio_enabled, inf->max_ratio);
2381
+ if (ret == 0)
2382
+ break;
2383
+ }
2384
+ inf->total_output = checked_add_size(inf->total_output, result_len,
2385
+ "decompressed output exceeds representable size");
2386
+ rb_str_set_len(result, result_len);
2387
+ RB_GC_GUARD(chunk);
2388
+ return result;
2389
+ }
2390
+ case ALGO_BROTLI: {
2391
+ size_t available_in = slen;
2392
+ const uint8_t *next_in = (const uint8_t *)src;
2393
+ size_t remaining_total_budget =
2394
+ inf->max_output_size > inf->total_output ? inf->max_output_size - inf->total_output : 0;
2395
+ if (remaining_total_budget == 0)
2396
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
2397
+ inf->max_output_size);
2398
+ size_t result_cap = slen * 2;
2399
+ if (result_cap < 1024)
2400
+ result_cap = 1024;
2401
+ if (result_cap > remaining_total_budget)
2402
+ result_cap = remaining_total_budget;
2403
+ VALUE result = rb_binary_str_buf_reserve(result_cap);
2404
+ size_t result_len = 0;
2405
+ VALUE scheduler = current_fiber_scheduler();
2406
+
2407
+ while (available_in > 0 || BrotliDecoderHasMoreOutput(inf->ctx.brotli)) {
2408
+ size_t available_out = 0;
2409
+ uint8_t *next_out = NULL;
2410
+ BrotliDecoderResult res;
2411
+
2412
+ if (scheduler != Qnil && available_in >= policy->fiber_stream_threshold) {
2413
+ brotli_decompress_stream_args_t sargs = {
2414
+ .dec = inf->ctx.brotli,
2415
+ .available_in = &available_in,
2416
+ .next_in = &next_in,
2417
+ .available_out = &available_out,
2418
+ .next_out = &next_out,
2419
+ .result = BROTLI_DECODER_RESULT_ERROR,
2420
+ };
2421
+ run_via_fiber_worker(scheduler, brotli_decompress_stream_nogvl, &sargs);
2422
+ res = sargs.result;
2423
+ } else {
2424
+ res = BrotliDecoderDecompressStream(inf->ctx.brotli, &available_in, &next_in,
2425
+ &available_out, &next_out, NULL);
2426
+ }
2427
+ if (res == BROTLI_DECODER_RESULT_ERROR)
2428
+ rb_raise(eDataError, "brotli decompress stream: %s",
2429
+ BrotliDecoderErrorString(BrotliDecoderGetErrorCode(inf->ctx.brotli)));
2430
+ const uint8_t *out_data;
2431
+ size_t out_size = 0;
2432
+ out_data = BrotliDecoderTakeOutput(inf->ctx.brotli, &out_size);
2433
+ if (out_size > 0) {
2434
+ size_t total_output = checked_add_size(
2435
+ inf->total_output,
2436
+ checked_add_size(result_len, out_size,
2437
+ "decompressed output exceeds representable size"),
2438
+ "decompressed output exceeds representable size");
2439
+ enforce_output_and_ratio_limits(total_output, inf->total_input,
2440
+ inf->max_output_size, inf->max_ratio_enabled,
2441
+ inf->max_ratio);
2442
+
2443
+ if (result_len + out_size > result_cap) {
2444
+ result_cap = result_len + out_size;
2445
+ rb_str_resize(result, result_cap);
2446
+ }
2447
+
2448
+ memcpy(RSTRING_PTR(result) + result_len, out_data, out_size);
2449
+ result_len += out_size;
2450
+ }
2451
+ if (res == BROTLI_DECODER_RESULT_SUCCESS)
2452
+ break;
2453
+ if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && available_in == 0)
2454
+ break;
2455
+ }
2456
+ inf->total_output = checked_add_size(inf->total_output, result_len,
2457
+ "decompressed output exceeds representable size");
2458
+ rb_str_set_len(result, result_len);
2459
+ RB_GC_GUARD(chunk);
2460
+ return result;
2461
+ }
2462
+ case ALGO_LZ4: {
2463
+ size_t data_len = inf->lz4_buf.len - inf->lz4_buf.offset;
2464
+ size_t needed = data_len + slen;
2465
+ // TODO(v0.4): optional standard LZ4 frame format support via lz4frame.h
2466
+
2467
+ if (inf->lz4_buf.offset > 0 && needed > inf->lz4_buf.cap) {
2468
+ if (data_len > 0)
2469
+ memmove(inf->lz4_buf.buf, inf->lz4_buf.buf + inf->lz4_buf.offset, data_len);
2470
+ inf->lz4_buf.offset = 0;
2471
+ inf->lz4_buf.len = data_len;
2472
+ } else if (inf->lz4_buf.offset > inf->lz4_buf.cap / 2) {
2473
+ if (data_len > 0)
2474
+ memmove(inf->lz4_buf.buf, inf->lz4_buf.buf + inf->lz4_buf.offset, data_len);
2475
+ inf->lz4_buf.offset = 0;
2476
+ inf->lz4_buf.len = data_len;
2477
+ }
2478
+
2479
+ needed = inf->lz4_buf.len + slen;
2480
+ if (needed > inf->lz4_buf.cap) {
2481
+ inf->lz4_buf.cap = needed * 2;
2482
+ REALLOC_N(inf->lz4_buf.buf, char, inf->lz4_buf.cap);
2483
+ }
2484
+ memcpy(inf->lz4_buf.buf + inf->lz4_buf.len, src, slen);
2485
+ inf->lz4_buf.len += slen;
2486
+
2487
+ size_t remaining_total_budget =
2488
+ inf->max_output_size > inf->total_output ? inf->max_output_size - inf->total_output : 0;
2489
+ if (remaining_total_budget == 0)
2490
+ rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
2491
+ inf->max_output_size);
2492
+ size_t result_cap = slen * 4;
2493
+ if (result_cap < 256)
2494
+ result_cap = 256;
2495
+ if (result_cap > remaining_total_budget)
2496
+ result_cap = remaining_total_budget;
2497
+ VALUE result = rb_binary_str_buf_new(result_cap);
2498
+ size_t result_len = 0;
2499
+ int use_fiber = has_fiber_scheduler();
2500
+ size_t fiber_counter = 0;
2501
+
2502
+ size_t pos = inf->lz4_buf.offset;
2503
+ while (pos + 4 <= inf->lz4_buf.len) {
2504
+ const uint8_t *p = (const uint8_t *)(inf->lz4_buf.buf + pos);
2505
+ uint32_t orig_size = read_le_u32(p);
2506
+ if (orig_size == 0) {
2507
+ inf->finished = 1;
2508
+ pos += 4;
2509
+ break;
2510
+ }
2511
+ if (pos + 8 > inf->lz4_buf.len)
2512
+ break;
2513
+ uint32_t comp_size = read_le_u32(p + 4);
2514
+ if (pos + 8 + comp_size > inf->lz4_buf.len)
2515
+ break;
2516
+ if (orig_size > 64 * 1024 * 1024)
2517
+ rb_raise(eDataError, "lz4 stream: block too large (%u)", orig_size);
2518
+
2519
+ size_t total_output =
2520
+ checked_add_size(inf->total_output,
2521
+ checked_add_size(result_len, orig_size,
2522
+ "decompressed output exceeds representable size"),
2523
+ "decompressed output exceeds representable size");
2524
+ enforce_output_and_ratio_limits(total_output, inf->total_input, inf->max_output_size,
2525
+ inf->max_ratio_enabled, inf->max_ratio);
2526
+
2527
+ if (result_len + orig_size > result_cap) {
2528
+ result_cap = result_len + orig_size;
2529
+ rb_str_resize(result, result_cap);
2530
+ }
2531
+
2532
+ int dsize =
2533
+ LZ4_decompress_safe(inf->lz4_buf.buf + pos + 8, RSTRING_PTR(result) + result_len,
2534
+ (int)comp_size, (int)orig_size);
2535
+ if (dsize < 0)
2536
+ rb_raise(eDataError, "lz4 stream decompress block failed");
2537
+
2538
+ result_len += dsize;
2539
+ pos += 8 + comp_size;
2540
+ if (use_fiber) {
2541
+ int did_yield = 0;
2542
+ fiber_counter = fiber_maybe_yield(fiber_counter, (size_t)dsize,
2543
+ policy->fiber_yield_chunk, &did_yield);
2544
+ (void)did_yield;
2545
+ }
2546
+ }
2547
+
2548
+ inf->lz4_buf.offset = pos;
2549
+ inf->total_output = checked_add_size(inf->total_output, result_len,
2550
+ "decompressed output exceeds representable size");
2551
+ rb_str_set_len(result, result_len);
2552
+ RB_GC_GUARD(chunk);
2553
+ return result;
2554
+ }
2555
+ }
2556
+ return rb_binary_str_new("", 0);
2557
+ }
2558
+
2559
+ static VALUE inflater_finish(VALUE self) {
2560
+ inflater_t *inf;
2561
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
2562
+ if (inf->closed)
2563
+ rb_raise(eStreamError, "stream is closed");
2564
+ inf->finished = 1;
2565
+ return rb_binary_str_new("", 0);
2566
+ }
2567
+
2568
+ static VALUE inflater_reset(VALUE self) {
2569
+ inflater_t *inf;
2570
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
2571
+
2572
+ VALUE dict_val = dictionary_ivar_get(self);
2573
+ dictionary_t *dict = NULL;
2574
+ if (!NIL_P(dict_val)) {
2575
+ TypedData_Get_Struct(dict_val, dictionary_t, &dictionary_type, dict);
2576
+ }
2577
+
2578
+ switch (inf->algo) {
2579
+ case ALGO_ZSTD:
2580
+ if (inf->ctx.zstd) {
2581
+ ZSTD_DCtx_reset(inf->ctx.zstd, ZSTD_reset_session_only);
2582
+ if (dict) {
2583
+ size_t r = ZSTD_DCtx_loadDictionary(inf->ctx.zstd, dict->data, dict->size);
2584
+ if (ZSTD_isError(r))
2585
+ rb_raise(eError, "zstd dict reload on reset: %s", ZSTD_getErrorName(r));
2586
+ }
2587
+ }
2588
+ break;
2589
+ case ALGO_BROTLI:
2590
+ if (inf->ctx.brotli) {
2591
+ BrotliDecoderDestroyInstance(inf->ctx.brotli);
2592
+ inf->ctx.brotli = BrotliDecoderCreateInstance(NULL, NULL, NULL);
2593
+ if (!inf->ctx.brotli)
2594
+ rb_raise(eMemError, "brotli: failed to recreate decoder");
2595
+ if (dict) {
2596
+ BrotliDecoderAttachDictionary(inf->ctx.brotli, BROTLI_SHARED_DICTIONARY_RAW,
2597
+ dict->size, dict->data);
2598
+ }
2599
+ }
2600
+ break;
2601
+ case ALGO_LZ4:
2602
+ inf->lz4_buf.len = 0;
2603
+ inf->lz4_buf.offset = 0;
2604
+ break;
2605
+ }
2606
+ inf->closed = 0;
2607
+ inf->finished = 0;
2608
+ inf->total_output = 0;
2609
+ inf->total_input = 0;
2610
+ return self;
2611
+ }
2612
+
2613
+ static VALUE inflater_close(VALUE self) {
2614
+ inflater_t *inf;
2615
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
2616
+ if (inf->closed)
2617
+ return Qnil;
2618
+
2619
+ switch (inf->algo) {
2620
+ case ALGO_ZSTD:
2621
+ if (inf->ctx.zstd) {
2622
+ ZSTD_freeDStream(inf->ctx.zstd);
2623
+ inf->ctx.zstd = NULL;
2624
+ }
2625
+ break;
2626
+ case ALGO_BROTLI:
2627
+ if (inf->ctx.brotli) {
2628
+ BrotliDecoderDestroyInstance(inf->ctx.brotli);
2629
+ inf->ctx.brotli = NULL;
2630
+ }
2631
+ break;
2632
+ case ALGO_LZ4:
2633
+ break;
2634
+ }
2635
+ inf->closed = 1;
2636
+ return Qnil;
2637
+ }
2638
+
2639
+ static VALUE inflater_closed_p(VALUE self) {
2640
+ inflater_t *inf;
2641
+ TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
2642
+ return inf->closed ? Qtrue : Qfalse;
2643
+ }
2644
+
2645
+ static VALUE dict_initialize(int argc, VALUE *argv, VALUE self) {
2646
+ VALUE raw, opts;
2647
+ rb_scan_args(argc, argv, "1:", &raw, &opts);
2648
+ StringValue(raw);
2649
+ reject_algorithm_keyword(opts);
2650
+
2651
+ dictionary_t *d;
2652
+ TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
2653
+
2654
+ VALUE algo_sym = Qnil;
2655
+ if (!NIL_P(opts)) {
2656
+ algo_sym = opt_get(opts, sym_cache.algo);
2657
+ }
2658
+ d->algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
2659
+
2660
+ if (d->algo == ALGO_LZ4)
2661
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
2662
+
2663
+ d->size = RSTRING_LEN(raw);
2664
+ d->data = ALLOC_N(uint8_t, d->size);
2665
+ memcpy(d->data, RSTRING_PTR(raw), d->size);
2666
+
2667
+ return self;
2668
+ }
2669
+
2670
+ static VALUE train_dictionary_internal(VALUE samples, VALUE size_val, compress_algo_t algo) {
2671
+ Check_Type(samples, T_ARRAY);
2672
+
2673
+ if (algo == ALGO_BROTLI) {
2674
+ rb_raise(eUnsupportedError, "Brotli dictionary training is not supported via this API. "
2675
+ "Create a raw dictionary using "
2676
+ "MultiCompress::Dictionary.new(data, algo: :brotli)");
2677
+ }
2678
+
2679
+ size_t dict_capacity = NIL_P(size_val) ? 112640 /* 110 KiB, zstd default */
2680
+ : NUM2SIZET(size_val);
2681
+ if (dict_capacity < 256)
2682
+ rb_raise(rb_eArgError, "dictionary size must be at least 256 bytes");
2683
+
2684
+ long num_samples = RARRAY_LEN(samples);
2685
+ if (num_samples < 1)
2686
+ rb_raise(rb_eArgError, "need at least 1 sample for training");
2687
+
2688
+ size_t total_size = 0;
2689
+ for (long i = 0; i < num_samples; i++) {
2690
+ VALUE s = rb_ary_entry(samples, i);
2691
+ StringValue(s);
2692
+ size_t slen = RSTRING_LEN(s);
2693
+ if (slen < 8)
2694
+ rb_raise(rb_eArgError, "sample %ld is too small (%zu bytes), minimum is 8 bytes", i,
2695
+ slen);
2696
+ total_size += slen;
2697
+ }
2698
+
2699
+ uint8_t *dict_buf = ALLOC_N(uint8_t, dict_capacity);
2700
+ char *concat = ALLOC_N(char, total_size);
2701
+ size_t *sizes = ALLOC_N(size_t, (size_t)num_samples);
2702
+
2703
+ size_t offset = 0;
2704
+ for (long i = 0; i < num_samples; i++) {
2705
+ VALUE s = rb_ary_entry(samples, i);
2706
+ StringValue(s);
2707
+ size_t slen = RSTRING_LEN(s);
2708
+ memcpy(concat + offset, RSTRING_PTR(s), slen);
2709
+ sizes[i] = slen;
2710
+ offset += slen;
2711
+ RB_GC_GUARD(s);
2712
+ }
2713
+
2714
+ size_t dict_size =
2715
+ ZDICT_trainFromBuffer(dict_buf, dict_capacity, concat, sizes, (unsigned)num_samples);
2716
+
2717
+ xfree(concat);
2718
+ xfree(sizes);
2719
+
2720
+ if (ZDICT_isError(dict_size)) {
2721
+ const char *err = ZDICT_getErrorName(dict_size);
2722
+ xfree(dict_buf);
2723
+ rb_raise(eError,
2724
+ "dictionary training failed: %s "
2725
+ "(tip: provide more samples; total sample bytes should be "
2726
+ "~100x the dictionary size)",
2727
+ err);
2728
+ }
2729
+
2730
+ VALUE dict_obj = dict_alloc(cDictionary);
2731
+ dictionary_t *d;
2732
+ TypedData_Get_Struct(dict_obj, dictionary_t, &dictionary_type, d);
2733
+ memset(d, 0, sizeof(*d));
2734
+ d->algo = algo;
2735
+ d->data = dict_buf;
2736
+ d->size = dict_size;
2737
+ return dict_obj;
2738
+ }
2739
+
2740
+ static VALUE zstd_train_dictionary(int argc, VALUE *argv, VALUE self) {
2741
+ // #if defined(__APPLE__) && (defined(__arm64__) || defined(__aarch64__))
2742
+ // rb_raise(eUnsupportedError,
2743
+ // "Zstd dictionary training is temporarily disabled on arm64-darwin "
2744
+ // "because the current vendored trainer path crashes on this platform");
2745
+ // #endif
2746
+
2747
+ VALUE samples, opts;
2748
+ rb_scan_args(argc, argv, "1:", &samples, &opts);
2749
+ reject_algorithm_keyword(opts);
2750
+ VALUE size_val = opt_get(opts, sym_cache.size);
2751
+ return train_dictionary_internal(samples, size_val, ALGO_ZSTD);
2752
+ }
2753
+
2754
+ static VALUE brotli_train_dictionary(int argc, VALUE *argv, VALUE self) {
2755
+ VALUE samples, opts;
2756
+ rb_scan_args(argc, argv, "1:", &samples, &opts);
2757
+ reject_algorithm_keyword(opts);
2758
+ VALUE size_val = opt_get(opts, sym_cache.size);
2759
+
2760
+ return train_dictionary_internal(samples, size_val, ALGO_BROTLI);
2761
+ }
2762
+
2763
+ static VALUE dict_load(int argc, VALUE *argv, VALUE self) {
2764
+ VALUE path, opts;
2765
+ rb_scan_args(argc, argv, "1:", &path, &opts);
2766
+ StringValue(path);
2767
+ reject_algorithm_keyword(opts);
2768
+ raise_if_path_has_null_byte(path);
2769
+
2770
+ VALUE algo_sym = Qnil;
2771
+ if (!NIL_P(opts)) {
2772
+ algo_sym = opt_get(opts, sym_cache.algo);
2773
+ }
2774
+ compress_algo_t algo = NIL_P(algo_sym) ? ALGO_ZSTD : sym_to_algo(algo_sym);
2775
+
2776
+ if (algo == ALGO_LZ4)
2777
+ rb_raise(eUnsupportedError, "LZ4 does not support dictionaries");
2778
+
2779
+ const char *cpath = RSTRING_PTR(path);
2780
+ FILE *f = fopen(cpath, "rb");
2781
+ if (!f)
2782
+ rb_sys_fail(cpath);
2783
+
2784
+ fseek(f, 0, SEEK_END);
2785
+ long file_size = ftell(f);
2786
+ fseek(f, 0, SEEK_SET);
2787
+
2788
+ if (file_size <= 0) {
2789
+ fclose(f);
2790
+ rb_raise(eDataError, "dictionary file is empty: %s", cpath);
2791
+ }
2792
+ if ((unsigned long long)file_size > DICT_FILE_MAX_SIZE) {
2793
+ fclose(f);
2794
+ rb_raise(eDataError, "dictionary file too large (%ld bytes, max=%d)", file_size,
2795
+ (int)DICT_FILE_MAX_SIZE);
2796
+ }
2797
+
2798
+ uint8_t *buf = ALLOC_N(uint8_t, file_size);
2799
+ size_t read_bytes = fread(buf, 1, file_size, f);
2800
+ fclose(f);
2801
+
2802
+ if ((long)read_bytes != file_size) {
2803
+ xfree(buf);
2804
+ rb_raise(eDataError, "failed to read dictionary: %s", cpath);
2805
+ }
2806
+
2807
+ VALUE dict_obj = dict_alloc(cDictionary);
2808
+ dictionary_t *d;
2809
+ TypedData_Get_Struct(dict_obj, dictionary_t, &dictionary_type, d);
2810
+ d->algo = algo;
2811
+ d->data = buf;
2812
+ d->size = (size_t)file_size;
2813
+ return dict_obj;
2814
+ }
2815
+
2816
+ static VALUE dict_save(VALUE self, VALUE path) {
2817
+ dictionary_t *d;
2818
+ TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
2819
+
2820
+ const char *cpath = StringValueCStr(path);
2821
+ FILE *f = fopen(cpath, "wb");
2822
+ if (!f)
2823
+ rb_sys_fail(cpath);
2824
+
2825
+ size_t written = fwrite(d->data, 1, d->size, f);
2826
+ fclose(f);
2827
+
2828
+ if (written != d->size)
2829
+ rb_raise(eError, "failed to write dictionary to %s", cpath);
2830
+ return path;
2831
+ }
2832
+
2833
+ static VALUE dict_algo(VALUE self) {
2834
+ dictionary_t *d;
2835
+ TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
2836
+ return algo_to_sym(d->algo);
2837
+ }
2838
+
2839
+ static VALUE dict_size(VALUE self) {
2840
+ dictionary_t *d;
2841
+ TypedData_Get_Struct(self, dictionary_t, &dictionary_type, d);
2842
+ return SIZET2NUM(d->size);
2843
+ }
2844
+
2845
+ void Init_multi_compress(void) {
2846
+ binary_encoding = rb_ascii8bit_encoding();
2847
+ init_id_cache();
2848
+ crc32_init_tables();
2849
+
2850
+ mMultiCompress = rb_define_module("MultiCompress");
2851
+
2852
+ eError = rb_define_class_under(mMultiCompress, "Error", rb_eStandardError);
2853
+ eDataError = rb_define_class_under(mMultiCompress, "DataError", eError);
2854
+ eMemError = rb_define_class_under(mMultiCompress, "MemError", eError);
2855
+ eStreamError = rb_define_class_under(mMultiCompress, "StreamError", eError);
2856
+ eUnsupportedError = rb_define_class_under(mMultiCompress, "UnsupportedError", eError);
2857
+ eLevelError = rb_define_class_under(mMultiCompress, "LevelError", eError);
2858
+
2859
+ mZstd = rb_define_module_under(mMultiCompress, "Zstd");
2860
+ mLZ4 = rb_define_module_under(mMultiCompress, "LZ4");
2861
+ mBrotli = rb_define_module_under(mMultiCompress, "Brotli");
2862
+
2863
+ rb_define_const(mZstd, "MIN_LEVEL", INT2FIX(1));
2864
+ rb_define_const(mZstd, "MAX_LEVEL", INT2FIX(22));
2865
+ rb_define_const(mZstd, "DEFAULT_LEVEL", INT2FIX(3));
2866
+ rb_define_const(mLZ4, "MIN_LEVEL", INT2FIX(1));
2867
+ rb_define_const(mLZ4, "MAX_LEVEL", INT2FIX(16));
2868
+ rb_define_const(mLZ4, "DEFAULT_LEVEL", INT2FIX(1));
2869
+ rb_define_const(mBrotli, "MIN_LEVEL", INT2FIX(0));
2870
+ rb_define_const(mBrotli, "MAX_LEVEL", INT2FIX(11));
2871
+ rb_define_const(mBrotli, "DEFAULT_LEVEL", INT2FIX(6));
2872
+
2873
+ rb_define_module_function(mMultiCompress, "compress", compress_compress, -1);
2874
+ rb_define_module_function(mMultiCompress, "decompress", compress_decompress, -1);
2875
+ rb_define_module_function(mMultiCompress, "crc32", compress_crc32, -1);
2876
+ rb_define_module_function(mMultiCompress, "adler32", compress_adler32, -1);
2877
+ rb_define_module_function(mMultiCompress, "algorithms", compress_algorithms, 0);
2878
+ rb_define_module_function(mMultiCompress, "available?", compress_available_p, 1);
2879
+ rb_define_module_function(mMultiCompress, "version", compress_version, 1);
2880
+
2881
+ cDeflater = rb_define_class_under(mMultiCompress, "Deflater", rb_cObject);
2882
+ rb_define_alloc_func(cDeflater, deflater_alloc);
2883
+ rb_define_method(cDeflater, "initialize", deflater_initialize, -1);
2884
+ rb_define_method(cDeflater, "write", deflater_write, 1);
2885
+ rb_define_method(cDeflater, "flush", deflater_flush, 0);
2886
+ rb_define_method(cDeflater, "finish", deflater_finish, 0);
2887
+ rb_define_method(cDeflater, "reset", deflater_reset, 0);
2888
+ rb_define_method(cDeflater, "close", deflater_close, 0);
2889
+ rb_define_method(cDeflater, "closed?", deflater_closed_p, 0);
2890
+
2891
+ cInflater = rb_define_class_under(mMultiCompress, "Inflater", rb_cObject);
2892
+ rb_define_alloc_func(cInflater, inflater_alloc);
2893
+ rb_define_method(cInflater, "initialize", inflater_initialize, -1);
2894
+ rb_define_method(cInflater, "write", inflater_write, 1);
2895
+ rb_define_method(cInflater, "finish", inflater_finish, 0);
2896
+ rb_define_method(cInflater, "reset", inflater_reset, 0);
2897
+ rb_define_method(cInflater, "close", inflater_close, 0);
2898
+ rb_define_method(cInflater, "closed?", inflater_closed_p, 0);
2899
+
2900
+ cWriter = rb_define_class_under(mMultiCompress, "Writer", rb_cObject);
2901
+ cReader = rb_define_class_under(mMultiCompress, "Reader", rb_cObject);
2902
+
2903
+ cDictionary = rb_define_class_under(mMultiCompress, "Dictionary", rb_cObject);
2904
+ rb_define_alloc_func(cDictionary, dict_alloc);
2905
+ rb_define_method(cDictionary, "initialize", dict_initialize, -1);
2906
+ rb_define_singleton_method(cDictionary, "load", dict_load, -1);
2907
+ rb_define_method(cDictionary, "save", dict_save, 1);
2908
+ rb_define_method(cDictionary, "algo", dict_algo, 0);
2909
+ rb_define_method(cDictionary, "size", dict_size, 0);
2910
+ rb_define_singleton_method(mZstd, "train_dictionary", zstd_train_dictionary, -1);
2911
+ rb_define_singleton_method(mBrotli, "train_dictionary", brotli_train_dictionary, -1);
2912
+ }