multi_compress 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +152 -0
  3. data/LICENSE.txt +21 -0
  4. data/README.md +177 -0
  5. data/ext/multi_compress/extconf.rb +190 -0
  6. data/ext/multi_compress/multi_compress.c +2912 -0
  7. data/ext/multi_compress/vendor/.vendored +3 -0
  8. data/ext/multi_compress/vendor/brotli/c/common/constants.c +15 -0
  9. data/ext/multi_compress/vendor/brotli/c/common/constants.h +201 -0
  10. data/ext/multi_compress/vendor/brotli/c/common/context.c +156 -0
  11. data/ext/multi_compress/vendor/brotli/c/common/context.h +113 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +5916 -0
  13. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +64 -0
  14. data/ext/multi_compress/vendor/brotli/c/common/platform.c +23 -0
  15. data/ext/multi_compress/vendor/brotli/c/common/platform.h +541 -0
  16. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +521 -0
  17. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
  18. data/ext/multi_compress/vendor/brotli/c/common/transform.c +291 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/transform.h +85 -0
  20. data/ext/multi_compress/vendor/brotli/c/common/version.h +51 -0
  21. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +78 -0
  22. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +423 -0
  23. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +2875 -0
  24. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +342 -0
  25. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +122 -0
  26. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +733 -0
  27. data/ext/multi_compress/vendor/brotli/c/dec/state.c +183 -0
  28. data/ext/multi_compress/vendor/brotli/c/dec/state.h +400 -0
  29. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +207 -0
  30. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +40 -0
  31. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +939 -0
  32. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +96 -0
  33. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_inc.h +189 -0
  34. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +36 -0
  35. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +64 -0
  36. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +127 -0
  37. data/ext/multi_compress/vendor/brotli/c/enc/block_encoder_inc.h +34 -0
  38. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +217 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +52 -0
  40. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +481 -0
  41. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +1336 -0
  42. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +89 -0
  43. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +57 -0
  44. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +49 -0
  45. data/ext/multi_compress/vendor/brotli/c/enc/cluster_inc.h +325 -0
  46. data/ext/multi_compress/vendor/brotli/c/enc/command.c +28 -0
  47. data/ext/multi_compress/vendor/brotli/c/enc/command.h +191 -0
  48. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +207 -0
  49. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +74 -0
  50. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +800 -0
  51. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +86 -0
  52. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +657 -0
  53. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +72 -0
  54. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +1848 -0
  55. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +25 -0
  56. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +1996 -0
  57. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +640 -0
  58. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +157 -0
  59. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +504 -0
  60. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +123 -0
  61. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +542 -0
  62. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +105 -0
  63. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +67 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +72 -0
  65. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +728 -0
  66. data/ext/multi_compress/vendor/brotli/c/enc/hash_composite_inc.h +140 -0
  67. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +295 -0
  68. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +262 -0
  69. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +258 -0
  70. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +266 -0
  71. data/ext/multi_compress/vendor/brotli/c/enc/hash_rolling_inc.h +212 -0
  72. data/ext/multi_compress/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +330 -0
  73. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +100 -0
  74. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +64 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/histogram_inc.h +51 -0
  76. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +180 -0
  77. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +32 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +194 -0
  79. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +131 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +677 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +106 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +185 -0
  83. data/ext/multi_compress/vendor/brotli/c/enc/params.h +47 -0
  84. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +54 -0
  85. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +202 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +168 -0
  87. data/ext/multi_compress/vendor/brotli/c/enc/state.h +104 -0
  88. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +542 -0
  89. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +41 -0
  90. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +5866 -0
  91. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +85 -0
  92. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +33 -0
  93. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +88 -0
  94. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +409 -0
  95. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +501 -0
  96. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +305 -0
  97. data/ext/multi_compress/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
  98. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +83 -0
  99. data/ext/multi_compress/vendor/lz4/lib/LICENSE +24 -0
  100. data/ext/multi_compress/vendor/lz4/lib/Makefile +244 -0
  101. data/ext/multi_compress/vendor/lz4/lib/README.md +193 -0
  102. data/ext/multi_compress/vendor/lz4/lib/dll/example/Makefile +63 -0
  103. data/ext/multi_compress/vendor/lz4/lib/dll/example/README.md +69 -0
  104. data/ext/multi_compress/vendor/lz4/lib/dll/example/fullbench-dll.sln +25 -0
  105. data/ext/multi_compress/vendor/lz4/lib/dll/example/fullbench-dll.vcxproj +182 -0
  106. data/ext/multi_compress/vendor/lz4/lib/liblz4-dll.rc.in +35 -0
  107. data/ext/multi_compress/vendor/lz4/lib/liblz4.pc.in +14 -0
  108. data/ext/multi_compress/vendor/lz4/lib/lz4.c +2829 -0
  109. data/ext/multi_compress/vendor/lz4/lib/lz4.h +884 -0
  110. data/ext/multi_compress/vendor/lz4/lib/lz4file.c +341 -0
  111. data/ext/multi_compress/vendor/lz4/lib/lz4file.h +93 -0
  112. data/ext/multi_compress/vendor/lz4/lib/lz4frame.c +2136 -0
  113. data/ext/multi_compress/vendor/lz4/lib/lz4frame.h +751 -0
  114. data/ext/multi_compress/vendor/lz4/lib/lz4frame_static.h +47 -0
  115. data/ext/multi_compress/vendor/lz4/lib/lz4hc.c +2192 -0
  116. data/ext/multi_compress/vendor/lz4/lib/lz4hc.h +414 -0
  117. data/ext/multi_compress/vendor/lz4/lib/xxhash.c +1030 -0
  118. data/ext/multi_compress/vendor/lz4/lib/xxhash.h +328 -0
  119. data/ext/multi_compress/vendor/zstd/lib/BUCK +232 -0
  120. data/ext/multi_compress/vendor/zstd/lib/Makefile +357 -0
  121. data/ext/multi_compress/vendor/zstd/lib/README.md +217 -0
  122. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +478 -0
  123. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +335 -0
  124. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +213 -0
  125. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +24 -0
  126. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +107 -0
  127. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +368 -0
  128. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +56 -0
  129. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +159 -0
  130. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +717 -0
  131. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +403 -0
  132. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +364 -0
  133. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +442 -0
  134. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +355 -0
  135. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +84 -0
  136. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +137 -0
  137. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +122 -0
  138. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +155 -0
  139. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +24 -0
  140. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +5686 -0
  141. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +83 -0
  142. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +111 -0
  143. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +493 -0
  144. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +163 -0
  145. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +134 -0
  146. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +741 -0
  147. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +181 -0
  148. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +75 -0
  149. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +1370 -0
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +6327 -0
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +1458 -0
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +159 -0
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +31 -0
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +442 -0
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +573 -0
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +676 -0
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +696 -0
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +38 -0
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +675 -0
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +37 -0
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +2104 -0
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +125 -0
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +724 -0
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +117 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +1446 -0
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +56 -0
  170. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +1859 -0
  171. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +113 -0
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +1889 -0
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +585 -0
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +244 -0
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +44 -0
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +2230 -0
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +2072 -0
  178. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +68 -0
  179. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +236 -0
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +214 -0
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +26 -0
  182. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +167 -0
  183. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +1253 -0
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +158 -0
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.c +1913 -0
  187. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +67 -0
  188. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +766 -0
  189. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +1205 -0
  190. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +48 -0
  191. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +63 -0
  192. data/ext/multi_compress/vendor/zstd/lib/dll/example/build_package.bat +20 -0
  193. data/ext/multi_compress/vendor/zstd/lib/dll/example/fullbench-dll.sln +25 -0
  194. data/ext/multi_compress/vendor/zstd/lib/dll/example/fullbench-dll.vcxproj +181 -0
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +415 -0
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +2158 -0
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +94 -0
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +3518 -0
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +93 -0
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +3160 -0
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +93 -0
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +3647 -0
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +142 -0
  204. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +4050 -0
  205. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +162 -0
  206. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +4154 -0
  207. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +172 -0
  208. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +4541 -0
  209. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +187 -0
  210. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +203 -0
  211. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +16 -0
  212. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +25 -0
  213. data/ext/multi_compress/vendor/zstd/lib/zdict.h +452 -0
  214. data/ext/multi_compress/vendor/zstd/lib/zstd.h +2575 -0
  215. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +95 -0
  216. data/lib/multi_compress/version.rb +5 -0
  217. data/lib/multi_compress.rb +329 -0
  218. metadata +322 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9faa0c73c6b3926604e7d3477c5a519a8898fd8a08e0f9d71b84c6fce87a815d
4
+ data.tar.gz: '0818b9fa5ea7cf2958910150185571a9855a1728f44c6f425ac590a2256298a0'
5
+ SHA512:
6
+ metadata.gz: b94255c0103f3f89ae4099627a61a972b6da00220ace21d9fbd6d0c3f1641cb46d2ae79126c75f738e21ebd5359c80f6bdee11d958200731e5d402e1058d9fad
7
+ data.tar.gz: c5ce6f3cb3e6c7d1b2fe08666dd916e66646f8077669f9908f61e2de64e311be1f7052345617faa045526c26136f38cc659bfabd92b596e826dd79224a1962fe
data/CHANGELOG.md ADDED
@@ -0,0 +1,152 @@
1
+ # Changelog
2
+
3
+ ## [0.2.4]
4
+
5
+ ### Added
6
+ - Keyword validation
7
+
8
+ ### Fixed
9
+ - Fixed gem packaging so vendored builds work correctly after installation.
10
+ Previously, installed gems could incorrectly fall back to system libraries because
11
+ `ext/multi_compress/vendor/.vendored` was not included in the packaged gem.
12
+ - Fixed vendored zstd build path in `extconf.rb`.
13
+
14
+ ## [0.2.3]
15
+
16
+ ### Changed
17
+ - Constants
18
+
19
+ ## [0.2.2]
20
+
21
+ ### Added
22
+ - New decompression safeguards:
23
+ - `max_output_size:` on `MultiCompress.decompress`, `MultiCompress::Inflater.new`, and `MultiCompress::Reader.open`
24
+ - `max_ratio:` with default `1000` and trusted-input opt-out via `nil`
25
+ - Cumulative streaming output budget enforcement
26
+ - Dictionary file size cap (`32MB`) for `MultiCompress::Dictionary.load`
27
+ - New docs/code parity and limits tests
28
+ - Linux-only ASAN/UBSAN CI job
29
+ - Local `script/valgrind.sh` helper for developer verification (with Ruby VM suppressions)
30
+ - Local `script/sanitize.sh` helper for reproducing the ASAN/UBSAN CI job locally on Linux
31
+ - Optional `./build.sh --test`, `./build.sh --valgrind`, and `./build.sh --sanitize` modes
32
+
33
+ ### Changed
34
+ - Vendored **zstd** is pinned to **1.5.2** in the current release line.
35
+ - This restores reliable `MultiCompress::Zstd.train_dictionary(...)` behavior on tested platforms, including arm64-darwin.
36
+ - Newer vendored zstd versions are temporarily avoided until the dictionary training regression is understood and fixed upstream.
37
+
38
+ ### Changed
39
+ - Documentation now matches current runtime behavior:
40
+ - Ruby requirement is documented as `>= 3.1.0`
41
+ - Supported named levels are `:fastest`, `:default`, and `:best`
42
+ - Zstd `:best` is documented with the current runtime mapping (level 19)
43
+ - LZ4 is documented as using a custom internal format that is not compatible with the standard `lz4` CLI
44
+
45
+ ### Fixed
46
+ - Corrected dictionary training docs:
47
+ - `MultiCompress::Zstd.train_dictionary` is documented as supported
48
+ - `MultiCompress::Brotli.train_dictionary` is documented as unsupported in the current implementation
49
+
50
+ ### Notes
51
+ - The default `max_ratio: 1000` is a user-visible behavior change. Workloads that legitimately expand beyond `1000:1` must pass `max_ratio: nil`.
52
+
53
+ ## [0.2.1] — 2026-04-15
54
+ - Change version zstd "1.5.6" -> "1.5.7".
55
+ - Micro optimization.
56
+
57
+ ## [0.2.0] — 2026-04-15
58
+
59
+ ### Added
60
+ - **Fiber-friendly execution** 🎉: compression and decompression now cooperate with Ruby's `Fiber::Scheduler`, making MultiCompress safe to use under [async](https://github.com/socketry/async), [falcon](https://github.com/socketry/falcon), and any other scheduler-based runtime.
61
+ - When a scheduler is active, CPU-heavy work is offloaded to a dedicated worker thread via `rb_thread_create`, while the calling fiber is parked with `rb_fiber_scheduler_block`. The scheduler is free to run other fibers (IO, timers, parallel compression) until the worker finishes and calls `rb_fiber_scheduler_unblock`.
62
+ - Covers **all three algorithms** (`zstd`, `lz4`, `brotli`) and **both API shapes**:
63
+ - One-shot `MultiCompress.compress` / `MultiCompress.decompress`
64
+ - Streaming `MultiCompress::Deflater#write` / `MultiCompress::Inflater#write`
65
+ - No API changes — the fiber-friendly path is selected automatically when a scheduler is detected.
66
+ - Chunks smaller than 16 KB (`FIBER_STREAM_THRESHOLD`) stay inline to avoid pthread-create overhead for micro-workloads.
67
+ - Outside of a scheduler, the previous `rb_thread_call_without_gvl` path is used unchanged.
68
+ - Generic `run_via_fiber_worker(scheduler, func, arg)` internal helper — encapsulates the pthread-worker + scheduler-block pattern and is reused across every fiber-friendly code path.
69
+ - New test suite `test/test_fiber_scheduler.rb` verifying scheduler progress during compress/decompress for every algorithm, in both one-shot and streaming modes.
70
+
71
+ ### Changed
72
+ - `fiber_maybe_yield` now actually yields: previously it was a no-op counter; it now calls `Fiber.scheduler.yield` via `rb_funcall` when the byte threshold is crossed. Used for `lz4` streaming paths where pthread-per-block would be overkill.
73
+ - Streaming `Deflater#write` for zstd/brotli uses the fiber-worker path for chunks ≥ 16 KB; the old inline path is kept as a fallback for small chunks and when no scheduler is present.
74
+ - Streaming `Inflater#write` for zstd/brotli similarly routes chunks through the fiber-worker path when possible.
75
+ - LZ4 one-shot decompress loop extracted into `lz4_decompress_all_nogvl` so it can run on the worker thread with the GVL released.
76
+
77
+ ### Performance
78
+ - No regressions for the non-fiber case: all existing GVL-unlocking fast paths are preserved untouched.
79
+ - Under a fiber scheduler: compression no longer starves concurrent IO fibers. Previously, compressing 50 MB with `zstd` level 3 would block every other fiber in the reactor for the entire duration; now the reactor keeps servicing IO, timers, and other compute fibers throughout.
80
+ - Streaming threshold of 16 KB chosen empirically: below that, pthread-create overhead (~20-50μs) eats the gains; at 16 KB+ the fiber-friendly path is a near-free win.
81
+
82
+ ### Upgrading
83
+ No code changes required. If you run under `Async`/`Falcon`/`Fiber::Scheduler`, you'll immediately get non-blocking compression. If you don't, behavior is identical to 0.1.2.
84
+
85
+ ## [0.1.2] — 2026-04-14
86
+
87
+ ### Fixed
88
+ - **LZ4 Streaming Correctness**: Fixed critical bug in LZ4 Inflater that caused incorrect decompression on large data (>1MB)
89
+ - Removed complex streaming API (`LZ4_decompress_safe_continue`) that had implementation errors
90
+ - Restored simple, reliable direct decompression (`LZ4_decompress_safe`)
91
+ - All roundtrip tests now pass correctly
92
+ - **LZ4 Deflater Ring Buffer**: Fixed ring buffer management to prevent overflow
93
+ - Added proper offset reset when approaching buffer limit
94
+ - Improved memory safety
95
+
96
+ ### Added
97
+ - **ZSTD Dictionary Training**: Added `MultiCompress::Zstd.train_dictionary(samples, size:)` method
98
+ - Uses ZDICT API for optimal dictionary generation
99
+ - Allows creating custom dictionaries for better compression on similar data
100
+ - **Memory Optimization Helpers**:
101
+ - Added `rb_binary_str_buf_reserve()` for efficient pre-allocation
102
+ - Added `grow_binary_str()` for safe capacity management
103
+
104
+ ### Changed
105
+ - **Memory Efficiency**: Eliminated double-buffering in compress/decompress operations
106
+ - ZSTD, LZ4, Brotli now write directly to Ruby strings when unlocking GVL
107
+ - Reduced memory allocations and improved cache locality
108
+ - **Dictionary Support in Streaming**: Fixed dictionary reloading in `reset()` methods
109
+ - Dictionaries are now properly saved as instance variables
110
+ - `deflater.reset` and `inflater.reset` correctly restore dictionary state
111
+ - **Brotli Error Handling**: Added comprehensive error checking for dictionary operations
112
+ - All Brotli API calls now verify success/failure
113
+ - More informative error messages
114
+
115
+ ### Performance Notes
116
+ - **LZ4 Streaming**: Slight performance regression on large chunks (128KB: ~40-50% slower decompression)
117
+ - Trade-off for correctness: reliable decompression is prioritized over peak performance
118
+ - Small chunks (4-32KB) maintain original performance
119
+ - Overall impact minimal as streaming typically uses smaller chunks
120
+ - **Memory Usage**: Reduced due to elimination of intermediate buffers
121
+ - **Dictionary Operations**: Faster on repeated use due to improved caching
122
+
123
+ ### Security
124
+ - Improved LZ4 format validation in `detect_algo()`
125
+ - Now checks for proper tail marker (4 zero bytes)
126
+ - Better protection against malformed data
127
+
128
+ ## [0.1.1] — Unreleased
129
+
130
+ ### Breaking Changes
131
+ - **REMOVED**: `MultiCompress::Dictionary.train` method - general dictionary training interface removed
132
+ - Dictionary training is now algorithm-specific to clarify capabilities
133
+
134
+ ### Added
135
+ - `MultiCompress::Brotli.train_dictionary` - Brotli-specific dictionary training method
136
+ - Clear indication that only Brotli supports dictionary training in this implementation
137
+
138
+ ### Changed
139
+ - Dictionary training API is now more explicit about which algorithms support it
140
+ - Improved API clarity by making training method algorithm-specific
141
+
142
+ ## [0.1.0] — Unreleased
143
+
144
+ ### Added
145
+ - One-shot compress/decompress for zstd, lz4, brotli
146
+ - Streaming API: Deflater, Inflater
147
+ - IO wrappers: Writer, Reader with block-style open
148
+ - Dictionary API skeleton (zstd, brotli)
149
+ - Auto-detect algorithm by magic bytes and file extension
150
+ - Named compression levels: :fastest, :default, :best
151
+ - CRC32 / Adler32 utility stubs
152
+ - Algorithm info: `.algorithms`, `.available?`, `.version`
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2025 Roman Haydarov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,177 @@
1
+ # MultiCompress 🗜️
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/multi_compress.svg)](https://badge.fury.io/rb/multi_compress)
4
+
5
+ > **Status:** functional, well-tested, and actively evolving. The current release is suitable for real workloads, but the API and implementation details are still being refined in upcoming releases.
6
+
7
+ Modern compression technology: **zstd**, **lz4**, **brotli** — unified compression platform with native C performance, **fiber-friendly** for modern async Ruby stacks.
8
+
9
+ Bundled library versions in the current release:
10
+ - **zstd 1.5.2**
11
+ - **lz4 1.10.0**
12
+ - **brotli 1.1.0**
13
+
14
+ 📖 **[Get Started →](GET_STARTED.md)** — Complete technology overview, algorithms, and implementation details
15
+
16
+ ## Technology Overview
17
+
18
+ **MultiCompress** is a comprehensive compression system that unites three cutting-edge algorithms in a single platform. Modern algorithms are 3–10x faster than traditional zlib while providing superior compression ratios.
19
+
20
+ | Algorithm | Strength | Best for |
21
+ |-----------|----------|----------|
22
+ | **zstd** | Best speed/ratio balance | Cache, logs, backups |
23
+ | **lz4** | Fastest compress/decompress | IPC, hot cache, real-time |
24
+ | **brotli** | Best ratio for HTTP | Web assets, API responses |
25
+
26
+ ## How It Works
27
+
28
+ **MultiCompress** packages modern compression algorithms (zstd, lz4, brotli) with their native C libraries, providing a unified interface. The system includes vendored sources of all compression libraries, eliminating external dependencies.
29
+
30
+ ### Key Design Principles
31
+
32
+ - **Dictionary support**: Runtime dictionary use is supported for zstd and brotli; Zstd dictionary training is available in the current release line
33
+ - **Zero external dependencies**: All C libraries are vendored and compiled
34
+ - **Unified API**: Same interface for all algorithms — just change the `algo:` parameter
35
+ - **Performance first**: Direct bindings to C libraries, minimal overhead
36
+ - **Fiber-friendly**: Compression and decompression cooperate with Ruby's fiber scheduler — safe to use under `async`, `falcon`, or any `Fiber::Scheduler`-based runtime without blocking the event loop. See [GET_STARTED.md](GET_STARTED.md) for details and examples.
37
+ - **Memory efficient**: Streaming support for large datasets, proper resource cleanup
38
+ - **Operationally focused**: Clear errors, comprehensive tests, and streaming support for practical workloads
39
+
40
+ ### Algorithm Auto-Detection
41
+
42
+ The system can automatically detect compression algorithms when decompressing data:
43
+
44
+ - **zstd**: Detected by magic bytes `28 B5 2F FD` (little-endian)
45
+ - **lz4**: Detected by internal format header validation (custom internal format, NOT compatible with the standard `lz4` CLI; optional standard frame support may be added in a future release)
46
+ - **brotli**: Requires explicit `algo: :brotli` parameter - no auto-detection
47
+
48
+ **Important**: Auto-detection only works for ZSTD and LZ4. Brotli data must be decompressed with explicit algorithm specification.
49
+
50
+ **Security**: Decompression now enforces a default 256MB output cap, cumulative streaming limits, a default ratio guard of 1000:1, and a 32MB dictionary file size cap.
51
+
52
+
53
+ ## Security limits
54
+
55
+ Decompression-facing APIs support conservative defaults intended to protect against decompression bombs and accidental resource spikes:
56
+
57
+ - **Default output cap:** `256MB`
58
+ - **Streaming cumulative cap:** enforced across the lifetime of an `Inflater`/`Reader`
59
+ - **Default ratio guard:** `1000:1`
60
+ - **Trusted-input opt-out:** pass `max_ratio: nil`
61
+ - **Dictionary file size cap:** `32MB` for `MultiCompress::Dictionary.load`
62
+
63
+ Examples:
64
+
65
+ ```ruby
66
+ MultiCompress.decompress(blob, algo: :zstd, max_output_size: 64 * 1024 * 1024)
67
+ MultiCompress.decompress(blob, algo: :brotli, max_ratio: nil)
68
+
69
+ MultiCompress::Reader.open("archive.zst", max_output_size: 128 * 1024 * 1024, max_ratio: 500) do |reader|
70
+ puts reader.read
71
+ end
72
+ ```
73
+
74
+ `max_output_size: nil` keeps the native default cap of `256MB`. `max_ratio: nil` disables the ratio guard for trusted input.
75
+
76
+ ## Algorithm Comparison
77
+
78
+ | Algorithm | Speed | Ratio | Best Use Case |
79
+ |-----------|-------|--------|---------------|
80
+ | **lz4** | Fastest | Good | Real-time processing, IPC, hot cache paths |
81
+ | **zstd** | Fast | Excellent | General purpose, logs, backups, web APIs |
82
+ | **brotli** | Slower | Best | Static assets, CDN, long-term storage |
83
+
84
+
85
+ ## Benchmark Results
86
+
87
+ > **📝 Note on v0.2.0**: Performance numbers below are from the v0.2.0 build with fiber-friendly paths enabled. There is no throughput regression compared to v0.1.2 — the fiber-friendly path is only taken when a `Fiber::Scheduler` is active, and even then the worker-thread overhead is negligible for payloads large enough to benefit.
88
+
89
+ Performance comparison against Ruby's built-in zlib compression (200 iterations per test):
90
+
91
+ ### 🗜️ COMPRESSION RATIO (%, lower is better)
92
+ ```
93
+ ┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
94
+ │ Configuration │ zlib │ lz4 │ zstd │ brotli │
95
+ ├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
96
+ │ Small JSON (~10KB, GC) │ 9.4% │ 16.1% │ 6.9% │ 6.1% │
97
+ │ Small text (~10KB, GC) │ 3.1% │ 4.6% │ 3.2% │ 2.6% │
98
+ │ Small JSON (~10KB, no GC) │ 9.4% │ 16.1% │ 6.9% │ 6.1% │
99
+ │ Small text (~10KB, no GC) │ 3.1% │ 4.6% │ 3.2% │ 2.6% │
100
+ │ Medium JSON (~370KB, GC) │ 8.5% │ 15.7% │ 6.7% │ 5.5% │
101
+ │ Medium logs (~168KB, GC) │ 8.6% │ 17.2% │ 5.4% │ 3.2% │
102
+ │ Medium JSON (~370KB, no GC) │ 8.5% │ 15.7% │ 6.7% │ 5.5% │
103
+ │ Medium logs (~168KB, no GC) │ 8.6% │ 17.2% │ 5.4% │ 3.2% │
104
+ │ Large JSON (~1.6MB, GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6% │
105
+ │ Large logs (~600KB, GC) │ 7.6% │ 16.0% │ 2.8% │ 2.1% │
106
+ │ Large JSON (~1.6MB, no GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6% │
107
+ │ Large logs (~600KB, no GC) │ 7.6% │ 16.0% │ 2.8% │ 2.1% │
108
+ └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
109
+ ```
110
+
111
+ ### ⚡ TOTAL TIME (compress + decompress, ms — lower is faster)
112
+ ```
113
+ ┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
114
+ │ Configuration │ zlib │ lz4 │ zstd │ brotli │
115
+ ├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
116
+ │ Small JSON (~10KB, GC) │ 0.05 │ 0.01 │ 0.02 │ 0.14 │
117
+ │ Small text (~10KB, GC) │ 0.04 │ 0.00 │ 0.01 │ 0.11 │
118
+ │ Small JSON (~10KB, no GC) │ 0.06 │ 0.01 │ 0.02 │ 0.13 │
119
+ │ Small text (~10KB, no GC) │ 0.04 │ 0.00 │ 0.01 │ 0.11 │
120
+ │ Medium JSON (~370KB, GC) │ 2.73 │ 0.29 │ 0.42 │ 2.36 │
121
+ │ Medium logs (~168KB, GC) │ 1.23 │ 0.14 │ 0.18 │ 0.92 │
122
+ │ Medium JSON (~370KB, no GC) │ 2.72 │ 0.28 │ 0.41 │ 2.41 │
123
+ │ Medium logs (~168KB, no GC) │ 1.26 │ 0.13 │ 0.18 │ 0.96 │
124
+ │ Large JSON (~1.6MB, GC) │ 12.44 │ 1.38 │ 1.96 │ 12.44 │
125
+ │ Large logs (~600KB, GC) │ 4.29 │ 0.46 │ 0.49 │ 2.85 │
126
+ │ Large JSON (~1.6MB, no GC) │ 12.22 │ 1.28 │ 1.86 │ 11.83 │
127
+ │ Large logs (~600KB, no GC) │ 4.39 │ 0.42 │ 0.44 │ 2.86 │
128
+ └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
129
+ ```
130
+
131
+ ### 📊 SPEEDUP vs ZLIB (higher is better)
132
+ ```
133
+ ┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
134
+ │ Configuration │ zlib │ lz4 │ zstd │ brotli │
135
+ ├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
136
+ │ Small JSON (~10KB, GC) │ 1.00x │ 5.00x │ 2.50x │ 0.36x │
137
+ │ Small text (~10KB, GC) │ 1.00x │ N/A │ 4.00x │ 0.36x │
138
+ │ Small JSON (~10KB, no GC) │ 1.00x │ 6.00x │ 3.00x │ 0.46x │
139
+ │ Small text (~10KB, no GC) │ 1.00x │ N/A │ 4.00x │ 0.36x │
140
+ │ Medium JSON (~370KB, GC) │ 1.00x │ 9.41x │ 6.50x │ 1.16x │
141
+ │ Medium logs (~168KB, GC) │ 1.00x │ 8.79x │ 6.83x │ 1.34x │
142
+ │ Medium JSON (~370KB, no GC) │ 1.00x │ 9.71x │ 6.63x │ 1.13x │
143
+ │ Medium logs (~168KB, no GC) │ 1.00x │ 9.69x │ 7.00x │ 1.31x │
144
+ │ Large JSON (~1.6MB, GC) │ 1.00x │ 9.01x │ 6.35x │ 1.00x │
145
+ │ Large logs (~600KB, GC) │ 1.00x │ 9.33x │ 8.76x │ 1.51x │
146
+ │ Large JSON (~1.6MB, no GC) │ 1.00x │ 9.55x │ 6.57x │ 1.03x │
147
+ │ Large logs (~600KB, no GC) │ 1.00x │ 10.45x │ 9.98x │ 1.53x │
148
+ └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
149
+ ```
150
+
151
+
152
+ **Dependencies for benchmarking:**
153
+ - `memory_profiler` — Memory usage analysis
154
+ - `benchmark-ips` — Iterations per second benchmarking
155
+
156
+ Or use the build script:
157
+
158
+ ```bash
159
+ ./build.sh
160
+ ```
161
+
162
+ ## Requirements
163
+
164
+ - Ruby >= 3.1.0
165
+ - C compiler (gcc, clang)
166
+
167
+ ## Contributing
168
+
169
+ 1. Fork it
170
+ 2. Create your feature branch (`git checkout -b feature/my-feature`)
171
+ 3. Commit your changes (`git commit -am 'Add feature'`)
172
+ 4. Push to the branch (`git push origin feature/my-feature`)
173
+ 5. Create a Pull Request
174
+
175
+ ## License
176
+
177
+ MIT — see [LICENSE.txt](LICENSE.txt).
@@ -0,0 +1,190 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ USE_SYSTEM = arg_config("--use-system-libraries") ||
6
+ ENV["COMPRESS_USE_SYSTEM_LIBRARIES"]
7
+ FORCE_VENDORED = arg_config("--force-vendored") ||
8
+ ENV["COMPRESS_FORCE_VENDORED"]
9
+
10
+ ZSTD_SUBDIRS = %w[lib/common lib/compress lib/decompress lib/dictBuilder].freeze
11
+ BROTLI_SUBDIRS = %w[c/common c/enc c/dec].freeze
12
+ LZ4_SOURCES = %w[lz4.c lz4hc.c lz4frame.c].freeze
13
+
14
+ def find_vendor_dir
15
+ candidates = [
16
+ File.join(__dir__, "vendor"),
17
+ File.join(__dir__, "..", "..", "..", "..", "ext", "multi_compress", "vendor"),
18
+ File.expand_path("../../ext/multi_compress/vendor", __dir__),
19
+ File.join(Dir.pwd, "ext", "multi_compress", "vendor"),
20
+ ]
21
+
22
+ dir = __dir__
23
+ 6.times do
24
+ candidates << File.join(dir, "ext", "multi_compress", "vendor")
25
+ dir = File.dirname(dir)
26
+ end
27
+
28
+ candidates.find { |path| File.exist?(File.join(path, ".vendored")) }
29
+ &.then { |path| File.expand_path(path) }
30
+ end
31
+
32
+ def find_compress_c_dir
33
+ candidates = [
34
+ __dir__,
35
+ File.join(__dir__, "..", "..", "..", "..", "ext", "multi_compress"),
36
+ ]
37
+
38
+ dir = __dir__
39
+ 6.times do
40
+ candidates << File.join(dir, "ext", "multi_compress")
41
+ dir = File.dirname(dir)
42
+ end
43
+
44
+ candidates.find { |path| File.exist?(File.join(path, "multi_compress.c")) }
45
+ &.then { |path| File.expand_path(path) } || __dir__
46
+ end
47
+
48
+ def configure_system_libraries
49
+ puts "Building with SYSTEM libraries"
50
+
51
+ configure_homebrew if RUBY_PLATFORM.include?("darwin")
52
+
53
+ require_system_lib("zstd", header: "zstd.h", func: "ZSTD_compress")
54
+ require_system_lib("lz4", header: "lz4.h", func: "LZ4_compress_default")
55
+ require_system_lib("brotli", header: "brotli/encode.h", func: "BrotliEncoderCreateInstance", lib: "brotlienc")
56
+
57
+ have_header("zdict.h")
58
+ have_header("lz4hc.h")
59
+ have_library("lz4", "LZ4_compress_HC")
60
+ have_header("brotli/decode.h")
61
+ have_library("brotlidec", "BrotliDecoderCreateInstance")
62
+ end
63
+
64
+ def configure_homebrew
65
+ dir_config("homebrew", "/opt/homebrew")
66
+ $CPPFLAGS += " -I/opt/homebrew/include"
67
+ $LDFLAGS += " -L/opt/homebrew/lib"
68
+ end
69
+
70
+ def require_system_lib(name, header:, func:, lib: name)
71
+ return if have_header(header) && have_library(lib, func)
72
+
73
+ abort "Missing #{name}. Install: apt install lib#{name}-dev / brew install #{name}"
74
+ end
75
+
76
+ def configure_vendored_libraries(vendor_dir)
77
+ versions = File.read(File.join(vendor_dir, ".vendored"))
78
+ puts "Building with VENDORED libraries from #{vendor_dir}"
79
+ puts " forced vendored mode enabled" if FORCE_VENDORED
80
+ puts " #{versions.tr("\n", ", ")}"
81
+
82
+ zstd_dir = File.join(vendor_dir, "zstd")
83
+ lz4_dir = File.join(vendor_dir, "lz4")
84
+ brotli_dir = File.join(vendor_dir, "brotli")
85
+
86
+ all_vendor_srcs = collect_vendor_sources(zstd_dir, lz4_dir, brotli_dir)
87
+
88
+ puts " #{all_vendor_srcs.length} vendored C files"
89
+
90
+ add_include_dirs(zstd_dir, lz4_dir, brotli_dir)
91
+ $CPPFLAGS += " -DZSTD_DISABLE_ASM"
92
+
93
+ vpath_dirs = build_vpath_dirs(zstd_dir, lz4_dir, brotli_dir)
94
+
95
+ deduplicate_sources!(all_vendor_srcs)
96
+
97
+ compress_c_dir = find_compress_c_dir
98
+
99
+ $srcs = ["multi_compress.c"] + all_vendor_srcs.map { |s| File.basename(s) }
100
+ $VPATH = [compress_c_dir] + vpath_dirs
101
+
102
+ $warnflags = ""
103
+
104
+ vpath_dirs
105
+ end
106
+
107
+ def collect_vendor_sources(zstd_dir, lz4_dir, brotli_dir)
108
+ zstd_srcs = ZSTD_SUBDIRS.flat_map { |d| Dir[File.join(zstd_dir, d, "*.c")] }
109
+
110
+ lz4_srcs = LZ4_SOURCES.filter_map do |f|
111
+ path = File.join(lz4_dir, "lib", f)
112
+ path if File.exist?(path)
113
+ end
114
+
115
+ brotli_srcs = BROTLI_SUBDIRS.flat_map { |d| Dir[File.join(brotli_dir, d, "*.c")] }
116
+
117
+ zstd_srcs + lz4_srcs + brotli_srcs
118
+ end
119
+
120
+ def add_include_dirs(zstd_dir, lz4_dir, brotli_dir)
121
+ [
122
+ File.join(zstd_dir, "lib"),
123
+ File.join(zstd_dir, "lib", "common"),
124
+ File.join(lz4_dir, "lib"),
125
+ File.join(brotli_dir, "c", "include"),
126
+ ].each { |d| $CPPFLAGS += " -I#{d}" }
127
+ end
128
+
129
+ def build_vpath_dirs(zstd_dir, lz4_dir, brotli_dir)
130
+ ZSTD_SUBDIRS.map { |d| File.join(zstd_dir, d) } +
131
+ [File.join(lz4_dir, "lib")] +
132
+ BROTLI_SUBDIRS.map { |d| File.join(brotli_dir, d) }
133
+ end
134
+
135
+ def deduplicate_sources!(sources)
136
+ seen = {}
137
+
138
+ sources.reject! do |src|
139
+ basename = File.basename(src)
140
+ duplicate = seen.key?(basename)
141
+ puts " SKIP duplicate: #{src}" if duplicate
142
+ seen[basename] = true
143
+ duplicate
144
+ end
145
+ end
146
+
147
+ def patch_makefile_vpath!(vpath_dirs)
148
+ makefile = File.read("Makefile")
149
+ return if makefile.include?("# vendored vpath")
150
+
151
+ vpath_lines = vpath_dirs.map { |d| "vpath %.c #{d}" }.join("\n")
152
+
153
+ makefile.sub!(/^(VPATH\s*=.*)$/m) { "#{Regexp.last_match(1)}\n# vendored vpath\n#{vpath_lines}" }
154
+ File.write("Makefile", makefile)
155
+ puts " Patched Makefile with #{vpath_dirs.length} VPATH entries"
156
+ end
157
+
158
+ # --- Main ---
159
+
160
+ VENDOR_DIR = find_vendor_dir
161
+ VENDORED = !VENDOR_DIR.nil?
162
+
163
+ if FORCE_VENDORED && !VENDORED
164
+ abort "COMPRESS_FORCE_VENDORED is set, but ext/multi_compress/vendor/.vendored was not found"
165
+ end
166
+
167
+ if FORCE_VENDORED
168
+ vpath_dirs = configure_vendored_libraries(VENDOR_DIR)
169
+ elsif USE_SYSTEM || !VENDORED
170
+ configure_system_libraries
171
+ vpath_dirs = nil
172
+ else
173
+ vpath_dirs = configure_vendored_libraries(VENDOR_DIR)
174
+ end
175
+
176
+ $CFLAGS += " -O3"
177
+ $CFLAGS += " -DXXH_NAMESPACE=MULTICOMPRESS_"
178
+
179
+ case RUBY_PLATFORM
180
+ when /x86_64|amd64|aarch64|arm64/
181
+ $CFLAGS += " -DBROTLI_BUILD_LITTLE_ENDIAN"
182
+ end
183
+
184
+ have_header("ruby/fiber/scheduler.h")
185
+
186
+ have_library("pthread") unless RUBY_PLATFORM.include?("darwin")
187
+
188
+ create_makefile("multi_compress/multi_compress")
189
+
190
+ patch_makefile_vpath!(vpath_dirs) if VENDORED && !USE_SYSTEM && vpath_dirs