multi_compress 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +152 -0
- data/LICENSE.txt +21 -0
- data/README.md +177 -0
- data/ext/multi_compress/extconf.rb +190 -0
- data/ext/multi_compress/multi_compress.c +2912 -0
- data/ext/multi_compress/vendor/.vendored +3 -0
- data/ext/multi_compress/vendor/brotli/c/common/constants.c +15 -0
- data/ext/multi_compress/vendor/brotli/c/common/constants.h +201 -0
- data/ext/multi_compress/vendor/brotli/c/common/context.c +156 -0
- data/ext/multi_compress/vendor/brotli/c/common/context.h +113 -0
- data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +5916 -0
- data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +64 -0
- data/ext/multi_compress/vendor/brotli/c/common/platform.c +23 -0
- data/ext/multi_compress/vendor/brotli/c/common/platform.h +541 -0
- data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +521 -0
- data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +75 -0
- data/ext/multi_compress/vendor/brotli/c/common/transform.c +291 -0
- data/ext/multi_compress/vendor/brotli/c/common/transform.h +85 -0
- data/ext/multi_compress/vendor/brotli/c/common/version.h +51 -0
- data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +78 -0
- data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +423 -0
- data/ext/multi_compress/vendor/brotli/c/dec/decode.c +2875 -0
- data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +342 -0
- data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +122 -0
- data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +733 -0
- data/ext/multi_compress/vendor/brotli/c/dec/state.c +183 -0
- data/ext/multi_compress/vendor/brotli/c/dec/state.h +400 -0
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +207 -0
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +40 -0
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +939 -0
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +96 -0
- data/ext/multi_compress/vendor/brotli/c/enc/backward_references_inc.h +189 -0
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +36 -0
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +64 -0
- data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +127 -0
- data/ext/multi_compress/vendor/brotli/c/enc/block_encoder_inc.h +34 -0
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +217 -0
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +52 -0
- data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +481 -0
- data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +1336 -0
- data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +89 -0
- data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +57 -0
- data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +49 -0
- data/ext/multi_compress/vendor/brotli/c/enc/cluster_inc.h +325 -0
- data/ext/multi_compress/vendor/brotli/c/enc/command.c +28 -0
- data/ext/multi_compress/vendor/brotli/c/enc/command.h +191 -0
- data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +207 -0
- data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +74 -0
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +800 -0
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +86 -0
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +657 -0
- data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +72 -0
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +1848 -0
- data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +25 -0
- data/ext/multi_compress/vendor/brotli/c/enc/encode.c +1996 -0
- data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +640 -0
- data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +157 -0
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +504 -0
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +123 -0
- data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +542 -0
- data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +105 -0
- data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +67 -0
- data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +72 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash.h +728 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_composite_inc.h +140 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +295 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +262 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +258 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +266 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_rolling_inc.h +212 -0
- data/ext/multi_compress/vendor/brotli/c/enc/hash_to_binary_tree_inc.h +330 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +100 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +64 -0
- data/ext/multi_compress/vendor/brotli/c/enc/histogram_inc.h +51 -0
- data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +180 -0
- data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +32 -0
- data/ext/multi_compress/vendor/brotli/c/enc/memory.c +194 -0
- data/ext/multi_compress/vendor/brotli/c/enc/memory.h +131 -0
- data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +677 -0
- data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +106 -0
- data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +185 -0
- data/ext/multi_compress/vendor/brotli/c/enc/params.h +47 -0
- data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +54 -0
- data/ext/multi_compress/vendor/brotli/c/enc/quality.h +202 -0
- data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +168 -0
- data/ext/multi_compress/vendor/brotli/c/enc/state.h +104 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +542 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +41 -0
- data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +5866 -0
- data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +85 -0
- data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +33 -0
- data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +88 -0
- data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +409 -0
- data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +501 -0
- data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +305 -0
- data/ext/multi_compress/vendor/brotli/c/include/brotli/shared_dictionary.h +100 -0
- data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +83 -0
- data/ext/multi_compress/vendor/lz4/lib/LICENSE +24 -0
- data/ext/multi_compress/vendor/lz4/lib/Makefile +244 -0
- data/ext/multi_compress/vendor/lz4/lib/README.md +193 -0
- data/ext/multi_compress/vendor/lz4/lib/dll/example/Makefile +63 -0
- data/ext/multi_compress/vendor/lz4/lib/dll/example/README.md +69 -0
- data/ext/multi_compress/vendor/lz4/lib/dll/example/fullbench-dll.sln +25 -0
- data/ext/multi_compress/vendor/lz4/lib/dll/example/fullbench-dll.vcxproj +182 -0
- data/ext/multi_compress/vendor/lz4/lib/liblz4-dll.rc.in +35 -0
- data/ext/multi_compress/vendor/lz4/lib/liblz4.pc.in +14 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4.c +2829 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4.h +884 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4file.c +341 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4file.h +93 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4frame.c +2136 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4frame.h +751 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4frame_static.h +47 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4hc.c +2192 -0
- data/ext/multi_compress/vendor/lz4/lib/lz4hc.h +414 -0
- data/ext/multi_compress/vendor/lz4/lib/xxhash.c +1030 -0
- data/ext/multi_compress/vendor/lz4/lib/xxhash.h +328 -0
- data/ext/multi_compress/vendor/zstd/lib/BUCK +232 -0
- data/ext/multi_compress/vendor/zstd/lib/Makefile +357 -0
- data/ext/multi_compress/vendor/zstd/lib/README.md +217 -0
- data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +478 -0
- data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +335 -0
- data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +213 -0
- data/ext/multi_compress/vendor/zstd/lib/common/debug.c +24 -0
- data/ext/multi_compress/vendor/zstd/lib/common/debug.h +107 -0
- data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +368 -0
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +56 -0
- data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +159 -0
- data/ext/multi_compress/vendor/zstd/lib/common/fse.h +717 -0
- data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +403 -0
- data/ext/multi_compress/vendor/zstd/lib/common/huf.h +364 -0
- data/ext/multi_compress/vendor/zstd/lib/common/mem.h +442 -0
- data/ext/multi_compress/vendor/zstd/lib/common/pool.c +355 -0
- data/ext/multi_compress/vendor/zstd/lib/common/pool.h +84 -0
- data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +137 -0
- data/ext/multi_compress/vendor/zstd/lib/common/threading.c +122 -0
- data/ext/multi_compress/vendor/zstd/lib/common/threading.h +155 -0
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +24 -0
- data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +5686 -0
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +83 -0
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +111 -0
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +493 -0
- data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +163 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +134 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +741 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +181 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +75 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +1370 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +6327 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +1458 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +159 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +31 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +442 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +54 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +573 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +32 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +676 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +696 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +38 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +675 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +37 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +2104 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +125 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +724 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +117 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +1446 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +56 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +1859 -0
- data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +113 -0
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +1889 -0
- data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +585 -0
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +244 -0
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +2230 -0
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +2072 -0
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +68 -0
- data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +236 -0
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +214 -0
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +26 -0
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +167 -0
- data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +75 -0
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +1253 -0
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +158 -0
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.c +1913 -0
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +67 -0
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +766 -0
- data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +1205 -0
- data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +48 -0
- data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +63 -0
- data/ext/multi_compress/vendor/zstd/lib/dll/example/build_package.bat +20 -0
- data/ext/multi_compress/vendor/zstd/lib/dll/example/fullbench-dll.sln +25 -0
- data/ext/multi_compress/vendor/zstd/lib/dll/example/fullbench-dll.vcxproj +181 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +415 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +2158 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +94 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +3518 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +93 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +3160 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +93 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +3647 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +142 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +4050 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +162 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +4154 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +172 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +4541 -0
- data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +187 -0
- data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +203 -0
- data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +16 -0
- data/ext/multi_compress/vendor/zstd/lib/module.modulemap +25 -0
- data/ext/multi_compress/vendor/zstd/lib/zdict.h +452 -0
- data/ext/multi_compress/vendor/zstd/lib/zstd.h +2575 -0
- data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +95 -0
- data/lib/multi_compress/version.rb +5 -0
- data/lib/multi_compress.rb +329 -0
- metadata +322 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 9faa0c73c6b3926604e7d3477c5a519a8898fd8a08e0f9d71b84c6fce87a815d
|
|
4
|
+
data.tar.gz: '0818b9fa5ea7cf2958910150185571a9855a1728f44c6f425ac590a2256298a0'
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: b94255c0103f3f89ae4099627a61a972b6da00220ace21d9fbd6d0c3f1641cb46d2ae79126c75f738e21ebd5359c80f6bdee11d958200731e5d402e1058d9fad
|
|
7
|
+
data.tar.gz: c5ce6f3cb3e6c7d1b2fe08666dd916e66646f8077669f9908f61e2de64e311be1f7052345617faa045526c26136f38cc659bfabd92b596e826dd79224a1962fe
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.2.4]
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- Keyword validation
|
|
7
|
+
|
|
8
|
+
### Fixed
|
|
9
|
+
- Fixed gem packaging so vendored builds work correctly after installation.
|
|
10
|
+
Previously, installed gems could incorrectly fall back to system libraries because
|
|
11
|
+
`ext/multi_compress/vendor/.vendored` was not included in the packaged gem.
|
|
12
|
+
- Fixed vendored zstd build path in `extconf.rb`.
|
|
13
|
+
|
|
14
|
+
## [0.2.3]
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
- Constants
|
|
18
|
+
|
|
19
|
+
## [0.2.2]
|
|
20
|
+
|
|
21
|
+
### Added
|
|
22
|
+
- New decompression safeguards:
|
|
23
|
+
- `max_output_size:` on `MultiCompress.decompress`, `MultiCompress::Inflater.new`, and `MultiCompress::Reader.open`
|
|
24
|
+
- `max_ratio:` with default `1000` and trusted-input opt-out via `nil`
|
|
25
|
+
- Cumulative streaming output budget enforcement
|
|
26
|
+
- Dictionary file size cap (`32MB`) for `MultiCompress::Dictionary.load`
|
|
27
|
+
- New docs/code parity and limits tests
|
|
28
|
+
- Linux-only ASAN/UBSAN CI job
|
|
29
|
+
- Local `script/valgrind.sh` helper for developer verification (with Ruby VM suppressions)
|
|
30
|
+
- Local `script/sanitize.sh` helper for reproducing the ASAN/UBSAN CI job locally on Linux
|
|
31
|
+
- Optional `./build.sh --test`, `./build.sh --valgrind`, and `./build.sh --sanitize` modes
|
|
32
|
+
|
|
33
|
+
### Changed
|
|
34
|
+
- Vendored **zstd** is pinned to **1.5.2** in the current release line.
|
|
35
|
+
- This restores reliable `MultiCompress::Zstd.train_dictionary(...)` behavior on tested platforms, including arm64-darwin.
|
|
36
|
+
- Newer vendored zstd versions are temporarily avoided until the dictionary training regression is understood and fixed upstream.
|
|
37
|
+
|
|
38
|
+
### Changed
|
|
39
|
+
- Documentation now matches current runtime behavior:
|
|
40
|
+
- Ruby requirement is documented as `>= 3.1.0`
|
|
41
|
+
- Supported named levels are `:fastest`, `:default`, and `:best`
|
|
42
|
+
- Zstd `:best` is documented with the current runtime mapping (level 19)
|
|
43
|
+
- LZ4 is documented as using a custom internal format that is not compatible with the standard `lz4` CLI
|
|
44
|
+
|
|
45
|
+
### Fixed
|
|
46
|
+
- Corrected dictionary training docs:
|
|
47
|
+
- `MultiCompress::Zstd.train_dictionary` is documented as supported
|
|
48
|
+
- `MultiCompress::Brotli.train_dictionary` is documented as unsupported in the current implementation
|
|
49
|
+
|
|
50
|
+
### Notes
|
|
51
|
+
- The default `max_ratio: 1000` is a user-visible behavior change. Workloads that legitimately expand beyond `1000:1` must pass `max_ratio: nil`.
|
|
52
|
+
|
|
53
|
+
## [0.2.1] — 2026-04-15
|
|
54
|
+
- Change version zstd "1.5.6" -> "1.5.7".
|
|
55
|
+
- Micro optimization.
|
|
56
|
+
|
|
57
|
+
## [0.2.0] — 2026-04-15
|
|
58
|
+
|
|
59
|
+
### Added
|
|
60
|
+
- **Fiber-friendly execution** 🎉: compression and decompression now cooperate with Ruby's `Fiber::Scheduler`, making MultiCompress safe to use under [async](https://github.com/socketry/async), [falcon](https://github.com/socketry/falcon), and any other scheduler-based runtime.
|
|
61
|
+
- When a scheduler is active, CPU-heavy work is offloaded to a dedicated worker thread via `rb_thread_create`, while the calling fiber is parked with `rb_fiber_scheduler_block`. The scheduler is free to run other fibers (IO, timers, parallel compression) until the worker finishes and calls `rb_fiber_scheduler_unblock`.
|
|
62
|
+
- Covers **all three algorithms** (`zstd`, `lz4`, `brotli`) and **both API shapes**:
|
|
63
|
+
- One-shot `MultiCompress.compress` / `MultiCompress.decompress`
|
|
64
|
+
- Streaming `MultiCompress::Deflater#write` / `MultiCompress::Inflater#write`
|
|
65
|
+
- No API changes — the fiber-friendly path is selected automatically when a scheduler is detected.
|
|
66
|
+
- Chunks smaller than 16 KB (`FIBER_STREAM_THRESHOLD`) stay inline to avoid pthread-create overhead for micro-workloads.
|
|
67
|
+
- Outside of a scheduler, the previous `rb_thread_call_without_gvl` path is used unchanged.
|
|
68
|
+
- Generic `run_via_fiber_worker(scheduler, func, arg)` internal helper — encapsulates the pthread-worker + scheduler-block pattern and is reused across every fiber-friendly code path.
|
|
69
|
+
- New test suite `test/test_fiber_scheduler.rb` verifying scheduler progress during compress/decompress for every algorithm, in both one-shot and streaming modes.
|
|
70
|
+
|
|
71
|
+
### Changed
|
|
72
|
+
- `fiber_maybe_yield` now actually yields: previously it was a no-op counter; it now calls `Fiber.scheduler.yield` via `rb_funcall` when the byte threshold is crossed. Used for `lz4` streaming paths where pthread-per-block would be overkill.
|
|
73
|
+
- Streaming `Deflater#write` for zstd/brotli uses the fiber-worker path for chunks ≥ 16 KB; the old inline path is kept as a fallback for small chunks and when no scheduler is present.
|
|
74
|
+
- Streaming `Inflater#write` for zstd/brotli similarly routes chunks through the fiber-worker path when possible.
|
|
75
|
+
- LZ4 one-shot decompress loop extracted into `lz4_decompress_all_nogvl` so it can run on the worker thread with the GVL released.
|
|
76
|
+
|
|
77
|
+
### Performance
|
|
78
|
+
- No regressions for the non-fiber case: all existing GVL-unlocking fast paths are preserved untouched.
|
|
79
|
+
- Under a fiber scheduler: compression no longer starves concurrent IO fibers. Previously, compressing 50 MB with `zstd` level 3 would block every other fiber in the reactor for the entire duration; now the reactor keeps servicing IO, timers, and other compute fibers throughout.
|
|
80
|
+
- Streaming threshold of 16 KB chosen empirically: below that, pthread-create overhead (~20-50μs) eats the gains; at 16 KB+ the fiber-friendly path is a near-free win.
|
|
81
|
+
|
|
82
|
+
### Upgrading
|
|
83
|
+
No code changes required. If you run under `Async`/`Falcon`/`Fiber::Scheduler`, you'll immediately get non-blocking compression. If you don't, behavior is identical to 0.1.2.
|
|
84
|
+
|
|
85
|
+
## [0.1.2] — 2026-04-14
|
|
86
|
+
|
|
87
|
+
### Fixed
|
|
88
|
+
- **LZ4 Streaming Correctness**: Fixed critical bug in LZ4 Inflater that caused incorrect decompression on large data (>1MB)
|
|
89
|
+
- Removed complex streaming API (`LZ4_decompress_safe_continue`) that had implementation errors
|
|
90
|
+
- Restored simple, reliable direct decompression (`LZ4_decompress_safe`)
|
|
91
|
+
- All roundtrip tests now pass correctly
|
|
92
|
+
- **LZ4 Deflater Ring Buffer**: Fixed ring buffer management to prevent overflow
|
|
93
|
+
- Added proper offset reset when approaching buffer limit
|
|
94
|
+
- Improved memory safety
|
|
95
|
+
|
|
96
|
+
### Added
|
|
97
|
+
- **ZSTD Dictionary Training**: Added `MultiCompress::Zstd.train_dictionary(samples, size:)` method
|
|
98
|
+
- Uses ZDICT API for optimal dictionary generation
|
|
99
|
+
- Allows creating custom dictionaries for better compression on similar data
|
|
100
|
+
- **Memory Optimization Helpers**:
|
|
101
|
+
- Added `rb_binary_str_buf_reserve()` for efficient pre-allocation
|
|
102
|
+
- Added `grow_binary_str()` for safe capacity management
|
|
103
|
+
|
|
104
|
+
### Changed
|
|
105
|
+
- **Memory Efficiency**: Eliminated double-buffering in compress/decompress operations
|
|
106
|
+
- ZSTD, LZ4, Brotli now write directly to Ruby strings when unlocking GVL
|
|
107
|
+
- Reduced memory allocations and improved cache locality
|
|
108
|
+
- **Dictionary Support in Streaming**: Fixed dictionary reloading in `reset()` methods
|
|
109
|
+
- Dictionaries are now properly saved as instance variables
|
|
110
|
+
- `deflater.reset` and `inflater.reset` correctly restore dictionary state
|
|
111
|
+
- **Brotli Error Handling**: Added comprehensive error checking for dictionary operations
|
|
112
|
+
- All Brotli API calls now verify success/failure
|
|
113
|
+
- More informative error messages
|
|
114
|
+
|
|
115
|
+
### Performance Notes
|
|
116
|
+
- **LZ4 Streaming**: Slight performance regression on large chunks (128KB: ~40-50% slower decompression)
|
|
117
|
+
- Trade-off for correctness: reliable decompression is prioritized over peak performance
|
|
118
|
+
- Small chunks (4-32KB) maintain original performance
|
|
119
|
+
- Overall impact minimal as streaming typically uses smaller chunks
|
|
120
|
+
- **Memory Usage**: Reduced due to elimination of intermediate buffers
|
|
121
|
+
- **Dictionary Operations**: Faster on repeated use due to improved caching
|
|
122
|
+
|
|
123
|
+
### Security
|
|
124
|
+
- Improved LZ4 format validation in `detect_algo()`
|
|
125
|
+
- Now checks for proper tail marker (4 zero bytes)
|
|
126
|
+
- Better protection against malformed data
|
|
127
|
+
|
|
128
|
+
## [0.1.1] — Unreleased
|
|
129
|
+
|
|
130
|
+
### Breaking Changes
|
|
131
|
+
- **REMOVED**: `MultiCompress::Dictionary.train` method - general dictionary training interface removed
|
|
132
|
+
- Dictionary training is now algorithm-specific to clarify capabilities
|
|
133
|
+
|
|
134
|
+
### Added
|
|
135
|
+
- `MultiCompress::Brotli.train_dictionary` - Brotli-specific dictionary training method
|
|
136
|
+
- Clear indication that only Brotli supports dictionary training in this implementation
|
|
137
|
+
|
|
138
|
+
### Changed
|
|
139
|
+
- Dictionary training API is now more explicit about which algorithms support it
|
|
140
|
+
- Improved API clarity by making training method algorithm-specific
|
|
141
|
+
|
|
142
|
+
## [0.1.0] — Unreleased
|
|
143
|
+
|
|
144
|
+
### Added
|
|
145
|
+
- One-shot compress/decompress for zstd, lz4, brotli
|
|
146
|
+
- Streaming API: Deflater, Inflater
|
|
147
|
+
- IO wrappers: Writer, Reader with block-style open
|
|
148
|
+
- Dictionary API skeleton (zstd, brotli)
|
|
149
|
+
- Auto-detect algorithm by magic bytes and file extension
|
|
150
|
+
- Named compression levels: :fastest, :default, :best
|
|
151
|
+
- CRC32 / Adler32 utility stubs
|
|
152
|
+
- Algorithm info: `.algorithms`, `.available?`, `.version`
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Roman Haydarov
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# MultiCompress 🗜️
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/rb/multi_compress)
|
|
4
|
+
|
|
5
|
+
> **Status:** functional, well-tested, and actively evolving. The current release is suitable for real workloads, but the API and implementation details are still being refined in upcoming releases.
|
|
6
|
+
|
|
7
|
+
Modern compression technology: **zstd**, **lz4**, **brotli** — unified compression platform with native C performance, **fiber-friendly** for modern async Ruby stacks.
|
|
8
|
+
|
|
9
|
+
Bundled library versions in the current release:
|
|
10
|
+
- **zstd 1.5.2**
|
|
11
|
+
- **lz4 1.10.0**
|
|
12
|
+
- **brotli 1.1.0**
|
|
13
|
+
|
|
14
|
+
📖 **[Get Started →](GET_STARTED.md)** — Complete technology overview, algorithms, and implementation details
|
|
15
|
+
|
|
16
|
+
## Technology Overview
|
|
17
|
+
|
|
18
|
+
**MultiCompress** is a comprehensive compression system that unites three cutting-edge algorithms in a single platform. Modern algorithms are 3–10x faster than traditional zlib while providing superior compression ratios.
|
|
19
|
+
|
|
20
|
+
| Algorithm | Strength | Best for |
|
|
21
|
+
|-----------|----------|----------|
|
|
22
|
+
| **zstd** | Best speed/ratio balance | Cache, logs, backups |
|
|
23
|
+
| **lz4** | Fastest compress/decompress | IPC, hot cache, real-time |
|
|
24
|
+
| **brotli** | Best ratio for HTTP | Web assets, API responses |
|
|
25
|
+
|
|
26
|
+
## How It Works
|
|
27
|
+
|
|
28
|
+
**MultiCompress** packages modern compression algorithms (zstd, lz4, brotli) with their native C libraries, providing a unified interface. The system includes vendored sources of all compression libraries, eliminating external dependencies.
|
|
29
|
+
|
|
30
|
+
### Key Design Principles
|
|
31
|
+
|
|
32
|
+
- **Dictionary support**: Runtime dictionary use is supported for zstd and brotli; Zstd dictionary training is available in the current release line
|
|
33
|
+
- **Zero external dependencies**: All C libraries are vendored and compiled
|
|
34
|
+
- **Unified API**: Same interface for all algorithms — just change the `algo:` parameter
|
|
35
|
+
- **Performance first**: Direct bindings to C libraries, minimal overhead
|
|
36
|
+
- **Fiber-friendly**: Compression and decompression cooperate with Ruby's fiber scheduler — safe to use under `async`, `falcon`, or any `Fiber::Scheduler`-based runtime without blocking the event loop. See [GET_STARTED.md](GET_STARTED.md) for details and examples.
|
|
37
|
+
- **Memory efficient**: Streaming support for large datasets, proper resource cleanup
|
|
38
|
+
- **Operationally focused**: Clear errors, comprehensive tests, and streaming support for practical workloads
|
|
39
|
+
|
|
40
|
+
### Algorithm Auto-Detection
|
|
41
|
+
|
|
42
|
+
The system can automatically detect compression algorithms when decompressing data:
|
|
43
|
+
|
|
44
|
+
- **zstd**: Detected by magic bytes `28 B5 2F FD` (little-endian)
|
|
45
|
+
- **lz4**: Detected by internal format header validation (custom internal format, NOT compatible with the standard `lz4` CLI; optional standard frame support may be added in a future release)
|
|
46
|
+
- **brotli**: Requires explicit `algo: :brotli` parameter - no auto-detection
|
|
47
|
+
|
|
48
|
+
**Important**: Auto-detection only works for ZSTD and LZ4. Brotli data must be decompressed with explicit algorithm specification.
|
|
49
|
+
|
|
50
|
+
**Security**: Decompression now enforces a default 256MB output cap, cumulative streaming limits, a default ratio guard of 1000:1, and a 32MB dictionary file size cap.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
## Security limits
|
|
54
|
+
|
|
55
|
+
Decompression-facing APIs support conservative defaults intended to protect against decompression bombs and accidental resource spikes:
|
|
56
|
+
|
|
57
|
+
- **Default output cap:** `256MB`
|
|
58
|
+
- **Streaming cumulative cap:** enforced across the lifetime of an `Inflater`/`Reader`
|
|
59
|
+
- **Default ratio guard:** `1000:1`
|
|
60
|
+
- **Trusted-input opt-out:** pass `max_ratio: nil`
|
|
61
|
+
- **Dictionary file size cap:** `32MB` for `MultiCompress::Dictionary.load`
|
|
62
|
+
|
|
63
|
+
Examples:
|
|
64
|
+
|
|
65
|
+
```ruby
|
|
66
|
+
MultiCompress.decompress(blob, algo: :zstd, max_output_size: 64 * 1024 * 1024)
|
|
67
|
+
MultiCompress.decompress(blob, algo: :brotli, max_ratio: nil)
|
|
68
|
+
|
|
69
|
+
MultiCompress::Reader.open("archive.zst", max_output_size: 128 * 1024 * 1024, max_ratio: 500) do |reader|
|
|
70
|
+
puts reader.read
|
|
71
|
+
end
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
`max_output_size: nil` keeps the native default cap of `256MB`. `max_ratio: nil` disables the ratio guard for trusted input.
|
|
75
|
+
|
|
76
|
+
## Algorithm Comparison
|
|
77
|
+
|
|
78
|
+
| Algorithm | Speed | Ratio | Best Use Case |
|
|
79
|
+
|-----------|-------|--------|---------------|
|
|
80
|
+
| **lz4** | Fastest | Good | Real-time processing, IPC, hot cache paths |
|
|
81
|
+
| **zstd** | Fast | Excellent | General purpose, logs, backups, web APIs |
|
|
82
|
+
| **brotli** | Slower | Best | Static assets, CDN, long-term storage |
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
## Benchmark Results
|
|
86
|
+
|
|
87
|
+
> **📝 Note on v0.2.0**: Performance numbers below are from the v0.2.0 build with fiber-friendly paths enabled. There is no throughput regression compared to v0.1.2 — the fiber-friendly path is only taken when a `Fiber::Scheduler` is active, and even then the worker-thread overhead is negligible for payloads large enough to benefit.
|
|
88
|
+
|
|
89
|
+
Performance comparison against Ruby's built-in zlib compression (200 iterations per test):
|
|
90
|
+
|
|
91
|
+
### 🗜️ COMPRESSION RATIO (%, lower is better)
|
|
92
|
+
```
|
|
93
|
+
┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
|
|
94
|
+
│ Configuration │ zlib │ lz4 │ zstd │ brotli │
|
|
95
|
+
├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
|
|
96
|
+
│ Small JSON (~10KB, GC) │ 9.4% │ 16.1% │ 6.9% │ 6.1% │
|
|
97
|
+
│ Small text (~10KB, GC) │ 3.1% │ 4.6% │ 3.2% │ 2.6% │
|
|
98
|
+
│ Small JSON (~10KB, no GC) │ 9.4% │ 16.1% │ 6.9% │ 6.1% │
|
|
99
|
+
│ Small text (~10KB, no GC) │ 3.1% │ 4.6% │ 3.2% │ 2.6% │
|
|
100
|
+
│ Medium JSON (~370KB, GC) │ 8.5% │ 15.7% │ 6.7% │ 5.5% │
|
|
101
|
+
│ Medium logs (~168KB, GC) │ 8.6% │ 17.2% │ 5.4% │ 3.2% │
|
|
102
|
+
│ Medium JSON (~370KB, no GC) │ 8.5% │ 15.7% │ 6.7% │ 5.5% │
|
|
103
|
+
│ Medium logs (~168KB, no GC) │ 8.6% │ 17.2% │ 5.4% │ 3.2% │
|
|
104
|
+
│ Large JSON (~1.6MB, GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6% │
|
|
105
|
+
│ Large logs (~600KB, GC) │ 7.6% │ 16.0% │ 2.8% │ 2.1% │
|
|
106
|
+
│ Large JSON (~1.6MB, no GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6% │
|
|
107
|
+
│ Large logs (~600KB, no GC) │ 7.6% │ 16.0% │ 2.8% │ 2.1% │
|
|
108
|
+
└─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### ⚡ TOTAL TIME (compress + decompress, ms — lower is faster)
|
|
112
|
+
```
|
|
113
|
+
┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
|
|
114
|
+
│ Configuration │ zlib │ lz4 │ zstd │ brotli │
|
|
115
|
+
├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
|
|
116
|
+
│ Small JSON (~10KB, GC) │ 0.05 │ 0.01 │ 0.02 │ 0.14 │
|
|
117
|
+
│ Small text (~10KB, GC) │ 0.04 │ 0.00 │ 0.01 │ 0.11 │
|
|
118
|
+
│ Small JSON (~10KB, no GC) │ 0.06 │ 0.01 │ 0.02 │ 0.13 │
|
|
119
|
+
│ Small text (~10KB, no GC) │ 0.04 │ 0.00 │ 0.01 │ 0.11 │
|
|
120
|
+
│ Medium JSON (~370KB, GC) │ 2.73 │ 0.29 │ 0.42 │ 2.36 │
|
|
121
|
+
│ Medium logs (~168KB, GC) │ 1.23 │ 0.14 │ 0.18 │ 0.92 │
|
|
122
|
+
│ Medium JSON (~370KB, no GC) │ 2.72 │ 0.28 │ 0.41 │ 2.41 │
|
|
123
|
+
│ Medium logs (~168KB, no GC) │ 1.26 │ 0.13 │ 0.18 │ 0.96 │
|
|
124
|
+
│ Large JSON (~1.6MB, GC) │ 12.44 │ 1.38 │ 1.96 │ 12.44 │
|
|
125
|
+
│ Large logs (~600KB, GC) │ 4.29 │ 0.46 │ 0.49 │ 2.85 │
|
|
126
|
+
│ Large JSON (~1.6MB, no GC) │ 12.22 │ 1.28 │ 1.86 │ 11.83 │
|
|
127
|
+
│ Large logs (~600KB, no GC) │ 4.39 │ 0.42 │ 0.44 │ 2.86 │
|
|
128
|
+
└─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### 📊 SPEEDUP vs ZLIB (higher is better)
|
|
132
|
+
```
|
|
133
|
+
┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
|
|
134
|
+
│ Configuration │ zlib │ lz4 │ zstd │ brotli │
|
|
135
|
+
├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
|
|
136
|
+
│ Small JSON (~10KB, GC) │ 1.00x │ 5.00x │ 2.50x │ 0.36x │
|
|
137
|
+
│ Small text (~10KB, GC) │ 1.00x │ N/A │ 4.00x │ 0.36x │
|
|
138
|
+
│ Small JSON (~10KB, no GC) │ 1.00x │ 6.00x │ 3.00x │ 0.46x │
|
|
139
|
+
│ Small text (~10KB, no GC) │ 1.00x │ N/A │ 4.00x │ 0.36x │
|
|
140
|
+
│ Medium JSON (~370KB, GC) │ 1.00x │ 9.41x │ 6.50x │ 1.16x │
|
|
141
|
+
│ Medium logs (~168KB, GC) │ 1.00x │ 8.79x │ 6.83x │ 1.34x │
|
|
142
|
+
│ Medium JSON (~370KB, no GC) │ 1.00x │ 9.71x │ 6.63x │ 1.13x │
|
|
143
|
+
│ Medium logs (~168KB, no GC) │ 1.00x │ 9.69x │ 7.00x │ 1.31x │
|
|
144
|
+
│ Large JSON (~1.6MB, GC) │ 1.00x │ 9.01x │ 6.35x │ 1.00x │
|
|
145
|
+
│ Large logs (~600KB, GC) │ 1.00x │ 9.33x │ 8.76x │ 1.51x │
|
|
146
|
+
│ Large JSON (~1.6MB, no GC) │ 1.00x │ 9.55x │ 6.57x │ 1.03x │
|
|
147
|
+
│ Large logs (~600KB, no GC) │ 1.00x │ 10.45x │ 9.98x │ 1.53x │
|
|
148
|
+
└─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
**Dependencies for benchmarking:**
|
|
153
|
+
- `memory_profiler` — Memory usage analysis
|
|
154
|
+
- `benchmark-ips` — Iterations per second benchmarking
|
|
155
|
+
|
|
156
|
+
Or use the build script:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
./build.sh
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Requirements
|
|
163
|
+
|
|
164
|
+
- Ruby >= 3.1.0
|
|
165
|
+
- C compiler (gcc, clang)
|
|
166
|
+
|
|
167
|
+
## Contributing
|
|
168
|
+
|
|
169
|
+
1. Fork it
|
|
170
|
+
2. Create your feature branch (`git checkout -b feature/my-feature`)
|
|
171
|
+
3. Commit your changes (`git commit -am 'Add feature'`)
|
|
172
|
+
4. Push to the branch (`git push origin feature/my-feature`)
|
|
173
|
+
5. Create a Pull Request
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
MIT — see [LICENSE.txt](LICENSE.txt).
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "mkmf"
|
|
4
|
+
|
|
5
|
+
USE_SYSTEM = arg_config("--use-system-libraries") ||
|
|
6
|
+
ENV["COMPRESS_USE_SYSTEM_LIBRARIES"]
|
|
7
|
+
FORCE_VENDORED = arg_config("--force-vendored") ||
|
|
8
|
+
ENV["COMPRESS_FORCE_VENDORED"]
|
|
9
|
+
|
|
10
|
+
ZSTD_SUBDIRS = %w[lib/common lib/compress lib/decompress lib/dictBuilder].freeze
|
|
11
|
+
BROTLI_SUBDIRS = %w[c/common c/enc c/dec].freeze
|
|
12
|
+
LZ4_SOURCES = %w[lz4.c lz4hc.c lz4frame.c].freeze
|
|
13
|
+
|
|
14
|
+
def find_vendor_dir
|
|
15
|
+
candidates = [
|
|
16
|
+
File.join(__dir__, "vendor"),
|
|
17
|
+
File.join(__dir__, "..", "..", "..", "..", "ext", "multi_compress", "vendor"),
|
|
18
|
+
File.expand_path("../../ext/multi_compress/vendor", __dir__),
|
|
19
|
+
File.join(Dir.pwd, "ext", "multi_compress", "vendor"),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
dir = __dir__
|
|
23
|
+
6.times do
|
|
24
|
+
candidates << File.join(dir, "ext", "multi_compress", "vendor")
|
|
25
|
+
dir = File.dirname(dir)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
candidates.find { |path| File.exist?(File.join(path, ".vendored")) }
|
|
29
|
+
&.then { |path| File.expand_path(path) }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def find_compress_c_dir
|
|
33
|
+
candidates = [
|
|
34
|
+
__dir__,
|
|
35
|
+
File.join(__dir__, "..", "..", "..", "..", "ext", "multi_compress"),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
dir = __dir__
|
|
39
|
+
6.times do
|
|
40
|
+
candidates << File.join(dir, "ext", "multi_compress")
|
|
41
|
+
dir = File.dirname(dir)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
candidates.find { |path| File.exist?(File.join(path, "multi_compress.c")) }
|
|
45
|
+
&.then { |path| File.expand_path(path) } || __dir__
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def configure_system_libraries
|
|
49
|
+
puts "Building with SYSTEM libraries"
|
|
50
|
+
|
|
51
|
+
configure_homebrew if RUBY_PLATFORM.include?("darwin")
|
|
52
|
+
|
|
53
|
+
require_system_lib("zstd", header: "zstd.h", func: "ZSTD_compress")
|
|
54
|
+
require_system_lib("lz4", header: "lz4.h", func: "LZ4_compress_default")
|
|
55
|
+
require_system_lib("brotli", header: "brotli/encode.h", func: "BrotliEncoderCreateInstance", lib: "brotlienc")
|
|
56
|
+
|
|
57
|
+
have_header("zdict.h")
|
|
58
|
+
have_header("lz4hc.h")
|
|
59
|
+
have_library("lz4", "LZ4_compress_HC")
|
|
60
|
+
have_header("brotli/decode.h")
|
|
61
|
+
have_library("brotlidec", "BrotliDecoderCreateInstance")
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def configure_homebrew
|
|
65
|
+
dir_config("homebrew", "/opt/homebrew")
|
|
66
|
+
$CPPFLAGS += " -I/opt/homebrew/include"
|
|
67
|
+
$LDFLAGS += " -L/opt/homebrew/lib"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def require_system_lib(name, header:, func:, lib: name)
|
|
71
|
+
return if have_header(header) && have_library(lib, func)
|
|
72
|
+
|
|
73
|
+
abort "Missing #{name}. Install: apt install lib#{name}-dev / brew install #{name}"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def configure_vendored_libraries(vendor_dir)
|
|
77
|
+
versions = File.read(File.join(vendor_dir, ".vendored"))
|
|
78
|
+
puts "Building with VENDORED libraries from #{vendor_dir}"
|
|
79
|
+
puts " forced vendored mode enabled" if FORCE_VENDORED
|
|
80
|
+
puts " #{versions.tr("\n", ", ")}"
|
|
81
|
+
|
|
82
|
+
zstd_dir = File.join(vendor_dir, "zstd")
|
|
83
|
+
lz4_dir = File.join(vendor_dir, "lz4")
|
|
84
|
+
brotli_dir = File.join(vendor_dir, "brotli")
|
|
85
|
+
|
|
86
|
+
all_vendor_srcs = collect_vendor_sources(zstd_dir, lz4_dir, brotli_dir)
|
|
87
|
+
|
|
88
|
+
puts " #{all_vendor_srcs.length} vendored C files"
|
|
89
|
+
|
|
90
|
+
add_include_dirs(zstd_dir, lz4_dir, brotli_dir)
|
|
91
|
+
$CPPFLAGS += " -DZSTD_DISABLE_ASM"
|
|
92
|
+
|
|
93
|
+
vpath_dirs = build_vpath_dirs(zstd_dir, lz4_dir, brotli_dir)
|
|
94
|
+
|
|
95
|
+
deduplicate_sources!(all_vendor_srcs)
|
|
96
|
+
|
|
97
|
+
compress_c_dir = find_compress_c_dir
|
|
98
|
+
|
|
99
|
+
$srcs = ["multi_compress.c"] + all_vendor_srcs.map { |s| File.basename(s) }
|
|
100
|
+
$VPATH = [compress_c_dir] + vpath_dirs
|
|
101
|
+
|
|
102
|
+
$warnflags = ""
|
|
103
|
+
|
|
104
|
+
vpath_dirs
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def collect_vendor_sources(zstd_dir, lz4_dir, brotli_dir)
|
|
108
|
+
zstd_srcs = ZSTD_SUBDIRS.flat_map { |d| Dir[File.join(zstd_dir, d, "*.c")] }
|
|
109
|
+
|
|
110
|
+
lz4_srcs = LZ4_SOURCES.filter_map do |f|
|
|
111
|
+
path = File.join(lz4_dir, "lib", f)
|
|
112
|
+
path if File.exist?(path)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
brotli_srcs = BROTLI_SUBDIRS.flat_map { |d| Dir[File.join(brotli_dir, d, "*.c")] }
|
|
116
|
+
|
|
117
|
+
zstd_srcs + lz4_srcs + brotli_srcs
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def add_include_dirs(zstd_dir, lz4_dir, brotli_dir)
|
|
121
|
+
[
|
|
122
|
+
File.join(zstd_dir, "lib"),
|
|
123
|
+
File.join(zstd_dir, "lib", "common"),
|
|
124
|
+
File.join(lz4_dir, "lib"),
|
|
125
|
+
File.join(brotli_dir, "c", "include"),
|
|
126
|
+
].each { |d| $CPPFLAGS += " -I#{d}" }
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def build_vpath_dirs(zstd_dir, lz4_dir, brotli_dir)
|
|
130
|
+
ZSTD_SUBDIRS.map { |d| File.join(zstd_dir, d) } +
|
|
131
|
+
[File.join(lz4_dir, "lib")] +
|
|
132
|
+
BROTLI_SUBDIRS.map { |d| File.join(brotli_dir, d) }
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def deduplicate_sources!(sources)
|
|
136
|
+
seen = {}
|
|
137
|
+
|
|
138
|
+
sources.reject! do |src|
|
|
139
|
+
basename = File.basename(src)
|
|
140
|
+
duplicate = seen.key?(basename)
|
|
141
|
+
puts " SKIP duplicate: #{src}" if duplicate
|
|
142
|
+
seen[basename] = true
|
|
143
|
+
duplicate
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def patch_makefile_vpath!(vpath_dirs)
|
|
148
|
+
makefile = File.read("Makefile")
|
|
149
|
+
return if makefile.include?("# vendored vpath")
|
|
150
|
+
|
|
151
|
+
vpath_lines = vpath_dirs.map { |d| "vpath %.c #{d}" }.join("\n")
|
|
152
|
+
|
|
153
|
+
makefile.sub!(/^(VPATH\s*=.*)$/m) { "#{Regexp.last_match(1)}\n# vendored vpath\n#{vpath_lines}" }
|
|
154
|
+
File.write("Makefile", makefile)
|
|
155
|
+
puts " Patched Makefile with #{vpath_dirs.length} VPATH entries"
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# --- Main ---
|
|
159
|
+
|
|
160
|
+
VENDOR_DIR = find_vendor_dir
|
|
161
|
+
VENDORED = !VENDOR_DIR.nil?
|
|
162
|
+
|
|
163
|
+
if FORCE_VENDORED && !VENDORED
|
|
164
|
+
abort "COMPRESS_FORCE_VENDORED is set, but ext/multi_compress/vendor/.vendored was not found"
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
if FORCE_VENDORED
|
|
168
|
+
vpath_dirs = configure_vendored_libraries(VENDOR_DIR)
|
|
169
|
+
elsif USE_SYSTEM || !VENDORED
|
|
170
|
+
configure_system_libraries
|
|
171
|
+
vpath_dirs = nil
|
|
172
|
+
else
|
|
173
|
+
vpath_dirs = configure_vendored_libraries(VENDOR_DIR)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
$CFLAGS += " -O3"
|
|
177
|
+
$CFLAGS += " -DXXH_NAMESPACE=MULTICOMPRESS_"
|
|
178
|
+
|
|
179
|
+
case RUBY_PLATFORM
|
|
180
|
+
when /x86_64|amd64|aarch64|arm64/
|
|
181
|
+
$CFLAGS += " -DBROTLI_BUILD_LITTLE_ENDIAN"
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
have_header("ruby/fiber/scheduler.h")
|
|
185
|
+
|
|
186
|
+
have_library("pthread") unless RUBY_PLATFORM.include?("darwin")
|
|
187
|
+
|
|
188
|
+
create_makefile("multi_compress/multi_compress")
|
|
189
|
+
|
|
190
|
+
patch_makefile_vpath!(vpath_dirs) if VENDORED && !USE_SYSTEM && vpath_dirs
|