multi_compress 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -3
  3. data/GET_STARTED.md +3 -3
  4. data/README.md +75 -66
  5. data/THIRD_PARTY_NOTICES.md +24 -0
  6. data/ext/multi_compress/brotli_dec_static_init.c +3 -0
  7. data/ext/multi_compress/brotli_enc_static_init.c +3 -0
  8. data/ext/multi_compress/extconf.rb +79 -3
  9. data/ext/multi_compress/multi_compress.c +199 -120
  10. data/ext/multi_compress/vendor/.vendored +2 -2
  11. data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
  13. data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
  14. data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
  15. data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
  16. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
  17. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
  18. data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
  20. data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
  21. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
  22. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
  23. data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
  24. data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
  25. data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
  26. data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
  27. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
  28. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
  29. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
  30. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
  31. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
  32. data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
  33. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
  34. data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
  35. data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
  36. data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
  37. data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
  38. data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
  40. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
  41. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
  42. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
  43. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
  44. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
  45. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
  46. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
  47. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
  48. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
  49. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
  50. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
  51. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
  52. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
  53. data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
  54. data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
  55. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
  56. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
  57. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
  58. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
  59. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
  60. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
  61. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
  62. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
  63. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
  65. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
  66. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
  67. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
  68. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
  69. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
  70. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
  71. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
  72. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
  73. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
  74. data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
  76. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
  77. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
  79. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
  83. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
  84. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
  85. data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
  87. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
  88. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
  89. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
  90. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
  91. data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
  92. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
  93. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
  94. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
  95. data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
  96. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
  97. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
  98. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
  99. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
  100. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
  101. data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
  102. data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
  103. data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
  104. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
  105. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
  106. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
  107. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
  108. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
  109. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
  110. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
  111. data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
  112. data/ext/multi_compress/vendor/zstd/COPYING +339 -0
  113. data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
  114. data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
  115. data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
  116. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  117. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
  118. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
  119. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
  120. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
  121. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
  122. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
  123. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
  124. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
  125. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
  126. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
  127. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
  128. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
  129. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
  130. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
  131. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
  132. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
  133. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
  134. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
  135. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
  136. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
  137. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
  138. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
  139. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
  140. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
  141. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
  142. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
  143. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
  144. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
  145. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
  146. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
  147. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
  148. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
  149. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
  170. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
  171. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
  178. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
  179. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  182. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
  183. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
  187. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
  188. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
  189. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
  190. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
  191. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
  192. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
  193. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
  194. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
  204. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
  205. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
  206. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
  207. data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
  208. data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
  209. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
  210. data/lib/multi_compress/version.rb +1 -1
  211. data/lib/multi_compress.rb +80 -41
  212. metadata +29 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 23ab6bacd75b21b5cfbf5b7b3121428c18060bd9e097f815438af6c8bafd8883
4
- data.tar.gz: 90321202358a43bb732077aa1f28b8a9e5d756e27067df851671883a21eb1470
3
+ metadata.gz: 51bd49ce7dbb59d7428cec080e9f2e1dc3b83412848b8e30e37eec3dc7f4fbf4
4
+ data.tar.gz: 34bd66cd9e255a5e13bb48f91afb584e2c3f3caefed71bd5c8367f61d6199e7d
5
5
  SHA512:
6
- metadata.gz: 431921d63b8757216df9179dfd5d792103777dbf597d52cfafeb2e593ee8ab52ce12f374499826b0eba45a164bfde0f4adaa3aa3ba9ee811c6d9f44ed2ccdb7d
7
- data.tar.gz: 2e1d3bf4455fd627696c7ff7a500e6424497df012939487d39896b8e50dad421c8acd8b427f10fc3de6a51b799ca5b1532cda5508485440e4f633a5ac5677a1d
6
+ metadata.gz: 3c2139458b9ec45769988f3efb3a3165c96801436ca31c7086f6f56a1a0549095d4b493fe239ea1607932b4209315eab076a2ed5fb68ac2249af5175382ec898
7
+ data.tar.gz: 78290aed6265c4915f7eb850646fbdc0e6000a615312a4714c8dbf1aac7d8817e88bcef06b92507d718a2f774601b183c521f05f7c12758bc85d04407afb51f9
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.4]
4
+
5
+ ### Changed
6
+ - Improved one-shot Zstd performance by reusing per-thread CCtx/DCtx.
7
+ - On deterministic arm64-darwin benchmarks, total zstd roundtrip improved by ~11–19% on ~10KB payloads,
8
+ ~1–4% on medium payloads, and ~2–6% on log-like large payloads, with identical compressed sizes.
9
+
10
+ ## [0.3.3]
11
+
12
+ ### Changed
13
+ - Updated vendored native libraries to **zstd 1.5.7** and **brotli 1.2.0**.
14
+ - Updated README / GET_STARTED examples so documented runtime versions match `MultiCompress.version(...)`.
15
+ - Added third-party license notices for the vendored C libraries.
16
+
3
17
  ## [0.3.2]
4
18
 
5
19
  ### Changed
@@ -59,9 +73,9 @@
59
73
  - Optional `./build.sh --test`, `./build.sh --valgrind`, and `./build.sh --sanitize` modes
60
74
 
61
75
  ### Changed
62
- - Vendored **zstd** is pinned to **1.5.2** in the current release line.
63
- - This restores reliable `MultiCompress::Zstd.train_dictionary(...)` behavior on tested platforms, including arm64-darwin.
64
- - Newer vendored zstd versions are temporarily avoided until the dictionary training regression is understood and fixed upstream.
76
+ - Historical note for the 0.2.2 release line: vendored **zstd** was pinned to **1.5.2**.
77
+ - This restored reliable `MultiCompress::Zstd.train_dictionary(...)` behavior on tested platforms at the time, including arm64-darwin.
78
+ - Later releases moved forward after the native-library upgrade path was revalidated.
65
79
 
66
80
  ### Changed
67
81
  - Documentation now matches current runtime behavior:
data/GET_STARTED.md CHANGED
@@ -400,7 +400,7 @@ end
400
400
 
401
401
  Dramatically improves compression on small, similar data (JSON APIs, configs, logs).
402
402
 
403
- **Important**: Dictionary training is available for **Zstd** in the current release (vendored zstd **1.5.2**). Brotli dictionaries can be used, but this gem does not expose Brotli training through `train_dictionary`.
403
+ **Important**: Dictionary training is available for **Zstd** in the current release (vendored zstd **1.5.7**). Brotli dictionaries can be used, but this gem does not expose Brotli training through `train_dictionary`.
404
404
 
405
405
  ### Training Dictionary (Zstd)
406
406
 
@@ -677,9 +677,9 @@ MultiCompress.available?(:zstd) # => true
677
677
  MultiCompress.available?(:fake) # => false
678
678
 
679
679
  # Get library versions
680
- MultiCompress.version(:zstd) # => "1.5.2"
680
+ MultiCompress.version(:zstd) # => "1.5.7"
681
681
  MultiCompress.version(:lz4) # => "1.10.0"
682
- MultiCompress.version(:brotli) # => "1.1.0"
682
+ MultiCompress.version(:brotli) # => "1.2.0"
683
683
  ```
684
684
 
685
685
  ### Data Integrity
data/README.md CHANGED
@@ -7,9 +7,9 @@
7
7
  Modern compression technology: **zstd**, **lz4**, **brotli** — unified compression platform with native C performance, **fiber-friendly** for modern async Ruby stacks.
8
8
 
9
9
  Bundled library versions in the current release:
10
- - **zstd 1.5.2**
10
+ - **zstd 1.5.7**
11
11
  - **lz4 1.10.0**
12
- - **brotli 1.1.0**
12
+ - **brotli 1.2.0**
13
13
 
14
14
  📖 **[Get Started →](GET_STARTED.md)** — Complete technology overview, algorithms, and implementation details
15
15
 
@@ -88,70 +88,79 @@ If `max_output_size:` is omitted, one-shot calls use `MultiCompress.config.max_o
88
88
 
89
89
  ## Benchmark Results
90
90
 
91
- > **📝 Note on v0.2.0**: Performance numbers below are from the v0.2.0 build with fiber-friendly paths enabled. There is no throughput regression compared to v0.1.2 the fiber-friendly path is only taken when a `Fiber::Scheduler` is active, and even then the worker-thread overhead is negligible for payloads large enough to benefit.
92
-
93
- Performance comparison against Ruby's built-in zlib compression (200 iterations per test):
94
-
95
- ### 🗜️ COMPRESSION RATIO (%, lower is better)
96
- ```
97
- ┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
98
- │ Configuration │ zlib │ lz4 │ zstd │ brotli │
99
- ├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
100
- │ Small JSON (~10KB, GC) │ 9.4% │ 16.1% │ 6.9% │ 6.1% │
101
- Small text (~10KB, GC) │ 3.1% │ 4.6% │ 3.2% │ 2.6%
102
- Small JSON (~10KB, no GC) │ 9.4% │ 16.1% │ 6.9% 6.1%
103
- Small text (~10KB, no GC) │ 3.1% │ 4.6% │ 3.2% │ 2.6%
104
- Medium JSON (~370KB, GC) │ 8.5% │ 15.7% │ 6.7% │ 5.5%
105
- Medium logs (~168KB, GC)8.6% │ 17.2% │ 5.4% │ 3.2%
106
- Medium JSON (~370KB, no GC) 8.5% │ 15.7% │ 6.7% │ 5.5%
107
- Medium logs (~168KB, no GC) 8.6% │ 17.2% │ 5.4% │ 3.2%
108
- Large JSON (~1.6MB, GC)8.1% │ 15.1% │ 6.1% │ 5.6%
109
- Large logs (~600KB, GC) │ 7.6% │ 16.0% │ 2.9% │ 2.0%
110
- Large JSON (~1.6MB, no GC) │ 8.1% │ 15.1% │ 6.1% │ 5.6%
111
- Large logs (~600KB, no GC) │ 7.6% │ 16.0% │ 2.9% │ 2.0%
112
- └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
113
- ```
114
-
115
- ### ⚡ TOTAL TIME (compress + decompress, ms — lower is faster)
116
- ```
117
- ┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
118
- Configuration │ zlib │ lz4 │ zstd │ brotli │
119
- ├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
120
- Small JSON (~10KB, GC)0.05 0.01 0.02 0.12
121
- Small text (~10KB, GC)0.03 0.00 0.01 0.09
122
- Small JSON (~10KB, no GC) │ 0.06 0.01 0.02 │ 0.13
123
- Small text (~10KB, no GC) │ 0.03 0.00 0.01 0.11
124
- Medium JSON (~370KB, GC)2.62 0.28 0.39 2.31
125
- Medium logs (~168KB, GC)1.23 0.13 0.18 │ 0.88
126
- Medium JSON (~370KB, no GC) │ 2.65 │ 0.27 │ 0.40 │ 2.31
127
- Medium logs (~168KB, no GC) │ 1.27 0.13 0.18 │ 0.95
128
- Large JSON (~1.6MB, GC)11.70 1.36 1.93 │ 11.95
129
- Large logs (~600KB, GC)4.10 0.45 0.45 2.62
130
- │ Large JSON (~1.6MB, no GC) │ 11.47 │ 1.27 │ 1.88 │ 11.47 │
131
- Large logs (~600KB, no GC) │ 4.06 │ 0.41 │ 0.45 │ 2.79
132
- └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
133
- ```
134
-
135
- ### 📊 SPEEDUP vs ZLIB (higher is better)
136
- ```
137
- ┌─────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
138
- Configuration │ zlib │ lz4 │ zstd │ brotli │
139
- ├─────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
140
- Small JSON (~10KB, GC)1.00x │ 5.00x │ 2.50x │ 0.42x
141
- Small text (~10KB, GC)1.00x │ N/A │ 3.00x │ 0.33x
142
- Small JSON (~10KB, no GC)1.00x │ 6.00x │ 3.00x │ 0.46x
143
- Small text (~10KB, no GC)1.00x │ N/A │ 3.00x │ 0.27x
144
- Medium JSON (~370KB, GC)1.00x │ 9.36x │ 6.72x 1.13x
145
- Medium logs (~168KB, GC)1.00x 9.46x 6.83x 1.40x
146
- Medium JSON (~370KB, no GC) 1.00x 9.81x │ 6.62x 1.15x
147
- │ Medium logs (~168KB, no GC) │ 1.00x │ 9.77x │ 7.06x │ 1.34x │
148
- Large JSON (~1.6MB, GC) │ 1.00x │ 8.60x │ 6.06x │ 0.98x │
149
- │ Large logs (~600KB, GC) │ 1.00x │ 9.11x │ 9.11x │ 1.56x │
150
- Large JSON (~1.6MB, no GC) │ 1.00x │ 9.03x │ 6.10x │ 1.00x
151
- │ Large logs (~600KB, no GC) │ 1.00x │ 9.90x │ 9.02x │ 1.46x │
152
- └─────────────────────────────┴─────────┴─────────┴─────────┴─────────┘
153
- ```
154
-
91
+ > Benchmark numbers are environment-dependent. The tables below were generated with **MultiCompress 0.3.2** using vendored **zstd 1.5.7**, **lz4 1.10.0**, and **brotli 1.2.0** on **Ruby 3.1.7 / arm64-darwin24**, 200 iterations per test.
92
+ >
93
+ > Use these numbers as relative guidance, not absolute guarantees. In this run, **lz4** is consistently the fastest, **zstd** gives the best speed/ratio balance, and **brotli** gives the smallest output size but is not optimized for speed-first paths.
94
+
95
+ Performance comparison against Ruby's built-in zlib compression:
96
+
97
+ ### 🗜️ Compression ratio (%, lower is better)
98
+
99
+ | Configuration | zlib | lz4 | zstd | brotli |
100
+ |---|---:|---:|---:|---:|
101
+ | Small JSON (~10KB, GC) | 9.4% | 16.1% | 6.9% | 6.1% |
102
+ | Small text (~10KB, GC) | 3.1% | 4.6% | 3.2% | 2.6% |
103
+ | Small JSON (~10KB, no GC) | 9.4% | 16.1% | 6.9% | 6.1% |
104
+ | Small text (~10KB, no GC) | 3.1% | 4.6% | 3.2% | 2.6% |
105
+ | Medium JSON (~370KB, GC) | 8.5% | 15.7% | 6.6% | 5.4% |
106
+ | Medium logs (~168KB, GC) | 8.6% | 17.2% | 5.3% | 3.3% |
107
+ | Medium JSON (~370KB, no GC) | 8.5% | 15.7% | 6.6% | 5.4% |
108
+ | Medium logs (~168KB, no GC) | 8.6% | 17.2% | 5.3% | 3.3% |
109
+ | Large JSON (~1.6MB, GC) | 8.1% | 15.1% | 6.1% | 5.6% |
110
+ | Large logs (~600KB, GC) | 7.6% | 16.0% | 2.9% | 2.0% |
111
+ | Large JSON (~1.6MB, no GC) | 8.1% | 15.1% | 6.1% | 5.6% |
112
+ | Large logs (~600KB, no GC) | 7.6% | 16.0% | 2.9% | 2.0% |
113
+
114
+ ### ⚡ Total time (compress + decompress, ms — lower is faster)
115
+
116
+ | Configuration | zlib | lz4 | zstd | brotli |
117
+ |---|---:|---:|---:|---:|
118
+ | Small JSON (~10KB, GC) | 0.05 | 0.01 | 0.02 | 0.14 |
119
+ | Small text (~10KB, GC) | 0.04 | 0.00 | 0.01 | 0.12 |
120
+ | Small JSON (~10KB, no GC) | 0.05 | 0.01 | 0.02 | 0.14 |
121
+ | Small text (~10KB, no GC) | 0.04 | 0.00 | 0.01 | 0.11 |
122
+ | Medium JSON (~370KB, GC) | 2.60 | 0.29 | 0.41 | 2.45 |
123
+ | Medium logs (~168KB, GC) | 1.28 | 0.13 | 0.17 | 0.96 |
124
+ | Medium JSON (~370KB, no GC) | 2.62 | 0.27 | 0.39 | 2.41 |
125
+ | Medium logs (~168KB, no GC) | 1.19 | 0.13 | 0.17 | 1.08 |
126
+ | Large JSON (~1.6MB, GC) | 11.60 | 1.30 | 1.81 | 11.12 |
127
+ | Large logs (~600KB, GC) | 4.11 | 0.41 | 0.46 | 2.99 |
128
+ | Large JSON (~1.6MB, no GC) | 11.26 | 1.24 | 1.77 | 10.77 |
129
+ | Large logs (~600KB, no GC) | 4.01 | 0.42 | 0.45 | 2.90 |
130
+
131
+ ### 📊 Speedup vs zlib by total time (higher is better)
132
+
133
+ | Configuration | zlib | lz4 | zstd | brotli |
134
+ |---|---:|---:|---:|---:|
135
+ | Small JSON (~10KB, GC) | 1.00x | 5.00x | 2.50x | 0.36x |
136
+ | Small text (~10KB, GC) | 1.00x | N/A | 4.00x | 0.33x |
137
+ | Small JSON (~10KB, no GC) | 1.00x | 5.00x | 2.50x | 0.36x |
138
+ | Small text (~10KB, no GC) | 1.00x | N/A | 4.00x | 0.36x |
139
+ | Medium JSON (~370KB, GC) | 1.00x | 8.97x | 6.34x | 1.06x |
140
+ | Medium logs (~168KB, GC) | 1.00x | 9.85x | 7.53x | 1.33x |
141
+ | Medium JSON (~370KB, no GC) | 1.00x | 9.70x | 6.72x | 1.09x |
142
+ | Medium logs (~168KB, no GC) | 1.00x | 9.15x | 7.00x | 1.10x |
143
+ | Large JSON (~1.6MB, GC) | 1.00x | 8.92x | 6.41x | 1.04x |
144
+ | Large logs (~600KB, GC) | 1.00x | 10.02x | 8.93x | 1.37x |
145
+ | Large JSON (~1.6MB, no GC) | 1.00x | 9.08x | 6.36x | 1.05x |
146
+ | Large logs (~600KB, no GC) | 1.00x | 9.55x | 8.91x | 1.38x |
147
+
148
+ ### 📏 Compressed size (bytes, lower is better)
149
+
150
+ | Configuration | zlib | lz4 | zstd | brotli |
151
+ |---|---:|---:|---:|---:|
152
+ | Small JSON (~10KB, GC) | 900 | 1544 | 665 | 583 |
153
+ | Small text (~10KB, GC) | 310 | 461 | 322 | 256 |
154
+ | Small JSON (~10KB, no GC) | 900 | 1544 | 665 | 583 |
155
+ | Small text (~10KB, no GC) | 310 | 461 | 322 | 256 |
156
+ | Medium JSON (~370KB, GC) | 31524 | 57986 | 24557 | 20122 |
157
+ | Medium logs (~168KB, GC) | 14488 | 28950 | 8985 | 5549 |
158
+ | Medium JSON (~370KB, no GC) | 31524 | 57986 | 24557 | 20122 |
159
+ | Medium logs (~168KB, no GC) | 14488 | 28950 | 8985 | 5549 |
160
+ | Large JSON (~1.6MB, GC) | 133275 | 250026 | 100965 | 92591 |
161
+ | Large logs (~600KB, GC) | 45432 | 96130 | 17385 | 12250 |
162
+ | Large JSON (~1.6MB, no GC) | 133275 | 250026 | 100965 | 92591 |
163
+ | Large logs (~600KB, no GC) | 45432 | 96130 | 17385 | 12250 |
155
164
 
156
165
  **Dependencies for benchmarking:**
157
166
  - `memory_profiler` — Memory usage analysis
@@ -0,0 +1,24 @@
1
+ # Third-Party Notices
2
+
3
+ MultiCompress vendors C sources from upstream compression libraries so the gem can build without system-level compression packages. MultiCompress's own Ruby code and extension glue are MIT-licensed, but vendored third-party sources keep their upstream licenses.
4
+
5
+ ## Vendored native libraries
6
+
7
+ | Library | Vendored version | Vendored source scope | Upstream license used by MultiCompress |
8
+ |---|---:|---|---|
9
+ | Zstandard (`zstd`) | 1.5.7 | `lib/` | BSD side of upstream BSD OR GPLv2 licensing |
10
+ | LZ4 | 1.10.0 | `lib/` | BSD 2-Clause |
11
+ | Brotli | 1.2.0 | `c/common`, `c/enc`, `c/dec`, `c/include` | MIT |
12
+
13
+ ## Notes
14
+
15
+ - Zstandard changed copyright wording from older Facebook/Yann Collet headers to `Meta Platforms, Inc. and affiliates` in newer releases. This is a copyright-owner notice change, not a prohibition on use.
16
+ - Do not imply that Facebook, Meta, Google, Yann Collet, or upstream contributors endorse MultiCompress.
17
+ - When distributing source or binary builds, keep the vendored source notices and this file in the package.
18
+ - LZ4's repository contains GPL-licensed areas outside `lib/`; MultiCompress vendors only `lib/`, which upstream documents as the BSD 2-Clause integration area.
19
+
20
+ ## Upstream projects
21
+
22
+ - Zstandard: https://github.com/facebook/zstd
23
+ - LZ4: https://github.com/lz4/lz4
24
+ - Brotli: https://github.com/google/brotli
@@ -0,0 +1,3 @@
1
+ // Wrapper for Brotli 1.2.0 vendored builds.
2
+
3
+ #include "vendor/brotli/c/dec/static_init.c"
@@ -0,0 +1,3 @@
1
+ // Wrapper for Brotli 1.2.0 vendored builds.
2
+
3
+ #include "vendor/brotli/c/enc/static_init.c"
@@ -6,10 +6,20 @@ USE_SYSTEM = arg_config("--use-system-libraries") ||
6
6
  ENV["COMPRESS_USE_SYSTEM_LIBRARIES"]
7
7
  FORCE_VENDORED = arg_config("--force-vendored") ||
8
8
  ENV["COMPRESS_FORCE_VENDORED"]
9
+ DISABLE_ZSTD_ASM = arg_config("--disable-zstd-asm") ||
10
+ ENV["MULTI_COMPRESS_DISABLE_ZSTD_ASM"] == "1"
9
11
 
10
12
  ZSTD_SUBDIRS = %w[lib/common lib/compress lib/decompress lib/dictBuilder].freeze
11
13
  BROTLI_SUBDIRS = %w[c/common c/enc c/dec].freeze
12
14
  LZ4_SOURCES = %w[lz4.c lz4hc.c lz4frame.c].freeze
15
+ BROTLI_STATIC_INIT_SOURCES = %w[
16
+ c/enc/static_init.c
17
+ c/dec/static_init.c
18
+ ].freeze
19
+ BROTLI_STATIC_INIT_WRAPPERS = %w[
20
+ brotli_enc_static_init.c
21
+ brotli_dec_static_init.c
22
+ ].freeze
13
23
 
14
24
  def find_vendor_dir
15
25
  candidates = [
@@ -45,6 +55,15 @@ def find_compress_c_dir
45
55
  &.then { |path| File.expand_path(path) } || __dir__
46
56
  end
47
57
 
58
+ def zstd_asm_supported?
59
+ case RUBY_PLATFORM
60
+ when /x86_64|amd64/
61
+ !RUBY_PLATFORM.include?("mswin") && !RUBY_PLATFORM.include?("mingw")
62
+ else
63
+ false
64
+ end
65
+ end
66
+
48
67
  def configure_system_libraries
49
68
  puts "Building with SYSTEM libraries"
50
69
 
@@ -90,7 +109,10 @@ def configure_vendored_libraries(vendor_dir)
90
109
  puts " #{all_vendor_srcs.length} vendored C files"
91
110
 
92
111
  add_include_dirs(zstd_dir, lz4_dir, brotli_dir)
93
- $CPPFLAGS += " -DZSTD_DISABLE_ASM"
112
+ if DISABLE_ZSTD_ASM
113
+ $CPPFLAGS += " -DZSTD_DISABLE_ASM"
114
+ puts " ZSTD ASM Huffman decoder disabled (--disable-zstd-asm or MULTI_COMPRESS_DISABLE_ZSTD_ASM=1)"
115
+ end
94
116
 
95
117
  vpath_dirs = build_vpath_dirs(zstd_dir, lz4_dir, brotli_dir)
96
118
 
@@ -98,9 +120,14 @@ def configure_vendored_libraries(vendor_dir)
98
120
 
99
121
  compress_c_dir = find_compress_c_dir
100
122
 
101
- $srcs = ["multi_compress.c"] + all_vendor_srcs.map { |s| File.basename(s) }
123
+ c_srcs = all_vendor_srcs.reject { |s| s.end_with?(".S") }
124
+ asm_srcs = all_vendor_srcs.select { |s| s.end_with?(".S") }
125
+
126
+ $srcs = ["multi_compress.c"] + c_srcs.map { |s| File.basename(s) }
102
127
  $VPATH = [compress_c_dir] + vpath_dirs
103
128
 
129
+ $multi_compress_asm_srcs = asm_srcs
130
+
104
131
  $warnflags = ""
105
132
 
106
133
  vpath_dirs
@@ -109,6 +136,11 @@ end
109
136
  def collect_vendor_sources(zstd_dir, lz4_dir, brotli_dir)
110
137
  zstd_srcs = ZSTD_SUBDIRS.flat_map { |d| Dir[File.join(zstd_dir, d, "*.c")] }
111
138
 
139
+ unless DISABLE_ZSTD_ASM
140
+ asm = File.join(zstd_dir, "lib", "decompress", "huf_decompress_amd64.S")
141
+ zstd_srcs << asm if File.exist?(asm) && zstd_asm_supported?
142
+ end
143
+
112
144
  lz4_srcs = LZ4_SOURCES.filter_map do |f|
113
145
  path = File.join(lz4_dir, "lib", f)
114
146
  path if File.exist?(path)
@@ -116,7 +148,20 @@ def collect_vendor_sources(zstd_dir, lz4_dir, brotli_dir)
116
148
 
117
149
  brotli_srcs = BROTLI_SUBDIRS.flat_map { |d| Dir[File.join(brotli_dir, d, "*.c")] }
118
150
 
119
- zstd_srcs + lz4_srcs + brotli_srcs
151
+ # Brotli 1.2.0 has both c/enc/static_init.c and c/dec/static_init.c.
152
+ # mkmf builds object files by basename, so compiling both as static_init.c
153
+ # makes one object shadow the other. Use unique wrapper translation units
154
+ # instead; otherwise the decoder static init can be skipped and crash at runtime.
155
+ brotli_static_init_paths = BROTLI_STATIC_INIT_SOURCES.map do |relative_path|
156
+ File.join(brotli_dir, relative_path)
157
+ end
158
+ brotli_srcs -= brotli_static_init_paths
159
+
160
+ brotli_static_init_wrappers = BROTLI_STATIC_INIT_WRAPPERS.map do |file|
161
+ File.join(__dir__, file)
162
+ end
163
+
164
+ zstd_srcs + lz4_srcs + brotli_srcs + brotli_static_init_wrappers
120
165
  end
121
166
 
122
167
  def add_include_dirs(zstd_dir, lz4_dir, brotli_dir)
@@ -157,6 +202,36 @@ def patch_makefile_vpath!(vpath_dirs)
157
202
  puts " Patched Makefile with #{vpath_dirs.length} VPATH entries"
158
203
  end
159
204
 
205
+ def patch_makefile_asm!(asm_srcs)
206
+ return if asm_srcs.nil? || asm_srcs.empty?
207
+
208
+ makefile = File.read("Makefile")
209
+ return if makefile.include?("# vendored asm")
210
+
211
+ asm_dirs = asm_srcs.map { |s| File.dirname(s) }.uniq
212
+ vpath_lines = asm_dirs.map { |d| "vpath %.S #{d}" }.join("\n")
213
+
214
+ asm_objs = asm_srcs.map { |s| File.basename(s, ".S") + ".o" }
215
+ obj_append = asm_objs.join(" ")
216
+
217
+ unless makefile.sub!(/^(OBJS\s*=\s*[^\n]+?)(\s*)$/) { "#{Regexp.last_match(1)} #{obj_append}#{Regexp.last_match(2)}" }
218
+ makefile << "\nOBJS = #{obj_append}\n"
219
+ end
220
+
221
+ pattern_rule = <<~MAKE
222
+ # vendored asm
223
+ #{vpath_lines}
224
+ %.o: %.S
225
+ \t$(ECHO) compiling $(<)
226
+ \t$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
227
+ MAKE
228
+
229
+ makefile << "\n#{pattern_rule}\n"
230
+
231
+ File.write("Makefile", makefile)
232
+ puts " Patched Makefile with #{asm_srcs.length} ASM source(s): #{asm_objs.join(", ")}"
233
+ end
234
+
160
235
  # --- Main ---
161
236
 
162
237
  VENDOR_DIR = find_vendor_dir
@@ -190,3 +265,4 @@ have_library("pthread") unless RUBY_PLATFORM.include?("darwin")
190
265
  create_makefile("multi_compress/multi_compress")
191
266
 
192
267
  patch_makefile_vpath!(vpath_dirs) if VENDORED && !USE_SYSTEM && vpath_dirs
268
+ patch_makefile_asm!($multi_compress_asm_srcs) if VENDORED && !USE_SYSTEM && $multi_compress_asm_srcs