lz4rip 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6cc17a63b94ba3943cc64ef1ded04a85994da14d1d305df9adebe10e78bfd798
4
+ data.tar.gz: 849f7c71658601730ff0f328963148644987798bfe06d7e3878f81eae3b5885b
5
+ SHA512:
6
+ metadata.gz: bf19aaf5f7f57cbaa50bd1c9674ddae4583d18665495344f19325b095a7298b97fb152625bcab41b6e96a37fa71d8e5bbe1c6c295fb27d2443012312b1d9bd05
7
+ data.tar.gz: bc6c28546e3ad483b6c97601610fc3625123567b1e70f48059626b0bf710fd6cb2df1b7096def34cf2c04c9142d509411a0fe3b5bc7e48c12a96edbad9e9229e
data/CHANGELOG.md ADDED
@@ -0,0 +1,12 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ## [0.1.0] - 2026-06-20
6
+
7
+ - Initial release.
8
+ - `Lz4rip::FrameCodec`: frame-format LZ4 codec (Ractor-shareable).
9
+ - `Lz4rip::BlockCodec`: block-format LZ4 codec with reusable scratch table.
10
+ - `Lz4rip::Dictionary`: immutable value type for LZ4 dictionaries.
11
+ - `Lz4rip::DictTrainer`: COVER-based dictionary trainer.
12
+ - `Lz4rip.compress_bound`: maximum output size for a given input size.
data/Cargo.lock ADDED
@@ -0,0 +1,325 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.4"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.72.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "itertools",
24
+ "proc-macro2",
25
+ "quote",
26
+ "regex",
27
+ "rustc-hash",
28
+ "shlex",
29
+ "syn",
30
+ ]
31
+
32
+ [[package]]
33
+ name = "bitflags"
34
+ version = "2.13.0"
35
+ source = "registry+https://github.com/rust-lang/crates.io-index"
36
+ checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
37
+
38
+ [[package]]
39
+ name = "cexpr"
40
+ version = "0.6.0"
41
+ source = "registry+https://github.com/rust-lang/crates.io-index"
42
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
43
+ dependencies = [
44
+ "nom",
45
+ ]
46
+
47
+ [[package]]
48
+ name = "cfg-if"
49
+ version = "1.0.4"
50
+ source = "registry+https://github.com/rust-lang/crates.io-index"
51
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
52
+
53
+ [[package]]
54
+ name = "clang-sys"
55
+ version = "1.8.1"
56
+ source = "registry+https://github.com/rust-lang/crates.io-index"
57
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
58
+ dependencies = [
59
+ "glob",
60
+ "libc",
61
+ "libloading",
62
+ ]
63
+
64
+ [[package]]
65
+ name = "either"
66
+ version = "1.16.0"
67
+ source = "registry+https://github.com/rust-lang/crates.io-index"
68
+ checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
69
+
70
+ [[package]]
71
+ name = "glob"
72
+ version = "0.3.3"
73
+ source = "registry+https://github.com/rust-lang/crates.io-index"
74
+ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
75
+
76
+ [[package]]
77
+ name = "itertools"
78
+ version = "0.13.0"
79
+ source = "registry+https://github.com/rust-lang/crates.io-index"
80
+ checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
81
+ dependencies = [
82
+ "either",
83
+ ]
84
+
85
+ [[package]]
86
+ name = "lazy_static"
87
+ version = "1.5.0"
88
+ source = "registry+https://github.com/rust-lang/crates.io-index"
89
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
90
+
91
+ [[package]]
92
+ name = "libc"
93
+ version = "0.2.186"
94
+ source = "registry+https://github.com/rust-lang/crates.io-index"
95
+ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
96
+
97
+ [[package]]
98
+ name = "libloading"
99
+ version = "0.8.9"
100
+ source = "registry+https://github.com/rust-lang/crates.io-index"
101
+ checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
102
+ dependencies = [
103
+ "cfg-if",
104
+ "windows-link",
105
+ ]
106
+
107
+ [[package]]
108
+ name = "lz4rip"
109
+ version = "0.1.0"
110
+ dependencies = [
111
+ "lz4rip 0.8.5",
112
+ "magnus",
113
+ "rb-sys",
114
+ ]
115
+
116
+ [[package]]
117
+ name = "lz4rip"
118
+ version = "0.8.5"
119
+ source = "registry+https://github.com/rust-lang/crates.io-index"
120
+ checksum = "16200eb628c6a5a6e539901f08e5ff4adb9b9522d7201a2dfa36d2060c355b34"
121
+ dependencies = [
122
+ "lz4rip-core",
123
+ "lz4rip-decode",
124
+ "lz4rip-encode",
125
+ "twox-hash",
126
+ ]
127
+
128
+ [[package]]
129
+ name = "lz4rip-core"
130
+ version = "0.5.2"
131
+ source = "registry+https://github.com/rust-lang/crates.io-index"
132
+ checksum = "5bf2d77a9fc2acbfe6f4b6b0a3ef2d97f0c96bb384c4406225851a900f483788"
133
+
134
+ [[package]]
135
+ name = "lz4rip-decode"
136
+ version = "0.8.2"
137
+ source = "registry+https://github.com/rust-lang/crates.io-index"
138
+ checksum = "7c45bc51677ce22ebf4d9edabdb658fbd9e4556a93316d423a9e7a6a54108166"
139
+ dependencies = [
140
+ "lz4rip-core",
141
+ ]
142
+
143
+ [[package]]
144
+ name = "lz4rip-encode"
145
+ version = "0.8.3"
146
+ source = "registry+https://github.com/rust-lang/crates.io-index"
147
+ checksum = "0d0d63726468388972bc99c72dab3bb3fde78096969cd270d84e77c07b5df5a2"
148
+ dependencies = [
149
+ "lz4rip-core",
150
+ ]
151
+
152
+ [[package]]
153
+ name = "magnus"
154
+ version = "0.8.2"
155
+ source = "registry+https://github.com/rust-lang/crates.io-index"
156
+ checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
157
+ dependencies = [
158
+ "magnus-macros",
159
+ "rb-sys",
160
+ "rb-sys-env",
161
+ "seq-macro",
162
+ ]
163
+
164
+ [[package]]
165
+ name = "magnus-macros"
166
+ version = "0.8.0"
167
+ source = "registry+https://github.com/rust-lang/crates.io-index"
168
+ checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
169
+ dependencies = [
170
+ "proc-macro2",
171
+ "quote",
172
+ "syn",
173
+ ]
174
+
175
+ [[package]]
176
+ name = "memchr"
177
+ version = "2.8.2"
178
+ source = "registry+https://github.com/rust-lang/crates.io-index"
179
+ checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
180
+
181
+ [[package]]
182
+ name = "minimal-lexical"
183
+ version = "0.2.1"
184
+ source = "registry+https://github.com/rust-lang/crates.io-index"
185
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
186
+
187
+ [[package]]
188
+ name = "nom"
189
+ version = "7.1.3"
190
+ source = "registry+https://github.com/rust-lang/crates.io-index"
191
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
192
+ dependencies = [
193
+ "memchr",
194
+ "minimal-lexical",
195
+ ]
196
+
197
+ [[package]]
198
+ name = "proc-macro2"
199
+ version = "1.0.106"
200
+ source = "registry+https://github.com/rust-lang/crates.io-index"
201
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
202
+ dependencies = [
203
+ "unicode-ident",
204
+ ]
205
+
206
+ [[package]]
207
+ name = "quote"
208
+ version = "1.0.45"
209
+ source = "registry+https://github.com/rust-lang/crates.io-index"
210
+ checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
211
+ dependencies = [
212
+ "proc-macro2",
213
+ ]
214
+
215
+ [[package]]
216
+ name = "rb-sys"
217
+ version = "0.9.128"
218
+ source = "registry+https://github.com/rust-lang/crates.io-index"
219
+ checksum = "45ca28513560e56cfb79a62b1fce363c73af170a182024ce880c77ee9429920a"
220
+ dependencies = [
221
+ "rb-sys-build",
222
+ ]
223
+
224
+ [[package]]
225
+ name = "rb-sys-build"
226
+ version = "0.9.128"
227
+ source = "registry+https://github.com/rust-lang/crates.io-index"
228
+ checksum = "ce04b2c55eff3a21aaa623fcc655d94373238e72cac6b3e1a3641ff31649f99a"
229
+ dependencies = [
230
+ "bindgen",
231
+ "lazy_static",
232
+ "proc-macro2",
233
+ "quote",
234
+ "regex",
235
+ "shell-words",
236
+ "syn",
237
+ ]
238
+
239
+ [[package]]
240
+ name = "rb-sys-env"
241
+ version = "0.2.3"
242
+ source = "registry+https://github.com/rust-lang/crates.io-index"
243
+ checksum = "cca7ad6a7e21e72151d56fe2495a259b5670e204c3adac41ee7ef676ea08117a"
244
+
245
+ [[package]]
246
+ name = "regex"
247
+ version = "1.12.4"
248
+ source = "registry+https://github.com/rust-lang/crates.io-index"
249
+ checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
250
+ dependencies = [
251
+ "aho-corasick",
252
+ "memchr",
253
+ "regex-automata",
254
+ "regex-syntax",
255
+ ]
256
+
257
+ [[package]]
258
+ name = "regex-automata"
259
+ version = "0.4.14"
260
+ source = "registry+https://github.com/rust-lang/crates.io-index"
261
+ checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
262
+ dependencies = [
263
+ "aho-corasick",
264
+ "memchr",
265
+ "regex-syntax",
266
+ ]
267
+
268
+ [[package]]
269
+ name = "regex-syntax"
270
+ version = "0.8.11"
271
+ source = "registry+https://github.com/rust-lang/crates.io-index"
272
+ checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
273
+
274
+ [[package]]
275
+ name = "rustc-hash"
276
+ version = "2.1.2"
277
+ source = "registry+https://github.com/rust-lang/crates.io-index"
278
+ checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
279
+
280
+ [[package]]
281
+ name = "seq-macro"
282
+ version = "0.3.6"
283
+ source = "registry+https://github.com/rust-lang/crates.io-index"
284
+ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
285
+
286
+ [[package]]
287
+ name = "shell-words"
288
+ version = "1.1.1"
289
+ source = "registry+https://github.com/rust-lang/crates.io-index"
290
+ checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
291
+
292
+ [[package]]
293
+ name = "shlex"
294
+ version = "1.3.0"
295
+ source = "registry+https://github.com/rust-lang/crates.io-index"
296
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
297
+
298
+ [[package]]
299
+ name = "syn"
300
+ version = "2.0.118"
301
+ source = "registry+https://github.com/rust-lang/crates.io-index"
302
+ checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
303
+ dependencies = [
304
+ "proc-macro2",
305
+ "quote",
306
+ "unicode-ident",
307
+ ]
308
+
309
+ [[package]]
310
+ name = "twox-hash"
311
+ version = "2.1.2"
312
+ source = "registry+https://github.com/rust-lang/crates.io-index"
313
+ checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
314
+
315
+ [[package]]
316
+ name = "unicode-ident"
317
+ version = "1.0.24"
318
+ source = "registry+https://github.com/rust-lang/crates.io-index"
319
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
320
+
321
+ [[package]]
322
+ name = "windows-link"
323
+ version = "0.2.1"
324
+ source = "registry+https://github.com/rust-lang/crates.io-index"
325
+ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
data/Cargo.toml ADDED
@@ -0,0 +1,9 @@
1
+ [workspace]
2
+ members = ["ext/lz4rip"]
3
+ resolver = "2"
4
+
5
+ [profile.release]
6
+ opt-level = 3
7
+ lto = true
8
+ codegen-units = 1
9
+ panic = "abort"
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Patrik Wenger
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,128 @@
1
+ # lz4rip — Ractor-safe LZ4 for Ruby
2
+
3
+ [![CI](https://github.com/paddor/lz4rip-rb/actions/workflows/ci.yml/badge.svg)](https://github.com/paddor/lz4rip-rb/actions/workflows/ci.yml)
4
+ [![Gem Version](https://img.shields.io/gem/v/lz4rip?color=e9573f)](https://rubygems.org/gems/lz4rip)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+ [![Ruby](https://img.shields.io/badge/Ruby-%3E%3D%204.0-CC342D?logo=ruby&logoColor=white)](https://www.ruby-lang.org)
7
+
8
+ Ruby bindings for [lz4rip](https://crates.io/crates/lz4rip), a pure-Rust LZ4
9
+ implementation. Built with [magnus](https://github.com/matsadler/magnus) and
10
+ declared Ractor-safe so you can compress from any Ractor without a global lock.
11
+
12
+ ## Features
13
+
14
+ - **Block codec** with reusable compressor scratch table
15
+ - **Frame codec** for standard `.lz4` frames
16
+ - **Dictionary support** for both block and frame codecs
17
+ - **COVER-based dictionary trainer** (`DictTrainer`)
18
+ - **Ractor-safe**: `FrameCodec` is shareable across Ractors, `BlockCodec` is
19
+ per-Ractor (mutable scratch state)
20
+
21
+ ## Install
22
+
23
+ Requires Ruby >= 4.0 and a Rust toolchain (for building the native extension):
24
+
25
+ ```sh
26
+ gem install lz4rip
27
+ ```
28
+
29
+ Or in your Gemfile:
30
+
31
+ ```ruby
32
+ gem "lz4rip"
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ### Frame codec (standard LZ4 frames)
38
+
39
+ ```ruby
40
+ require "lz4rip"
41
+
42
+ codec = Lz4rip::FrameCodec.new
43
+ compressed = codec.compress("hello world " * 1000)
44
+ original = codec.decompress(compressed)
45
+ ```
46
+
47
+ ### Block codec (raw LZ4 blocks)
48
+
49
+ ```ruby
50
+ codec = Lz4rip::BlockCodec.new
51
+ compressed = codec.compress("hello world " * 1000)
52
+ original = codec.decompress(compressed, decompressed_size: 12_000)
53
+ ```
54
+
55
+ Block decompression requires the original size up front. This is by design: LZ4
56
+ block format does not store it, so the caller must track it.
57
+
58
+ ### Dictionary compression
59
+
60
+ ```ruby
61
+ dict = Lz4rip::Dictionary.new(bytes: "common log prefix: ")
62
+ codec = Lz4rip::FrameCodec.new(dict: dict)
63
+
64
+ compressed = codec.compress("common log prefix: event=login user=alice")
65
+ original = codec.decompress(compressed)
66
+ ```
67
+
68
+ ### Dictionary training
69
+
70
+ ```ruby
71
+ trainer = Lz4rip::DictTrainer.new(2048)
72
+ messages.each { |msg| trainer.add_sample(msg) }
73
+ dict_bytes = trainer.train
74
+
75
+ dict = Lz4rip::Dictionary.new(bytes: dict_bytes)
76
+ codec = Lz4rip::BlockCodec.new(dict: dict)
77
+ ```
78
+
79
+ ### Ractor safety
80
+
81
+ ```ruby
82
+ codec = Lz4rip::FrameCodec.new
83
+
84
+ ractors = 4.times.map do |i|
85
+ Ractor.new(codec) do |c|
86
+ data = "ractor #{Ractor.current} payload " * 100
87
+ ct = c.compress(data)
88
+ raise "mismatch" unless c.decompress(ct) == data
89
+ :ok
90
+ end
91
+ end
92
+
93
+ ractors.each { |r| p r.value } # => :ok, :ok, :ok, :ok
94
+ ```
95
+
96
+ ## API
97
+
98
+ | Class / Module | Method | Description |
99
+ |---|---|---|
100
+ | `Lz4rip::FrameCodec` | `.new(dict: nil)` | Create a frame codec, optionally with a `Dictionary` or raw `String` dict |
101
+ | | `#compress(string)` | Compress to LZ4 frame |
102
+ | | `#decompress(string)` | Decompress an LZ4 frame |
103
+ | | `#has_dict?` | Whether a dictionary is loaded |
104
+ | | `#id` | Dictionary ID (nil without dict) |
105
+ | | `#size` | Dictionary size in bytes (0 without dict) |
106
+ | `Lz4rip::BlockCodec` | `.new(dict: nil)` | Create a block codec, optionally with a dict `String` |
107
+ | | `#compress(string)` | Compress to raw LZ4 block |
108
+ | | `#decompress(string, decompressed_size:)` | Decompress a raw LZ4 block |
109
+ | | `#has_dict?` | Whether a dictionary is loaded |
110
+ | | `#size` | Internal state size in bytes |
111
+ | `Lz4rip::Dictionary` | `.new(bytes:, id: auto)` | Immutable dictionary value object |
112
+ | | `#bytes` | Frozen binary dict bytes |
113
+ | | `#id` | 32-bit dictionary ID |
114
+ | | `#size` | Dictionary size in bytes |
115
+ | `Lz4rip::DictTrainer` | `.new(max_dict_size)` | Create a trainer (capped at 65535) |
116
+ | | `#add_sample(string)` | Feed a training sample |
117
+ | | `#train` | Consume the trainer, return dict bytes |
118
+ | | `#sample_count` | Number of accepted samples |
119
+ | | `#total_bytes` | Total bytes of accepted samples |
120
+ | | `#trained?` | Whether `#train` has been called |
121
+ | | `#max_dict_size` | Configured max dict size |
122
+ | `Lz4rip` | `.compress_bound(size)` | Max compressed output size for a given input size |
123
+ | | `.block_stream_size` | Internal compressor heap size |
124
+ | `Lz4rip::DecompressError` | | Raised on decompression failure (subclass of `StandardError`) |
125
+
126
+ ## License
127
+
128
+ [MIT](LICENSE)
@@ -0,0 +1,16 @@
1
+ [package]
2
+ name = "lz4rip"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+
6
+ [lib]
7
+ name = "lz4rip"
8
+ crate-type = ["cdylib", "rlib"]
9
+
10
+ [dependencies]
11
+ lz4 = { version = "0.8", package = "lz4rip" }
12
+ magnus = "0.8"
13
+ rb-sys = "0.9"
14
+
15
+ [build-dependencies]
16
+ rb-sys = "0.9"
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ require "rb_sys/mkmf"
5
+
6
+ create_rust_makefile("lz4rip/lz4rip") do |r|
7
+ r.profile = ENV.fetch("RB_SYS_CARGO_PROFILE", :release).to_sym
8
+ end
@@ -0,0 +1,392 @@
1
+ use magnus::{
2
+ exception::ExceptionClass, function, method, prelude::*, r_string::RString, value::Opaque,
3
+ Error, Ruby,
4
+ };
5
+ use std::cell::RefCell;
6
+ use std::io::{Cursor, Read, Write};
7
+ use std::sync::OnceLock;
8
+
9
+ use lz4::block::{self, Compressor, Decompressor, DictTrainer};
10
+ use lz4::frame::{BlockMode, FrameDecoder, FrameEncoder, FrameInfo};
11
+
12
+ const COMPRESSOR_HEAP_SIZE: usize = 8192;
13
+
14
+ const LZ4_FRAME_MAGIC: [u8; 4] = [0x04, 0x22, 0x4d, 0x18];
15
+
16
+ static DECOMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
17
+
18
+ fn decompress_error(ruby: &Ruby) -> ExceptionClass {
19
+ ruby.get_inner(
20
+ *DECOMPRESS_ERROR
21
+ .get()
22
+ .expect("DecompressError not initialized"),
23
+ )
24
+ }
25
+
26
+ // ---------- module functions ----------
27
+
28
+ fn lz4rip_compress_bound(_ruby: &Ruby, size: usize) -> usize {
29
+ block::get_maximum_output_size(size)
30
+ }
31
+
32
+ fn lz4rip_block_stream_size(_ruby: &Ruby) -> usize {
33
+ COMPRESSOR_HEAP_SIZE
34
+ }
35
+
36
+ // ---------- BlockCodec ----------
37
+
38
+ #[magnus::wrap(class = "Lz4rip::BlockCodec", free_immediately, size)]
39
+ struct BlockCodec {
40
+ compressor: Option<RefCell<Compressor>>,
41
+ decompressor: Option<Decompressor>,
42
+ dict_len: usize,
43
+ }
44
+
45
+ fn block_codec_new(_ruby: &Ruby, rb_dict: Option<RString>) -> Result<BlockCodec, Error> {
46
+ match rb_dict {
47
+ None => Ok(BlockCodec {
48
+ compressor: None,
49
+ decompressor: None,
50
+ dict_len: 0,
51
+ }),
52
+ Some(rb_dict) => {
53
+ let bytes: Vec<u8> = unsafe { rb_dict.as_slice().to_vec() };
54
+ Ok(BlockCodec {
55
+ compressor: Some(RefCell::new(Compressor::with_dict(&bytes))),
56
+ decompressor: Some(Decompressor::with_dict(&bytes)),
57
+ dict_len: bytes.len(),
58
+ })
59
+ }
60
+ }
61
+ }
62
+
63
+ fn block_codec_size(rb_self: &BlockCodec) -> usize {
64
+ if rb_self.compressor.is_some() {
65
+ COMPRESSOR_HEAP_SIZE + rb_self.dict_len
66
+ } else {
67
+ 0
68
+ }
69
+ }
70
+
71
+ fn block_codec_has_dict(rb_self: &BlockCodec) -> bool {
72
+ rb_self.compressor.is_some()
73
+ }
74
+
75
+ fn block_codec_compress(
76
+ ruby: &Ruby,
77
+ rb_self: &BlockCodec,
78
+ rb_input: RString,
79
+ ) -> Result<RString, Error> {
80
+ let input: &[u8] = unsafe { rb_input.as_slice() };
81
+
82
+ let out = match &rb_self.compressor {
83
+ None => block::compress(input),
84
+ Some(comp) => comp.borrow_mut().compress(input),
85
+ };
86
+
87
+ Ok(ruby.str_from_slice(&out))
88
+ }
89
+
90
+ fn block_codec_decompress(
91
+ ruby: &Ruby,
92
+ rb_self: &BlockCodec,
93
+ rb_input: RString,
94
+ decompressed_size: usize,
95
+ ) -> Result<RString, Error> {
96
+ let compressed: &[u8] = unsafe { rb_input.as_slice() };
97
+
98
+ let result = match &rb_self.decompressor {
99
+ None => block::decompress(compressed, decompressed_size),
100
+ Some(decomp) => decomp.decompress(compressed, decompressed_size),
101
+ };
102
+
103
+ match result {
104
+ Ok(data) => Ok(ruby.str_from_slice(&data)),
105
+ Err(e) => Err(Error::new(
106
+ decompress_error(ruby),
107
+ format!("lz4 block decode failed: {e}"),
108
+ )),
109
+ }
110
+ }
111
+
112
+ // ---------- FrameCodec ----------
113
+
114
+ #[magnus::wrap(class = "Lz4rip::FrameCodec", free_immediately, size)]
115
+ struct FrameCodec {
116
+ dict: Option<DictBound>,
117
+ }
118
+
119
+ struct DictBound {
120
+ bytes: Vec<u8>,
121
+ id: u32,
122
+ }
123
+
124
+ fn frame_codec_initialize(
125
+ _ruby: &Ruby,
126
+ rb_dict: Option<RString>,
127
+ id: u32,
128
+ ) -> Result<FrameCodec, Error> {
129
+ let dict = rb_dict.map(|s| {
130
+ let bytes: Vec<u8> = unsafe { s.as_slice().to_vec() };
131
+ s.freeze();
132
+ DictBound { bytes, id }
133
+ });
134
+ Ok(FrameCodec { dict })
135
+ }
136
+
137
+ fn frame_codec_compress(
138
+ ruby: &Ruby,
139
+ rb_self: &FrameCodec,
140
+ rb_input: RString,
141
+ ) -> Result<RString, Error> {
142
+ let input: &[u8] = unsafe { rb_input.as_slice() };
143
+
144
+ let buf = Vec::new();
145
+ let mut enc = match &rb_self.dict {
146
+ None => {
147
+ let info = FrameInfo::new().block_mode(BlockMode::Linked);
148
+ FrameEncoder::with_frame_info(info, buf)
149
+ }
150
+ Some(d) => FrameEncoder::with_dictionary(buf, &d.bytes, d.id),
151
+ };
152
+
153
+ enc.write_all(input).map_err(|e| {
154
+ Error::new(
155
+ ruby.exception_runtime_error(),
156
+ format!("lz4 frame compress failed: {e}"),
157
+ )
158
+ })?;
159
+
160
+ let out = enc.finish().map_err(|e| {
161
+ Error::new(
162
+ ruby.exception_runtime_error(),
163
+ format!("lz4 frame compress failed: {e}"),
164
+ )
165
+ })?;
166
+
167
+ Ok(ruby.str_from_slice(&out))
168
+ }
169
+
170
+ fn frame_codec_decompress(
171
+ ruby: &Ruby,
172
+ rb_self: &FrameCodec,
173
+ rb_input: RString,
174
+ ) -> Result<RString, Error> {
175
+ let input: &[u8] = unsafe { rb_input.as_slice() };
176
+
177
+ if input.len() < 4 || input[..4] != LZ4_FRAME_MAGIC {
178
+ return Err(Error::new(
179
+ decompress_error(ruby),
180
+ "lz4 frame decode failed: bad magic (input is not an LZ4 frame)",
181
+ ));
182
+ }
183
+
184
+ let mut dec = match &rb_self.dict {
185
+ None => FrameDecoder::new(Cursor::new(input)),
186
+ Some(d) => FrameDecoder::with_dictionary(Cursor::new(input), &d.bytes, d.id),
187
+ };
188
+
189
+ let mut out = Vec::new();
190
+ dec.read_to_end(&mut out).map_err(|e| {
191
+ Error::new(
192
+ decompress_error(ruby),
193
+ format!("lz4 frame decode failed: {e}"),
194
+ )
195
+ })?;
196
+
197
+ Ok(ruby.str_from_slice(&out))
198
+ }
199
+
200
+ fn frame_codec_size(rb_self: &FrameCodec) -> usize {
201
+ rb_self.dict.as_ref().map_or(0, |d| d.bytes.len())
202
+ }
203
+
204
+ fn frame_codec_has_dict(rb_self: &FrameCodec) -> bool {
205
+ rb_self.dict.is_some()
206
+ }
207
+
208
+ fn frame_codec_id(rb_self: &FrameCodec) -> Option<u32> {
209
+ rb_self.dict.as_ref().map(|d| d.id)
210
+ }
211
+
212
+ // ---------- DictTrainer ----------
213
+
214
+ const LZ4_MAX_DISTANCE: usize = 65535;
215
+
216
+ #[magnus::wrap(class = "Lz4rip::DictTrainer", free_immediately, size)]
217
+ struct RbDictTrainer {
218
+ inner: RefCell<Option<DictTrainer>>,
219
+ max_dict_size: usize,
220
+ }
221
+
222
+ fn dict_trainer_new(_ruby: &Ruby, max_dict_size: usize) -> RbDictTrainer {
223
+ let capped = max_dict_size.min(LZ4_MAX_DISTANCE);
224
+ RbDictTrainer {
225
+ max_dict_size: capped,
226
+ inner: RefCell::new(Some(DictTrainer::new(max_dict_size))),
227
+ }
228
+ }
229
+
230
+ fn dict_trainer_add_sample(
231
+ ruby: &Ruby,
232
+ rb_self: &RbDictTrainer,
233
+ rb_data: RString,
234
+ ) -> Result<(), Error> {
235
+ let mut borrow = rb_self.inner.borrow_mut();
236
+ let trainer = borrow.as_mut().ok_or_else(|| {
237
+ Error::new(
238
+ ruby.exception_runtime_error(),
239
+ "DictTrainer already consumed by #train",
240
+ )
241
+ })?;
242
+ let data: &[u8] = unsafe { rb_data.as_slice() };
243
+ let sample = if data.len() > rb_self.max_dict_size {
244
+ &data[..rb_self.max_dict_size]
245
+ } else {
246
+ data
247
+ };
248
+ trainer.add_sample(sample);
249
+ Ok(())
250
+ }
251
+
252
+ fn dict_trainer_sample_count(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<usize, Error> {
253
+ let borrow = rb_self.inner.borrow();
254
+ borrow.as_ref().map(|t| t.sample_count()).ok_or_else(|| {
255
+ Error::new(
256
+ ruby.exception_runtime_error(),
257
+ "DictTrainer already consumed by #train",
258
+ )
259
+ })
260
+ }
261
+
262
+ fn dict_trainer_total_bytes(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<usize, Error> {
263
+ let borrow = rb_self.inner.borrow();
264
+ borrow.as_ref().map(|t| t.total_bytes()).ok_or_else(|| {
265
+ Error::new(
266
+ ruby.exception_runtime_error(),
267
+ "DictTrainer already consumed by #train",
268
+ )
269
+ })
270
+ }
271
+
272
+ fn dict_trainer_train(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<RString, Error> {
273
+ let trainer = rb_self.inner.borrow_mut().take().ok_or_else(|| {
274
+ Error::new(
275
+ ruby.exception_runtime_error(),
276
+ "DictTrainer already consumed by #train",
277
+ )
278
+ })?;
279
+ let dict = trainer.train();
280
+ Ok(ruby.str_from_slice(&dict))
281
+ }
282
+
283
+ fn dict_trainer_max_dict_size(rb_self: &RbDictTrainer) -> usize {
284
+ rb_self.max_dict_size
285
+ }
286
+
287
+ fn dict_trainer_trained(rb_self: &RbDictTrainer) -> bool {
288
+ rb_self.inner.borrow().is_none()
289
+ }
290
+
291
+ // ---------- module init ----------
292
+
293
+ #[magnus::init]
294
+ fn init(ruby: &Ruby) -> Result<(), Error> {
295
+ unsafe { rb_sys::rb_ext_ractor_safe(true) };
296
+
297
+ let module = ruby.define_module("Lz4rip")?;
298
+
299
+ let decompress_error_class =
300
+ module.define_error("DecompressError", ruby.exception_standard_error())?;
301
+ DECOMPRESS_ERROR
302
+ .set(Opaque::from(decompress_error_class))
303
+ .unwrap_or_else(|_| panic!("init called more than once"));
304
+
305
+ module.define_module_function("compress_bound", function!(lz4rip_compress_bound, 1))?;
306
+ module.define_module_function("block_stream_size", function!(lz4rip_block_stream_size, 0))?;
307
+
308
+ let codec_class = module.define_class("BlockCodec", ruby.class_object())?;
309
+ codec_class.define_singleton_method("_native_new", function!(block_codec_new, 1))?;
310
+ codec_class.define_method("size", method!(block_codec_size, 0))?;
311
+ codec_class.define_method("has_dict?", method!(block_codec_has_dict, 0))?;
312
+ codec_class.define_method("compress", method!(block_codec_compress, 1))?;
313
+ codec_class.define_method("_decompress", method!(block_codec_decompress, 2))?;
314
+
315
+ let trainer_class = module.define_class("DictTrainer", ruby.class_object())?;
316
+ trainer_class.define_singleton_method("_native_new", function!(dict_trainer_new, 1))?;
317
+ trainer_class.define_method("add_sample", method!(dict_trainer_add_sample, 1))?;
318
+ trainer_class.define_method("sample_count", method!(dict_trainer_sample_count, 0))?;
319
+ trainer_class.define_method("total_bytes", method!(dict_trainer_total_bytes, 0))?;
320
+ trainer_class.define_method("train", method!(dict_trainer_train, 0))?;
321
+ trainer_class.define_method("max_dict_size", method!(dict_trainer_max_dict_size, 0))?;
322
+ trainer_class.define_method("trained?", method!(dict_trainer_trained, 0))?;
323
+
324
+ let frame_codec_class = module.define_class("FrameCodec", ruby.class_object())?;
325
+ frame_codec_class
326
+ .define_singleton_method("_native_new", function!(frame_codec_initialize, 2))?;
327
+ frame_codec_class.define_method("compress", method!(frame_codec_compress, 1))?;
328
+ frame_codec_class.define_method("decompress", method!(frame_codec_decompress, 1))?;
329
+ frame_codec_class.define_method("size", method!(frame_codec_size, 0))?;
330
+ frame_codec_class.define_method("has_dict?", method!(frame_codec_has_dict, 0))?;
331
+ frame_codec_class.define_method("id", method!(frame_codec_id, 0))?;
332
+
333
+ Ok(())
334
+ }
335
+
336
+ #[cfg(test)]
337
+ mod tests {
338
+ use super::*;
339
+
340
+ #[test]
341
+ fn block_round_trip() {
342
+ let data = b"hello hello hello hello".to_vec();
343
+ let ct = block::compress(&data);
344
+ let pt = block::decompress(&ct, data.len()).unwrap();
345
+ assert_eq!(pt, data);
346
+ }
347
+
348
+ #[test]
349
+ fn block_dict_round_trip() {
350
+ let dict = b"common log prefix: ".to_vec();
351
+ let msg = b"common log prefix: event=login user=alice".to_vec();
352
+
353
+ let mut comp = Compressor::with_dict(&dict);
354
+ let ct_dict = comp.compress(&msg);
355
+ let decomp = Decompressor::with_dict(&dict);
356
+ let pt = decomp.decompress(&ct_dict, msg.len()).unwrap();
357
+ assert_eq!(pt, msg);
358
+
359
+ let ct_plain = block::compress(&msg);
360
+ assert!(
361
+ ct_dict.len() < ct_plain.len(),
362
+ "dict compression should beat no-dict on shared-prefix input"
363
+ );
364
+ }
365
+
366
+ #[test]
367
+ fn frame_round_trip() {
368
+ let data = b"the quick brown fox jumps over the lazy dog ".repeat(100);
369
+ let mut enc = FrameEncoder::new(Vec::new());
370
+ enc.write_all(&data).unwrap();
371
+ let ct = enc.finish().unwrap();
372
+ assert!(ct.len() < data.len());
373
+ assert_eq!(&ct[..4], &LZ4_FRAME_MAGIC);
374
+
375
+ let mut dec = FrameDecoder::new(Cursor::new(&ct));
376
+ let mut pt = Vec::new();
377
+ dec.read_to_end(&mut pt).unwrap();
378
+ assert_eq!(pt, data);
379
+ }
380
+
381
+ #[test]
382
+ fn frame_empty_round_trip() {
383
+ let mut enc = FrameEncoder::new(Vec::new());
384
+ enc.write_all(b"").unwrap();
385
+ let ct = enc.finish().unwrap();
386
+
387
+ let mut dec = FrameDecoder::new(Cursor::new(&ct));
388
+ let mut pt = Vec::new();
389
+ dec.read_to_end(&mut pt).unwrap();
390
+ assert!(pt.is_empty());
391
+ }
392
+ }
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "dictionary"
4
+
5
+ module Lz4rip
6
+ class BlockCodec
7
+ def self.new(dict: nil)
8
+ _native_new(Dictionary === dict ? dict.bytes : dict)
9
+ end
10
+
11
+
12
+ def decompress(bytes, decompressed_size:)
13
+ _decompress(bytes, decompressed_size)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lz4rip
4
+ class DictTrainer
5
+ def self.new(max_dict_size)
6
+ _native_new(max_dict_size)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module Lz4rip
6
+ Dictionary = Data.define(:bytes, :id) do
7
+ def initialize(bytes:, id: Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V"))
8
+ super(bytes: bytes.b.freeze, id: id)
9
+ end
10
+
11
+
12
+ def size
13
+ bytes.bytesize
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "dictionary"
4
+
5
+ module Lz4rip
6
+ class FrameCodec
7
+ def self.new(dict: nil)
8
+ case dict
9
+ when nil
10
+ _native_new(nil, 0)
11
+ when Dictionary
12
+ _native_new(dict.bytes, dict.id)
13
+ when String
14
+ _native_new(dict, Dictionary.new(bytes: dict).id)
15
+ else
16
+ raise TypeError, "expected Lz4rip::Dictionary, String, or nil; got #{dict.class}"
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lz4rip
4
+ VERSION = "0.1.0"
5
+ end
data/lib/lz4rip.rb ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lz4rip/lz4rip" # Rust extension
4
+ require_relative "lz4rip/version"
5
+ require_relative "lz4rip/dictionary"
6
+ require_relative "lz4rip/block_codec"
7
+ require_relative "lz4rip/frame_codec"
8
+ require_relative "lz4rip/dict_trainer"
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lz4rip
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Patrik Wenger
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rb_sys
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.9'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.9'
26
+ description: Ruby bindings (via Rust/magnus) for lz4rip, a pure-Rust LZ4 implementation.
27
+ Block-format and frame-format compress/decompress with optional dictionary support
28
+ and COVER-based dictionary training. Ractor-safe.
29
+ email:
30
+ - paddor@gmail.com
31
+ executables: []
32
+ extensions:
33
+ - ext/lz4rip/extconf.rb
34
+ extra_rdoc_files: []
35
+ files:
36
+ - CHANGELOG.md
37
+ - Cargo.lock
38
+ - Cargo.toml
39
+ - LICENSE
40
+ - README.md
41
+ - ext/lz4rip/Cargo.toml
42
+ - ext/lz4rip/extconf.rb
43
+ - ext/lz4rip/src/lib.rs
44
+ - lib/lz4rip.rb
45
+ - lib/lz4rip/block_codec.rb
46
+ - lib/lz4rip/dict_trainer.rb
47
+ - lib/lz4rip/dictionary.rb
48
+ - lib/lz4rip/frame_codec.rb
49
+ - lib/lz4rip/version.rb
50
+ homepage: https://github.com/paddor/lz4rip-rb
51
+ licenses:
52
+ - MIT
53
+ metadata:
54
+ homepage_uri: https://github.com/paddor/lz4rip-rb
55
+ source_code_uri: https://github.com/paddor/lz4rip-rb
56
+ changelog_uri: https://github.com/paddor/lz4rip-rb/blob/main/CHANGELOG.md
57
+ rubygems_mfa_required: 'true'
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: 4.0.0
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubygems_version: 4.0.10
73
+ specification_version: 4
74
+ summary: Ractor-safe LZ4 compression for Ruby
75
+ test_files: []