zrip 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cec06279fae44f8a9e27237f7eda38cddd28f192a05612cb48d8ce97195ccd96
4
+ data.tar.gz: 5829099982f42ef6adca1ada816eb49d1ad61c5b87fc81a5567a28d96d37e381
5
+ SHA512:
6
+ metadata.gz: 59a7636a293430724a1bbca39f4bab97ed5e160162383b8ff48e44dc754518e5b58172041adaf3841026881523bab6937ce20c72a876852d9f1df7b236e243c4
7
+ data.tar.gz: 4f52bc2bbb0c9e65b46be617963639ebb6deb3fca57ae630387aa59116b5403000e08fb798bf95fb48b00f9ffcbec93e85da737897ff514cd2304126f4aa0751
data/CHANGELOG.md ADDED
@@ -0,0 +1,12 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ## [0.1.0] - 2026-06-20
6
+
7
+ - Initial release.
8
+ - `Zrip::FrameCodec`: frame-format Zstandard codec (Ractor-shareable).
9
+ - `Zrip::BlockCodec`: frame-format Zstandard codec, per-Ractor (no lock overhead).
10
+ - `Zrip::Dictionary`: immutable value type for Zstandard dictionaries.
11
+ - `Zrip::DictTrainer`: FastCOVER-based dictionary trainer.
12
+ - `Zrip::FrameCodec.get_frame_content_size`: reads FCS from frame header.
data/Cargo.lock ADDED
@@ -0,0 +1,318 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.4"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.72.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "itertools",
24
+ "proc-macro2",
25
+ "quote",
26
+ "regex",
27
+ "rustc-hash",
28
+ "shlex",
29
+ "syn",
30
+ ]
31
+
32
+ [[package]]
33
+ name = "bitflags"
34
+ version = "2.13.0"
35
+ source = "registry+https://github.com/rust-lang/crates.io-index"
36
+ checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
37
+
38
+ [[package]]
39
+ name = "cexpr"
40
+ version = "0.6.0"
41
+ source = "registry+https://github.com/rust-lang/crates.io-index"
42
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
43
+ dependencies = [
44
+ "nom",
45
+ ]
46
+
47
+ [[package]]
48
+ name = "cfg-if"
49
+ version = "1.0.4"
50
+ source = "registry+https://github.com/rust-lang/crates.io-index"
51
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
52
+
53
+ [[package]]
54
+ name = "clang-sys"
55
+ version = "1.8.1"
56
+ source = "registry+https://github.com/rust-lang/crates.io-index"
57
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
58
+ dependencies = [
59
+ "glob",
60
+ "libc",
61
+ "libloading",
62
+ ]
63
+
64
+ [[package]]
65
+ name = "either"
66
+ version = "1.16.0"
67
+ source = "registry+https://github.com/rust-lang/crates.io-index"
68
+ checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
69
+
70
+ [[package]]
71
+ name = "glob"
72
+ version = "0.3.3"
73
+ source = "registry+https://github.com/rust-lang/crates.io-index"
74
+ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
75
+
76
+ [[package]]
77
+ name = "itertools"
78
+ version = "0.13.0"
79
+ source = "registry+https://github.com/rust-lang/crates.io-index"
80
+ checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
81
+ dependencies = [
82
+ "either",
83
+ ]
84
+
85
+ [[package]]
86
+ name = "lazy_static"
87
+ version = "1.5.0"
88
+ source = "registry+https://github.com/rust-lang/crates.io-index"
89
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
90
+
91
+ [[package]]
92
+ name = "libc"
93
+ version = "0.2.186"
94
+ source = "registry+https://github.com/rust-lang/crates.io-index"
95
+ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
96
+
97
+ [[package]]
98
+ name = "libloading"
99
+ version = "0.8.9"
100
+ source = "registry+https://github.com/rust-lang/crates.io-index"
101
+ checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
102
+ dependencies = [
103
+ "cfg-if",
104
+ "windows-link",
105
+ ]
106
+
107
+ [[package]]
108
+ name = "magnus"
109
+ version = "0.8.2"
110
+ source = "registry+https://github.com/rust-lang/crates.io-index"
111
+ checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
112
+ dependencies = [
113
+ "magnus-macros",
114
+ "rb-sys",
115
+ "rb-sys-env",
116
+ "seq-macro",
117
+ ]
118
+
119
+ [[package]]
120
+ name = "magnus-macros"
121
+ version = "0.8.0"
122
+ source = "registry+https://github.com/rust-lang/crates.io-index"
123
+ checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
124
+ dependencies = [
125
+ "proc-macro2",
126
+ "quote",
127
+ "syn",
128
+ ]
129
+
130
+ [[package]]
131
+ name = "memchr"
132
+ version = "2.8.2"
133
+ source = "registry+https://github.com/rust-lang/crates.io-index"
134
+ checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
135
+
136
+ [[package]]
137
+ name = "minimal-lexical"
138
+ version = "0.2.1"
139
+ source = "registry+https://github.com/rust-lang/crates.io-index"
140
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
141
+
142
+ [[package]]
143
+ name = "nom"
144
+ version = "7.1.3"
145
+ source = "registry+https://github.com/rust-lang/crates.io-index"
146
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
147
+ dependencies = [
148
+ "memchr",
149
+ "minimal-lexical",
150
+ ]
151
+
152
+ [[package]]
153
+ name = "proc-macro2"
154
+ version = "1.0.106"
155
+ source = "registry+https://github.com/rust-lang/crates.io-index"
156
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
157
+ dependencies = [
158
+ "unicode-ident",
159
+ ]
160
+
161
+ [[package]]
162
+ name = "quote"
163
+ version = "1.0.45"
164
+ source = "registry+https://github.com/rust-lang/crates.io-index"
165
+ checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
166
+ dependencies = [
167
+ "proc-macro2",
168
+ ]
169
+
170
+ [[package]]
171
+ name = "rb-sys"
172
+ version = "0.9.128"
173
+ source = "registry+https://github.com/rust-lang/crates.io-index"
174
+ checksum = "45ca28513560e56cfb79a62b1fce363c73af170a182024ce880c77ee9429920a"
175
+ dependencies = [
176
+ "rb-sys-build",
177
+ ]
178
+
179
+ [[package]]
180
+ name = "rb-sys-build"
181
+ version = "0.9.128"
182
+ source = "registry+https://github.com/rust-lang/crates.io-index"
183
+ checksum = "ce04b2c55eff3a21aaa623fcc655d94373238e72cac6b3e1a3641ff31649f99a"
184
+ dependencies = [
185
+ "bindgen",
186
+ "lazy_static",
187
+ "proc-macro2",
188
+ "quote",
189
+ "regex",
190
+ "shell-words",
191
+ "syn",
192
+ ]
193
+
194
+ [[package]]
195
+ name = "rb-sys-env"
196
+ version = "0.2.3"
197
+ source = "registry+https://github.com/rust-lang/crates.io-index"
198
+ checksum = "cca7ad6a7e21e72151d56fe2495a259b5670e204c3adac41ee7ef676ea08117a"
199
+
200
+ [[package]]
201
+ name = "regex"
202
+ version = "1.12.4"
203
+ source = "registry+https://github.com/rust-lang/crates.io-index"
204
+ checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
205
+ dependencies = [
206
+ "aho-corasick",
207
+ "memchr",
208
+ "regex-automata",
209
+ "regex-syntax",
210
+ ]
211
+
212
+ [[package]]
213
+ name = "regex-automata"
214
+ version = "0.4.14"
215
+ source = "registry+https://github.com/rust-lang/crates.io-index"
216
+ checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
217
+ dependencies = [
218
+ "aho-corasick",
219
+ "memchr",
220
+ "regex-syntax",
221
+ ]
222
+
223
+ [[package]]
224
+ name = "regex-syntax"
225
+ version = "0.8.11"
226
+ source = "registry+https://github.com/rust-lang/crates.io-index"
227
+ checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
228
+
229
+ [[package]]
230
+ name = "rustc-hash"
231
+ version = "2.1.2"
232
+ source = "registry+https://github.com/rust-lang/crates.io-index"
233
+ checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
234
+
235
+ [[package]]
236
+ name = "seq-macro"
237
+ version = "0.3.6"
238
+ source = "registry+https://github.com/rust-lang/crates.io-index"
239
+ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
240
+
241
+ [[package]]
242
+ name = "shell-words"
243
+ version = "1.1.1"
244
+ source = "registry+https://github.com/rust-lang/crates.io-index"
245
+ checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
246
+
247
+ [[package]]
248
+ name = "shlex"
249
+ version = "1.3.0"
250
+ source = "registry+https://github.com/rust-lang/crates.io-index"
251
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
252
+
253
+ [[package]]
254
+ name = "syn"
255
+ version = "2.0.118"
256
+ source = "registry+https://github.com/rust-lang/crates.io-index"
257
+ checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
258
+ dependencies = [
259
+ "proc-macro2",
260
+ "quote",
261
+ "unicode-ident",
262
+ ]
263
+
264
+ [[package]]
265
+ name = "unicode-ident"
266
+ version = "1.0.24"
267
+ source = "registry+https://github.com/rust-lang/crates.io-index"
268
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
269
+
270
+ [[package]]
271
+ name = "windows-link"
272
+ version = "0.2.1"
273
+ source = "registry+https://github.com/rust-lang/crates.io-index"
274
+ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
275
+
276
+ [[package]]
277
+ name = "zrip"
278
+ version = "0.1.0"
279
+ dependencies = [
280
+ "magnus",
281
+ "rb-sys",
282
+ "zrip 0.3.4",
283
+ ]
284
+
285
+ [[package]]
286
+ name = "zrip"
287
+ version = "0.3.4"
288
+ source = "registry+https://github.com/rust-lang/crates.io-index"
289
+ checksum = "86ea9c671343ec4c85c7e64e1e81ba37653c5ba190fa4861fc094fa21fe8a7ce"
290
+ dependencies = [
291
+ "zrip-core",
292
+ "zrip-decode",
293
+ "zrip-encode",
294
+ ]
295
+
296
+ [[package]]
297
+ name = "zrip-core"
298
+ version = "0.3.3"
299
+ source = "registry+https://github.com/rust-lang/crates.io-index"
300
+ checksum = "d085b7cbd7698827ed6ce19331819146e70e6612ed2e2dc26b0b14a1c2dab2d0"
301
+
302
+ [[package]]
303
+ name = "zrip-decode"
304
+ version = "0.3.3"
305
+ source = "registry+https://github.com/rust-lang/crates.io-index"
306
+ checksum = "dc4ad88afeb185aaf5209a86efafae5ce320d3e9a6e80d76b45a27773dab5ece"
307
+ dependencies = [
308
+ "zrip-core",
309
+ ]
310
+
311
+ [[package]]
312
+ name = "zrip-encode"
313
+ version = "0.3.3"
314
+ source = "registry+https://github.com/rust-lang/crates.io-index"
315
+ checksum = "9bbf6a9da066f17b5e76d189c7e028dc61dcd806ab394b3999b6ee3c2da7bf79"
316
+ dependencies = [
317
+ "zrip-core",
318
+ ]
data/Cargo.toml ADDED
@@ -0,0 +1,9 @@
1
+ [workspace]
2
+ members = ["ext/zrip"]
3
+ resolver = "2"
4
+
5
+ [profile.release]
6
+ opt-level = 3
7
+ lto = true
8
+ codegen-units = 1
9
+ panic = "abort"
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Patrik Wenger
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,150 @@
1
+ # zrip — Ractor-safe Zstandard for Ruby
2
+
3
+ [![CI](https://github.com/paddor/zrip-rb/actions/workflows/ci.yml/badge.svg)](https://github.com/paddor/zrip-rb/actions/workflows/ci.yml)
4
+ [![Gem Version](https://img.shields.io/gem/v/zrip?color=e9573f)](https://rubygems.org/gems/zrip)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+ [![Ruby](https://img.shields.io/badge/Ruby-%3E%3D%204.0-CC342D?logo=ruby&logoColor=white)](https://www.ruby-lang.org)
7
+
8
+ Ruby bindings for [zrip](https://crates.io/crates/zrip), a pure-Rust Zstandard
9
+ implementation. Built with [magnus](https://github.com/matsadler/magnus) and
10
+ declared Ractor-safe so you can compress from any Ractor without a global lock.
11
+
12
+ ## Features
13
+
14
+ - **Frame codec** for standard Zstd frames (Ractor-shareable)
15
+ - **Block codec** with per-Ractor context (no lock overhead)
16
+ - **Dictionary support** for both frame and block codecs
17
+ - **FastCOVER-based dictionary trainer** (`DictTrainer`)
18
+ - **Configurable compression levels** (default: 1)
19
+ - **Bounded decompression** with `max_output_size:` and frame content size checks
20
+ - **Ractor-safe**: `FrameCodec` is shareable across Ractors, `BlockCodec` is
21
+ per-Ractor (mutable context state)
22
+
23
+ ## Install
24
+
25
+ Requires Ruby >= 4.0 and a Rust toolchain (for building the native extension):
26
+
27
+ ```sh
28
+ gem install zrip
29
+ ```
30
+
31
+ Or in your Gemfile:
32
+
33
+ ```ruby
34
+ gem "zrip"
35
+ ```
36
+
37
+ ## Usage
38
+
39
+ ### Frame codec (standard Zstd frames)
40
+
41
+ ```ruby
42
+ require "zrip"
43
+
44
+ codec = Zrip::FrameCodec.new
45
+ compressed = codec.compress("hello world " * 1000)
46
+ original = codec.decompress(compressed)
47
+ ```
48
+
49
+ ### Block codec
50
+
51
+ ```ruby
52
+ codec = Zrip::BlockCodec.new
53
+ compressed = codec.compress("hello world " * 1000)
54
+ original = codec.decompress(compressed)
55
+ ```
56
+
57
+ ### Compression levels
58
+
59
+ ```ruby
60
+ fast = Zrip::FrameCodec.new(level: -3) # negative = faster
61
+ strong = Zrip::FrameCodec.new(level: 19) # higher = smaller output
62
+ ```
63
+
64
+ ### Bounded decompression
65
+
66
+ ```ruby
67
+ codec = Zrip::FrameCodec.new
68
+
69
+ # Limit output size to 1 MiB
70
+ codec.decompress(compressed, max_output_size: 1_048_576)
71
+
72
+ # Read frame content size from header (without decompressing)
73
+ Zrip::FrameCodec.get_frame_content_size(compressed) #=> 12000
74
+ ```
75
+
76
+ ### Dictionary compression
77
+
78
+ ```ruby
79
+ dict = Zrip::Dictionary.new(bytes: trained_dict_bytes)
80
+ codec = Zrip::FrameCodec.new(dict: dict)
81
+
82
+ compressed = codec.compress("common log prefix: event=login user=alice")
83
+ original = codec.decompress(compressed)
84
+ ```
85
+
86
+ ### Dictionary training
87
+
88
+ ```ruby
89
+ trainer = Zrip::DictTrainer.new(8192)
90
+ messages.each { |msg| trainer.add_sample(msg) }
91
+ dict_bytes = trainer.train
92
+
93
+ dict = Zrip::Dictionary.new(bytes: dict_bytes)
94
+ codec = Zrip::FrameCodec.new(dict: dict)
95
+ ```
96
+
97
+ ### Ractor safety
98
+
99
+ ```ruby
100
+ codec = Zrip::FrameCodec.new
101
+
102
+ ractors = 4.times.map do |i|
103
+ Ractor.new(codec) do |c|
104
+ data = "ractor #{Ractor.current} payload " * 100
105
+ ct = c.compress(data)
106
+ raise "mismatch" unless c.decompress(ct) == data
107
+ :ok
108
+ end
109
+ end
110
+
111
+ ractors.each { |r| p r.value } # => :ok, :ok, :ok, :ok
112
+ ```
113
+
114
+ ## API
115
+
116
+ | Class / Module | Method | Description |
117
+ |---|---|---|
118
+ | `Zrip::FrameCodec` | `.new(dict: nil, level: 1)` | Create a frame codec, optionally with a `Dictionary`, raw `String` dict, or compression level |
119
+ | | `.get_frame_content_size(string)` | Read Frame_Content_Size from a Zstd frame header |
120
+ | | `#compress(string)` | Compress to Zstd frame |
121
+ | | `#decompress(string, max_output_size: nil)` | Decompress a Zstd frame, optionally bounded |
122
+ | | `#has_dict?` | Whether a dictionary is loaded |
123
+ | | `#id` | Dictionary ID (nil without dict) |
124
+ | | `#size` | Dictionary size in bytes (0 without dict) |
125
+ | | `#level` | Compression level |
126
+ | `Zrip::BlockCodec` | `.new(dict: nil, level: 1)` | Create a block codec, optionally with a dict |
127
+ | | `#compress(string)` | Compress to Zstd block |
128
+ | | `#decompress(string, max_output_size: nil)` | Decompress a Zstd block, optionally bounded |
129
+ | | `#has_dict?` | Whether a dictionary is loaded |
130
+ | | `#size` | Dictionary size in bytes (0 without dict) |
131
+ | | `#level` | Compression level |
132
+ | `Zrip::Dictionary` | `.new(bytes:, id: nil)` | Immutable dictionary value object (`Data.define`) |
133
+ | | `#bytes` | Frozen binary dict bytes |
134
+ | | `#id` | 32-bit dictionary ID (auto-detected from ZDICT header or SHA-256) |
135
+ | | `#size` | Dictionary size in bytes |
136
+ | `Zrip::DictTrainer` | `.new(max_dict_size)` | Create a trainer |
137
+ | | `#add_sample(string)` | Feed a training sample (skips < 4 bytes) |
138
+ | | `#train` | Consume the trainer, return dict bytes |
139
+ | | `#sample_count` | Number of accepted samples |
140
+ | | `#total_bytes` | Total bytes of accepted samples |
141
+ | | `#trained?` | Whether `#train` has been called |
142
+ | | `#max_dict_size` | Configured max dict size |
143
+ | `Zrip::DecompressError` | | Raised on decompression failure |
144
+ | `Zrip::CompressError` | | Raised on compression failure |
145
+ | `Zrip::MissingContentSizeError` | | Raised when Frame_Content_Size is absent (subclass of `DecompressError`) |
146
+ | `Zrip::OutputSizeLimitError` | | Raised when declared content size exceeds limit (subclass of `DecompressError`) |
147
+
148
+ ## License
149
+
150
+ [MIT](LICENSE)
@@ -0,0 +1,16 @@
1
+ [package]
2
+ name = "zrip"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+
6
+ [lib]
7
+ name = "zrip"
8
+ crate-type = ["cdylib", "rlib"]
9
+
10
+ [dependencies]
11
+ zstd = { version = "0.3", package = "zrip", features = ["frame", "dict_builder"] }
12
+ magnus = "0.8"
13
+ rb-sys = "0.9"
14
+
15
+ [build-dependencies]
16
+ rb-sys = "0.9"
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ require "rb_sys/mkmf"
5
+
6
+ create_rust_makefile("zrip/zrip") do |r|
7
+ r.profile = ENV.fetch("RB_SYS_CARGO_PROFILE", :release).to_sym
8
+ end
@@ -0,0 +1,576 @@
1
+ use magnus::{
2
+ exception::ExceptionClass, function, method, prelude::*, r_string::RString, value::Opaque,
3
+ Error, Ruby,
4
+ };
5
+ use std::cell::RefCell;
6
+ use std::sync::{Mutex, OnceLock};
7
+
8
+ use zstd::dict::fastcover::FastCoverParams;
9
+ use zstd::{CompressContext, DecompressContext, Dictionary};
10
+
11
+ static DECOMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
12
+ static COMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
13
+ static MISSING_CONTENT_SIZE_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
14
+ static OUTPUT_SIZE_LIMIT_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
15
+
16
+ fn decompress_error(ruby: &Ruby) -> ExceptionClass {
17
+ ruby.get_inner(
18
+ *DECOMPRESS_ERROR
19
+ .get()
20
+ .expect("DecompressError not initialized"),
21
+ )
22
+ }
23
+
24
+ fn compress_error(ruby: &Ruby) -> ExceptionClass {
25
+ ruby.get_inner(*COMPRESS_ERROR.get().expect("CompressError not initialized"))
26
+ }
27
+
28
+ fn missing_content_size_error(ruby: &Ruby) -> ExceptionClass {
29
+ ruby.get_inner(
30
+ *MISSING_CONTENT_SIZE_ERROR
31
+ .get()
32
+ .expect("MissingContentSizeError not initialized"),
33
+ )
34
+ }
35
+
36
+ fn output_size_limit_error(ruby: &Ruby) -> ExceptionClass {
37
+ ruby.get_inner(
38
+ *OUTPUT_SIZE_LIMIT_ERROR
39
+ .get()
40
+ .expect("OutputSizeLimitError not initialized"),
41
+ )
42
+ }
43
+
44
+ // ---------- frame header parsing ----------
45
+
46
+ const ZSTD_FRAME_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
47
+
48
+ #[derive(Debug)]
49
+ enum BoundedError {
50
+ BadMagic,
51
+ MissingContentSize,
52
+ OutputSizeLimit { declared: u64, limit: u64 },
53
+ DecoderFailed(String),
54
+ }
55
+
56
+ fn parse_frame_content_size(input: &[u8]) -> Result<Option<u64>, BoundedError> {
57
+ if input.len() < 5 {
58
+ return Err(BoundedError::BadMagic);
59
+ }
60
+ if input[..4] != ZSTD_FRAME_MAGIC {
61
+ return Err(BoundedError::BadMagic);
62
+ }
63
+ let fhd = input[4];
64
+ let fcs_flag = (fhd >> 6) & 3;
65
+ let single_segment = (fhd >> 5) & 1;
66
+ let dict_id_flag = fhd & 3;
67
+
68
+ let window_desc_size = if single_segment == 0 { 1usize } else { 0 };
69
+ let dict_id_size = [0usize, 1, 2, 4][dict_id_flag as usize];
70
+ let fcs_field_size = match fcs_flag {
71
+ 0 => {
72
+ if single_segment == 1 {
73
+ 1usize
74
+ } else {
75
+ return Ok(None);
76
+ }
77
+ }
78
+ 1 => 2,
79
+ 2 => 4,
80
+ 3 => 8,
81
+ _ => unreachable!(),
82
+ };
83
+
84
+ let fcs_offset = 5 + window_desc_size + dict_id_size;
85
+ if input.len() < fcs_offset + fcs_field_size {
86
+ return Err(BoundedError::BadMagic);
87
+ }
88
+
89
+ let fcs_bytes = &input[fcs_offset..fcs_offset + fcs_field_size];
90
+ let value = match fcs_field_size {
91
+ 1 => fcs_bytes[0] as u64,
92
+ 2 => u16::from_le_bytes([fcs_bytes[0], fcs_bytes[1]]) as u64 + 256,
93
+ 4 => u32::from_le_bytes([fcs_bytes[0], fcs_bytes[1], fcs_bytes[2], fcs_bytes[3]]) as u64,
94
+ 8 => u64::from_le_bytes(fcs_bytes.try_into().unwrap()),
95
+ _ => unreachable!(),
96
+ };
97
+
98
+ Ok(Some(value))
99
+ }
100
+
101
+ fn decompress_bounded(
102
+ compressed: &[u8],
103
+ max_output: usize,
104
+ dctx: &mut DecompressContext,
105
+ ) -> Result<Vec<u8>, BoundedError> {
106
+ if compressed.len() < ZSTD_FRAME_MAGIC.len() || compressed[..4] != ZSTD_FRAME_MAGIC {
107
+ return Err(BoundedError::BadMagic);
108
+ }
109
+
110
+ let upper = match parse_frame_content_size(compressed)? {
111
+ Some(n) => {
112
+ if max_output != 0 && n > max_output as u64 {
113
+ return Err(BoundedError::OutputSizeLimit {
114
+ declared: n,
115
+ limit: max_output as u64,
116
+ });
117
+ }
118
+ if n > u64::from(u32::MAX) {
119
+ return Err(BoundedError::OutputSizeLimit {
120
+ declared: n,
121
+ limit: u64::from(u32::MAX),
122
+ });
123
+ }
124
+ n as usize
125
+ }
126
+ None => {
127
+ if max_output != 0 {
128
+ return Err(BoundedError::MissingContentSize);
129
+ }
130
+ 1024 * 1024
131
+ }
132
+ };
133
+
134
+ let result = dctx
135
+ .decompress_with_limit(compressed, upper)
136
+ .map_err(|e| BoundedError::DecoderFailed(format!("{e}")))?;
137
+
138
+ Ok(result.into_owned())
139
+ }
140
+
141
+ fn raise_bounded(ruby: &Ruby, err: BoundedError, prefix: &str) -> Error {
142
+ match err {
143
+ BoundedError::BadMagic => Error::new(
144
+ decompress_error(ruby),
145
+ format!("{prefix}: bad magic (input is not a Zstd frame)"),
146
+ ),
147
+ BoundedError::MissingContentSize => Error::new(
148
+ missing_content_size_error(ruby),
149
+ format!("{prefix}: Frame_Content_Size absent from frame header"),
150
+ ),
151
+ BoundedError::OutputSizeLimit { declared, limit } => Error::new(
152
+ output_size_limit_error(ruby),
153
+ format!("{prefix}: declared content size {declared} exceeds limit {limit}"),
154
+ ),
155
+ BoundedError::DecoderFailed(msg) => {
156
+ Error::new(decompress_error(ruby), format!("{prefix}: {msg}"))
157
+ }
158
+ }
159
+ }
160
+
161
+ // ---------- dict helper ----------
162
+
163
+ fn load_dict(ruby: &Ruby, bytes: &[u8]) -> Result<Dictionary, Error> {
164
+ Dictionary::from_bytes(bytes).map_err(|_| {
165
+ Error::new(
166
+ ruby.exception_runtime_error(),
167
+ "dictionary must be in ZDICT format (use DictTrainer to train one)",
168
+ )
169
+ })
170
+ }
171
+
172
+ // ---------- FrameCodec ----------
173
+
174
+ #[magnus::wrap(class = "Zrip::FrameCodec", free_immediately, size)]
175
+ struct FrameCodec {
176
+ dict_len: usize,
177
+ dict_id: Option<u32>,
178
+ level: i32,
179
+ cctx: Mutex<CompressContext>,
180
+ dctx: Mutex<DecompressContext>,
181
+ }
182
+
183
+ unsafe impl Send for FrameCodec {}
184
+ unsafe impl Sync for FrameCodec {}
185
+
186
+ fn frame_codec_new(
187
+ ruby: &Ruby,
188
+ rb_dict: Option<RString>,
189
+ id: u32,
190
+ level: i32,
191
+ ) -> Result<FrameCodec, Error> {
192
+ let (dict_len, dict_id, cctx, dctx) = match rb_dict {
193
+ None => {
194
+ let cctx = CompressContext::new(level).map_err(|e| {
195
+ Error::new(
196
+ compress_error(ruby),
197
+ format!("CompressContext::new failed: {e}"),
198
+ )
199
+ })?;
200
+ (0, None, cctx, DecompressContext::new())
201
+ }
202
+ Some(s) => {
203
+ let bytes: Vec<u8> = unsafe { s.as_slice().to_vec() };
204
+ s.freeze();
205
+ let dict = load_dict(ruby, &bytes)?;
206
+ let dl = bytes.len();
207
+ let cctx = CompressContext::with_dict(level, dict.clone()).map_err(|e| {
208
+ Error::new(
209
+ compress_error(ruby),
210
+ format!("CompressContext::with_dict failed: {e}"),
211
+ )
212
+ })?;
213
+ let dctx = DecompressContext::with_dict(dict);
214
+ (dl, Some(id), cctx, dctx)
215
+ }
216
+ };
217
+
218
+ Ok(FrameCodec {
219
+ dict_len,
220
+ dict_id,
221
+ level,
222
+ cctx: Mutex::new(cctx),
223
+ dctx: Mutex::new(dctx),
224
+ })
225
+ }
226
+
227
+ fn frame_codec_compress(
228
+ ruby: &Ruby,
229
+ rb_self: &FrameCodec,
230
+ rb_input: RString,
231
+ ) -> Result<RString, Error> {
232
+ let input: &[u8] = unsafe { rb_input.as_slice() };
233
+ let mut cctx = rb_self.cctx.lock().expect("FrameCodec CCtx mutex poisoned");
234
+ let out = cctx
235
+ .compress(input)
236
+ .map_err(|e| Error::new(compress_error(ruby), format!("zstd compress failed: {e}")))?;
237
+ Ok(ruby.str_from_slice(&out))
238
+ }
239
+
240
+ fn frame_codec_decompress(
241
+ ruby: &Ruby,
242
+ rb_self: &FrameCodec,
243
+ rb_input: RString,
244
+ max_output: usize,
245
+ ) -> Result<RString, Error> {
246
+ let compressed: &[u8] = unsafe { rb_input.as_slice() };
247
+ let mut dctx = rb_self.dctx.lock().expect("FrameCodec DCtx mutex poisoned");
248
+ let out = decompress_bounded(compressed, max_output, &mut dctx)
249
+ .map_err(|e| raise_bounded(ruby, e, "zstd frame decode failed"))?;
250
+ Ok(ruby.str_from_slice(&out))
251
+ }
252
+
253
+ fn frame_codec_size(rb_self: &FrameCodec) -> usize {
254
+ rb_self.dict_len
255
+ }
256
+
257
+ fn frame_codec_has_dict(rb_self: &FrameCodec) -> bool {
258
+ rb_self.dict_id.is_some()
259
+ }
260
+
261
+ fn frame_codec_id(rb_self: &FrameCodec) -> Option<u32> {
262
+ rb_self.dict_id
263
+ }
264
+
265
+ fn frame_codec_level(rb_self: &FrameCodec) -> i32 {
266
+ rb_self.level
267
+ }
268
+
269
+ fn frame_codec_get_frame_content_size(
270
+ ruby: &Ruby,
271
+ rb_input: RString,
272
+ ) -> Result<Option<u64>, Error> {
273
+ let bytes: &[u8] = unsafe { rb_input.as_slice() };
274
+ match parse_frame_content_size(bytes) {
275
+ Ok(v) => Ok(v),
276
+ Err(BoundedError::BadMagic) => Err(Error::new(
277
+ decompress_error(ruby),
278
+ "zstd frame header parse failed: bad magic (input is not a Zstd frame)",
279
+ )),
280
+ Err(e) => Err(raise_bounded(ruby, e, "zstd frame header parse failed")),
281
+ }
282
+ }
283
+
284
+ // ---------- BlockCodec ----------
285
+
286
+ #[magnus::wrap(class = "Zrip::BlockCodec", free_immediately, size)]
287
+ struct BlockCodec {
288
+ dict_len: usize,
289
+ dict_id: Option<u32>,
290
+ level: i32,
291
+ cctx: RefCell<CompressContext>,
292
+ dctx: RefCell<DecompressContext>,
293
+ }
294
+
295
+ fn block_codec_new(
296
+ ruby: &Ruby,
297
+ rb_dict: Option<RString>,
298
+ id: u32,
299
+ level: i32,
300
+ ) -> Result<BlockCodec, Error> {
301
+ let (dict_len, dict_id, cctx, dctx) = match rb_dict {
302
+ None => {
303
+ let cctx = CompressContext::new(level).map_err(|e| {
304
+ Error::new(
305
+ compress_error(ruby),
306
+ format!("CompressContext::new failed: {e}"),
307
+ )
308
+ })?;
309
+ (0, None, cctx, DecompressContext::new())
310
+ }
311
+ Some(s) => {
312
+ let bytes: Vec<u8> = unsafe { s.as_slice().to_vec() };
313
+ let dict = load_dict(ruby, &bytes)?;
314
+ let dl = bytes.len();
315
+ let cctx = CompressContext::with_dict(level, dict.clone()).map_err(|e| {
316
+ Error::new(
317
+ compress_error(ruby),
318
+ format!("CompressContext::with_dict failed: {e}"),
319
+ )
320
+ })?;
321
+ let dctx = DecompressContext::with_dict(dict);
322
+ (dl, Some(id), cctx, dctx)
323
+ }
324
+ };
325
+
326
+ Ok(BlockCodec {
327
+ dict_len,
328
+ dict_id,
329
+ level,
330
+ cctx: RefCell::new(cctx),
331
+ dctx: RefCell::new(dctx),
332
+ })
333
+ }
334
+
335
+ fn block_codec_compress(
336
+ ruby: &Ruby,
337
+ rb_self: &BlockCodec,
338
+ rb_input: RString,
339
+ ) -> Result<RString, Error> {
340
+ let input: &[u8] = unsafe { rb_input.as_slice() };
341
+ let mut cctx = rb_self.cctx.borrow_mut();
342
+ let out = cctx
343
+ .compress(input)
344
+ .map_err(|e| Error::new(compress_error(ruby), format!("zstd compress failed: {e}")))?;
345
+ Ok(ruby.str_from_slice(&out))
346
+ }
347
+
348
+ fn block_codec_decompress(
349
+ ruby: &Ruby,
350
+ rb_self: &BlockCodec,
351
+ rb_input: RString,
352
+ max_output: usize,
353
+ ) -> Result<RString, Error> {
354
+ let compressed: &[u8] = unsafe { rb_input.as_slice() };
355
+ let mut dctx = rb_self.dctx.borrow_mut();
356
+ let out = decompress_bounded(compressed, max_output, &mut dctx)
357
+ .map_err(|e| raise_bounded(ruby, e, "zstd block decode failed"))?;
358
+ Ok(ruby.str_from_slice(&out))
359
+ }
360
+
361
+ fn block_codec_size(rb_self: &BlockCodec) -> usize {
362
+ rb_self.dict_len
363
+ }
364
+
365
+ fn block_codec_has_dict(rb_self: &BlockCodec) -> bool {
366
+ rb_self.dict_id.is_some()
367
+ }
368
+
369
+ fn block_codec_level(rb_self: &BlockCodec) -> i32 {
370
+ rb_self.level
371
+ }
372
+
373
+ // ---------- DictTrainer ----------
374
+
375
+ #[magnus::wrap(class = "Zrip::DictTrainer", free_immediately, size)]
376
+ struct RbDictTrainer {
377
+ inner: RefCell<Option<TrainerState>>,
378
+ max_dict_size: usize,
379
+ }
380
+
381
+ struct TrainerState {
382
+ samples: Vec<Vec<u8>>,
383
+ total_bytes: usize,
384
+ }
385
+
386
+ fn dict_trainer_new(_ruby: &Ruby, max_dict_size: usize) -> RbDictTrainer {
387
+ RbDictTrainer {
388
+ max_dict_size,
389
+ inner: RefCell::new(Some(TrainerState {
390
+ samples: Vec::new(),
391
+ total_bytes: 0,
392
+ })),
393
+ }
394
+ }
395
+
396
+ fn dict_trainer_add_sample(
397
+ ruby: &Ruby,
398
+ rb_self: &RbDictTrainer,
399
+ rb_data: RString,
400
+ ) -> Result<(), Error> {
401
+ let mut borrow = rb_self.inner.borrow_mut();
402
+ let state = borrow.as_mut().ok_or_else(|| {
403
+ Error::new(
404
+ ruby.exception_runtime_error(),
405
+ "DictTrainer already consumed by #train",
406
+ )
407
+ })?;
408
+ let data: Vec<u8> = unsafe { rb_data.as_slice().to_vec() };
409
+ if data.len() < 4 {
410
+ return Ok(());
411
+ }
412
+ state.total_bytes += data.len();
413
+ state.samples.push(data);
414
+ Ok(())
415
+ }
416
+
417
+ fn dict_trainer_sample_count(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<usize, Error> {
418
+ let borrow = rb_self.inner.borrow();
419
+ borrow.as_ref().map(|s| s.samples.len()).ok_or_else(|| {
420
+ Error::new(
421
+ ruby.exception_runtime_error(),
422
+ "DictTrainer already consumed by #train",
423
+ )
424
+ })
425
+ }
426
+
427
+ fn dict_trainer_total_bytes(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<usize, Error> {
428
+ let borrow = rb_self.inner.borrow();
429
+ borrow.as_ref().map(|s| s.total_bytes).ok_or_else(|| {
430
+ Error::new(
431
+ ruby.exception_runtime_error(),
432
+ "DictTrainer already consumed by #train",
433
+ )
434
+ })
435
+ }
436
+
437
+ fn dict_trainer_train(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<RString, Error> {
438
+ let state = rb_self.inner.borrow_mut().take().ok_or_else(|| {
439
+ Error::new(
440
+ ruby.exception_runtime_error(),
441
+ "DictTrainer already consumed by #train",
442
+ )
443
+ })?;
444
+
445
+ if state.samples.len() < 2 {
446
+ return Ok(ruby.str_from_slice(b""));
447
+ }
448
+
449
+ let refs: Vec<&[u8]> = state.samples.iter().map(|s| s.as_slice()).collect();
450
+
451
+ let content = zstd::dict::fastcover::select_segments(
452
+ &refs,
453
+ rb_self.max_dict_size,
454
+ &FastCoverParams::default(),
455
+ );
456
+ let dict_bytes =
457
+ zstd::dict::finalize::finalize_dictionary(&content, &refs, rb_self.max_dict_size);
458
+
459
+ Ok(ruby.str_from_slice(&dict_bytes))
460
+ }
461
+
462
+ fn dict_trainer_max_dict_size(rb_self: &RbDictTrainer) -> usize {
463
+ rb_self.max_dict_size
464
+ }
465
+
466
+ fn dict_trainer_trained(rb_self: &RbDictTrainer) -> bool {
467
+ rb_self.inner.borrow().is_none()
468
+ }
469
+
470
+ // ---------- module init ----------
471
+
472
+ #[magnus::init]
473
+ fn init(ruby: &Ruby) -> Result<(), Error> {
474
+ unsafe { rb_sys::rb_ext_ractor_safe(true) };
475
+
476
+ let module = ruby.define_module("Zrip")?;
477
+
478
+ let decompress_error_class =
479
+ module.define_error("DecompressError", ruby.exception_standard_error())?;
480
+ DECOMPRESS_ERROR
481
+ .set(Opaque::from(decompress_error_class))
482
+ .unwrap_or_else(|_| panic!("init called more than once"));
483
+
484
+ let compress_error_class =
485
+ module.define_error("CompressError", ruby.exception_standard_error())?;
486
+ COMPRESS_ERROR
487
+ .set(Opaque::from(compress_error_class))
488
+ .unwrap_or_else(|_| panic!("init called more than once"));
489
+
490
+ let missing_content_size_error_class =
491
+ module.define_error("MissingContentSizeError", decompress_error_class)?;
492
+ MISSING_CONTENT_SIZE_ERROR
493
+ .set(Opaque::from(missing_content_size_error_class))
494
+ .unwrap_or_else(|_| panic!("init called more than once"));
495
+
496
+ let output_size_limit_error_class =
497
+ module.define_error("OutputSizeLimitError", decompress_error_class)?;
498
+ OUTPUT_SIZE_LIMIT_ERROR
499
+ .set(Opaque::from(output_size_limit_error_class))
500
+ .unwrap_or_else(|_| panic!("init called more than once"));
501
+
502
+ // FrameCodec
503
+ let frame_codec_class = module.define_class("FrameCodec", ruby.class_object())?;
504
+ frame_codec_class.define_singleton_method("_native_new", function!(frame_codec_new, 3))?;
505
+ frame_codec_class.define_singleton_method(
506
+ "get_frame_content_size",
507
+ function!(frame_codec_get_frame_content_size, 1),
508
+ )?;
509
+ frame_codec_class.define_method("compress", method!(frame_codec_compress, 1))?;
510
+ frame_codec_class.define_method("_native_decompress", method!(frame_codec_decompress, 2))?;
511
+ frame_codec_class.define_method("size", method!(frame_codec_size, 0))?;
512
+ frame_codec_class.define_method("has_dict?", method!(frame_codec_has_dict, 0))?;
513
+ frame_codec_class.define_method("id", method!(frame_codec_id, 0))?;
514
+ frame_codec_class.define_method("level", method!(frame_codec_level, 0))?;
515
+
516
+ // BlockCodec
517
+ let block_codec_class = module.define_class("BlockCodec", ruby.class_object())?;
518
+ block_codec_class.define_singleton_method("_native_new", function!(block_codec_new, 3))?;
519
+ block_codec_class.define_method("compress", method!(block_codec_compress, 1))?;
520
+ block_codec_class.define_method("_native_decompress", method!(block_codec_decompress, 2))?;
521
+ block_codec_class.define_method("size", method!(block_codec_size, 0))?;
522
+ block_codec_class.define_method("has_dict?", method!(block_codec_has_dict, 0))?;
523
+ block_codec_class.define_method("level", method!(block_codec_level, 0))?;
524
+
525
+ // DictTrainer
526
+ let trainer_class = module.define_class("DictTrainer", ruby.class_object())?;
527
+ trainer_class.define_singleton_method("_native_new", function!(dict_trainer_new, 1))?;
528
+ trainer_class.define_method("add_sample", method!(dict_trainer_add_sample, 1))?;
529
+ trainer_class.define_method("sample_count", method!(dict_trainer_sample_count, 0))?;
530
+ trainer_class.define_method("total_bytes", method!(dict_trainer_total_bytes, 0))?;
531
+ trainer_class.define_method("train", method!(dict_trainer_train, 0))?;
532
+ trainer_class.define_method("max_dict_size", method!(dict_trainer_max_dict_size, 0))?;
533
+ trainer_class.define_method("trained?", method!(dict_trainer_trained, 0))?;
534
+
535
+ Ok(())
536
+ }
537
+
538
+ #[cfg(test)]
539
+ mod tests {
540
+ use super::*;
541
+
542
+ #[test]
543
+ fn round_trip() {
544
+ let data = b"the quick brown fox jumps over the lazy dog ".repeat(100);
545
+ let compressed = zstd::compress(&data, 1).unwrap();
546
+ assert!(compressed.len() < data.len());
547
+ assert_eq!(&compressed[..4], &ZSTD_FRAME_MAGIC);
548
+ let decompressed = zstd::decompress(&compressed).unwrap();
549
+ assert_eq!(decompressed, data);
550
+ }
551
+
552
+ #[test]
553
+ fn empty_round_trip() {
554
+ let compressed = zstd::compress(b"", 1).unwrap();
555
+ let decompressed = zstd::decompress(&compressed).unwrap();
556
+ assert!(decompressed.is_empty());
557
+ }
558
+
559
+ #[test]
560
+ fn context_round_trip() {
561
+ let mut cctx = CompressContext::new(1).unwrap();
562
+ let mut dctx = DecompressContext::new();
563
+ let data = b"hello world hello world hello world";
564
+ let ct = cctx.compress(data).unwrap();
565
+ let pt = dctx.decompress(&ct).unwrap();
566
+ assert_eq!(&*pt, data);
567
+ }
568
+
569
+ #[test]
570
+ fn parse_fcs() {
571
+ let data = b"test data for fcs parsing".repeat(10);
572
+ let compressed = zstd::compress(&data, 1).unwrap();
573
+ let fcs = parse_frame_content_size(&compressed).unwrap();
574
+ assert_eq!(fcs, Some(data.len() as u64));
575
+ }
576
+ }
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "dictionary"
4
+
5
+ module Zrip
6
+ class BlockCodec
7
+ def self.new(dict: nil, level: DEFAULT_LEVEL)
8
+ case dict
9
+ when nil
10
+ _native_new(nil, 0, Integer(level))
11
+ when Dictionary
12
+ _native_new(dict.bytes, dict.id, Integer(level))
13
+ when String
14
+ d = Dictionary.new(bytes: dict)
15
+ _native_new(d.bytes, d.id, Integer(level))
16
+ else
17
+ raise TypeError, "expected Zrip::Dictionary, String, or nil; got #{dict.class}"
18
+ end
19
+ end
20
+
21
+
22
+ def decompress(bytes, max_output_size: nil)
23
+ _native_decompress(bytes, Integer(max_output_size || 0))
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Zrip
4
+ class DictTrainer
5
+ def self.new(max_dict_size)
6
+ _native_new(max_dict_size)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module Zrip
6
+ Dictionary = Data.define(:bytes, :id)
7
+
8
+ class Dictionary
9
+ ZDICT_MAGIC = "\x37\xA4\x30\xEC".b.freeze
10
+ USER_DICT_ID_MIN = 32_768
11
+ USER_DICT_ID_MAX = (2**31) - 1
12
+ USER_DICT_ID_SIZE = USER_DICT_ID_MAX - USER_DICT_ID_MIN + 1
13
+
14
+
15
+ def initialize(bytes:, id: nil)
16
+ b = bytes.b
17
+ id ||= if b.byteslice(0, 4) == ZDICT_MAGIC
18
+ b.byteslice(4, 4).unpack1("V")
19
+ else
20
+ raw = Digest::SHA256.digest(b).byteslice(0, 4).unpack1("V")
21
+ USER_DICT_ID_MIN + (raw % USER_DICT_ID_SIZE)
22
+ end
23
+ super(bytes: b.freeze, id: id)
24
+ end
25
+
26
+
27
+ def size
28
+ bytes.bytesize
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "dictionary"
4
+
5
+ module Zrip
6
+ class FrameCodec
7
+ def self.new(dict: nil, level: DEFAULT_LEVEL)
8
+ case dict
9
+ when nil
10
+ _native_new(nil, 0, Integer(level))
11
+ when Dictionary
12
+ _native_new(dict.bytes, dict.id, Integer(level))
13
+ when String
14
+ d = Dictionary.new(bytes: dict)
15
+ _native_new(d.bytes, d.id, Integer(level))
16
+ else
17
+ raise TypeError, "expected Zrip::Dictionary, String, or nil; got #{dict.class}"
18
+ end
19
+ end
20
+
21
+
22
+ def decompress(bytes, max_output_size: nil)
23
+ _native_decompress(bytes, Integer(max_output_size || 0))
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Zrip
4
+ VERSION = "0.1.0"
5
+ end
data/lib/zrip.rb ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "zrip/zrip" # Rust extension
4
+ require_relative "zrip/version"
5
+
6
+ module Zrip
7
+ DEFAULT_LEVEL = 1
8
+ end
9
+
10
+ require_relative "zrip/dictionary"
11
+ require_relative "zrip/block_codec"
12
+ require_relative "zrip/frame_codec"
13
+ require_relative "zrip/dict_trainer"
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: zrip
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Patrik Wenger
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rb_sys
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.9'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.9'
26
+ description: Ruby bindings (via Rust/magnus) for zrip, a pure-Rust Zstandard implementation.
27
+ Frame-format and block-format compress/decompress with optional dictionary support,
28
+ configurable compression levels, and FastCOVER-based dictionary training. Ractor-safe.
29
+ email:
30
+ - paddor@gmail.com
31
+ executables: []
32
+ extensions:
33
+ - ext/zrip/extconf.rb
34
+ extra_rdoc_files: []
35
+ files:
36
+ - CHANGELOG.md
37
+ - Cargo.lock
38
+ - Cargo.toml
39
+ - LICENSE
40
+ - README.md
41
+ - ext/zrip/Cargo.toml
42
+ - ext/zrip/extconf.rb
43
+ - ext/zrip/src/lib.rs
44
+ - lib/zrip.rb
45
+ - lib/zrip/block_codec.rb
46
+ - lib/zrip/dict_trainer.rb
47
+ - lib/zrip/dictionary.rb
48
+ - lib/zrip/frame_codec.rb
49
+ - lib/zrip/version.rb
50
+ homepage: https://github.com/paddor/zrip-rb
51
+ licenses:
52
+ - MIT
53
+ metadata:
54
+ homepage_uri: https://github.com/paddor/zrip-rb
55
+ source_code_uri: https://github.com/paddor/zrip-rb
56
+ changelog_uri: https://github.com/paddor/zrip-rb/blob/main/CHANGELOG.md
57
+ rubygems_mfa_required: 'true'
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: 4.0.0
66
+ required_rubygems_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ requirements: []
72
+ rubygems_version: 4.0.10
73
+ specification_version: 4
74
+ summary: Ractor-safe Zstandard bindings for Ruby (pure-Rust zrip backend)
75
+ test_files: []