lz4rip 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +325 -0
- data/Cargo.toml +9 -0
- data/LICENSE +21 -0
- data/README.md +128 -0
- data/ext/lz4rip/Cargo.toml +16 -0
- data/ext/lz4rip/extconf.rb +8 -0
- data/ext/lz4rip/src/lib.rs +392 -0
- data/lib/lz4rip/block_codec.rb +16 -0
- data/lib/lz4rip/dict_trainer.rb +9 -0
- data/lib/lz4rip/dictionary.rb +16 -0
- data/lib/lz4rip/frame_codec.rb +20 -0
- data/lib/lz4rip/version.rb +5 -0
- data/lib/lz4rip.rb +8 -0
- metadata +75 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 6cc17a63b94ba3943cc64ef1ded04a85994da14d1d305df9adebe10e78bfd798
|
|
4
|
+
data.tar.gz: 849f7c71658601730ff0f328963148644987798bfe06d7e3878f81eae3b5885b
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: bf19aaf5f7f57cbaa50bd1c9674ddae4583d18665495344f19325b095a7298b97fb152625bcab41b6e96a37fa71d8e5bbe1c6c295fb27d2443012312b1d9bd05
|
|
7
|
+
data.tar.gz: bc6c28546e3ad483b6c97601610fc3625123567b1e70f48059626b0bf710fd6cb2df1b7096def34cf2c04c9142d509411a0fe3b5bc7e48c12a96edbad9e9229e
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [Unreleased]
|
|
4
|
+
|
|
5
|
+
## [0.1.0] - 2026-06-20
|
|
6
|
+
|
|
7
|
+
- Initial release.
|
|
8
|
+
- `Lz4rip::FrameCodec`: frame-format LZ4 codec (Ractor-shareable).
|
|
9
|
+
- `Lz4rip::BlockCodec`: block-format LZ4 codec with reusable scratch table.
|
|
10
|
+
- `Lz4rip::Dictionary`: immutable value type for LZ4 dictionaries.
|
|
11
|
+
- `Lz4rip::DictTrainer`: COVER-based dictionary trainer.
|
|
12
|
+
- `Lz4rip.compress_bound`: maximum output size for a given input size.
|
data/Cargo.lock
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "aho-corasick"
|
|
7
|
+
version = "1.1.4"
|
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
+
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"memchr",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[[package]]
|
|
15
|
+
name = "bindgen"
|
|
16
|
+
version = "0.72.1"
|
|
17
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
+
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
|
19
|
+
dependencies = [
|
|
20
|
+
"bitflags",
|
|
21
|
+
"cexpr",
|
|
22
|
+
"clang-sys",
|
|
23
|
+
"itertools",
|
|
24
|
+
"proc-macro2",
|
|
25
|
+
"quote",
|
|
26
|
+
"regex",
|
|
27
|
+
"rustc-hash",
|
|
28
|
+
"shlex",
|
|
29
|
+
"syn",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[[package]]
|
|
33
|
+
name = "bitflags"
|
|
34
|
+
version = "2.13.0"
|
|
35
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
36
|
+
checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
|
|
37
|
+
|
|
38
|
+
[[package]]
|
|
39
|
+
name = "cexpr"
|
|
40
|
+
version = "0.6.0"
|
|
41
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
42
|
+
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
|
43
|
+
dependencies = [
|
|
44
|
+
"nom",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
[[package]]
|
|
48
|
+
name = "cfg-if"
|
|
49
|
+
version = "1.0.4"
|
|
50
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
51
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
52
|
+
|
|
53
|
+
[[package]]
|
|
54
|
+
name = "clang-sys"
|
|
55
|
+
version = "1.8.1"
|
|
56
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
57
|
+
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
|
58
|
+
dependencies = [
|
|
59
|
+
"glob",
|
|
60
|
+
"libc",
|
|
61
|
+
"libloading",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
[[package]]
|
|
65
|
+
name = "either"
|
|
66
|
+
version = "1.16.0"
|
|
67
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
68
|
+
checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
|
|
69
|
+
|
|
70
|
+
[[package]]
|
|
71
|
+
name = "glob"
|
|
72
|
+
version = "0.3.3"
|
|
73
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
74
|
+
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
|
75
|
+
|
|
76
|
+
[[package]]
|
|
77
|
+
name = "itertools"
|
|
78
|
+
version = "0.13.0"
|
|
79
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
80
|
+
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
|
81
|
+
dependencies = [
|
|
82
|
+
"either",
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
[[package]]
|
|
86
|
+
name = "lazy_static"
|
|
87
|
+
version = "1.5.0"
|
|
88
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
89
|
+
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|
90
|
+
|
|
91
|
+
[[package]]
|
|
92
|
+
name = "libc"
|
|
93
|
+
version = "0.2.186"
|
|
94
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
95
|
+
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
|
|
96
|
+
|
|
97
|
+
[[package]]
|
|
98
|
+
name = "libloading"
|
|
99
|
+
version = "0.8.9"
|
|
100
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
101
|
+
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
|
102
|
+
dependencies = [
|
|
103
|
+
"cfg-if",
|
|
104
|
+
"windows-link",
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
[[package]]
|
|
108
|
+
name = "lz4rip"
|
|
109
|
+
version = "0.1.0"
|
|
110
|
+
dependencies = [
|
|
111
|
+
"lz4rip 0.8.5",
|
|
112
|
+
"magnus",
|
|
113
|
+
"rb-sys",
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
[[package]]
|
|
117
|
+
name = "lz4rip"
|
|
118
|
+
version = "0.8.5"
|
|
119
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
120
|
+
checksum = "16200eb628c6a5a6e539901f08e5ff4adb9b9522d7201a2dfa36d2060c355b34"
|
|
121
|
+
dependencies = [
|
|
122
|
+
"lz4rip-core",
|
|
123
|
+
"lz4rip-decode",
|
|
124
|
+
"lz4rip-encode",
|
|
125
|
+
"twox-hash",
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
[[package]]
|
|
129
|
+
name = "lz4rip-core"
|
|
130
|
+
version = "0.5.2"
|
|
131
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
132
|
+
checksum = "5bf2d77a9fc2acbfe6f4b6b0a3ef2d97f0c96bb384c4406225851a900f483788"
|
|
133
|
+
|
|
134
|
+
[[package]]
|
|
135
|
+
name = "lz4rip-decode"
|
|
136
|
+
version = "0.8.2"
|
|
137
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
138
|
+
checksum = "7c45bc51677ce22ebf4d9edabdb658fbd9e4556a93316d423a9e7a6a54108166"
|
|
139
|
+
dependencies = [
|
|
140
|
+
"lz4rip-core",
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
[[package]]
|
|
144
|
+
name = "lz4rip-encode"
|
|
145
|
+
version = "0.8.3"
|
|
146
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
147
|
+
checksum = "0d0d63726468388972bc99c72dab3bb3fde78096969cd270d84e77c07b5df5a2"
|
|
148
|
+
dependencies = [
|
|
149
|
+
"lz4rip-core",
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
[[package]]
|
|
153
|
+
name = "magnus"
|
|
154
|
+
version = "0.8.2"
|
|
155
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
156
|
+
checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
|
|
157
|
+
dependencies = [
|
|
158
|
+
"magnus-macros",
|
|
159
|
+
"rb-sys",
|
|
160
|
+
"rb-sys-env",
|
|
161
|
+
"seq-macro",
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
[[package]]
|
|
165
|
+
name = "magnus-macros"
|
|
166
|
+
version = "0.8.0"
|
|
167
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
168
|
+
checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
|
|
169
|
+
dependencies = [
|
|
170
|
+
"proc-macro2",
|
|
171
|
+
"quote",
|
|
172
|
+
"syn",
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
[[package]]
|
|
176
|
+
name = "memchr"
|
|
177
|
+
version = "2.8.2"
|
|
178
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
179
|
+
checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
|
|
180
|
+
|
|
181
|
+
[[package]]
|
|
182
|
+
name = "minimal-lexical"
|
|
183
|
+
version = "0.2.1"
|
|
184
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
185
|
+
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|
186
|
+
|
|
187
|
+
[[package]]
|
|
188
|
+
name = "nom"
|
|
189
|
+
version = "7.1.3"
|
|
190
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
191
|
+
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
|
192
|
+
dependencies = [
|
|
193
|
+
"memchr",
|
|
194
|
+
"minimal-lexical",
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
[[package]]
|
|
198
|
+
name = "proc-macro2"
|
|
199
|
+
version = "1.0.106"
|
|
200
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
201
|
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
202
|
+
dependencies = [
|
|
203
|
+
"unicode-ident",
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
[[package]]
|
|
207
|
+
name = "quote"
|
|
208
|
+
version = "1.0.45"
|
|
209
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
210
|
+
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
|
211
|
+
dependencies = [
|
|
212
|
+
"proc-macro2",
|
|
213
|
+
]
|
|
214
|
+
|
|
215
|
+
[[package]]
|
|
216
|
+
name = "rb-sys"
|
|
217
|
+
version = "0.9.128"
|
|
218
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
219
|
+
checksum = "45ca28513560e56cfb79a62b1fce363c73af170a182024ce880c77ee9429920a"
|
|
220
|
+
dependencies = [
|
|
221
|
+
"rb-sys-build",
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
[[package]]
|
|
225
|
+
name = "rb-sys-build"
|
|
226
|
+
version = "0.9.128"
|
|
227
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
228
|
+
checksum = "ce04b2c55eff3a21aaa623fcc655d94373238e72cac6b3e1a3641ff31649f99a"
|
|
229
|
+
dependencies = [
|
|
230
|
+
"bindgen",
|
|
231
|
+
"lazy_static",
|
|
232
|
+
"proc-macro2",
|
|
233
|
+
"quote",
|
|
234
|
+
"regex",
|
|
235
|
+
"shell-words",
|
|
236
|
+
"syn",
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
[[package]]
|
|
240
|
+
name = "rb-sys-env"
|
|
241
|
+
version = "0.2.3"
|
|
242
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
243
|
+
checksum = "cca7ad6a7e21e72151d56fe2495a259b5670e204c3adac41ee7ef676ea08117a"
|
|
244
|
+
|
|
245
|
+
[[package]]
|
|
246
|
+
name = "regex"
|
|
247
|
+
version = "1.12.4"
|
|
248
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
249
|
+
checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
|
|
250
|
+
dependencies = [
|
|
251
|
+
"aho-corasick",
|
|
252
|
+
"memchr",
|
|
253
|
+
"regex-automata",
|
|
254
|
+
"regex-syntax",
|
|
255
|
+
]
|
|
256
|
+
|
|
257
|
+
[[package]]
|
|
258
|
+
name = "regex-automata"
|
|
259
|
+
version = "0.4.14"
|
|
260
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
261
|
+
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
|
262
|
+
dependencies = [
|
|
263
|
+
"aho-corasick",
|
|
264
|
+
"memchr",
|
|
265
|
+
"regex-syntax",
|
|
266
|
+
]
|
|
267
|
+
|
|
268
|
+
[[package]]
|
|
269
|
+
name = "regex-syntax"
|
|
270
|
+
version = "0.8.11"
|
|
271
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
272
|
+
checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
|
|
273
|
+
|
|
274
|
+
[[package]]
|
|
275
|
+
name = "rustc-hash"
|
|
276
|
+
version = "2.1.2"
|
|
277
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
278
|
+
checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
|
|
279
|
+
|
|
280
|
+
[[package]]
|
|
281
|
+
name = "seq-macro"
|
|
282
|
+
version = "0.3.6"
|
|
283
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
284
|
+
checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
|
|
285
|
+
|
|
286
|
+
[[package]]
|
|
287
|
+
name = "shell-words"
|
|
288
|
+
version = "1.1.1"
|
|
289
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
290
|
+
checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
|
|
291
|
+
|
|
292
|
+
[[package]]
|
|
293
|
+
name = "shlex"
|
|
294
|
+
version = "1.3.0"
|
|
295
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
296
|
+
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
|
297
|
+
|
|
298
|
+
[[package]]
|
|
299
|
+
name = "syn"
|
|
300
|
+
version = "2.0.118"
|
|
301
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
302
|
+
checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
|
|
303
|
+
dependencies = [
|
|
304
|
+
"proc-macro2",
|
|
305
|
+
"quote",
|
|
306
|
+
"unicode-ident",
|
|
307
|
+
]
|
|
308
|
+
|
|
309
|
+
[[package]]
|
|
310
|
+
name = "twox-hash"
|
|
311
|
+
version = "2.1.2"
|
|
312
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
313
|
+
checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
|
|
314
|
+
|
|
315
|
+
[[package]]
|
|
316
|
+
name = "unicode-ident"
|
|
317
|
+
version = "1.0.24"
|
|
318
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
319
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
320
|
+
|
|
321
|
+
[[package]]
|
|
322
|
+
name = "windows-link"
|
|
323
|
+
version = "0.2.1"
|
|
324
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
325
|
+
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
data/Cargo.toml
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Patrik Wenger
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# lz4rip — Ractor-safe LZ4 for Ruby
|
|
2
|
+
|
|
3
|
+
[](https://github.com/paddor/lz4rip-rb/actions/workflows/ci.yml)
|
|
4
|
+
[](https://rubygems.org/gems/lz4rip)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://www.ruby-lang.org)
|
|
7
|
+
|
|
8
|
+
Ruby bindings for [lz4rip](https://crates.io/crates/lz4rip), a pure-Rust LZ4
|
|
9
|
+
implementation. Built with [magnus](https://github.com/matsadler/magnus) and
|
|
10
|
+
declared Ractor-safe so you can compress from any Ractor without a global lock.
|
|
11
|
+
|
|
12
|
+
## Features
|
|
13
|
+
|
|
14
|
+
- **Block codec** with reusable compressor scratch table
|
|
15
|
+
- **Frame codec** for standard `.lz4` frames
|
|
16
|
+
- **Dictionary support** for both block and frame codecs
|
|
17
|
+
- **COVER-based dictionary trainer** (`DictTrainer`)
|
|
18
|
+
- **Ractor-safe**: `FrameCodec` is shareable across Ractors, `BlockCodec` is
|
|
19
|
+
per-Ractor (mutable scratch state)
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
Requires Ruby >= 4.0 and a Rust toolchain (for building the native extension):
|
|
24
|
+
|
|
25
|
+
```sh
|
|
26
|
+
gem install lz4rip
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Or in your Gemfile:
|
|
30
|
+
|
|
31
|
+
```ruby
|
|
32
|
+
gem "lz4rip"
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
### Frame codec (standard LZ4 frames)
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
require "lz4rip"
|
|
41
|
+
|
|
42
|
+
codec = Lz4rip::FrameCodec.new
|
|
43
|
+
compressed = codec.compress("hello world " * 1000)
|
|
44
|
+
original = codec.decompress(compressed)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Block codec (raw LZ4 blocks)
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
codec = Lz4rip::BlockCodec.new
|
|
51
|
+
compressed = codec.compress("hello world " * 1000)
|
|
52
|
+
original = codec.decompress(compressed, decompressed_size: 12_000)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Block decompression requires the original size up front. This is by design: LZ4
|
|
56
|
+
block format does not store it, so the caller must track it.
|
|
57
|
+
|
|
58
|
+
### Dictionary compression
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
dict = Lz4rip::Dictionary.new(bytes: "common log prefix: ")
|
|
62
|
+
codec = Lz4rip::FrameCodec.new(dict: dict)
|
|
63
|
+
|
|
64
|
+
compressed = codec.compress("common log prefix: event=login user=alice")
|
|
65
|
+
original = codec.decompress(compressed)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Dictionary training
|
|
69
|
+
|
|
70
|
+
```ruby
|
|
71
|
+
trainer = Lz4rip::DictTrainer.new(2048)
|
|
72
|
+
messages.each { |msg| trainer.add_sample(msg) }
|
|
73
|
+
dict_bytes = trainer.train
|
|
74
|
+
|
|
75
|
+
dict = Lz4rip::Dictionary.new(bytes: dict_bytes)
|
|
76
|
+
codec = Lz4rip::BlockCodec.new(dict: dict)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Ractor safety
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
codec = Lz4rip::FrameCodec.new
|
|
83
|
+
|
|
84
|
+
ractors = 4.times.map do |i|
|
|
85
|
+
Ractor.new(codec) do |c|
|
|
86
|
+
data = "ractor #{Ractor.current} payload " * 100
|
|
87
|
+
ct = c.compress(data)
|
|
88
|
+
raise "mismatch" unless c.decompress(ct) == data
|
|
89
|
+
:ok
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
ractors.each { |r| p r.value } # => :ok, :ok, :ok, :ok
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## API
|
|
97
|
+
|
|
98
|
+
| Class / Module | Method | Description |
|
|
99
|
+
|---|---|---|
|
|
100
|
+
| `Lz4rip::FrameCodec` | `.new(dict: nil)` | Create a frame codec, optionally with a `Dictionary` or raw `String` dict |
|
|
101
|
+
| | `#compress(string)` | Compress to LZ4 frame |
|
|
102
|
+
| | `#decompress(string)` | Decompress an LZ4 frame |
|
|
103
|
+
| | `#has_dict?` | Whether a dictionary is loaded |
|
|
104
|
+
| | `#id` | Dictionary ID (nil without dict) |
|
|
105
|
+
| | `#size` | Dictionary size in bytes (0 without dict) |
|
|
106
|
+
| `Lz4rip::BlockCodec` | `.new(dict: nil)` | Create a block codec, optionally with a dict `String` |
|
|
107
|
+
| | `#compress(string)` | Compress to raw LZ4 block |
|
|
108
|
+
| | `#decompress(string, decompressed_size:)` | Decompress a raw LZ4 block |
|
|
109
|
+
| | `#has_dict?` | Whether a dictionary is loaded |
|
|
110
|
+
| | `#size` | Internal state size in bytes |
|
|
111
|
+
| `Lz4rip::Dictionary` | `.new(bytes:, id: auto)` | Immutable dictionary value object |
|
|
112
|
+
| | `#bytes` | Frozen binary dict bytes |
|
|
113
|
+
| | `#id` | 32-bit dictionary ID |
|
|
114
|
+
| | `#size` | Dictionary size in bytes |
|
|
115
|
+
| `Lz4rip::DictTrainer` | `.new(max_dict_size)` | Create a trainer (capped at 65535) |
|
|
116
|
+
| | `#add_sample(string)` | Feed a training sample |
|
|
117
|
+
| | `#train` | Consume the trainer, return dict bytes |
|
|
118
|
+
| | `#sample_count` | Number of accepted samples |
|
|
119
|
+
| | `#total_bytes` | Total bytes of accepted samples |
|
|
120
|
+
| | `#trained?` | Whether `#train` has been called |
|
|
121
|
+
| | `#max_dict_size` | Configured max dict size |
|
|
122
|
+
| `Lz4rip` | `.compress_bound(size)` | Max compressed output size for a given input size |
|
|
123
|
+
| | `.block_stream_size` | Internal compressor heap size |
|
|
124
|
+
| `Lz4rip::DecompressError` | | Raised on decompression failure (subclass of `StandardError`) |
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "lz4rip"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
|
|
6
|
+
[lib]
|
|
7
|
+
name = "lz4rip"
|
|
8
|
+
crate-type = ["cdylib", "rlib"]
|
|
9
|
+
|
|
10
|
+
[dependencies]
|
|
11
|
+
lz4 = { version = "0.8", package = "lz4rip" }
|
|
12
|
+
magnus = "0.8"
|
|
13
|
+
rb-sys = "0.9"
|
|
14
|
+
|
|
15
|
+
[build-dependencies]
|
|
16
|
+
rb-sys = "0.9"
|
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
use magnus::{
|
|
2
|
+
exception::ExceptionClass, function, method, prelude::*, r_string::RString, value::Opaque,
|
|
3
|
+
Error, Ruby,
|
|
4
|
+
};
|
|
5
|
+
use std::cell::RefCell;
|
|
6
|
+
use std::io::{Cursor, Read, Write};
|
|
7
|
+
use std::sync::OnceLock;
|
|
8
|
+
|
|
9
|
+
use lz4::block::{self, Compressor, Decompressor, DictTrainer};
|
|
10
|
+
use lz4::frame::{BlockMode, FrameDecoder, FrameEncoder, FrameInfo};
|
|
11
|
+
|
|
12
|
+
const COMPRESSOR_HEAP_SIZE: usize = 8192;
|
|
13
|
+
|
|
14
|
+
const LZ4_FRAME_MAGIC: [u8; 4] = [0x04, 0x22, 0x4d, 0x18];
|
|
15
|
+
|
|
16
|
+
static DECOMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
|
|
17
|
+
|
|
18
|
+
fn decompress_error(ruby: &Ruby) -> ExceptionClass {
|
|
19
|
+
ruby.get_inner(
|
|
20
|
+
*DECOMPRESS_ERROR
|
|
21
|
+
.get()
|
|
22
|
+
.expect("DecompressError not initialized"),
|
|
23
|
+
)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// ---------- module functions ----------
|
|
27
|
+
|
|
28
|
+
fn lz4rip_compress_bound(_ruby: &Ruby, size: usize) -> usize {
|
|
29
|
+
block::get_maximum_output_size(size)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
fn lz4rip_block_stream_size(_ruby: &Ruby) -> usize {
|
|
33
|
+
COMPRESSOR_HEAP_SIZE
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ---------- BlockCodec ----------
|
|
37
|
+
|
|
38
|
+
#[magnus::wrap(class = "Lz4rip::BlockCodec", free_immediately, size)]
|
|
39
|
+
struct BlockCodec {
|
|
40
|
+
compressor: Option<RefCell<Compressor>>,
|
|
41
|
+
decompressor: Option<Decompressor>,
|
|
42
|
+
dict_len: usize,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
fn block_codec_new(_ruby: &Ruby, rb_dict: Option<RString>) -> Result<BlockCodec, Error> {
|
|
46
|
+
match rb_dict {
|
|
47
|
+
None => Ok(BlockCodec {
|
|
48
|
+
compressor: None,
|
|
49
|
+
decompressor: None,
|
|
50
|
+
dict_len: 0,
|
|
51
|
+
}),
|
|
52
|
+
Some(rb_dict) => {
|
|
53
|
+
let bytes: Vec<u8> = unsafe { rb_dict.as_slice().to_vec() };
|
|
54
|
+
Ok(BlockCodec {
|
|
55
|
+
compressor: Some(RefCell::new(Compressor::with_dict(&bytes))),
|
|
56
|
+
decompressor: Some(Decompressor::with_dict(&bytes)),
|
|
57
|
+
dict_len: bytes.len(),
|
|
58
|
+
})
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
fn block_codec_size(rb_self: &BlockCodec) -> usize {
|
|
64
|
+
if rb_self.compressor.is_some() {
|
|
65
|
+
COMPRESSOR_HEAP_SIZE + rb_self.dict_len
|
|
66
|
+
} else {
|
|
67
|
+
0
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
fn block_codec_has_dict(rb_self: &BlockCodec) -> bool {
|
|
72
|
+
rb_self.compressor.is_some()
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
fn block_codec_compress(
|
|
76
|
+
ruby: &Ruby,
|
|
77
|
+
rb_self: &BlockCodec,
|
|
78
|
+
rb_input: RString,
|
|
79
|
+
) -> Result<RString, Error> {
|
|
80
|
+
let input: &[u8] = unsafe { rb_input.as_slice() };
|
|
81
|
+
|
|
82
|
+
let out = match &rb_self.compressor {
|
|
83
|
+
None => block::compress(input),
|
|
84
|
+
Some(comp) => comp.borrow_mut().compress(input),
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
Ok(ruby.str_from_slice(&out))
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
fn block_codec_decompress(
|
|
91
|
+
ruby: &Ruby,
|
|
92
|
+
rb_self: &BlockCodec,
|
|
93
|
+
rb_input: RString,
|
|
94
|
+
decompressed_size: usize,
|
|
95
|
+
) -> Result<RString, Error> {
|
|
96
|
+
let compressed: &[u8] = unsafe { rb_input.as_slice() };
|
|
97
|
+
|
|
98
|
+
let result = match &rb_self.decompressor {
|
|
99
|
+
None => block::decompress(compressed, decompressed_size),
|
|
100
|
+
Some(decomp) => decomp.decompress(compressed, decompressed_size),
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
match result {
|
|
104
|
+
Ok(data) => Ok(ruby.str_from_slice(&data)),
|
|
105
|
+
Err(e) => Err(Error::new(
|
|
106
|
+
decompress_error(ruby),
|
|
107
|
+
format!("lz4 block decode failed: {e}"),
|
|
108
|
+
)),
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ---------- FrameCodec ----------
|
|
113
|
+
|
|
114
|
+
#[magnus::wrap(class = "Lz4rip::FrameCodec", free_immediately, size)]
|
|
115
|
+
struct FrameCodec {
|
|
116
|
+
dict: Option<DictBound>,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
struct DictBound {
|
|
120
|
+
bytes: Vec<u8>,
|
|
121
|
+
id: u32,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
fn frame_codec_initialize(
|
|
125
|
+
_ruby: &Ruby,
|
|
126
|
+
rb_dict: Option<RString>,
|
|
127
|
+
id: u32,
|
|
128
|
+
) -> Result<FrameCodec, Error> {
|
|
129
|
+
let dict = rb_dict.map(|s| {
|
|
130
|
+
let bytes: Vec<u8> = unsafe { s.as_slice().to_vec() };
|
|
131
|
+
s.freeze();
|
|
132
|
+
DictBound { bytes, id }
|
|
133
|
+
});
|
|
134
|
+
Ok(FrameCodec { dict })
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
fn frame_codec_compress(
|
|
138
|
+
ruby: &Ruby,
|
|
139
|
+
rb_self: &FrameCodec,
|
|
140
|
+
rb_input: RString,
|
|
141
|
+
) -> Result<RString, Error> {
|
|
142
|
+
let input: &[u8] = unsafe { rb_input.as_slice() };
|
|
143
|
+
|
|
144
|
+
let buf = Vec::new();
|
|
145
|
+
let mut enc = match &rb_self.dict {
|
|
146
|
+
None => {
|
|
147
|
+
let info = FrameInfo::new().block_mode(BlockMode::Linked);
|
|
148
|
+
FrameEncoder::with_frame_info(info, buf)
|
|
149
|
+
}
|
|
150
|
+
Some(d) => FrameEncoder::with_dictionary(buf, &d.bytes, d.id),
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
enc.write_all(input).map_err(|e| {
|
|
154
|
+
Error::new(
|
|
155
|
+
ruby.exception_runtime_error(),
|
|
156
|
+
format!("lz4 frame compress failed: {e}"),
|
|
157
|
+
)
|
|
158
|
+
})?;
|
|
159
|
+
|
|
160
|
+
let out = enc.finish().map_err(|e| {
|
|
161
|
+
Error::new(
|
|
162
|
+
ruby.exception_runtime_error(),
|
|
163
|
+
format!("lz4 frame compress failed: {e}"),
|
|
164
|
+
)
|
|
165
|
+
})?;
|
|
166
|
+
|
|
167
|
+
Ok(ruby.str_from_slice(&out))
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
fn frame_codec_decompress(
|
|
171
|
+
ruby: &Ruby,
|
|
172
|
+
rb_self: &FrameCodec,
|
|
173
|
+
rb_input: RString,
|
|
174
|
+
) -> Result<RString, Error> {
|
|
175
|
+
let input: &[u8] = unsafe { rb_input.as_slice() };
|
|
176
|
+
|
|
177
|
+
if input.len() < 4 || input[..4] != LZ4_FRAME_MAGIC {
|
|
178
|
+
return Err(Error::new(
|
|
179
|
+
decompress_error(ruby),
|
|
180
|
+
"lz4 frame decode failed: bad magic (input is not an LZ4 frame)",
|
|
181
|
+
));
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let mut dec = match &rb_self.dict {
|
|
185
|
+
None => FrameDecoder::new(Cursor::new(input)),
|
|
186
|
+
Some(d) => FrameDecoder::with_dictionary(Cursor::new(input), &d.bytes, d.id),
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
let mut out = Vec::new();
|
|
190
|
+
dec.read_to_end(&mut out).map_err(|e| {
|
|
191
|
+
Error::new(
|
|
192
|
+
decompress_error(ruby),
|
|
193
|
+
format!("lz4 frame decode failed: {e}"),
|
|
194
|
+
)
|
|
195
|
+
})?;
|
|
196
|
+
|
|
197
|
+
Ok(ruby.str_from_slice(&out))
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
fn frame_codec_size(rb_self: &FrameCodec) -> usize {
|
|
201
|
+
rb_self.dict.as_ref().map_or(0, |d| d.bytes.len())
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
fn frame_codec_has_dict(rb_self: &FrameCodec) -> bool {
|
|
205
|
+
rb_self.dict.is_some()
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
fn frame_codec_id(rb_self: &FrameCodec) -> Option<u32> {
|
|
209
|
+
rb_self.dict.as_ref().map(|d| d.id)
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// ---------- DictTrainer ----------
|
|
213
|
+
|
|
214
|
+
const LZ4_MAX_DISTANCE: usize = 65535;
|
|
215
|
+
|
|
216
|
+
#[magnus::wrap(class = "Lz4rip::DictTrainer", free_immediately, size)]
|
|
217
|
+
struct RbDictTrainer {
|
|
218
|
+
inner: RefCell<Option<DictTrainer>>,
|
|
219
|
+
max_dict_size: usize,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
fn dict_trainer_new(_ruby: &Ruby, max_dict_size: usize) -> RbDictTrainer {
|
|
223
|
+
let capped = max_dict_size.min(LZ4_MAX_DISTANCE);
|
|
224
|
+
RbDictTrainer {
|
|
225
|
+
max_dict_size: capped,
|
|
226
|
+
inner: RefCell::new(Some(DictTrainer::new(max_dict_size))),
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
fn dict_trainer_add_sample(
|
|
231
|
+
ruby: &Ruby,
|
|
232
|
+
rb_self: &RbDictTrainer,
|
|
233
|
+
rb_data: RString,
|
|
234
|
+
) -> Result<(), Error> {
|
|
235
|
+
let mut borrow = rb_self.inner.borrow_mut();
|
|
236
|
+
let trainer = borrow.as_mut().ok_or_else(|| {
|
|
237
|
+
Error::new(
|
|
238
|
+
ruby.exception_runtime_error(),
|
|
239
|
+
"DictTrainer already consumed by #train",
|
|
240
|
+
)
|
|
241
|
+
})?;
|
|
242
|
+
let data: &[u8] = unsafe { rb_data.as_slice() };
|
|
243
|
+
let sample = if data.len() > rb_self.max_dict_size {
|
|
244
|
+
&data[..rb_self.max_dict_size]
|
|
245
|
+
} else {
|
|
246
|
+
data
|
|
247
|
+
};
|
|
248
|
+
trainer.add_sample(sample);
|
|
249
|
+
Ok(())
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
fn dict_trainer_sample_count(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<usize, Error> {
|
|
253
|
+
let borrow = rb_self.inner.borrow();
|
|
254
|
+
borrow.as_ref().map(|t| t.sample_count()).ok_or_else(|| {
|
|
255
|
+
Error::new(
|
|
256
|
+
ruby.exception_runtime_error(),
|
|
257
|
+
"DictTrainer already consumed by #train",
|
|
258
|
+
)
|
|
259
|
+
})
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
fn dict_trainer_total_bytes(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<usize, Error> {
|
|
263
|
+
let borrow = rb_self.inner.borrow();
|
|
264
|
+
borrow.as_ref().map(|t| t.total_bytes()).ok_or_else(|| {
|
|
265
|
+
Error::new(
|
|
266
|
+
ruby.exception_runtime_error(),
|
|
267
|
+
"DictTrainer already consumed by #train",
|
|
268
|
+
)
|
|
269
|
+
})
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
fn dict_trainer_train(ruby: &Ruby, rb_self: &RbDictTrainer) -> Result<RString, Error> {
|
|
273
|
+
let trainer = rb_self.inner.borrow_mut().take().ok_or_else(|| {
|
|
274
|
+
Error::new(
|
|
275
|
+
ruby.exception_runtime_error(),
|
|
276
|
+
"DictTrainer already consumed by #train",
|
|
277
|
+
)
|
|
278
|
+
})?;
|
|
279
|
+
let dict = trainer.train();
|
|
280
|
+
Ok(ruby.str_from_slice(&dict))
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
fn dict_trainer_max_dict_size(rb_self: &RbDictTrainer) -> usize {
|
|
284
|
+
rb_self.max_dict_size
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
fn dict_trainer_trained(rb_self: &RbDictTrainer) -> bool {
|
|
288
|
+
rb_self.inner.borrow().is_none()
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// ---------- module init ----------
|
|
292
|
+
|
|
293
|
+
#[magnus::init]
|
|
294
|
+
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
295
|
+
unsafe { rb_sys::rb_ext_ractor_safe(true) };
|
|
296
|
+
|
|
297
|
+
let module = ruby.define_module("Lz4rip")?;
|
|
298
|
+
|
|
299
|
+
let decompress_error_class =
|
|
300
|
+
module.define_error("DecompressError", ruby.exception_standard_error())?;
|
|
301
|
+
DECOMPRESS_ERROR
|
|
302
|
+
.set(Opaque::from(decompress_error_class))
|
|
303
|
+
.unwrap_or_else(|_| panic!("init called more than once"));
|
|
304
|
+
|
|
305
|
+
module.define_module_function("compress_bound", function!(lz4rip_compress_bound, 1))?;
|
|
306
|
+
module.define_module_function("block_stream_size", function!(lz4rip_block_stream_size, 0))?;
|
|
307
|
+
|
|
308
|
+
let codec_class = module.define_class("BlockCodec", ruby.class_object())?;
|
|
309
|
+
codec_class.define_singleton_method("_native_new", function!(block_codec_new, 1))?;
|
|
310
|
+
codec_class.define_method("size", method!(block_codec_size, 0))?;
|
|
311
|
+
codec_class.define_method("has_dict?", method!(block_codec_has_dict, 0))?;
|
|
312
|
+
codec_class.define_method("compress", method!(block_codec_compress, 1))?;
|
|
313
|
+
codec_class.define_method("_decompress", method!(block_codec_decompress, 2))?;
|
|
314
|
+
|
|
315
|
+
let trainer_class = module.define_class("DictTrainer", ruby.class_object())?;
|
|
316
|
+
trainer_class.define_singleton_method("_native_new", function!(dict_trainer_new, 1))?;
|
|
317
|
+
trainer_class.define_method("add_sample", method!(dict_trainer_add_sample, 1))?;
|
|
318
|
+
trainer_class.define_method("sample_count", method!(dict_trainer_sample_count, 0))?;
|
|
319
|
+
trainer_class.define_method("total_bytes", method!(dict_trainer_total_bytes, 0))?;
|
|
320
|
+
trainer_class.define_method("train", method!(dict_trainer_train, 0))?;
|
|
321
|
+
trainer_class.define_method("max_dict_size", method!(dict_trainer_max_dict_size, 0))?;
|
|
322
|
+
trainer_class.define_method("trained?", method!(dict_trainer_trained, 0))?;
|
|
323
|
+
|
|
324
|
+
let frame_codec_class = module.define_class("FrameCodec", ruby.class_object())?;
|
|
325
|
+
frame_codec_class
|
|
326
|
+
.define_singleton_method("_native_new", function!(frame_codec_initialize, 2))?;
|
|
327
|
+
frame_codec_class.define_method("compress", method!(frame_codec_compress, 1))?;
|
|
328
|
+
frame_codec_class.define_method("decompress", method!(frame_codec_decompress, 1))?;
|
|
329
|
+
frame_codec_class.define_method("size", method!(frame_codec_size, 0))?;
|
|
330
|
+
frame_codec_class.define_method("has_dict?", method!(frame_codec_has_dict, 0))?;
|
|
331
|
+
frame_codec_class.define_method("id", method!(frame_codec_id, 0))?;
|
|
332
|
+
|
|
333
|
+
Ok(())
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
#[cfg(test)]
|
|
337
|
+
mod tests {
|
|
338
|
+
use super::*;
|
|
339
|
+
|
|
340
|
+
#[test]
|
|
341
|
+
fn block_round_trip() {
|
|
342
|
+
let data = b"hello hello hello hello".to_vec();
|
|
343
|
+
let ct = block::compress(&data);
|
|
344
|
+
let pt = block::decompress(&ct, data.len()).unwrap();
|
|
345
|
+
assert_eq!(pt, data);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
#[test]
|
|
349
|
+
fn block_dict_round_trip() {
|
|
350
|
+
let dict = b"common log prefix: ".to_vec();
|
|
351
|
+
let msg = b"common log prefix: event=login user=alice".to_vec();
|
|
352
|
+
|
|
353
|
+
let mut comp = Compressor::with_dict(&dict);
|
|
354
|
+
let ct_dict = comp.compress(&msg);
|
|
355
|
+
let decomp = Decompressor::with_dict(&dict);
|
|
356
|
+
let pt = decomp.decompress(&ct_dict, msg.len()).unwrap();
|
|
357
|
+
assert_eq!(pt, msg);
|
|
358
|
+
|
|
359
|
+
let ct_plain = block::compress(&msg);
|
|
360
|
+
assert!(
|
|
361
|
+
ct_dict.len() < ct_plain.len(),
|
|
362
|
+
"dict compression should beat no-dict on shared-prefix input"
|
|
363
|
+
);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
#[test]
|
|
367
|
+
fn frame_round_trip() {
|
|
368
|
+
let data = b"the quick brown fox jumps over the lazy dog ".repeat(100);
|
|
369
|
+
let mut enc = FrameEncoder::new(Vec::new());
|
|
370
|
+
enc.write_all(&data).unwrap();
|
|
371
|
+
let ct = enc.finish().unwrap();
|
|
372
|
+
assert!(ct.len() < data.len());
|
|
373
|
+
assert_eq!(&ct[..4], &LZ4_FRAME_MAGIC);
|
|
374
|
+
|
|
375
|
+
let mut dec = FrameDecoder::new(Cursor::new(&ct));
|
|
376
|
+
let mut pt = Vec::new();
|
|
377
|
+
dec.read_to_end(&mut pt).unwrap();
|
|
378
|
+
assert_eq!(pt, data);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
#[test]
|
|
382
|
+
fn frame_empty_round_trip() {
|
|
383
|
+
let mut enc = FrameEncoder::new(Vec::new());
|
|
384
|
+
enc.write_all(b"").unwrap();
|
|
385
|
+
let ct = enc.finish().unwrap();
|
|
386
|
+
|
|
387
|
+
let mut dec = FrameDecoder::new(Cursor::new(&ct));
|
|
388
|
+
let mut pt = Vec::new();
|
|
389
|
+
dec.read_to_end(&mut pt).unwrap();
|
|
390
|
+
assert!(pt.is_empty());
|
|
391
|
+
}
|
|
392
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "dictionary"
|
|
4
|
+
|
|
5
|
+
module Lz4rip
|
|
6
|
+
class BlockCodec
|
|
7
|
+
def self.new(dict: nil)
|
|
8
|
+
_native_new(Dictionary === dict ? dict.bytes : dict)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def decompress(bytes, decompressed_size:)
|
|
13
|
+
_decompress(bytes, decompressed_size)
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module Lz4rip
|
|
6
|
+
Dictionary = Data.define(:bytes, :id) do
|
|
7
|
+
def initialize(bytes:, id: Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V"))
|
|
8
|
+
super(bytes: bytes.b.freeze, id: id)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def size
|
|
13
|
+
bytes.bytesize
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "dictionary"
|
|
4
|
+
|
|
5
|
+
module Lz4rip
|
|
6
|
+
class FrameCodec
|
|
7
|
+
def self.new(dict: nil)
|
|
8
|
+
case dict
|
|
9
|
+
when nil
|
|
10
|
+
_native_new(nil, 0)
|
|
11
|
+
when Dictionary
|
|
12
|
+
_native_new(dict.bytes, dict.id)
|
|
13
|
+
when String
|
|
14
|
+
_native_new(dict, Dictionary.new(bytes: dict).id)
|
|
15
|
+
else
|
|
16
|
+
raise TypeError, "expected Lz4rip::Dictionary, String, or nil; got #{dict.class}"
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
data/lib/lz4rip.rb
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lz4rip/lz4rip" # Rust extension
|
|
4
|
+
require_relative "lz4rip/version"
|
|
5
|
+
require_relative "lz4rip/dictionary"
|
|
6
|
+
require_relative "lz4rip/block_codec"
|
|
7
|
+
require_relative "lz4rip/frame_codec"
|
|
8
|
+
require_relative "lz4rip/dict_trainer"
|
metadata
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lz4rip
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Patrik Wenger
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: rb_sys
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0.9'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0.9'
|
|
26
|
+
description: Ruby bindings (via Rust/magnus) for lz4rip, a pure-Rust LZ4 implementation.
|
|
27
|
+
Block-format and frame-format compress/decompress with optional dictionary support
|
|
28
|
+
and COVER-based dictionary training. Ractor-safe.
|
|
29
|
+
email:
|
|
30
|
+
- paddor@gmail.com
|
|
31
|
+
executables: []
|
|
32
|
+
extensions:
|
|
33
|
+
- ext/lz4rip/extconf.rb
|
|
34
|
+
extra_rdoc_files: []
|
|
35
|
+
files:
|
|
36
|
+
- CHANGELOG.md
|
|
37
|
+
- Cargo.lock
|
|
38
|
+
- Cargo.toml
|
|
39
|
+
- LICENSE
|
|
40
|
+
- README.md
|
|
41
|
+
- ext/lz4rip/Cargo.toml
|
|
42
|
+
- ext/lz4rip/extconf.rb
|
|
43
|
+
- ext/lz4rip/src/lib.rs
|
|
44
|
+
- lib/lz4rip.rb
|
|
45
|
+
- lib/lz4rip/block_codec.rb
|
|
46
|
+
- lib/lz4rip/dict_trainer.rb
|
|
47
|
+
- lib/lz4rip/dictionary.rb
|
|
48
|
+
- lib/lz4rip/frame_codec.rb
|
|
49
|
+
- lib/lz4rip/version.rb
|
|
50
|
+
homepage: https://github.com/paddor/lz4rip-rb
|
|
51
|
+
licenses:
|
|
52
|
+
- MIT
|
|
53
|
+
metadata:
|
|
54
|
+
homepage_uri: https://github.com/paddor/lz4rip-rb
|
|
55
|
+
source_code_uri: https://github.com/paddor/lz4rip-rb
|
|
56
|
+
changelog_uri: https://github.com/paddor/lz4rip-rb/blob/main/CHANGELOG.md
|
|
57
|
+
rubygems_mfa_required: 'true'
|
|
58
|
+
rdoc_options: []
|
|
59
|
+
require_paths:
|
|
60
|
+
- lib
|
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
62
|
+
requirements:
|
|
63
|
+
- - ">="
|
|
64
|
+
- !ruby/object:Gem::Version
|
|
65
|
+
version: 4.0.0
|
|
66
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
67
|
+
requirements:
|
|
68
|
+
- - ">="
|
|
69
|
+
- !ruby/object:Gem::Version
|
|
70
|
+
version: '0'
|
|
71
|
+
requirements: []
|
|
72
|
+
rubygems_version: 4.0.10
|
|
73
|
+
specification_version: 4
|
|
74
|
+
summary: Ractor-safe LZ4 compression for Ruby
|
|
75
|
+
test_files: []
|