rzstd 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Cargo.lock +441 -0
- data/Cargo.toml +9 -0
- data/LICENSE +21 -0
- data/README.md +62 -0
- data/ext/rzstd/Cargo.toml +24 -0
- data/ext/rzstd/extconf.rb +8 -0
- data/ext/rzstd/src/lib.rs +377 -0
- data/lib/rzstd/version.rb +5 -0
- data/lib/rzstd.rb +50 -0
- data/tmp/x86_64-linux/stage/Cargo.toml +9 -0
- data/tmp/x86_64-linux/stage/ext/rzstd/Cargo.toml +24 -0
- metadata +117 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 831259e481d6ea30d50a765ed67b4e574a2e71ba2bdfaba53ec7e529d07373a7
|
|
4
|
+
data.tar.gz: 158331395ece67a5f87ca0c6c42d7e8cc6f92fcc0dfd4334aa3830a663bce47b
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 60dafb7dde062a01c77db4d04b2aa337e29b831bcdc1d69eae5c7b89861e85f1c9f55326e9163bb2d66d913a55a5288e9de9a65e148462f5f2d7c2f32e442a3d
|
|
7
|
+
data.tar.gz: bef641270ddcc3b275579741a43143174f39e9ab6fba5c8fd899edb98a6ff8ddd0ba707ffdff62f5f0004b598ec778c85dcfaf346c3af6e8a22d625b25a51ada
|
data/Cargo.lock
ADDED
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "aho-corasick"
|
|
7
|
+
version = "1.1.4"
|
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
+
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"memchr",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[[package]]
|
|
15
|
+
name = "bindgen"
|
|
16
|
+
version = "0.72.1"
|
|
17
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
18
|
+
checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
|
|
19
|
+
dependencies = [
|
|
20
|
+
"bitflags",
|
|
21
|
+
"cexpr",
|
|
22
|
+
"clang-sys",
|
|
23
|
+
"itertools",
|
|
24
|
+
"proc-macro2",
|
|
25
|
+
"quote",
|
|
26
|
+
"regex",
|
|
27
|
+
"rustc-hash",
|
|
28
|
+
"shlex",
|
|
29
|
+
"syn",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[[package]]
|
|
33
|
+
name = "bitflags"
|
|
34
|
+
version = "2.11.0"
|
|
35
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
36
|
+
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
|
|
37
|
+
|
|
38
|
+
[[package]]
|
|
39
|
+
name = "block-buffer"
|
|
40
|
+
version = "0.10.4"
|
|
41
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
42
|
+
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
|
43
|
+
dependencies = [
|
|
44
|
+
"generic-array",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
[[package]]
|
|
48
|
+
name = "cc"
|
|
49
|
+
version = "1.2.60"
|
|
50
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
51
|
+
checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20"
|
|
52
|
+
dependencies = [
|
|
53
|
+
"find-msvc-tools",
|
|
54
|
+
"jobserver",
|
|
55
|
+
"libc",
|
|
56
|
+
"shlex",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
[[package]]
|
|
60
|
+
name = "cexpr"
|
|
61
|
+
version = "0.6.0"
|
|
62
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
63
|
+
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
|
64
|
+
dependencies = [
|
|
65
|
+
"nom",
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
[[package]]
|
|
69
|
+
name = "cfg-if"
|
|
70
|
+
version = "1.0.4"
|
|
71
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
72
|
+
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
|
73
|
+
|
|
74
|
+
[[package]]
|
|
75
|
+
name = "clang-sys"
|
|
76
|
+
version = "1.8.1"
|
|
77
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
78
|
+
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
|
79
|
+
dependencies = [
|
|
80
|
+
"glob",
|
|
81
|
+
"libc",
|
|
82
|
+
"libloading",
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
[[package]]
|
|
86
|
+
name = "cpufeatures"
|
|
87
|
+
version = "0.2.17"
|
|
88
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
89
|
+
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
|
|
90
|
+
dependencies = [
|
|
91
|
+
"libc",
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
[[package]]
|
|
95
|
+
name = "crypto-common"
|
|
96
|
+
version = "0.1.7"
|
|
97
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
98
|
+
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
|
|
99
|
+
dependencies = [
|
|
100
|
+
"generic-array",
|
|
101
|
+
"typenum",
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
[[package]]
|
|
105
|
+
name = "digest"
|
|
106
|
+
version = "0.10.7"
|
|
107
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
108
|
+
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
|
109
|
+
dependencies = [
|
|
110
|
+
"block-buffer",
|
|
111
|
+
"crypto-common",
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
[[package]]
|
|
115
|
+
name = "either"
|
|
116
|
+
version = "1.15.0"
|
|
117
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
118
|
+
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
|
119
|
+
|
|
120
|
+
[[package]]
|
|
121
|
+
name = "find-msvc-tools"
|
|
122
|
+
version = "0.1.9"
|
|
123
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
124
|
+
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
|
|
125
|
+
|
|
126
|
+
[[package]]
|
|
127
|
+
name = "generic-array"
|
|
128
|
+
version = "0.14.7"
|
|
129
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
130
|
+
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
|
131
|
+
dependencies = [
|
|
132
|
+
"typenum",
|
|
133
|
+
"version_check",
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
[[package]]
|
|
137
|
+
name = "getrandom"
|
|
138
|
+
version = "0.3.4"
|
|
139
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
140
|
+
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
|
|
141
|
+
dependencies = [
|
|
142
|
+
"cfg-if",
|
|
143
|
+
"libc",
|
|
144
|
+
"r-efi",
|
|
145
|
+
"wasip2",
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
[[package]]
|
|
149
|
+
name = "glob"
|
|
150
|
+
version = "0.3.3"
|
|
151
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
152
|
+
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
|
153
|
+
|
|
154
|
+
[[package]]
|
|
155
|
+
name = "itertools"
|
|
156
|
+
version = "0.13.0"
|
|
157
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
158
|
+
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
|
159
|
+
dependencies = [
|
|
160
|
+
"either",
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
[[package]]
|
|
164
|
+
name = "jobserver"
|
|
165
|
+
version = "0.1.34"
|
|
166
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
167
|
+
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
|
|
168
|
+
dependencies = [
|
|
169
|
+
"getrandom",
|
|
170
|
+
"libc",
|
|
171
|
+
]
|
|
172
|
+
|
|
173
|
+
[[package]]
|
|
174
|
+
name = "lazy_static"
|
|
175
|
+
version = "1.5.0"
|
|
176
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
177
|
+
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|
178
|
+
|
|
179
|
+
[[package]]
|
|
180
|
+
name = "libc"
|
|
181
|
+
version = "0.2.184"
|
|
182
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
183
|
+
checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
|
|
184
|
+
|
|
185
|
+
[[package]]
|
|
186
|
+
name = "libloading"
|
|
187
|
+
version = "0.8.9"
|
|
188
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
189
|
+
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
|
190
|
+
dependencies = [
|
|
191
|
+
"cfg-if",
|
|
192
|
+
"windows-link",
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
[[package]]
|
|
196
|
+
name = "magnus"
|
|
197
|
+
version = "0.8.2"
|
|
198
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
199
|
+
checksum = "3b36a5b126bbe97eb0d02d07acfeb327036c6319fd816139a49824a83b7f9012"
|
|
200
|
+
dependencies = [
|
|
201
|
+
"magnus-macros",
|
|
202
|
+
"rb-sys",
|
|
203
|
+
"rb-sys-env",
|
|
204
|
+
"seq-macro",
|
|
205
|
+
]
|
|
206
|
+
|
|
207
|
+
[[package]]
|
|
208
|
+
name = "magnus-macros"
|
|
209
|
+
version = "0.8.0"
|
|
210
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
211
|
+
checksum = "47607461fd8e1513cb4f2076c197d8092d921a1ea75bd08af97398f593751892"
|
|
212
|
+
dependencies = [
|
|
213
|
+
"proc-macro2",
|
|
214
|
+
"quote",
|
|
215
|
+
"syn",
|
|
216
|
+
]
|
|
217
|
+
|
|
218
|
+
[[package]]
|
|
219
|
+
name = "memchr"
|
|
220
|
+
version = "2.8.0"
|
|
221
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
222
|
+
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
|
|
223
|
+
|
|
224
|
+
[[package]]
|
|
225
|
+
name = "minimal-lexical"
|
|
226
|
+
version = "0.2.1"
|
|
227
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
228
|
+
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|
229
|
+
|
|
230
|
+
[[package]]
|
|
231
|
+
name = "nom"
|
|
232
|
+
version = "7.1.3"
|
|
233
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
234
|
+
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
|
235
|
+
dependencies = [
|
|
236
|
+
"memchr",
|
|
237
|
+
"minimal-lexical",
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
[[package]]
|
|
241
|
+
name = "pkg-config"
|
|
242
|
+
version = "0.3.33"
|
|
243
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
244
|
+
checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
|
|
245
|
+
|
|
246
|
+
[[package]]
|
|
247
|
+
name = "proc-macro2"
|
|
248
|
+
version = "1.0.106"
|
|
249
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
250
|
+
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
|
251
|
+
dependencies = [
|
|
252
|
+
"unicode-ident",
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
[[package]]
|
|
256
|
+
name = "quote"
|
|
257
|
+
version = "1.0.45"
|
|
258
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
259
|
+
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
|
260
|
+
dependencies = [
|
|
261
|
+
"proc-macro2",
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
[[package]]
|
|
265
|
+
name = "r-efi"
|
|
266
|
+
version = "5.3.0"
|
|
267
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
268
|
+
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
|
|
269
|
+
|
|
270
|
+
[[package]]
|
|
271
|
+
name = "rb-sys"
|
|
272
|
+
version = "0.9.126"
|
|
273
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
274
|
+
checksum = "284799e73e899fe946fd77c7211b83bff61a1356e039ade7a2516a779e3212d0"
|
|
275
|
+
dependencies = [
|
|
276
|
+
"rb-sys-build",
|
|
277
|
+
]
|
|
278
|
+
|
|
279
|
+
[[package]]
|
|
280
|
+
name = "rb-sys-build"
|
|
281
|
+
version = "0.9.126"
|
|
282
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
283
|
+
checksum = "855fc1ad8943d12c89ef12f9147f1cc531f5bf19fb744112fdd317bb6ee7b5c5"
|
|
284
|
+
dependencies = [
|
|
285
|
+
"bindgen",
|
|
286
|
+
"lazy_static",
|
|
287
|
+
"proc-macro2",
|
|
288
|
+
"quote",
|
|
289
|
+
"regex",
|
|
290
|
+
"shell-words",
|
|
291
|
+
"syn",
|
|
292
|
+
]
|
|
293
|
+
|
|
294
|
+
[[package]]
|
|
295
|
+
name = "rb-sys-env"
|
|
296
|
+
version = "0.2.3"
|
|
297
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
298
|
+
checksum = "cca7ad6a7e21e72151d56fe2495a259b5670e204c3adac41ee7ef676ea08117a"
|
|
299
|
+
|
|
300
|
+
[[package]]
|
|
301
|
+
name = "regex"
|
|
302
|
+
version = "1.12.3"
|
|
303
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
304
|
+
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
|
|
305
|
+
dependencies = [
|
|
306
|
+
"aho-corasick",
|
|
307
|
+
"memchr",
|
|
308
|
+
"regex-automata",
|
|
309
|
+
"regex-syntax",
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
[[package]]
|
|
313
|
+
name = "regex-automata"
|
|
314
|
+
version = "0.4.14"
|
|
315
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
316
|
+
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
|
|
317
|
+
dependencies = [
|
|
318
|
+
"aho-corasick",
|
|
319
|
+
"memchr",
|
|
320
|
+
"regex-syntax",
|
|
321
|
+
]
|
|
322
|
+
|
|
323
|
+
[[package]]
|
|
324
|
+
name = "regex-syntax"
|
|
325
|
+
version = "0.8.10"
|
|
326
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
327
|
+
checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
|
|
328
|
+
|
|
329
|
+
[[package]]
|
|
330
|
+
name = "rustc-hash"
|
|
331
|
+
version = "2.1.2"
|
|
332
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
333
|
+
checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
|
|
334
|
+
|
|
335
|
+
[[package]]
|
|
336
|
+
name = "rzstd"
|
|
337
|
+
version = "0.1.0"
|
|
338
|
+
dependencies = [
|
|
339
|
+
"magnus",
|
|
340
|
+
"rb-sys",
|
|
341
|
+
"sha2",
|
|
342
|
+
"zstd-safe",
|
|
343
|
+
]
|
|
344
|
+
|
|
345
|
+
[[package]]
|
|
346
|
+
name = "seq-macro"
|
|
347
|
+
version = "0.3.6"
|
|
348
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
349
|
+
checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
|
|
350
|
+
|
|
351
|
+
[[package]]
|
|
352
|
+
name = "sha2"
|
|
353
|
+
version = "0.10.9"
|
|
354
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
355
|
+
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
|
356
|
+
dependencies = [
|
|
357
|
+
"cfg-if",
|
|
358
|
+
"cpufeatures",
|
|
359
|
+
"digest",
|
|
360
|
+
]
|
|
361
|
+
|
|
362
|
+
[[package]]
|
|
363
|
+
name = "shell-words"
|
|
364
|
+
version = "1.1.1"
|
|
365
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
366
|
+
checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
|
|
367
|
+
|
|
368
|
+
[[package]]
|
|
369
|
+
name = "shlex"
|
|
370
|
+
version = "1.3.0"
|
|
371
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
372
|
+
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
|
373
|
+
|
|
374
|
+
[[package]]
|
|
375
|
+
name = "syn"
|
|
376
|
+
version = "2.0.117"
|
|
377
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
378
|
+
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
|
379
|
+
dependencies = [
|
|
380
|
+
"proc-macro2",
|
|
381
|
+
"quote",
|
|
382
|
+
"unicode-ident",
|
|
383
|
+
]
|
|
384
|
+
|
|
385
|
+
[[package]]
|
|
386
|
+
name = "typenum"
|
|
387
|
+
version = "1.19.0"
|
|
388
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
389
|
+
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
|
|
390
|
+
|
|
391
|
+
[[package]]
|
|
392
|
+
name = "unicode-ident"
|
|
393
|
+
version = "1.0.24"
|
|
394
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
395
|
+
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
396
|
+
|
|
397
|
+
[[package]]
|
|
398
|
+
name = "version_check"
|
|
399
|
+
version = "0.9.5"
|
|
400
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
401
|
+
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
|
402
|
+
|
|
403
|
+
[[package]]
|
|
404
|
+
name = "wasip2"
|
|
405
|
+
version = "1.0.2+wasi-0.2.9"
|
|
406
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
407
|
+
checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
|
|
408
|
+
dependencies = [
|
|
409
|
+
"wit-bindgen",
|
|
410
|
+
]
|
|
411
|
+
|
|
412
|
+
[[package]]
|
|
413
|
+
name = "windows-link"
|
|
414
|
+
version = "0.2.1"
|
|
415
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
416
|
+
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
|
417
|
+
|
|
418
|
+
[[package]]
|
|
419
|
+
name = "wit-bindgen"
|
|
420
|
+
version = "0.51.0"
|
|
421
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
422
|
+
checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
|
|
423
|
+
|
|
424
|
+
[[package]]
|
|
425
|
+
name = "zstd-safe"
|
|
426
|
+
version = "7.2.4"
|
|
427
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
428
|
+
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
|
|
429
|
+
dependencies = [
|
|
430
|
+
"zstd-sys",
|
|
431
|
+
]
|
|
432
|
+
|
|
433
|
+
[[package]]
|
|
434
|
+
name = "zstd-sys"
|
|
435
|
+
version = "2.0.16+zstd.1.5.7"
|
|
436
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
437
|
+
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
|
|
438
|
+
dependencies = [
|
|
439
|
+
"cc",
|
|
440
|
+
"pkg-config",
|
|
441
|
+
]
|
data/Cargo.toml
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Patrik Wenger
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# rzstd
|
|
2
|
+
|
|
3
|
+
[](https://rubygems.org/gems/rzstd)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
[](https://www.ruby-lang.org)
|
|
6
|
+
[](https://www.rust-lang.org)
|
|
7
|
+
|
|
8
|
+
Ractor-safe Zstandard bindings for Ruby with persistent contexts.
|
|
9
|
+
|
|
10
|
+
`rzstd` provides Zstd frame compress/decompress at module level and a
|
|
11
|
+
stateful `Dictionary` class for dict-bound compression. Internally it
|
|
12
|
+
holds onto `ZSTD_CCtx` / `ZSTD_DCtx` state across calls instead of
|
|
13
|
+
allocating fresh ~256 KB contexts every time, which is what makes it
|
|
14
|
+
viable for small-message workloads where the upstream `zstd-ruby` gem
|
|
15
|
+
loses to LZ4 purely on context-allocation overhead.
|
|
16
|
+
|
|
17
|
+
API mirrors [`rlz4`](https://github.com/paddor/rlz4) 0.2.x:
|
|
18
|
+
|
|
19
|
+
```ruby
|
|
20
|
+
require "rzstd"
|
|
21
|
+
|
|
22
|
+
# Module-level frame compression
|
|
23
|
+
ct = RZstd.compress("the quick brown fox", level: 3) # level: kwarg, default 3
|
|
24
|
+
RZstd.decompress(ct) # => "the quick brown fox"
|
|
25
|
+
|
|
26
|
+
# Negative levels enable Zstd's fast strategy (trades ratio for speed).
|
|
27
|
+
# Supported range: -131072..22. Typical useful range: -7..19.
|
|
28
|
+
RZstd.compress(payload, level: -3) # fast strategy, low ratio
|
|
29
|
+
RZstd.compress(payload, level: 19) # high ratio, slow
|
|
30
|
+
|
|
31
|
+
# Dict-bound compression
|
|
32
|
+
dict = RZstd::Dictionary.new(File.binread("schema.dict"), level: -3)
|
|
33
|
+
dict.id # => u32 from sha256(dict)[0..4] LE
|
|
34
|
+
dict.size # => byte length
|
|
35
|
+
dict.compress("payload that shares the schema")
|
|
36
|
+
dict.decompress(ct)
|
|
37
|
+
|
|
38
|
+
# Dictionary training from sample payloads (wraps ZDICT_trainFromBuffer).
|
|
39
|
+
# Gather representative messages, then train a dictionary once and reuse
|
|
40
|
+
# it on both peers. Small-message workloads benefit the most.
|
|
41
|
+
samples = 1000.times.map { generate_sample_message }
|
|
42
|
+
dict_bytes = RZstd::Dictionary.train(samples, capacity: 64 * 1024)
|
|
43
|
+
dict = RZstd::Dictionary.new(dict_bytes)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
`Dictionary#id` is derived from `sha256(dict_bytes)[0..4]` interpreted
|
|
47
|
+
little-endian. It is intended for **out-of-band** peer negotiation
|
|
48
|
+
(e.g. via a `dict:sha256:<hex>` profile string in your application
|
|
49
|
+
protocol). Raw-content Zstd dictionaries always carry a frame `dictID`
|
|
50
|
+
of 0 by spec, so this id is not embedded in the on-wire frame itself.
|
|
51
|
+
Wrong-dict decoding is caught by the content checksum the encoder
|
|
52
|
+
enables — a peer using the wrong dictionary raises
|
|
53
|
+
`RZstd::DecompressError` instead of returning corrupt bytes.
|
|
54
|
+
|
|
55
|
+
## Ractor safety
|
|
56
|
+
|
|
57
|
+
The extension is marked Ractor-safe. `Dictionary` instances are
|
|
58
|
+
shareable. Module-level `RZstd.compress` / `RZstd.decompress` use a
|
|
59
|
+
single global `CCtx` / `DCtx` behind a `Mutex`, which serializes
|
|
60
|
+
calls across Ractors — if you need parallel throughput, give each
|
|
61
|
+
Ractor its own `Dictionary` (each one owns its own per-instance
|
|
62
|
+
contexts).
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "rzstd"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
|
|
6
|
+
[lib]
|
|
7
|
+
name = "rzstd"
|
|
8
|
+
crate-type = ["cdylib", "rlib"]
|
|
9
|
+
|
|
10
|
+
[dependencies]
|
|
11
|
+
# Backend: gyscos/zstd-rs (libzstd via zstd-sys). Chosen because it exposes
|
|
12
|
+
# persistent ZSTD_CCtx / ZSTD_DCtx contexts via `zstd-safe`, which is the
|
|
13
|
+
# whole reason this gem exists — we need to reuse encoder/decoder state
|
|
14
|
+
# across calls to compete fairly with rlz4 on small messages.
|
|
15
|
+
#
|
|
16
|
+
# The pure-Rust killingspark/zstd-rs option is the eventual goal; tracked
|
|
17
|
+
# as a follow-up once it grows a persistent-context API.
|
|
18
|
+
zstd-safe = { version = "7", default-features = false, features = ["std", "zdict_builder"] }
|
|
19
|
+
magnus = "0.8"
|
|
20
|
+
rb-sys = "0.9"
|
|
21
|
+
sha2 = { version = "0.10", default-features = false }
|
|
22
|
+
|
|
23
|
+
[build-dependencies]
|
|
24
|
+
rb-sys = "0.9"
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
use magnus::{
|
|
2
|
+
exception::ExceptionClass, function, method, prelude::*, r_string::RString, value::Opaque,
|
|
3
|
+
Error, Ruby,
|
|
4
|
+
};
|
|
5
|
+
use std::sync::{Mutex, OnceLock};
|
|
6
|
+
|
|
7
|
+
use zstd_safe::{CCtx, CParameter, DCtx};
|
|
8
|
+
|
|
9
|
+
const ZSTD_FRAME_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
|
|
10
|
+
|
|
11
|
+
static DECOMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
|
|
12
|
+
|
|
13
|
+
fn decompress_error(ruby: &Ruby) -> ExceptionClass {
|
|
14
|
+
ruby.get_inner(
|
|
15
|
+
*DECOMPRESS_ERROR
|
|
16
|
+
.get()
|
|
17
|
+
.expect("DecompressError not initialized"),
|
|
18
|
+
)
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// ---------- module-level: persistent CCtx / DCtx for the no-dict path ----------
|
|
22
|
+
//
|
|
23
|
+
// The whole reason this gem exists is that the upstream `zstd-ruby` gem
|
|
24
|
+
// allocates a fresh ZSTD_CCtx / ZSTD_DCtx (~256 KB of internal state)
|
|
25
|
+
// on every compress/decompress call. For small messages that overhead
|
|
26
|
+
// dominates and biases LZ4-vs-Zstd benchmarks. We hold one CCtx and one
|
|
27
|
+
// DCtx behind a Mutex for the lifetime of the process. Multiple Ractors
|
|
28
|
+
// share these contexts and serialize on the Mutex; if you need parallel
|
|
29
|
+
// throughput, give each Ractor its own `RZstd::Dictionary` (each one
|
|
30
|
+
// owns its own per-instance CCtx/DCtx — see below).
|
|
31
|
+
|
|
32
|
+
static GLOBAL_CCTX: OnceLock<Mutex<CCtx<'static>>> = OnceLock::new();
|
|
33
|
+
static GLOBAL_DCTX: OnceLock<Mutex<DCtx<'static>>> = OnceLock::new();
|
|
34
|
+
|
|
35
|
+
fn global_cctx() -> &'static Mutex<CCtx<'static>> {
|
|
36
|
+
GLOBAL_CCTX.get_or_init(|| Mutex::new(CCtx::create()))
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
fn global_dctx() -> &'static Mutex<DCtx<'static>> {
|
|
40
|
+
GLOBAL_DCTX.get_or_init(|| Mutex::new(DCtx::create()))
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
fn rzstd_compress(ruby: &Ruby, rb_input: RString, level: i32) -> Result<RString, Error> {
|
|
44
|
+
// SAFETY: copy borrowed bytes into an owned Vec before any Ruby allocation.
|
|
45
|
+
let input: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
|
|
46
|
+
let upper = zstd_safe::compress_bound(input.len());
|
|
47
|
+
let mut out = vec![0u8; upper];
|
|
48
|
+
|
|
49
|
+
let mut cctx = global_cctx().lock().expect("global CCtx mutex poisoned");
|
|
50
|
+
cctx.set_parameter(CParameter::CompressionLevel(level))
|
|
51
|
+
.map_err(|code| {
|
|
52
|
+
Error::new(
|
|
53
|
+
ruby.exception_runtime_error(),
|
|
54
|
+
format!("zstd set_parameter failed: {code}"),
|
|
55
|
+
)
|
|
56
|
+
})?;
|
|
57
|
+
let n = cctx.compress2(&mut out, &input).map_err(|code| {
|
|
58
|
+
Error::new(
|
|
59
|
+
ruby.exception_runtime_error(),
|
|
60
|
+
format!("zstd compress failed: {code}"),
|
|
61
|
+
)
|
|
62
|
+
})?;
|
|
63
|
+
out.truncate(n);
|
|
64
|
+
Ok(ruby.str_from_slice(&out))
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
fn rzstd_decompress(ruby: &Ruby, rb_input: RString) -> Result<RString, Error> {
|
|
68
|
+
// SAFETY: copy borrowed bytes before any Ruby allocation.
|
|
69
|
+
let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
|
|
70
|
+
|
|
71
|
+
// Reject anything that isn't a well-formed zstd frame up front. zstd
|
|
72
|
+
// permissively returns 0 for some malformed inputs and we'd rather not
|
|
73
|
+
// mask "sender forgot --compress" mistakes in callers.
|
|
74
|
+
if compressed.len() < ZSTD_FRAME_MAGIC.len() || compressed[..4] != ZSTD_FRAME_MAGIC {
|
|
75
|
+
return Err(Error::new(
|
|
76
|
+
decompress_error(ruby),
|
|
77
|
+
"zstd frame decode failed: bad magic (input is not a Zstd frame)",
|
|
78
|
+
));
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Frames produced by `compress2` always carry frame_content_size, so
|
|
82
|
+
// we can preallocate exactly. For frames without it (third-party
|
|
83
|
+
// producers) we fall back to a 1 MiB ceiling and grow as needed.
|
|
84
|
+
let upper = match zstd_safe::get_frame_content_size(&compressed) {
|
|
85
|
+
Ok(Some(n)) if n <= u64::from(u32::MAX) => n as usize,
|
|
86
|
+
_ => 1024 * 1024,
|
|
87
|
+
};
|
|
88
|
+
let mut out = vec![0u8; upper];
|
|
89
|
+
let mut dctx = global_dctx().lock().expect("global DCtx mutex poisoned");
|
|
90
|
+
let n = dctx.decompress(&mut out, &compressed).map_err(|code| {
|
|
91
|
+
Error::new(
|
|
92
|
+
decompress_error(ruby),
|
|
93
|
+
format!("zstd frame decode failed: {code}"),
|
|
94
|
+
)
|
|
95
|
+
})?;
|
|
96
|
+
out.truncate(n);
|
|
97
|
+
Ok(ruby.str_from_slice(&out))
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// ---------- Dictionary: persistent dict-bound CCtx / DCtx ----------
|
|
101
|
+
//
|
|
102
|
+
// `Dict_ID` is supplied by the caller (the Ruby wrapper in `lib/rzstd.rb`
|
|
103
|
+
// derives it from `sha256(dict_bytes)[0..4]` interpreted little-endian).
|
|
104
|
+
// Doing the digest in Ruby keeps a hash crate out of the Rust extension's
|
|
105
|
+
// dependency tree, exactly like rlz4 0.2.x.
|
|
106
|
+
//
|
|
107
|
+
// Each `Dictionary` instance owns:
|
|
108
|
+
// - the dict bytes (leaked to `&'static [u8]` so the CCtx can hold a
|
|
109
|
+
// by-reference dict load with no self-referential struct gymnastics —
|
|
110
|
+
// `Dictionary` instances are typically per-workload and long-lived)
|
|
111
|
+
// - a CCtx with the dictionary loaded by reference, behind a Mutex
|
|
112
|
+
// - a DCtx with the dictionary loaded by reference, behind a Mutex
|
|
113
|
+
//
|
|
114
|
+
// Multiple Ractors with separate Dictionary instances scale freely.
|
|
115
|
+
// Multiple Ractors sharing one Dictionary serialize on its mutexes.
|
|
116
|
+
#[magnus::wrap(class = "RZstd::Dictionary", free_immediately, size)]
|
|
117
|
+
struct Dictionary {
|
|
118
|
+
bytes: &'static [u8],
|
|
119
|
+
id: u32,
|
|
120
|
+
cctx: Mutex<CCtx<'static>>,
|
|
121
|
+
dctx: Mutex<DCtx<'static>>,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Safety: Dictionary's interior state lives behind Mutexes. The leaked
|
|
125
|
+
// `bytes` slice is `'static`. CCtx/DCtx from zstd-safe own a unique
|
|
126
|
+
// ZSTD_*Ctx pointer with no shared state, so wrapping them in Mutex
|
|
127
|
+
// makes the whole struct Send + Sync.
|
|
128
|
+
unsafe impl Send for Dictionary {}
|
|
129
|
+
unsafe impl Sync for Dictionary {}
|
|
130
|
+
|
|
131
|
+
fn dict_initialize(ruby: &Ruby, rb_dict: RString, id: u32, level: i32) -> Result<Dictionary, Error> {
|
|
132
|
+
// SAFETY: copy bytes into an owned Vec before any Ruby allocation.
|
|
133
|
+
let owned: Vec<u8> = unsafe { rb_dict.as_slice().to_vec() };
|
|
134
|
+
rb_dict.freeze();
|
|
135
|
+
|
|
136
|
+
// Leak the dict bytes so the CCtx/DCtx can hold a `'static` reference
|
|
137
|
+
// without self-referential lifetimes. Dictionaries are long-lived
|
|
138
|
+
// (typically one per workload) so this is fine in practice.
|
|
139
|
+
let bytes: &'static [u8] = Box::leak(owned.into_boxed_slice());
|
|
140
|
+
|
|
141
|
+
let mut cctx = CCtx::create();
|
|
142
|
+
cctx.set_parameter(CParameter::CompressionLevel(level))
|
|
143
|
+
.map_err(|code| {
|
|
144
|
+
Error::new(
|
|
145
|
+
ruby.exception_runtime_error(),
|
|
146
|
+
format!("zstd CCtx set_parameter failed: {code}"),
|
|
147
|
+
)
|
|
148
|
+
})?;
|
|
149
|
+
// Enable the content checksum so decoding with the wrong dictionary
|
|
150
|
+
// (or wrong bytes generally) fails fast at the trailing XXH64 check
|
|
151
|
+
// instead of silently returning garbage. Raw-content dictionaries
|
|
152
|
+
// always carry a frame `dictID` of 0 by spec, so the checksum is the
|
|
153
|
+
// only on-wire signal we have to detect dict mismatch.
|
|
154
|
+
cctx.set_parameter(CParameter::ContentSizeFlag(true))
|
|
155
|
+
.map_err(|code| {
|
|
156
|
+
Error::new(
|
|
157
|
+
ruby.exception_runtime_error(),
|
|
158
|
+
format!("zstd CCtx set_parameter ContentSizeFlag failed: {code}"),
|
|
159
|
+
)
|
|
160
|
+
})?;
|
|
161
|
+
cctx.set_parameter(CParameter::ChecksumFlag(true))
|
|
162
|
+
.map_err(|code| {
|
|
163
|
+
Error::new(
|
|
164
|
+
ruby.exception_runtime_error(),
|
|
165
|
+
format!("zstd CCtx set_parameter ChecksumFlag failed: {code}"),
|
|
166
|
+
)
|
|
167
|
+
})?;
|
|
168
|
+
cctx.load_dictionary(bytes).map_err(|code| {
|
|
169
|
+
Error::new(
|
|
170
|
+
ruby.exception_runtime_error(),
|
|
171
|
+
format!("zstd CCtx load_dictionary failed: {code}"),
|
|
172
|
+
)
|
|
173
|
+
})?;
|
|
174
|
+
|
|
175
|
+
let mut dctx = DCtx::create();
|
|
176
|
+
dctx.load_dictionary(bytes).map_err(|code| {
|
|
177
|
+
Error::new(
|
|
178
|
+
ruby.exception_runtime_error(),
|
|
179
|
+
format!("zstd DCtx load_dictionary failed: {code}"),
|
|
180
|
+
)
|
|
181
|
+
})?;
|
|
182
|
+
|
|
183
|
+
Ok(Dictionary {
|
|
184
|
+
bytes,
|
|
185
|
+
id,
|
|
186
|
+
cctx: Mutex::new(cctx),
|
|
187
|
+
dctx: Mutex::new(dctx),
|
|
188
|
+
})
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
fn dict_compress(ruby: &Ruby, rb_self: &Dictionary, rb_input: RString) -> Result<RString, Error> {
|
|
192
|
+
let input: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
|
|
193
|
+
let upper = zstd_safe::compress_bound(input.len());
|
|
194
|
+
let mut out = vec![0u8; upper];
|
|
195
|
+
|
|
196
|
+
let mut cctx = rb_self.cctx.lock().expect("Dictionary CCtx mutex poisoned");
|
|
197
|
+
let n = cctx.compress2(&mut out, &input).map_err(|code| {
|
|
198
|
+
Error::new(
|
|
199
|
+
ruby.exception_runtime_error(),
|
|
200
|
+
format!("zstd dict compress failed: {code}"),
|
|
201
|
+
)
|
|
202
|
+
})?;
|
|
203
|
+
out.truncate(n);
|
|
204
|
+
Ok(ruby.str_from_slice(&out))
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
fn dict_decompress(
|
|
208
|
+
ruby: &Ruby,
|
|
209
|
+
rb_self: &Dictionary,
|
|
210
|
+
rb_input: RString,
|
|
211
|
+
) -> Result<RString, Error> {
|
|
212
|
+
let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
|
|
213
|
+
if compressed.len() < ZSTD_FRAME_MAGIC.len() || compressed[..4] != ZSTD_FRAME_MAGIC {
|
|
214
|
+
return Err(Error::new(
|
|
215
|
+
decompress_error(ruby),
|
|
216
|
+
"zstd dict frame decode failed: bad magic (input is not a Zstd frame)",
|
|
217
|
+
));
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Note: raw-content zstd dictionaries always produce a frame `dictID`
|
|
221
|
+
// of 0, so we cannot use `get_dict_id_from_frame` for negotiation.
|
|
222
|
+
// The Dict_ID exposed by `Dictionary#id` is for out-of-band peer
|
|
223
|
+
// agreement (e.g. via the `dict:sha256:<hex>` profile string in the
|
|
224
|
+
// application protocol). On-wire mismatch is caught by the content
|
|
225
|
+
// checksum that the encoder enables — wrong dict bytes will produce
|
|
226
|
+
// a checksum failure here.
|
|
227
|
+
|
|
228
|
+
let upper = match zstd_safe::get_frame_content_size(&compressed) {
|
|
229
|
+
Ok(Some(n)) if n <= u64::from(u32::MAX) => n as usize,
|
|
230
|
+
_ => 1024 * 1024,
|
|
231
|
+
};
|
|
232
|
+
let mut out = vec![0u8; upper];
|
|
233
|
+
|
|
234
|
+
let mut dctx = rb_self.dctx.lock().expect("Dictionary DCtx mutex poisoned");
|
|
235
|
+
let n = dctx.decompress(&mut out, &compressed).map_err(|code| {
|
|
236
|
+
Error::new(
|
|
237
|
+
decompress_error(ruby),
|
|
238
|
+
format!("zstd dict frame decode failed: {code}"),
|
|
239
|
+
)
|
|
240
|
+
})?;
|
|
241
|
+
out.truncate(n);
|
|
242
|
+
Ok(ruby.str_from_slice(&out))
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// ---------- dictionary training ----------
|
|
246
|
+
//
|
|
247
|
+
// Wraps `ZDICT_trainFromBuffer`. Caller passes a flat buffer of all
|
|
248
|
+
// samples concatenated, plus a sizes array describing how to split
|
|
249
|
+
// it back into per-sample slices. Returns the trained dictionary
|
|
250
|
+
// bytes (truncated to actual dictionary size).
|
|
251
|
+
//
|
|
252
|
+
// `dict_capacity` is the upper bound on the produced dictionary size;
|
|
253
|
+
// the C function may return less. ZDICT recommends ~100 KiB and at
|
|
254
|
+
// least ~10 samples to converge — under-sized inputs raise an error.
|
|
255
|
+
fn rzstd_train(
|
|
256
|
+
ruby: &Ruby,
|
|
257
|
+
rb_samples: RString,
|
|
258
|
+
rb_sample_sizes: magnus::RArray,
|
|
259
|
+
dict_capacity: usize,
|
|
260
|
+
) -> Result<RString, Error> {
|
|
261
|
+
let samples: Vec<u8> = unsafe { rb_samples.as_slice().to_vec() };
|
|
262
|
+
|
|
263
|
+
let mut sizes: Vec<usize> = Vec::with_capacity(rb_sample_sizes.len());
|
|
264
|
+
for v in rb_sample_sizes.into_iter() {
|
|
265
|
+
let n: usize = magnus::TryConvert::try_convert(v)?;
|
|
266
|
+
sizes.push(n);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if sizes.iter().sum::<usize>() != samples.len() {
|
|
270
|
+
return Err(Error::new(
|
|
271
|
+
ruby.exception_arg_error(),
|
|
272
|
+
"sample sizes do not sum to samples buffer length",
|
|
273
|
+
));
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
let mut dict_buf: Vec<u8> = vec![0u8; dict_capacity];
|
|
277
|
+
let written = zstd_safe::train_from_buffer(&mut dict_buf, &samples, &sizes).map_err(|code| {
|
|
278
|
+
Error::new(
|
|
279
|
+
ruby.exception_runtime_error(),
|
|
280
|
+
format!("zstd train_from_buffer failed: {code}"),
|
|
281
|
+
)
|
|
282
|
+
})?;
|
|
283
|
+
dict_buf.truncate(written);
|
|
284
|
+
Ok(ruby.str_from_slice(&dict_buf))
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
fn dict_size(rb_self: &Dictionary) -> usize {
|
|
289
|
+
rb_self.bytes.len()
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
fn dict_id(rb_self: &Dictionary) -> u32 {
|
|
293
|
+
rb_self.id
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// ---------- module init ----------
|
|
297
|
+
|
|
298
|
+
#[magnus::init]
|
|
299
|
+
fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
300
|
+
// Mark this extension as Ractor-safe. Globals are OnceLock<Mutex<...>>;
|
|
301
|
+
// Dictionary state is Mutex-protected and Send+Sync; the leaked dict
|
|
302
|
+
// bytes are 'static.
|
|
303
|
+
unsafe { rb_sys::rb_ext_ractor_safe(true) };
|
|
304
|
+
|
|
305
|
+
let module = ruby.define_module("RZstd")?;
|
|
306
|
+
|
|
307
|
+
let decompress_error_class =
|
|
308
|
+
module.define_error("DecompressError", ruby.exception_standard_error())?;
|
|
309
|
+
DECOMPRESS_ERROR
|
|
310
|
+
.set(Opaque::from(decompress_error_class))
|
|
311
|
+
.unwrap_or_else(|_| panic!("init called more than once"));
|
|
312
|
+
|
|
313
|
+
// Bound as `_native_compress(bytes, level)`. Ruby's `RZstd.compress`
|
|
314
|
+
// wraps this with a `level:` kwarg default — see `lib/rzstd.rb`.
|
|
315
|
+
module.define_module_function("_native_compress", function!(rzstd_compress, 2))?;
|
|
316
|
+
module.define_module_function("decompress", function!(rzstd_decompress, 1))?;
|
|
317
|
+
|
|
318
|
+
let dict_class = module.define_class("Dictionary", ruby.class_object())?;
|
|
319
|
+
// Bound as `_native_new(bytes, id, level)`. Ruby's `RZstd::Dictionary.new(bytes)`
|
|
320
|
+
// computes the id and forwards — see `lib/rzstd.rb`.
|
|
321
|
+
dict_class.define_singleton_method("_native_new", function!(dict_initialize, 3))?;
|
|
322
|
+
dict_class.define_singleton_method("_native_train", function!(rzstd_train, 3))?;
|
|
323
|
+
dict_class.define_method("compress", method!(dict_compress, 1))?;
|
|
324
|
+
dict_class.define_method("decompress", method!(dict_decompress, 1))?;
|
|
325
|
+
dict_class.define_method("size", method!(dict_size, 0))?;
|
|
326
|
+
dict_class.define_method("id", method!(dict_id, 0))?;
|
|
327
|
+
|
|
328
|
+
Ok(())
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
#[cfg(test)]
|
|
332
|
+
mod tests {
|
|
333
|
+
use super::*;
|
|
334
|
+
|
|
335
|
+
#[test]
|
|
336
|
+
fn frame_round_trip() {
|
|
337
|
+
let data = b"the quick brown fox jumps over the lazy dog ".repeat(100);
|
|
338
|
+
let upper = zstd_safe::compress_bound(data.len());
|
|
339
|
+
let mut out = vec![0u8; upper];
|
|
340
|
+
let mut cctx = CCtx::create();
|
|
341
|
+
cctx.set_parameter(CParameter::CompressionLevel(3)).unwrap();
|
|
342
|
+
let n = cctx.compress2(&mut out, &data).unwrap();
|
|
343
|
+
out.truncate(n);
|
|
344
|
+
assert!(out.len() < data.len());
|
|
345
|
+
assert_eq!(&out[..4], &ZSTD_FRAME_MAGIC);
|
|
346
|
+
|
|
347
|
+
let mut pt = vec![0u8; data.len()];
|
|
348
|
+
let mut dctx = DCtx::create();
|
|
349
|
+
let m = dctx.decompress(&mut pt, &out).unwrap();
|
|
350
|
+
pt.truncate(m);
|
|
351
|
+
assert_eq!(pt, data);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
#[test]
|
|
355
|
+
fn dict_round_trip() {
|
|
356
|
+
let dict = b"JSON schema version 1 field ".repeat(8);
|
|
357
|
+
let bytes: &'static [u8] = Box::leak(dict.into_boxed_slice());
|
|
358
|
+
let msg = b"JSON schema version 1 field name=hello value=world".to_vec();
|
|
359
|
+
|
|
360
|
+
let mut cctx = CCtx::create();
|
|
361
|
+
cctx.set_parameter(CParameter::CompressionLevel(3)).unwrap();
|
|
362
|
+
cctx.load_dictionary(bytes).unwrap();
|
|
363
|
+
let mut ct = vec![0u8; zstd_safe::compress_bound(msg.len())];
|
|
364
|
+
let n = cctx.compress2(&mut ct, &msg).unwrap();
|
|
365
|
+
ct.truncate(n);
|
|
366
|
+
assert_eq!(&ct[..4], &ZSTD_FRAME_MAGIC);
|
|
367
|
+
|
|
368
|
+
// Raw-content dicts always carry frame dict id 0; that's expected.
|
|
369
|
+
|
|
370
|
+
let mut dctx = DCtx::create();
|
|
371
|
+
dctx.load_dictionary(bytes).unwrap();
|
|
372
|
+
let mut pt = vec![0u8; msg.len()];
|
|
373
|
+
let m = dctx.decompress(&mut pt, &ct).unwrap();
|
|
374
|
+
pt.truncate(m);
|
|
375
|
+
assert_eq!(pt, msg);
|
|
376
|
+
}
|
|
377
|
+
}
|
data/lib/rzstd.rb
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
require_relative "rzstd/rzstd"
|
|
6
|
+
require_relative "rzstd/version"
|
|
7
|
+
|
|
8
|
+
module RZstd
|
|
9
|
+
DEFAULT_LEVEL = 3
|
|
10
|
+
|
|
11
|
+
# Wrap the native `_native_compress(bytes, level)` so callers get a
|
|
12
|
+
# `level:` kwarg with a sensible default. Defined as a real method
|
|
13
|
+
# (not a Proc-bound singleton method) so it remains Ractor-shareable.
|
|
14
|
+
def self.compress(bytes, level: DEFAULT_LEVEL)
|
|
15
|
+
_native_compress(bytes, Integer(level))
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class Dictionary
|
|
19
|
+
# Public constructor. Derives the Zstd `Dict_ID` from the dictionary
|
|
20
|
+
# bytes (sha256 truncated to the first 4 bytes, little-endian) and
|
|
21
|
+
# forwards to the Rust extension. The id is for out-of-band peer
|
|
22
|
+
# negotiation (e.g. via the `dict:sha256:<hex>` profile string in
|
|
23
|
+
# the application protocol) — raw-content zstd dictionaries always
|
|
24
|
+
# write a frame `dictID` of 0, so the on-wire frame doesn't carry
|
|
25
|
+
# this id. Wrong-dict decoding is caught by the content checksum
|
|
26
|
+
# the encoder enables.
|
|
27
|
+
def self.new(bytes, level: DEFAULT_LEVEL)
|
|
28
|
+
id = Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V")
|
|
29
|
+
_native_new(bytes, id, Integer(level))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Trains a raw-content dictionary from a corpus of sample frames.
|
|
34
|
+
# Wraps `ZDICT_trainFromBuffer`. Returns the trained dictionary as
|
|
35
|
+
# a binary String, ready to feed back into `Dictionary.new`.
|
|
36
|
+
#
|
|
37
|
+
# ZDICT recommends roughly 100 KiB total samples and at least 10
|
|
38
|
+
# samples; under-provisioned inputs raise.
|
|
39
|
+
#
|
|
40
|
+
# @param samples [Array<String>] sample frames (any encoding)
|
|
41
|
+
# @param capacity [Integer] upper bound on the produced dict size
|
|
42
|
+
# @return [String] trained dictionary bytes (binary)
|
|
43
|
+
def self.train(samples, capacity: 64 * 1024)
|
|
44
|
+
sizes = samples.map { |s| s.bytesize }
|
|
45
|
+
buffer = String.new(capacity: sizes.sum, encoding: Encoding::BINARY)
|
|
46
|
+
samples.each { |s| buffer << s.b }
|
|
47
|
+
_native_train(buffer, sizes, Integer(capacity))
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "rzstd"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
|
|
6
|
+
[lib]
|
|
7
|
+
name = "rzstd"
|
|
8
|
+
crate-type = ["cdylib", "rlib"]
|
|
9
|
+
|
|
10
|
+
[dependencies]
|
|
11
|
+
# Backend: gyscos/zstd-rs (libzstd via zstd-sys). Chosen because it exposes
|
|
12
|
+
# persistent ZSTD_CCtx / ZSTD_DCtx contexts via `zstd-safe`, which is the
|
|
13
|
+
# whole reason this gem exists — we need to reuse encoder/decoder state
|
|
14
|
+
# across calls to compete fairly with rlz4 on small messages.
|
|
15
|
+
#
|
|
16
|
+
# The pure-Rust killingspark/zstd-rs option is the eventual goal; tracked
|
|
17
|
+
# as a follow-up once it grows a persistent-context API.
|
|
18
|
+
zstd-safe = { version = "7", default-features = false, features = ["std", "zdict_builder"] }
|
|
19
|
+
magnus = "0.8"
|
|
20
|
+
rb-sys = "0.9"
|
|
21
|
+
sha2 = { version = "0.10", default-features = false }
|
|
22
|
+
|
|
23
|
+
[build-dependencies]
|
|
24
|
+
rb-sys = "0.9"
|
metadata
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: rzstd
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Patrik Wenger
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: rb_sys
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '0.9'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '0.9'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: rake
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '13.0'
|
|
33
|
+
type: :development
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '13.0'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: rake-compiler
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '1.2'
|
|
47
|
+
type: :development
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '1.2'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: minitest
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - "~>"
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '5.0'
|
|
61
|
+
type: :development
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - "~>"
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '5.0'
|
|
68
|
+
description: |
|
|
69
|
+
Ruby bindings (via Rust/magnus) for the Zstandard compressor with
|
|
70
|
+
persistent ZSTD_CCtx / ZSTD_DCtx contexts that are reused across
|
|
71
|
+
calls. Provides Zstd frame compress/decompress at module level and
|
|
72
|
+
a stateful Dictionary class for dict-bound compression. Designed to
|
|
73
|
+
be safe to call from multiple Ractors and competitive with rlz4 on
|
|
74
|
+
small messages, where per-call context allocation in zstd-ruby
|
|
75
|
+
dominates the cost.
|
|
76
|
+
email:
|
|
77
|
+
- paddor@protonmail.ch
|
|
78
|
+
executables: []
|
|
79
|
+
extensions:
|
|
80
|
+
- ext/rzstd/extconf.rb
|
|
81
|
+
extra_rdoc_files: []
|
|
82
|
+
files:
|
|
83
|
+
- Cargo.lock
|
|
84
|
+
- Cargo.toml
|
|
85
|
+
- LICENSE
|
|
86
|
+
- README.md
|
|
87
|
+
- ext/rzstd/Cargo.toml
|
|
88
|
+
- ext/rzstd/extconf.rb
|
|
89
|
+
- ext/rzstd/src/lib.rs
|
|
90
|
+
- lib/rzstd.rb
|
|
91
|
+
- lib/rzstd/version.rb
|
|
92
|
+
- tmp/x86_64-linux/stage/Cargo.toml
|
|
93
|
+
- tmp/x86_64-linux/stage/ext/rzstd/Cargo.toml
|
|
94
|
+
homepage: https://github.com/paddor/rzstd
|
|
95
|
+
licenses:
|
|
96
|
+
- MIT
|
|
97
|
+
metadata:
|
|
98
|
+
homepage_uri: https://github.com/paddor/rzstd
|
|
99
|
+
source_code_uri: https://github.com/paddor/rzstd
|
|
100
|
+
rdoc_options: []
|
|
101
|
+
require_paths:
|
|
102
|
+
- lib
|
|
103
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
104
|
+
requirements:
|
|
105
|
+
- - ">="
|
|
106
|
+
- !ruby/object:Gem::Version
|
|
107
|
+
version: 4.0.0
|
|
108
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
|
+
requirements:
|
|
110
|
+
- - ">="
|
|
111
|
+
- !ruby/object:Gem::Version
|
|
112
|
+
version: '0'
|
|
113
|
+
requirements: []
|
|
114
|
+
rubygems_version: 4.0.6
|
|
115
|
+
specification_version: 4
|
|
116
|
+
summary: Ractor-safe Zstandard bindings for Ruby with persistent contexts
|
|
117
|
+
test_files: []
|