rzstd 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +1 -1
- data/ext/rzstd/Cargo.toml +1 -1
- data/ext/rzstd/src/lib.rs +155 -50
- data/lib/rzstd/version.rb +1 -1
- data/lib/rzstd.rb +13 -0
- data/tmp/x86_64-linux/stage/ext/rzstd/Cargo.toml +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f499740d25842f109c142aaf1ff24648a317e10f3ac2600157fe73151bad7fc7
|
|
4
|
+
data.tar.gz: 21a645b344ae469809a95a1500d87cf72326e72c5d4d1005cc0cf7b15c07b05d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 070d5a73c25d55de6b04b2b373dd6bfd925381984346bf7683a57027dd77fadcd4c60941c2a408550116292f7181cefbd6ae62b618e529433519bbf4abb39569
|
|
7
|
+
data.tar.gz: 807f3d87206810f19d528630ea1e1054067e3c0e79ae6eebff97605715dbc8abdcf30f94898f3d21196be04cb7819d7e54382de0a76a2dc65cdea6521bf41a7a
|
data/Cargo.lock
CHANGED
data/ext/rzstd/Cargo.toml
CHANGED
data/ext/rzstd/src/lib.rs
CHANGED
|
@@ -9,6 +9,8 @@ use zstd_safe::{CCtx, CParameter, DCtx};
|
|
|
9
9
|
const ZSTD_FRAME_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
|
|
10
10
|
|
|
11
11
|
static DECOMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
|
|
12
|
+
static MISSING_CONTENT_SIZE_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
|
|
13
|
+
static OUTPUT_SIZE_LIMIT_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
|
|
12
14
|
|
|
13
15
|
fn decompress_error(ruby: &Ruby) -> ExceptionClass {
|
|
14
16
|
ruby.get_inner(
|
|
@@ -18,6 +20,22 @@ fn decompress_error(ruby: &Ruby) -> ExceptionClass {
|
|
|
18
20
|
)
|
|
19
21
|
}
|
|
20
22
|
|
|
23
|
+
fn missing_content_size_error(ruby: &Ruby) -> ExceptionClass {
|
|
24
|
+
ruby.get_inner(
|
|
25
|
+
*MISSING_CONTENT_SIZE_ERROR
|
|
26
|
+
.get()
|
|
27
|
+
.expect("MissingContentSizeError not initialized"),
|
|
28
|
+
)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
fn output_size_limit_error(ruby: &Ruby) -> ExceptionClass {
|
|
32
|
+
ruby.get_inner(
|
|
33
|
+
*OUTPUT_SIZE_LIMIT_ERROR
|
|
34
|
+
.get()
|
|
35
|
+
.expect("OutputSizeLimitError not initialized"),
|
|
36
|
+
)
|
|
37
|
+
}
|
|
38
|
+
|
|
21
39
|
// ---------- module-level: persistent CCtx / DCtx for the no-dict path ----------
|
|
22
40
|
//
|
|
23
41
|
// The whole reason this gem exists is that the upstream `zstd-ruby` gem
|
|
@@ -64,36 +82,127 @@ fn rzstd_compress(ruby: &Ruby, rb_input: RString, level: i32) -> Result<RString,
|
|
|
64
82
|
Ok(ruby.str_from_slice(&out))
|
|
65
83
|
}
|
|
66
84
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
85
|
+
// Single-shot bounded decompress shared by the global and dictionary
|
|
86
|
+
// paths. Returns the decoded plaintext or a tagged error so the caller
|
|
87
|
+
// can map it onto the appropriate Ruby exception class. `max_output`
|
|
88
|
+
// of 0 means "no upper bound set"; any other value enforces the RFC
|
|
89
|
+
// rule that a frame's declared Frame_Content_Size must be present and
|
|
90
|
+
// must fit within that cap, and rejects the frame on the header alone
|
|
91
|
+
// (no decoder invocation, no output allocation) when either condition
|
|
92
|
+
// is violated. With no cap set, frames lacking Frame_Content_Size fall
|
|
93
|
+
// back to a 1 MiB ceiling (legacy behavior for third-party producers).
|
|
94
|
+
//
|
|
95
|
+
enum BoundedError {
|
|
96
|
+
BadMagic,
|
|
97
|
+
MissingContentSize,
|
|
98
|
+
OutputSizeLimit { declared: u64, limit: u64 },
|
|
99
|
+
DecoderFailed(String),
|
|
100
|
+
}
|
|
101
|
+
|
|
70
102
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
103
|
+
fn decompress_bounded(
|
|
104
|
+
compressed: &[u8],
|
|
105
|
+
max_output: usize,
|
|
106
|
+
dctx: &mut DCtx<'_>,
|
|
107
|
+
) -> Result<Vec<u8>, BoundedError> {
|
|
74
108
|
if compressed.len() < ZSTD_FRAME_MAGIC.len() || compressed[..4] != ZSTD_FRAME_MAGIC {
|
|
109
|
+
return Err(BoundedError::BadMagic);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
let upper = match zstd_safe::get_frame_content_size(compressed) {
|
|
113
|
+
Ok(Some(n)) => {
|
|
114
|
+
if max_output != 0 && n > max_output as u64 {
|
|
115
|
+
return Err(BoundedError::OutputSizeLimit {
|
|
116
|
+
declared: n,
|
|
117
|
+
limit: max_output as u64,
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
if n > u64::from(u32::MAX) {
|
|
121
|
+
return Err(BoundedError::OutputSizeLimit {
|
|
122
|
+
declared: n,
|
|
123
|
+
limit: u64::from(u32::MAX),
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
n as usize
|
|
127
|
+
}
|
|
128
|
+
Ok(None) => {
|
|
129
|
+
if max_output != 0 {
|
|
130
|
+
return Err(BoundedError::MissingContentSize);
|
|
131
|
+
}
|
|
132
|
+
1024 * 1024
|
|
133
|
+
}
|
|
134
|
+
Err(_) => return Err(BoundedError::BadMagic),
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
let mut out = vec![0u8; upper];
|
|
138
|
+
let n = dctx
|
|
139
|
+
.decompress(&mut out, compressed)
|
|
140
|
+
.map_err(|code| BoundedError::DecoderFailed(format!("{code}")))?;
|
|
141
|
+
out.truncate(n);
|
|
142
|
+
Ok(out)
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
fn raise_bounded(ruby: &Ruby, err: BoundedError, prefix: &str) -> Error {
|
|
147
|
+
match err {
|
|
148
|
+
BoundedError::BadMagic => Error::new(
|
|
149
|
+
decompress_error(ruby),
|
|
150
|
+
format!("{prefix}: bad magic (input is not a Zstd frame)"),
|
|
151
|
+
),
|
|
152
|
+
BoundedError::MissingContentSize => Error::new(
|
|
153
|
+
missing_content_size_error(ruby),
|
|
154
|
+
format!("{prefix}: Frame_Content_Size absent from frame header"),
|
|
155
|
+
),
|
|
156
|
+
BoundedError::OutputSizeLimit { declared, limit } => Error::new(
|
|
157
|
+
output_size_limit_error(ruby),
|
|
158
|
+
format!("{prefix}: declared content size {declared} exceeds limit {limit}"),
|
|
159
|
+
),
|
|
160
|
+
BoundedError::DecoderFailed(msg) => {
|
|
161
|
+
Error::new(decompress_error(ruby), format!("{prefix}: {msg}"))
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
// Reads the `Frame_Content_Size` field from a Zstandard frame header
|
|
168
|
+
// without invoking the decoder. Returns `Some(n)` if the producer wrote
|
|
169
|
+
// it, `None` if the frame header omits it, and an error if the input
|
|
170
|
+
// isn't a well-formed Zstandard frame. Callers enforcing a bounded
|
|
171
|
+
// decompressed size MUST use this before `decompress` to reject frames
|
|
172
|
+
// whose declared size already exceeds their limit.
|
|
173
|
+
fn rzstd_get_frame_content_size(
|
|
174
|
+
ruby: &Ruby,
|
|
175
|
+
rb_input: RString,
|
|
176
|
+
) -> Result<Option<u64>, Error> {
|
|
177
|
+
let bytes: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
|
|
178
|
+
if bytes.len() < ZSTD_FRAME_MAGIC.len() || bytes[..4] != ZSTD_FRAME_MAGIC {
|
|
75
179
|
return Err(Error::new(
|
|
76
180
|
decompress_error(ruby),
|
|
77
181
|
"zstd frame decode failed: bad magic (input is not a Zstd frame)",
|
|
78
182
|
));
|
|
79
183
|
}
|
|
184
|
+
match zstd_safe::get_frame_content_size(&bytes) {
|
|
185
|
+
Ok(Some(n)) => Ok(Some(n)),
|
|
186
|
+
Ok(None) => Ok(None),
|
|
187
|
+
Err(code) => Err(Error::new(
|
|
188
|
+
decompress_error(ruby),
|
|
189
|
+
format!("zstd frame header parse failed: {code:?}"),
|
|
190
|
+
)),
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
fn rzstd_decompress(
|
|
196
|
+
ruby: &Ruby,
|
|
197
|
+
rb_input: RString,
|
|
198
|
+
max_output: usize,
|
|
199
|
+
) -> Result<RString, Error> {
|
|
200
|
+
// SAFETY: copy borrowed bytes before any Ruby allocation.
|
|
201
|
+
let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
|
|
80
202
|
|
|
81
|
-
// Frames produced by `compress2` always carry frame_content_size, so
|
|
82
|
-
// we can preallocate exactly. For frames without it (third-party
|
|
83
|
-
// producers) we fall back to a 1 MiB ceiling and grow as needed.
|
|
84
|
-
let upper = match zstd_safe::get_frame_content_size(&compressed) {
|
|
85
|
-
Ok(Some(n)) if n <= u64::from(u32::MAX) => n as usize,
|
|
86
|
-
_ => 1024 * 1024,
|
|
87
|
-
};
|
|
88
|
-
let mut out = vec![0u8; upper];
|
|
89
203
|
let mut dctx = global_dctx().lock().expect("global DCtx mutex poisoned");
|
|
90
|
-
let
|
|
91
|
-
|
|
92
|
-
decompress_error(ruby),
|
|
93
|
-
format!("zstd frame decode failed: {code}"),
|
|
94
|
-
)
|
|
95
|
-
})?;
|
|
96
|
-
out.truncate(n);
|
|
204
|
+
let out = decompress_bounded(&compressed, max_output, &mut dctx)
|
|
205
|
+
.map_err(|e| raise_bounded(ruby, e, "zstd frame decode failed"))?;
|
|
97
206
|
Ok(ruby.str_from_slice(&out))
|
|
98
207
|
}
|
|
99
208
|
|
|
@@ -208,37 +317,17 @@ fn dict_decompress(
|
|
|
208
317
|
ruby: &Ruby,
|
|
209
318
|
rb_self: &Dictionary,
|
|
210
319
|
rb_input: RString,
|
|
320
|
+
max_output: usize,
|
|
211
321
|
) -> Result<RString, Error> {
|
|
212
|
-
let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
|
|
213
|
-
if compressed.len() < ZSTD_FRAME_MAGIC.len() || compressed[..4] != ZSTD_FRAME_MAGIC {
|
|
214
|
-
return Err(Error::new(
|
|
215
|
-
decompress_error(ruby),
|
|
216
|
-
"zstd dict frame decode failed: bad magic (input is not a Zstd frame)",
|
|
217
|
-
));
|
|
218
|
-
}
|
|
219
|
-
|
|
220
322
|
// Note: raw-content zstd dictionaries always produce a frame `dictID`
|
|
221
323
|
// of 0, so we cannot use `get_dict_id_from_frame` for negotiation.
|
|
222
|
-
//
|
|
223
|
-
//
|
|
224
|
-
//
|
|
225
|
-
|
|
226
|
-
// a checksum failure here.
|
|
227
|
-
|
|
228
|
-
let upper = match zstd_safe::get_frame_content_size(&compressed) {
|
|
229
|
-
Ok(Some(n)) if n <= u64::from(u32::MAX) => n as usize,
|
|
230
|
-
_ => 1024 * 1024,
|
|
231
|
-
};
|
|
232
|
-
let mut out = vec![0u8; upper];
|
|
233
|
-
|
|
324
|
+
// On-wire dict mismatch is caught by the content checksum that the
|
|
325
|
+
// encoder enables — wrong dict bytes produce a checksum failure in
|
|
326
|
+
// the shared helper's decoder call.
|
|
327
|
+
let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
|
|
234
328
|
let mut dctx = rb_self.dctx.lock().expect("Dictionary DCtx mutex poisoned");
|
|
235
|
-
let
|
|
236
|
-
|
|
237
|
-
decompress_error(ruby),
|
|
238
|
-
format!("zstd dict frame decode failed: {code}"),
|
|
239
|
-
)
|
|
240
|
-
})?;
|
|
241
|
-
out.truncate(n);
|
|
329
|
+
let out = decompress_bounded(&compressed, max_output, &mut dctx)
|
|
330
|
+
.map_err(|e| raise_bounded(ruby, e, "zstd dict frame decode failed"))?;
|
|
242
331
|
Ok(ruby.str_from_slice(&out))
|
|
243
332
|
}
|
|
244
333
|
|
|
@@ -310,10 +399,26 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
310
399
|
.set(Opaque::from(decompress_error_class))
|
|
311
400
|
.unwrap_or_else(|_| panic!("init called more than once"));
|
|
312
401
|
|
|
402
|
+
let missing_content_size_error_class =
|
|
403
|
+
module.define_error("MissingContentSizeError", decompress_error_class)?;
|
|
404
|
+
MISSING_CONTENT_SIZE_ERROR
|
|
405
|
+
.set(Opaque::from(missing_content_size_error_class))
|
|
406
|
+
.unwrap_or_else(|_| panic!("init called more than once"));
|
|
407
|
+
|
|
408
|
+
let output_size_limit_error_class =
|
|
409
|
+
module.define_error("OutputSizeLimitError", decompress_error_class)?;
|
|
410
|
+
OUTPUT_SIZE_LIMIT_ERROR
|
|
411
|
+
.set(Opaque::from(output_size_limit_error_class))
|
|
412
|
+
.unwrap_or_else(|_| panic!("init called more than once"));
|
|
413
|
+
|
|
313
414
|
// Bound as `_native_compress(bytes, level)`. Ruby's `RZstd.compress`
|
|
314
415
|
// wraps this with a `level:` kwarg default — see `lib/rzstd.rb`.
|
|
315
416
|
module.define_module_function("_native_compress", function!(rzstd_compress, 2))?;
|
|
316
|
-
module.define_module_function("
|
|
417
|
+
module.define_module_function("_native_decompress", function!(rzstd_decompress, 2))?;
|
|
418
|
+
module.define_module_function(
|
|
419
|
+
"get_frame_content_size",
|
|
420
|
+
function!(rzstd_get_frame_content_size, 1),
|
|
421
|
+
)?;
|
|
317
422
|
|
|
318
423
|
let dict_class = module.define_class("Dictionary", ruby.class_object())?;
|
|
319
424
|
// Bound as `_native_new(bytes, id, level)`. Ruby's `RZstd::Dictionary.new(bytes)`
|
|
@@ -321,7 +426,7 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
321
426
|
dict_class.define_singleton_method("_native_new", function!(dict_initialize, 3))?;
|
|
322
427
|
dict_class.define_singleton_method("_native_train", function!(rzstd_train, 3))?;
|
|
323
428
|
dict_class.define_method("compress", method!(dict_compress, 1))?;
|
|
324
|
-
dict_class.define_method("
|
|
429
|
+
dict_class.define_method("_native_decompress", method!(dict_decompress, 2))?;
|
|
325
430
|
dict_class.define_method("size", method!(dict_size, 0))?;
|
|
326
431
|
dict_class.define_method("id", method!(dict_id, 0))?;
|
|
327
432
|
|
data/lib/rzstd/version.rb
CHANGED
data/lib/rzstd.rb
CHANGED
|
@@ -15,7 +15,20 @@ module RZstd
|
|
|
15
15
|
_native_compress(bytes, Integer(level))
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
+
# Bounded single-shot decompression. When `max_output_size:` is given,
|
|
19
|
+
# the Rust extension reads the frame's Frame_Content_Size header, raises
|
|
20
|
+
# MissingContentSizeError if absent, and raises OutputSizeLimitError if
|
|
21
|
+
# the declared size exceeds the limit — all before allocating the output
|
|
22
|
+
# buffer or invoking the decoder. When nil, frames without FCS fall back
|
|
23
|
+
# to a 1 MiB ceiling.
|
|
24
|
+
def self.decompress(bytes, max_output_size: nil)
|
|
25
|
+
_native_decompress(bytes, Integer(max_output_size || 0))
|
|
26
|
+
end
|
|
27
|
+
|
|
18
28
|
class Dictionary
|
|
29
|
+
def decompress(bytes, max_output_size: nil)
|
|
30
|
+
_native_decompress(bytes, Integer(max_output_size || 0))
|
|
31
|
+
end
|
|
19
32
|
# Public constructor. Derives the Zstd `Dict_ID` from the dictionary
|
|
20
33
|
# bytes (sha256 truncated to the first 4 bytes, little-endian) and
|
|
21
34
|
# forwards to the Rust extension. The id is for out-of-band peer
|