rzstd 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 831259e481d6ea30d50a765ed67b4e574a2e71ba2bdfaba53ec7e529d07373a7
4
- data.tar.gz: 158331395ece67a5f87ca0c6c42d7e8cc6f92fcc0dfd4334aa3830a663bce47b
3
+ metadata.gz: f499740d25842f109c142aaf1ff24648a317e10f3ac2600157fe73151bad7fc7
4
+ data.tar.gz: 21a645b344ae469809a95a1500d87cf72326e72c5d4d1005cc0cf7b15c07b05d
5
5
  SHA512:
6
- metadata.gz: 60dafb7dde062a01c77db4d04b2aa337e29b831bcdc1d69eae5c7b89861e85f1c9f55326e9163bb2d66d913a55a5288e9de9a65e148462f5f2d7c2f32e442a3d
7
- data.tar.gz: bef641270ddcc3b275579741a43143174f39e9ab6fba5c8fd899edb98a6ff8ddd0ba707ffdff62f5f0004b598ec778c85dcfaf346c3af6e8a22d625b25a51ada
6
+ metadata.gz: 070d5a73c25d55de6b04b2b373dd6bfd925381984346bf7683a57027dd77fadcd4c60941c2a408550116292f7181cefbd6ae62b618e529433519bbf4abb39569
7
+ data.tar.gz: 807f3d87206810f19d528630ea1e1054067e3c0e79ae6eebff97605715dbc8abdcf30f94898f3d21196be04cb7819d7e54382de0a76a2dc65cdea6521bf41a7a
data/Cargo.lock CHANGED
@@ -334,7 +334,7 @@ checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
334
334
 
335
335
  [[package]]
336
336
  name = "rzstd"
337
- version = "0.1.0"
337
+ version = "0.2.0"
338
338
  dependencies = [
339
339
  "magnus",
340
340
  "rb-sys",
data/ext/rzstd/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "rzstd"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  edition = "2021"
5
5
 
6
6
  [lib]
data/ext/rzstd/src/lib.rs CHANGED
@@ -9,6 +9,8 @@ use zstd_safe::{CCtx, CParameter, DCtx};
9
9
  const ZSTD_FRAME_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
10
10
 
11
11
  static DECOMPRESS_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
12
+ static MISSING_CONTENT_SIZE_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
13
+ static OUTPUT_SIZE_LIMIT_ERROR: OnceLock<Opaque<ExceptionClass>> = OnceLock::new();
12
14
 
13
15
  fn decompress_error(ruby: &Ruby) -> ExceptionClass {
14
16
  ruby.get_inner(
@@ -18,6 +20,22 @@ fn decompress_error(ruby: &Ruby) -> ExceptionClass {
18
20
  )
19
21
  }
20
22
 
23
+ fn missing_content_size_error(ruby: &Ruby) -> ExceptionClass {
24
+ ruby.get_inner(
25
+ *MISSING_CONTENT_SIZE_ERROR
26
+ .get()
27
+ .expect("MissingContentSizeError not initialized"),
28
+ )
29
+ }
30
+
31
+ fn output_size_limit_error(ruby: &Ruby) -> ExceptionClass {
32
+ ruby.get_inner(
33
+ *OUTPUT_SIZE_LIMIT_ERROR
34
+ .get()
35
+ .expect("OutputSizeLimitError not initialized"),
36
+ )
37
+ }
38
+
21
39
  // ---------- module-level: persistent CCtx / DCtx for the no-dict path ----------
22
40
  //
23
41
  // The whole reason this gem exists is that the upstream `zstd-ruby` gem
@@ -64,36 +82,127 @@ fn rzstd_compress(ruby: &Ruby, rb_input: RString, level: i32) -> Result<RString,
64
82
  Ok(ruby.str_from_slice(&out))
65
83
  }
66
84
 
67
- fn rzstd_decompress(ruby: &Ruby, rb_input: RString) -> Result<RString, Error> {
68
- // SAFETY: copy borrowed bytes before any Ruby allocation.
69
- let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
85
+ // Single-shot bounded decompress shared by the global and dictionary
86
+ // paths. Returns the decoded plaintext or a tagged error so the caller
87
+ // can map it onto the appropriate Ruby exception class. `max_output`
88
+ // of 0 means "no upper bound set"; any other value enforces the RFC
89
+ // rule that a frame's declared Frame_Content_Size must be present and
90
+ // must fit within that cap, and rejects the frame on the header alone
91
+ // (no decoder invocation, no output allocation) when either condition
92
+ // is violated. With no cap set, frames lacking Frame_Content_Size fall
93
+ // back to a 1 MiB ceiling (legacy behavior for third-party producers).
94
+ //
95
+ enum BoundedError {
96
+ BadMagic,
97
+ MissingContentSize,
98
+ OutputSizeLimit { declared: u64, limit: u64 },
99
+ DecoderFailed(String),
100
+ }
101
+
70
102
 
71
- // Reject anything that isn't a well-formed zstd frame up front. zstd
72
- // permissively returns 0 for some malformed inputs and we'd rather not
73
- // mask "sender forgot --compress" mistakes in callers.
103
+ fn decompress_bounded(
104
+ compressed: &[u8],
105
+ max_output: usize,
106
+ dctx: &mut DCtx<'_>,
107
+ ) -> Result<Vec<u8>, BoundedError> {
74
108
  if compressed.len() < ZSTD_FRAME_MAGIC.len() || compressed[..4] != ZSTD_FRAME_MAGIC {
109
+ return Err(BoundedError::BadMagic);
110
+ }
111
+
112
+ let upper = match zstd_safe::get_frame_content_size(compressed) {
113
+ Ok(Some(n)) => {
114
+ if max_output != 0 && n > max_output as u64 {
115
+ return Err(BoundedError::OutputSizeLimit {
116
+ declared: n,
117
+ limit: max_output as u64,
118
+ });
119
+ }
120
+ if n > u64::from(u32::MAX) {
121
+ return Err(BoundedError::OutputSizeLimit {
122
+ declared: n,
123
+ limit: u64::from(u32::MAX),
124
+ });
125
+ }
126
+ n as usize
127
+ }
128
+ Ok(None) => {
129
+ if max_output != 0 {
130
+ return Err(BoundedError::MissingContentSize);
131
+ }
132
+ 1024 * 1024
133
+ }
134
+ Err(_) => return Err(BoundedError::BadMagic),
135
+ };
136
+
137
+ let mut out = vec![0u8; upper];
138
+ let n = dctx
139
+ .decompress(&mut out, compressed)
140
+ .map_err(|code| BoundedError::DecoderFailed(format!("{code}")))?;
141
+ out.truncate(n);
142
+ Ok(out)
143
+ }
144
+
145
+
146
+ fn raise_bounded(ruby: &Ruby, err: BoundedError, prefix: &str) -> Error {
147
+ match err {
148
+ BoundedError::BadMagic => Error::new(
149
+ decompress_error(ruby),
150
+ format!("{prefix}: bad magic (input is not a Zstd frame)"),
151
+ ),
152
+ BoundedError::MissingContentSize => Error::new(
153
+ missing_content_size_error(ruby),
154
+ format!("{prefix}: Frame_Content_Size absent from frame header"),
155
+ ),
156
+ BoundedError::OutputSizeLimit { declared, limit } => Error::new(
157
+ output_size_limit_error(ruby),
158
+ format!("{prefix}: declared content size {declared} exceeds limit {limit}"),
159
+ ),
160
+ BoundedError::DecoderFailed(msg) => {
161
+ Error::new(decompress_error(ruby), format!("{prefix}: {msg}"))
162
+ }
163
+ }
164
+ }
165
+
166
+
167
+ // Reads the `Frame_Content_Size` field from a Zstandard frame header
168
+ // without invoking the decoder. Returns `Some(n)` if the producer wrote
169
+ // it, `None` if the frame header omits it, and an error if the input
170
+ // isn't a well-formed Zstandard frame. Callers enforcing a bounded
171
+ // decompressed size MUST use this before `decompress` to reject frames
172
+ // whose declared size already exceeds their limit.
173
+ fn rzstd_get_frame_content_size(
174
+ ruby: &Ruby,
175
+ rb_input: RString,
176
+ ) -> Result<Option<u64>, Error> {
177
+ let bytes: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
178
+ if bytes.len() < ZSTD_FRAME_MAGIC.len() || bytes[..4] != ZSTD_FRAME_MAGIC {
75
179
  return Err(Error::new(
76
180
  decompress_error(ruby),
77
181
  "zstd frame decode failed: bad magic (input is not a Zstd frame)",
78
182
  ));
79
183
  }
184
+ match zstd_safe::get_frame_content_size(&bytes) {
185
+ Ok(Some(n)) => Ok(Some(n)),
186
+ Ok(None) => Ok(None),
187
+ Err(code) => Err(Error::new(
188
+ decompress_error(ruby),
189
+ format!("zstd frame header parse failed: {code:?}"),
190
+ )),
191
+ }
192
+ }
193
+
194
+
195
+ fn rzstd_decompress(
196
+ ruby: &Ruby,
197
+ rb_input: RString,
198
+ max_output: usize,
199
+ ) -> Result<RString, Error> {
200
+ // SAFETY: copy borrowed bytes before any Ruby allocation.
201
+ let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
80
202
 
81
- // Frames produced by `compress2` always carry frame_content_size, so
82
- // we can preallocate exactly. For frames without it (third-party
83
- // producers) we fall back to a 1 MiB ceiling and grow as needed.
84
- let upper = match zstd_safe::get_frame_content_size(&compressed) {
85
- Ok(Some(n)) if n <= u64::from(u32::MAX) => n as usize,
86
- _ => 1024 * 1024,
87
- };
88
- let mut out = vec![0u8; upper];
89
203
  let mut dctx = global_dctx().lock().expect("global DCtx mutex poisoned");
90
- let n = dctx.decompress(&mut out, &compressed).map_err(|code| {
91
- Error::new(
92
- decompress_error(ruby),
93
- format!("zstd frame decode failed: {code}"),
94
- )
95
- })?;
96
- out.truncate(n);
204
+ let out = decompress_bounded(&compressed, max_output, &mut dctx)
205
+ .map_err(|e| raise_bounded(ruby, e, "zstd frame decode failed"))?;
97
206
  Ok(ruby.str_from_slice(&out))
98
207
  }
99
208
 
@@ -208,37 +317,17 @@ fn dict_decompress(
208
317
  ruby: &Ruby,
209
318
  rb_self: &Dictionary,
210
319
  rb_input: RString,
320
+ max_output: usize,
211
321
  ) -> Result<RString, Error> {
212
- let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
213
- if compressed.len() < ZSTD_FRAME_MAGIC.len() || compressed[..4] != ZSTD_FRAME_MAGIC {
214
- return Err(Error::new(
215
- decompress_error(ruby),
216
- "zstd dict frame decode failed: bad magic (input is not a Zstd frame)",
217
- ));
218
- }
219
-
220
322
  // Note: raw-content zstd dictionaries always produce a frame `dictID`
221
323
  // of 0, so we cannot use `get_dict_id_from_frame` for negotiation.
222
- // The Dict_ID exposed by `Dictionary#id` is for out-of-band peer
223
- // agreement (e.g. via the `dict:sha256:<hex>` profile string in the
224
- // application protocol). On-wire mismatch is caught by the content
225
- // checksum that the encoder enables wrong dict bytes will produce
226
- // a checksum failure here.
227
-
228
- let upper = match zstd_safe::get_frame_content_size(&compressed) {
229
- Ok(Some(n)) if n <= u64::from(u32::MAX) => n as usize,
230
- _ => 1024 * 1024,
231
- };
232
- let mut out = vec![0u8; upper];
233
-
324
+ // On-wire dict mismatch is caught by the content checksum that the
325
+ // encoder enables wrong dict bytes produce a checksum failure in
326
+ // the shared helper's decoder call.
327
+ let compressed: Vec<u8> = unsafe { rb_input.as_slice().to_vec() };
234
328
  let mut dctx = rb_self.dctx.lock().expect("Dictionary DCtx mutex poisoned");
235
- let n = dctx.decompress(&mut out, &compressed).map_err(|code| {
236
- Error::new(
237
- decompress_error(ruby),
238
- format!("zstd dict frame decode failed: {code}"),
239
- )
240
- })?;
241
- out.truncate(n);
329
+ let out = decompress_bounded(&compressed, max_output, &mut dctx)
330
+ .map_err(|e| raise_bounded(ruby, e, "zstd dict frame decode failed"))?;
242
331
  Ok(ruby.str_from_slice(&out))
243
332
  }
244
333
 
@@ -310,10 +399,26 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
310
399
  .set(Opaque::from(decompress_error_class))
311
400
  .unwrap_or_else(|_| panic!("init called more than once"));
312
401
 
402
+ let missing_content_size_error_class =
403
+ module.define_error("MissingContentSizeError", decompress_error_class)?;
404
+ MISSING_CONTENT_SIZE_ERROR
405
+ .set(Opaque::from(missing_content_size_error_class))
406
+ .unwrap_or_else(|_| panic!("init called more than once"));
407
+
408
+ let output_size_limit_error_class =
409
+ module.define_error("OutputSizeLimitError", decompress_error_class)?;
410
+ OUTPUT_SIZE_LIMIT_ERROR
411
+ .set(Opaque::from(output_size_limit_error_class))
412
+ .unwrap_or_else(|_| panic!("init called more than once"));
413
+
313
414
  // Bound as `_native_compress(bytes, level)`. Ruby's `RZstd.compress`
314
415
  // wraps this with a `level:` kwarg default — see `lib/rzstd.rb`.
315
416
  module.define_module_function("_native_compress", function!(rzstd_compress, 2))?;
316
- module.define_module_function("decompress", function!(rzstd_decompress, 1))?;
417
+ module.define_module_function("_native_decompress", function!(rzstd_decompress, 2))?;
418
+ module.define_module_function(
419
+ "get_frame_content_size",
420
+ function!(rzstd_get_frame_content_size, 1),
421
+ )?;
317
422
 
318
423
  let dict_class = module.define_class("Dictionary", ruby.class_object())?;
319
424
  // Bound as `_native_new(bytes, id, level)`. Ruby's `RZstd::Dictionary.new(bytes)`
@@ -321,7 +426,7 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
321
426
  dict_class.define_singleton_method("_native_new", function!(dict_initialize, 3))?;
322
427
  dict_class.define_singleton_method("_native_train", function!(rzstd_train, 3))?;
323
428
  dict_class.define_method("compress", method!(dict_compress, 1))?;
324
- dict_class.define_method("decompress", method!(dict_decompress, 1))?;
429
+ dict_class.define_method("_native_decompress", method!(dict_decompress, 2))?;
325
430
  dict_class.define_method("size", method!(dict_size, 0))?;
326
431
  dict_class.define_method("id", method!(dict_id, 0))?;
327
432
 
data/lib/rzstd/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RZstd
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
data/lib/rzstd.rb CHANGED
@@ -15,7 +15,20 @@ module RZstd
15
15
  _native_compress(bytes, Integer(level))
16
16
  end
17
17
 
18
+ # Bounded single-shot decompression. When `max_output_size:` is given,
19
+ # the Rust extension reads the frame's Frame_Content_Size header, raises
20
+ # MissingContentSizeError if absent, and raises OutputSizeLimitError if
21
+ # the declared size exceeds the limit — all before allocating the output
22
+ # buffer or invoking the decoder. When nil, frames without FCS fall back
23
+ # to a 1 MiB ceiling.
24
+ def self.decompress(bytes, max_output_size: nil)
25
+ _native_decompress(bytes, Integer(max_output_size || 0))
26
+ end
27
+
18
28
  class Dictionary
29
+ def decompress(bytes, max_output_size: nil)
30
+ _native_decompress(bytes, Integer(max_output_size || 0))
31
+ end
19
32
  # Public constructor. Derives the Zstd `Dict_ID` from the dictionary
20
33
  # bytes (sha256 truncated to the first 4 bytes, little-endian) and
21
34
  # forwards to the Rust extension. The id is for out-of-band peer
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "rzstd"
3
- version = "0.1.0"
3
+ version = "0.2.0"
4
4
  edition = "2021"
5
5
 
6
6
  [lib]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rzstd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Patrik Wenger