rzstd 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f499740d25842f109c142aaf1ff24648a317e10f3ac2600157fe73151bad7fc7
4
- data.tar.gz: 21a645b344ae469809a95a1500d87cf72326e72c5d4d1005cc0cf7b15c07b05d
3
+ metadata.gz: 4d839dd8c43cda81d339bbcb624508952c77468320439d429699d75b3f441537
4
+ data.tar.gz: e57e1a1f7d8f782e1243fccca978833e51fe829b8dbdf4d7d652f6b64106ae14
5
5
  SHA512:
6
- metadata.gz: 070d5a73c25d55de6b04b2b373dd6bfd925381984346bf7683a57027dd77fadcd4c60941c2a408550116292f7181cefbd6ae62b618e529433519bbf4abb39569
7
- data.tar.gz: 807f3d87206810f19d528630ea1e1054067e3c0e79ae6eebff97605715dbc8abdcf30f94898f3d21196be04cb7819d7e54382de0a76a2dc65cdea6521bf41a7a
6
+ metadata.gz: 21c2663f86669e9d6bbfccb6620786aebdc8e7e793cf4e96e3c6ce863324c1065f85a1d2f4ab68be77231d09536e490f2467bba0b70635cf41f6fcbc114fdc25
7
+ data.tar.gz: 54b0e0f46779782a089706618fbad3a0af26ee72205c9e7c835a90a25c184034a62db669e6a2ca0caa0b308e6a77fda1d4a5f81b399e8678a8b37a89c0c401b8
data/ext/rzstd/src/lib.rs CHANGED
@@ -257,9 +257,11 @@ fn dict_initialize(ruby: &Ruby, rb_dict: RString, id: u32, level: i32) -> Result
257
257
  })?;
258
258
  // Enable the content checksum so decoding with the wrong dictionary
259
259
  // (or wrong bytes generally) fails fast at the trailing XXH64 check
260
- // instead of silently returning garbage. Raw-content dictionaries
261
- // always carry a frame `dictID` of 0 by spec, so the checksum is the
262
- // only on-wire signal we have to detect dict mismatch.
260
+ // instead of silently returning garbage. For ZDICT-format dicts
261
+ // the frame header additionally carries the dict's own dict_id
262
+ // (via `ZSTD_c_dictIDFlag`, enabled by default), which lets peers
263
+ // dispatch by id; raw-content dicts carry a frame `dictID` of 0
264
+ // and rely solely on the checksum to detect mismatch.
263
265
  cctx.set_parameter(CParameter::ContentSizeFlag(true))
264
266
  .map_err(|code| {
265
267
  Error::new(
data/lib/rzstd/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RZstd
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/rzstd.rb CHANGED
@@ -8,6 +8,7 @@ require_relative "rzstd/version"
8
8
  module RZstd
9
9
  DEFAULT_LEVEL = 3
10
10
 
11
+
11
12
  # Wrap the native `_native_compress(bytes, level)` so callers get a
12
13
  # `level:` kwarg with a sensible default. Defined as a real method
13
14
  # (not a Proc-bound singleton method) so it remains Ractor-shareable.
@@ -15,6 +16,7 @@ module RZstd
15
16
  _native_compress(bytes, Integer(level))
16
17
  end
17
18
 
19
+
18
20
  # Bounded single-shot decompression. When `max_output_size:` is given,
19
21
  # the Rust extension reads the frame's Frame_Content_Size header, raises
20
22
  # MissingContentSizeError if absent, and raises OutputSizeLimitError if
@@ -25,20 +27,38 @@ module RZstd
25
27
  _native_decompress(bytes, Integer(max_output_size || 0))
26
28
  end
27
29
 
30
+
28
31
  class Dictionary
29
- def decompress(bytes, max_output_size: nil)
30
- _native_decompress(bytes, Integer(max_output_size || 0))
31
- end
32
- # Public constructor. Derives the Zstd `Dict_ID` from the dictionary
33
- # bytes (sha256 truncated to the first 4 bytes, little-endian) and
34
- # forwards to the Rust extension. The id is for out-of-band peer
35
- # negotiation (e.g. via the `dict:sha256:<hex>` profile string in
36
- # the application protocol) — raw-content zstd dictionaries always
37
- # write a frame `dictID` of 0, so the on-wire frame doesn't carry
38
- # this id. Wrong-dict decoding is caught by the content checksum
39
- # the encoder enables.
32
+ ZDICT_MAGIC = "\x37\xA4\x30\xEC".b.freeze
33
+
34
+ # Public Dict_ID range per the Zstandard spec. IDs `0..32_767` are
35
+ # reserved for a future registrar, and `>= 2**31` is reserved. Only
36
+ # `32_768..(2**31 - 1)` is available for private/auto-generated dicts.
37
+ USER_DICT_ID_MIN = 32_768
38
+ USER_DICT_ID_MAX = (2**31) - 1
39
+ USER_DICT_ID_SIZE = USER_DICT_ID_MAX - USER_DICT_ID_MIN + 1
40
+
41
+ # Public constructor. Resolves the Zstd `Dict_ID`:
42
+ #
43
+ # - If `bytes` begins with the ZDICT magic (`0x EC30A437` LE), the
44
+ # id is read from bytes `[4..7]` of the dictionary header. This is
45
+ # the same id zstd writes into every compressed frame header via
46
+ # `ZSTD_c_dictIDFlag` (enabled by default), so on-wire frames and
47
+ # `Dictionary#id` agree.
48
+ # - Otherwise the dict is raw content: zstd writes a frame `dictID`
49
+ # of 0, and this wrapper falls back to `sha256(bytes)[0..3]` LE
50
+ # mapped into the public range `32_768..(2**31 - 1)`, purely as
51
+ # an out-of-band identifier for the Ruby side. Wrong-dict decoding
52
+ # of raw dicts is caught by the content checksum the encoder
53
+ # enables.
40
54
  def self.new(bytes, level: DEFAULT_LEVEL)
41
- id = Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V")
55
+ id = if bytes.byteslice(0, 4) == ZDICT_MAGIC
56
+ bytes.byteslice(4, 4).unpack1("V")
57
+ else
58
+ raw = Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V")
59
+ USER_DICT_ID_MIN + (raw % USER_DICT_ID_SIZE)
60
+ end
61
+
42
62
  _native_new(bytes, id, Integer(level))
43
63
  end
44
64
 
@@ -59,5 +79,11 @@ module RZstd
59
79
  samples.each { |s| buffer << s.b }
60
80
  _native_train(buffer, sizes, Integer(capacity))
61
81
  end
82
+
83
+
84
+ def decompress(bytes, max_output_size: nil)
85
+ _native_decompress(bytes, Integer(max_output_size || 0))
86
+ end
87
+
62
88
  end
63
89
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rzstd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Patrik Wenger