rlz4 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "dictionary"
4
+
5
+ module RLZ4
6
+ class BlockCodec
7
+ # Block-format LZ4 compression with a reusable scratch hash table.
8
+ #
9
+ # Dict is passed once at construction time and baked into the codec:
10
+ # the dict is hashed into a pristine table exactly once in `.new`, and
11
+ # every subsequent `#compress` call restores that pristine state via
12
+ # a 16 KiB memcpy before running the block compressor. This amortises
13
+ # dict initialisation across the codec's lifetime rather than paying
14
+ # ~3-5 µs per call to re-hash the dict.
15
+ #
16
+ # `#compress` mutates the scratch table; `#decompress` does not. Both
17
+ # live on the same class so callers hold one object per worker.
18
+ #
19
+ # A BlockCodec must not cross Ractor boundaries. Per-Ractor codecs are
20
+ # the natural unit.
21
+ #
22
+ # @param dict [Dictionary, String, nil] dictionary bytes or a
23
+ # Dictionary value wrapping them. The id on a Dictionary is
24
+ # ignored (block format has no Dict_ID field); we only consult
25
+ # the bytes.
26
+ def self.new(dict: nil)
27
+ _native_new(Dictionary === dict ? dict.bytes : dict)
28
+ end
29
+
30
+
31
+ def decompress(bytes, decompressed_size:)
32
+ _decompress(bytes, decompressed_size)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module RLZ4
6
+ # Pure value type for an LZ4 dictionary: raw bytes plus a 4-byte id.
7
+ # Built on `Data.define`, so it's immutable, gets `==` / `#hash` /
8
+ # `#deconstruct` for free, and is shareable across Ractors.
9
+ #
10
+ # The id defaults to `sha256(bytes)[0, 4]` interpreted little-endian
11
+ # — the same derivation LZ4 frame FLG.DictID uses. Callers can pass
12
+ # their own id (e.g. a value coordinated out of band) via `id:`.
13
+ #
14
+ # The id is load-bearing in the frame format (FrameCodec writes it
15
+ # into the FrameDescriptor); BlockCodec accepts a Dictionary for
16
+ # API symmetry but doesn't consult the id.
17
+ Dictionary = Data.define(:bytes, :id) do
18
+ def initialize(bytes:, id: Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V"))
19
+ super(bytes: bytes.b.freeze, id: id)
20
+ end
21
+
22
+
23
+ def size
24
+ bytes.bytesize
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "dictionary"
4
+
5
+ module RLZ4
6
+ class FrameCodec
7
+ # Frame-format LZ4 codec, optionally dict-bound. Parallel in shape
8
+ # to BlockCodec, but emits a real LZ4 frame (magic `04 22 4D 18`)
9
+ # with the FLG.DictID bit set and `Dict_ID` written into the
10
+ # FrameDescriptor when a dict is installed. Output is interoperable
11
+ # with the reference `lz4` CLI given the same dictionary file.
12
+ #
13
+ # Unlike BlockCodec, FrameCodec holds no thread-local mutable state:
14
+ # it's a read-only dict bytes buffer plus a derived id. Shareable
15
+ # across Ractors.
16
+ #
17
+ # @param dict [Dictionary, String, nil] dictionary bytes or a
18
+ # Dictionary value. Passing a Dictionary reuses its cached id
19
+ # (skips the sha256 digest); a raw String derives the id on the
20
+ # fly.
21
+ def self.new(dict: nil)
22
+ case dict
23
+ when nil
24
+ _native_new(nil, 0)
25
+ when Dictionary
26
+ _native_new(dict.bytes, dict.id)
27
+ when String
28
+ _native_new(dict, Dictionary.new(bytes: dict).id)
29
+ else
30
+ raise TypeError, "expected RLZ4::Dictionary, String, or nil; got #{dict.class}"
31
+ end
32
+ end
33
+ end
34
+ end
data/lib/rlz4/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RLZ4
4
- VERSION = "0.2.1"
4
+ VERSION = "0.5.0"
5
5
  end
data/lib/rlz4.rb CHANGED
@@ -1,20 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "digest"
4
-
5
- require_relative "rlz4/rlz4"
3
+ require_relative "rlz4/rlz4" # Rust extension (native classes + compress_bound)
6
4
  require_relative "rlz4/version"
7
-
8
- module RLZ4
9
- class Dictionary
10
- # Public constructor. Derives the LZ4 frame `Dict_ID` from the dictionary
11
- # bytes (sha256 truncated to the first 4 bytes, little-endian) and forwards
12
- # to the Rust extension. The id is what gets written into every emitted
13
- # frame's FrameDescriptor and what `#decompress` asserts the incoming
14
- # frame declares before decoding.
15
- def self.new(bytes)
16
- id = Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V")
17
- _native_new(bytes, id)
18
- end
19
- end
20
- end
5
+ require_relative "rlz4/dictionary"
6
+ require_relative "rlz4/block_codec"
7
+ require_relative "rlz4/frame_codec"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rlz4
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Patrik Wenger
@@ -66,11 +66,10 @@ dependencies:
66
66
  - !ruby/object:Gem::Version
67
67
  version: '5.0'
68
68
  description: |
69
- Ruby bindings (via Rust/magnus) for the lz4_flex LZ4 implementation.
70
- Provides LZ4 frame-format compress/decompress at module level and a
71
- stateful Dictionary class for block-format compression with a shared
72
- dictionary. Designed to be safe to call from multiple Ractors, unlike
73
- existing Ruby LZ4 gems.
69
+ Ruby bindings (via Rust/magnus) for liblz4, using lz4-sys FFI.
70
+ Provides block-format and frame-format LZ4 compress/decompress with
71
+ optional dictionary support. Designed to be safe to call from multiple
72
+ Ractors, unlike existing Ruby LZ4 gems.
74
73
  email:
75
74
  - paddor@protonmail.ch
76
75
  executables: []
@@ -86,9 +85,10 @@ files:
86
85
  - ext/rlz4/extconf.rb
87
86
  - ext/rlz4/src/lib.rs
88
87
  - lib/rlz4.rb
88
+ - lib/rlz4/block_codec.rb
89
+ - lib/rlz4/dictionary.rb
90
+ - lib/rlz4/frame_codec.rb
89
91
  - lib/rlz4/version.rb
90
- - tmp/x86_64-linux/stage/Cargo.toml
91
- - tmp/x86_64-linux/stage/ext/rlz4/Cargo.toml
92
92
  homepage: https://github.com/paddor/rlz4
93
93
  licenses:
94
94
  - MIT
@@ -111,5 +111,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
111
  requirements: []
112
112
  rubygems_version: 4.0.6
113
113
  specification_version: 4
114
- summary: Ractor-safe LZ4 bindings for Ruby (Rust extension via lz4_flex)
114
+ summary: Ractor-safe LZ4 bindings for Ruby (Rust extension via lz4-sys/liblz4)
115
115
  test_files: []
@@ -1,9 +0,0 @@
1
- [workspace]
2
- members = ["ext/rlz4"]
3
- resolver = "2"
4
-
5
- [profile.release]
6
- opt-level = 3
7
- lto = true
8
- codegen-units = 1
9
- panic = "abort"
@@ -1,16 +0,0 @@
1
- [package]
2
- name = "rlz4"
3
- version = "0.2.0"
4
- edition = "2021"
5
-
6
- [lib]
7
- name = "rlz4"
8
- crate-type = ["cdylib", "rlib"]
9
-
10
- [dependencies]
11
- lz4_flex = { git = "https://github.com/paddor/lz4_flex.git", rev = "dae9c784e890591e6445135ba23cacf344eafe8f", default-features = false, features = ["frame", "std", "safe-encode", "safe-decode"] }
12
- magnus = "0.8"
13
- rb-sys = "0.9"
14
-
15
- [build-dependencies]
16
- rb-sys = "0.9"