rlz4 0.1.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +23 -12
- data/README.md +87 -33
- data/ext/rlz4/Cargo.toml +2 -2
- data/ext/rlz4/src/lib.rs +783 -124
- data/lib/rlz4/block_codec.rb +35 -0
- data/lib/rlz4/dictionary.rb +27 -0
- data/lib/rlz4/frame_codec.rb +34 -0
- data/lib/rlz4/version.rb +1 -1
- data/lib/rlz4.rb +4 -1
- metadata +9 -9
- data/tmp/x86_64-linux/stage/Cargo.toml +0 -9
- data/tmp/x86_64-linux/stage/ext/rlz4/Cargo.toml +0 -16
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "dictionary"
|
|
4
|
+
|
|
5
|
+
module RLZ4
|
|
6
|
+
class BlockCodec
|
|
7
|
+
# Block-format LZ4 compression with a reusable scratch hash table.
|
|
8
|
+
#
|
|
9
|
+
# Dict is passed once at construction time and baked into the codec:
|
|
10
|
+
# the dict is hashed into a pristine table exactly once in `.new`, and
|
|
11
|
+
# every subsequent `#compress` call restores that pristine state via
|
|
12
|
+
# a 16 KiB memcpy before running the block compressor. This amortises
|
|
13
|
+
# dict initialisation across the codec's lifetime rather than paying
|
|
14
|
+
# ~3-5 µs per call to re-hash the dict.
|
|
15
|
+
#
|
|
16
|
+
# `#compress` mutates the scratch table; `#decompress` does not. Both
|
|
17
|
+
# live on the same class so callers hold one object per worker.
|
|
18
|
+
#
|
|
19
|
+
# A BlockCodec must not cross Ractor boundaries. Per-Ractor codecs are
|
|
20
|
+
# the natural unit.
|
|
21
|
+
#
|
|
22
|
+
# @param dict [Dictionary, String, nil] dictionary bytes or a
|
|
23
|
+
# Dictionary value wrapping them. The id on a Dictionary is
|
|
24
|
+
# ignored (block format has no Dict_ID field); we only consult
|
|
25
|
+
# the bytes.
|
|
26
|
+
def self.new(dict: nil)
|
|
27
|
+
_native_new(Dictionary === dict ? dict.bytes : dict)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def decompress(bytes, decompressed_size:)
|
|
32
|
+
_decompress(bytes, decompressed_size)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module RLZ4
|
|
6
|
+
# Pure value type for an LZ4 dictionary: raw bytes plus a 4-byte id.
|
|
7
|
+
# Built on `Data.define`, so it's immutable, gets `==` / `#hash` /
|
|
8
|
+
# `#deconstruct` for free, and is shareable across Ractors.
|
|
9
|
+
#
|
|
10
|
+
# The id defaults to `sha256(bytes)[0, 4]` interpreted little-endian
|
|
11
|
+
# — the same derivation LZ4 frame FLG.DictID uses. Callers can pass
|
|
12
|
+
# their own id (e.g. a value coordinated out of band) via `id:`.
|
|
13
|
+
#
|
|
14
|
+
# The id is load-bearing in the frame format (FrameCodec writes it
|
|
15
|
+
# into the FrameDescriptor); BlockCodec accepts a Dictionary for
|
|
16
|
+
# API symmetry but doesn't consult the id.
|
|
17
|
+
Dictionary = Data.define(:bytes, :id) do
|
|
18
|
+
def initialize(bytes:, id: Digest::SHA256.digest(bytes).byteslice(0, 4).unpack1("V"))
|
|
19
|
+
super(bytes: bytes.b.freeze, id: id)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def size
|
|
24
|
+
bytes.bytesize
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "dictionary"
|
|
4
|
+
|
|
5
|
+
module RLZ4
|
|
6
|
+
class FrameCodec
|
|
7
|
+
# Frame-format LZ4 codec, optionally dict-bound. Parallel in shape
|
|
8
|
+
# to BlockCodec, but emits a real LZ4 frame (magic `04 22 4D 18`)
|
|
9
|
+
# with the FLG.DictID bit set and `Dict_ID` written into the
|
|
10
|
+
# FrameDescriptor when a dict is installed. Output is interoperable
|
|
11
|
+
# with the reference `lz4` CLI given the same dictionary file.
|
|
12
|
+
#
|
|
13
|
+
# Unlike BlockCodec, FrameCodec holds no thread-local mutable state:
|
|
14
|
+
# it's a read-only dict bytes buffer plus a derived id. Shareable
|
|
15
|
+
# across Ractors.
|
|
16
|
+
#
|
|
17
|
+
# @param dict [Dictionary, String, nil] dictionary bytes or a
|
|
18
|
+
# Dictionary value. Passing a Dictionary reuses its cached id
|
|
19
|
+
# (skips the sha256 digest); a raw String derives the id on the
|
|
20
|
+
# fly.
|
|
21
|
+
def self.new(dict: nil)
|
|
22
|
+
case dict
|
|
23
|
+
when nil
|
|
24
|
+
_native_new(nil, 0)
|
|
25
|
+
when Dictionary
|
|
26
|
+
_native_new(dict.bytes, dict.id)
|
|
27
|
+
when String
|
|
28
|
+
_native_new(dict, Dictionary.new(bytes: dict).id)
|
|
29
|
+
else
|
|
30
|
+
raise TypeError, "expected RLZ4::Dictionary, String, or nil; got #{dict.class}"
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
data/lib/rlz4/version.rb
CHANGED
data/lib/rlz4.rb
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "rlz4/rlz4"
|
|
3
|
+
require_relative "rlz4/rlz4" # Rust extension (native classes + compress_bound)
|
|
4
4
|
require_relative "rlz4/version"
|
|
5
|
+
require_relative "rlz4/dictionary"
|
|
6
|
+
require_relative "rlz4/block_codec"
|
|
7
|
+
require_relative "rlz4/frame_codec"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rlz4
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Patrik Wenger
|
|
@@ -66,11 +66,10 @@ dependencies:
|
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
67
|
version: '5.0'
|
|
68
68
|
description: |
|
|
69
|
-
Ruby bindings (via Rust/magnus) for
|
|
70
|
-
Provides
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
existing Ruby LZ4 gems.
|
|
69
|
+
Ruby bindings (via Rust/magnus) for liblz4, using lz4-sys FFI.
|
|
70
|
+
Provides block-format and frame-format LZ4 compress/decompress with
|
|
71
|
+
optional dictionary support. Designed to be safe to call from multiple
|
|
72
|
+
Ractors, unlike existing Ruby LZ4 gems.
|
|
74
73
|
email:
|
|
75
74
|
- paddor@protonmail.ch
|
|
76
75
|
executables: []
|
|
@@ -86,9 +85,10 @@ files:
|
|
|
86
85
|
- ext/rlz4/extconf.rb
|
|
87
86
|
- ext/rlz4/src/lib.rs
|
|
88
87
|
- lib/rlz4.rb
|
|
88
|
+
- lib/rlz4/block_codec.rb
|
|
89
|
+
- lib/rlz4/dictionary.rb
|
|
90
|
+
- lib/rlz4/frame_codec.rb
|
|
89
91
|
- lib/rlz4/version.rb
|
|
90
|
-
- tmp/x86_64-linux/stage/Cargo.toml
|
|
91
|
-
- tmp/x86_64-linux/stage/ext/rlz4/Cargo.toml
|
|
92
92
|
homepage: https://github.com/paddor/rlz4
|
|
93
93
|
licenses:
|
|
94
94
|
- MIT
|
|
@@ -111,5 +111,5 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
111
111
|
requirements: []
|
|
112
112
|
rubygems_version: 4.0.6
|
|
113
113
|
specification_version: 4
|
|
114
|
-
summary: Ractor-safe LZ4 bindings for Ruby (Rust extension via
|
|
114
|
+
summary: Ractor-safe LZ4 bindings for Ruby (Rust extension via lz4-sys/liblz4)
|
|
115
115
|
test_files: []
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
[package]
|
|
2
|
-
name = "rlz4"
|
|
3
|
-
version = "0.1.0"
|
|
4
|
-
edition = "2021"
|
|
5
|
-
|
|
6
|
-
[lib]
|
|
7
|
-
name = "rlz4"
|
|
8
|
-
crate-type = ["cdylib", "rlib"]
|
|
9
|
-
|
|
10
|
-
[dependencies]
|
|
11
|
-
lz4_flex = { version = "0.13", default-features = false, features = ["frame", "std", "safe-encode", "safe-decode"] }
|
|
12
|
-
magnus = "0.8"
|
|
13
|
-
rb-sys = "0.9"
|
|
14
|
-
|
|
15
|
-
[build-dependencies]
|
|
16
|
-
rb-sys = "0.9"
|