omq-lz4 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ade7fc716054707c2e05bade867b01333708de30df98e3acb524f8da1a7d98d4
4
- data.tar.gz: 7ae5fade7f0d88eed7f6f7f8875373a044127098d596b9f6bae5554d585b1e90
3
+ metadata.gz: 4217a939f757a07b0e3e2ad1a905d11ea3cd017e54baa2f059a75f304386bdab
4
+ data.tar.gz: 1d0f4e4076e0cf02291b3f714fe20c721b8e05e0d4d17af4fbc8662e83897319
5
5
  SHA512:
6
- metadata.gz: 59afc48b58c8c1efac0973bffa702cac3958b0150b4eb51fe9dec82dacc1c9735446d83e58ac6963f0302e13b26a0890b1eaeff5c0d7d846830c561775a848f4
7
- data.tar.gz: 17c66cce9f79f1a375a8614e3aa3cc45071277432521ec96d446daede230b1e6e1a71003b88062345c48550c128e524aa10160d6a06c41e6c7597309e138d5f0
6
+ metadata.gz: a963f09065ef7a019a8b766a00093e5ef84ba9489fed66d3524da5e8a0d8d956d27cd01534926ac1cb3f5c5088439da9e0db5c5f71c0b7b58b7b90db8ccfc8bf
7
+ data.tar.gz: 86e7b5fe26c080a13a6b5c5c44b0e0a250bc45d650b2a9f76f4a80330209d6e24374b8a6412dd28118cb971619f56d3bede7a5ed937c91c5a80fd520ab17b5ac
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.0 (2026-05-11)
4
+
5
+ ### Added
6
+
7
+ - **LZ4M multi-block encoding/decoding** (RFC §5.3a, §5.4a, §5.5 rule 4).
8
+ Parts larger than `LZ4M_BLOCK_SIZE` (1 GiB) are split into independently
9
+ decodable blocks, each compressed against the installed dict (if any).
10
+ `encode_part` / `decode_part` accept a `block_size:` keyword for testing
11
+ with smaller-than-protocol block sizes.
12
+ - `LZ4M_SENTINEL` (`"LZ4M"`) and `LZ4M_BLOCK_SIZE` (1,073,741,824) constants
13
+ in `OMQ::LZ4::Codec`.
14
+ - LZ4B `decompressed_size` cap: the decoder now rejects single-block parts
15
+ whose declared `decompressed_size` exceeds `LZ4M_BLOCK_SIZE` (RFC §5.5
16
+ rule 3).
17
+ - Codec tests for LZ4M round-trips (with and without dict, partial last
18
+ block, random bytes), malformed LZ4M inputs (truncated, leftover bytes,
19
+ corrupt block data, budget overrun), and the LZ4B block size limit.
20
+
3
21
  ## 0.2.0 (2026-05-04)
4
22
 
5
23
  ### Changed
data/README.md CHANGED
@@ -4,7 +4,6 @@
4
4
  [![License: ISC](https://img.shields.io/badge/License-ISC-blue.svg)](LICENSE)
5
5
  [![Ruby](https://img.shields.io/badge/Ruby-%3E%3D%203.3-CC342D?logo=ruby&logoColor=white)](https://www.ruby-lang.org)
6
6
 
7
- > **Status:** 0.1.0 — first landable release. See
8
7
  > [RFC.md](RFC.md) for the wire-format spec and
9
8
  > [CHANGELOG.md](CHANGELOG.md) for what's in.
10
9
 
data/lib/omq/lz4/codec.rb CHANGED
@@ -14,18 +14,22 @@ module OMQ
14
14
  # Each wire part begins with a 4-byte sentinel:
15
15
  #
16
16
  # 00 00 00 00 uncompressed plaintext
17
- # 4C 5A 34 42 LZ4-compressed block ("LZ4B" in ASCII)
17
+ # 4C 5A 34 42 LZ4-compressed single block ("LZ4B" in ASCII)
18
+ # 4C 5A 34 4D LZ4-compressed multi-block ("LZ4M" in ASCII)
18
19
  # 4C 5A 34 44 dictionary shipment ("LZ4D" in ASCII)
19
20
  #
20
- # `decode_part` handles UNCOMPRESSED and LZ4B only. Dictionary
21
+ # `decode_part` handles UNCOMPRESSED, LZ4B, and LZ4M. Dictionary
21
22
  # shipments are a transport-layer concern: the transport peeks the
22
23
  # first 4 bytes of each incoming wire part, routes LZ4D to
23
24
  # `decode_dict_shipment`, and never hands a shipment to `decode_part`.
24
25
  module Codec
25
26
  UNCOMPRESSED_SENTINEL = "\x00\x00\x00\x00".b.freeze
26
27
  LZ4B_SENTINEL = "LZ4B".b.freeze
28
+ LZ4M_SENTINEL = "LZ4M".b.freeze
27
29
  LZ4D_SENTINEL = "LZ4D".b.freeze
28
30
 
31
+ LZ4M_BLOCK_SIZE = 1_073_741_824
32
+
29
33
  # Size thresholds below which compression isn't worth attempting.
30
34
  # Empirically tuned on Lorem-ipsum-like input via
31
35
  # bench/min_compress_size_sweep.rb: for block-format LZ4 the
@@ -65,10 +69,11 @@ module OMQ
65
69
  # `min_size` overrides the default threshold. Nil (the default)
66
70
  # picks `MIN_COMPRESS_NO_DICT` for a no-dict codec and
67
71
  # `MIN_COMPRESS_WITH_DICT` for a dict codec.
68
- def encode_part(plaintext, block_codec:, min_size: nil)
72
+ def encode_part(plaintext, block_codec:, min_size: nil, block_size: LZ4M_BLOCK_SIZE)
69
73
  min_size ||= block_codec.has_dict? ? MIN_COMPRESS_WITH_DICT : MIN_COMPRESS_NO_DICT
70
74
 
71
75
  return encode_passthrough(plaintext) if plaintext.bytesize < min_size
76
+ return encode_multi_block(plaintext, block_codec, block_size) if plaintext.bytesize > block_size
72
77
 
73
78
  compressed = block_codec.compress(plaintext)
74
79
 
@@ -91,7 +96,7 @@ module OMQ
91
96
  #
92
97
  # Does not handle LZ4D shipments; transport must route those to
93
98
  # `decode_dict_shipment` before calling here.
94
- def decode_part(wire_bytes, block_codec:, max_size: nil)
99
+ def decode_part(wire_bytes, block_codec:, max_size: nil, block_size: LZ4M_BLOCK_SIZE)
95
100
  if wire_bytes.bytesize < 4
96
101
  raise ProtocolError, "wire part too short (< 4 bytes)"
97
102
  end
@@ -107,6 +112,10 @@ module OMQ
107
112
  raise ProtocolError, "LZ4B part too short (< 12 bytes, no room for size field)"
108
113
  end
109
114
  decompressed_size = wire_bytes.byteslice(4, 8).unpack1("Q<")
115
+ if decompressed_size > block_size
116
+ raise ProtocolError,
117
+ "LZ4B decompressed_size #{decompressed_size} exceeds block size limit #{block_size}"
118
+ end
110
119
  check_size!(decompressed_size, max_size)
111
120
  block = wire_bytes.byteslice(12, wire_bytes.bytesize - 12)
112
121
  begin
@@ -114,8 +123,9 @@ module OMQ
114
123
  rescue RLZ4::DecompressError => e
115
124
  raise ProtocolError, "LZ4B decode failed: #{e.message}"
116
125
  end
126
+ when LZ4M_SENTINEL
127
+ decode_multi_block(wire_bytes, block_codec, max_size, block_size)
117
128
  when LZ4D_SENTINEL
118
- # Should not reach decode_part; transport should have routed this.
119
129
  raise ProtocolError,
120
130
  "LZ4D dictionary shipment seen at decode_part (transport should route to decode_dict_shipment)"
121
131
  else
@@ -154,6 +164,70 @@ module OMQ
154
164
  class << self
155
165
  private
156
166
 
167
+ def encode_multi_block(plaintext, block_codec, block_size)
168
+ buf = String.new(encoding: Encoding::BINARY)
169
+ buf << LZ4M_SENTINEL
170
+ buf << [plaintext.bytesize].pack("Q<")
171
+
172
+ offset = 0
173
+ while offset < plaintext.bytesize
174
+ chunk_size = [block_size, plaintext.bytesize - offset].min
175
+ chunk = plaintext.byteslice(offset, chunk_size)
176
+ compressed = block_codec.compress(chunk)
177
+ buf << [compressed.bytesize].pack("V")
178
+ buf << compressed
179
+ offset += chunk_size
180
+ end
181
+
182
+ buf
183
+ end
184
+
185
+
186
+ def decode_multi_block(wire_bytes, block_codec, max_size, block_size)
187
+ if wire_bytes.bytesize < 12
188
+ raise ProtocolError, "LZ4M part too short (< 12 bytes, no room for size field)"
189
+ end
190
+
191
+ decompressed_size = wire_bytes.byteslice(4, 8).unpack1("Q<")
192
+ check_size!(decompressed_size, max_size)
193
+
194
+ output = String.new(capacity: decompressed_size, encoding: Encoding::BINARY)
195
+ offset = 12
196
+ remaining = decompressed_size
197
+
198
+ while remaining > 0
199
+ if offset + 4 > wire_bytes.bytesize
200
+ raise ProtocolError, "LZ4M truncated: no room for block length at offset #{offset}"
201
+ end
202
+
203
+ compressed_len = wire_bytes.byteslice(offset, 4).unpack1("V")
204
+ offset += 4
205
+
206
+ if offset + compressed_len > wire_bytes.bytesize
207
+ raise ProtocolError, "LZ4M truncated: block at offset #{offset} extends past wire end"
208
+ end
209
+
210
+ block_data = wire_bytes.byteslice(offset, compressed_len)
211
+ offset += compressed_len
212
+
213
+ block_decompressed_size = [block_size, remaining].min
214
+ begin
215
+ output << block_codec.decompress(block_data, decompressed_size: block_decompressed_size)
216
+ rescue RLZ4::DecompressError => e
217
+ raise ProtocolError, "LZ4M block decode failed: #{e.message}"
218
+ end
219
+
220
+ remaining -= block_decompressed_size
221
+ end
222
+
223
+ if offset != wire_bytes.bytesize
224
+ raise ProtocolError, "LZ4M: #{wire_bytes.bytesize - offset} leftover bytes after last block"
225
+ end
226
+
227
+ output
228
+ end
229
+
230
+
157
231
  def encode_passthrough(plaintext)
158
232
  UNCOMPRESSED_SENTINEL + plaintext
159
233
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OMQ
4
4
  module LZ4
5
- VERSION = "0.2.0"
5
+ VERSION = "0.3.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omq-lz4
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Patrik Wenger