multi_compress 0.2.4 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -2
- data/GET_STARTED.md +863 -0
- data/README.md +15 -19
- data/ext/multi_compress/extconf.rb +2 -0
- data/ext/multi_compress/multi_compress.c +683 -245
- data/lib/multi_compress/version.rb +1 -1
- data/lib/multi_compress.rb +104 -21
- metadata +2 -1
data/GET_STARTED.md
ADDED
|
@@ -0,0 +1,863 @@
|
|
|
1
|
+
# Get Started with MultiCompress 🚀
|
|
2
|
+
|
|
3
|
+
Comprehensive guide to using **MultiCompress** gem for modern compression in Ruby.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
gem 'multi_compress'
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
### Basic MultiCompression
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
require 'multi_compress'
|
|
17
|
+
|
|
18
|
+
data = "Hello, world!" * 1000
|
|
19
|
+
|
|
20
|
+
# MultiCompress with different algorithms
|
|
21
|
+
zstd_data = MultiCompress.compress(data, algo: :zstd)
|
|
22
|
+
lz4_data = MultiCompress.compress(data, algo: :lz4)
|
|
23
|
+
brotli_data = MultiCompress.compress(data, algo: :brotli)
|
|
24
|
+
|
|
25
|
+
# Decompress (auto-detects algorithm for ZSTD and LZ4)
|
|
26
|
+
original = MultiCompress.decompress(zstd_data) # Auto-detects ZSTD
|
|
27
|
+
puts original == data # => true
|
|
28
|
+
|
|
29
|
+
# Brotli requires explicit algorithm specification
|
|
30
|
+
brotli_original = MultiCompress.decompress(brotli_data, algo: :brotli)
|
|
31
|
+
puts brotli_original == data # => true
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Important Notes on Algorithm Detection
|
|
35
|
+
|
|
36
|
+
**Auto-detection behavior:**
|
|
37
|
+
- **ZSTD**: Automatically detected by magic bytes `28 B5 2F FD` (little-endian)
|
|
38
|
+
- **LZ4**: Auto-detected by internal format validation (see LZ4 format note below)
|
|
39
|
+
- **Brotli**: Requires explicit `algo: :brotli` - no auto-detection available
|
|
40
|
+
|
|
41
|
+
**LZ4 Internal Format Warning:**
|
|
42
|
+
This gem uses a **custom internal LZ4 format** that is **NOT compatible** with the standard `lz4` command-line tool. The internal format includes:
|
|
43
|
+
- 4 bytes: original size (little-endian)
|
|
44
|
+
- 4 bytes: compressed size (little-endian)
|
|
45
|
+
- N bytes: LZ4 compressed data
|
|
46
|
+
- 4 bytes: end marker (`00 00 00 00`)
|
|
47
|
+
|
|
48
|
+
This format is optimized for streaming and provides better error detection, but files compressed with this gem cannot be decompressed with standard `lz4` CLI tools and vice versa.
|
|
49
|
+
|
|
50
|
+
**❌ WRONG - This will NOT work:**
|
|
51
|
+
```bash
|
|
52
|
+
# DON'T DO THIS - These formats are incompatible!
|
|
53
|
+
|
|
54
|
+
# MultiCompress with Ruby gem
|
|
55
|
+
ruby -r multi_compress -e "File.write('data.lz4', MultiCompress.compress('Hello World', algo: :lz4))"
|
|
56
|
+
|
|
57
|
+
# Try to decompress with CLI tool - FAILS!
|
|
58
|
+
lz4 -d data.lz4 data.txt
|
|
59
|
+
# => Error: MultiCompressed file is corrupted
|
|
60
|
+
|
|
61
|
+
# OR vice versa:
|
|
62
|
+
echo "Hello World" | lz4 > data.lz4
|
|
63
|
+
|
|
64
|
+
# Try to decompress with gem - FAILS!
|
|
65
|
+
ruby -r multi_compress -e "puts MultiCompress.decompress(File.read('data.lz4'))"
|
|
66
|
+
# => MultiCompress::DataError: cannot detect compression format
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**✅ CORRECT - Use consistent tools:**
|
|
70
|
+
```ruby
|
|
71
|
+
# MultiCompress and decompress with same gem
|
|
72
|
+
data = "Hello World"
|
|
73
|
+
compressed = MultiCompress.compress(data, algo: :lz4)
|
|
74
|
+
original = MultiCompress.decompress(compressed) # Works perfectly!
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**Security Features:**
|
|
78
|
+
- Default one-shot decompression output cap: **512MB**
|
|
79
|
+
- Default streaming cumulative output cap: **2GB**
|
|
80
|
+
- `MultiCompress.configure` lets applications set global defaults
|
|
81
|
+
- Per-call `max_output_size:` overrides keep local business logic explicit
|
|
82
|
+
- `MultiCompress::Dictionary.load` rejects dictionary files larger than **32MB**
|
|
83
|
+
- Invalid or corrupted data raises `MultiCompress::DataError` with descriptive messages
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
MultiCompress.configure do |config|
|
|
87
|
+
config.max_output_size = 512 * 1024 * 1024
|
|
88
|
+
config.streaming_max_output_size = 2 * 1024 * 1024 * 1024
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Tighten the output cap for untrusted input
|
|
92
|
+
MultiCompress.decompress(zstd_data, algo: :zstd, max_output_size: 8 * 1024 * 1024)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Algorithm-specific Shortcuts
|
|
96
|
+
|
|
97
|
+
```ruby
|
|
98
|
+
# Quick compression with defaults
|
|
99
|
+
compressed = MultiCompress.zstd(data) # level: 3
|
|
100
|
+
compressed = MultiCompress.lz4(data) # level: 1
|
|
101
|
+
compressed = MultiCompress.brotli(data) # level: 6
|
|
102
|
+
|
|
103
|
+
# With custom levels
|
|
104
|
+
compressed = MultiCompress.zstd(data, level: 9)
|
|
105
|
+
compressed = MultiCompress.brotli(data, level: 11) # maximum compression
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Named MultiCompression Levels
|
|
109
|
+
|
|
110
|
+
```ruby
|
|
111
|
+
# Use semantic names instead of numbers
|
|
112
|
+
# Supported names today: :fastest, :default, :best
|
|
113
|
+
MultiCompress.compress(data, algo: :zstd, level: :fastest) # zstd level 1
|
|
114
|
+
MultiCompress.compress(data, algo: :zstd, level: :default) # zstd level 3
|
|
115
|
+
MultiCompress.compress(data, algo: :zstd, level: :best) # zstd level 19
|
|
116
|
+
|
|
117
|
+
MultiCompress.compress(data, algo: :lz4, level: :fastest) # lz4 level 1
|
|
118
|
+
MultiCompress.compress(data, algo: :lz4, level: :default) # lz4 level 1
|
|
119
|
+
MultiCompress.compress(data, algo: :lz4, level: :best) # lz4 level 16
|
|
120
|
+
|
|
121
|
+
MultiCompress.compress(data, algo: :brotli, level: :fastest) # brotli level 0
|
|
122
|
+
MultiCompress.compress(data, algo: :brotli, level: :default) # brotli level 6
|
|
123
|
+
MultiCompress.compress(data, algo: :brotli, level: :best) # brotli level 11
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Streaming MultiCompression
|
|
127
|
+
|
|
128
|
+
Perfect for processing large datasets, HTTP responses, or real-time data.
|
|
129
|
+
|
|
130
|
+
### Basic Streaming
|
|
131
|
+
|
|
132
|
+
```ruby
|
|
133
|
+
# MultiCompress data in chunks
|
|
134
|
+
deflater = MultiCompress::Deflater.new(algo: :zstd, level: 5)
|
|
135
|
+
|
|
136
|
+
compressed_chunks = []
|
|
137
|
+
compressed_chunks << deflater.write("chunk 1")
|
|
138
|
+
compressed_chunks << deflater.write("chunk 2")
|
|
139
|
+
compressed_chunks << deflater.write("chunk 3")
|
|
140
|
+
compressed_chunks << deflater.finish # Important: finalize stream
|
|
141
|
+
|
|
142
|
+
deflater.close
|
|
143
|
+
|
|
144
|
+
# Join all compressed data
|
|
145
|
+
compressed_data = compressed_chunks.join
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Stream Decompression
|
|
149
|
+
|
|
150
|
+
`MultiCompress.decompress(...)` can auto-detect ZSTD and LZ4 in one-shot mode. `MultiCompress::Inflater` is a streaming API and should be created with an explicit `algo:`.
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
inflater = MultiCompress::Inflater.new(algo: :zstd, max_output_size: 32 * 1024 * 1024)
|
|
154
|
+
|
|
155
|
+
decompressed_chunks = []
|
|
156
|
+
compressed_chunks.each do |chunk|
|
|
157
|
+
decompressed_chunks << inflater.write(chunk)
|
|
158
|
+
end
|
|
159
|
+
decompressed_chunks << inflater.finish
|
|
160
|
+
|
|
161
|
+
inflater.close
|
|
162
|
+
|
|
163
|
+
original_data = decompressed_chunks.join
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Advanced Streaming Example
|
|
167
|
+
|
|
168
|
+
```ruby
|
|
169
|
+
def compress_large_file(input_path, output_path)
|
|
170
|
+
deflater = MultiCompress::Deflater.new(algo: :zstd, level: 3)
|
|
171
|
+
|
|
172
|
+
File.open(input_path, 'rb') do |input|
|
|
173
|
+
File.open(output_path, 'wb') do |output|
|
|
174
|
+
while chunk = input.read(64 * 1024) # 64KB chunks
|
|
175
|
+
compressed = deflater.write(chunk)
|
|
176
|
+
output.write(compressed) unless compressed.empty?
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Write final chunk
|
|
180
|
+
final_chunk = deflater.finish
|
|
181
|
+
output.write(final_chunk) unless final_chunk.empty?
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
deflater.close
|
|
186
|
+
end
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Fiber-friendly Execution
|
|
190
|
+
|
|
191
|
+
Starting with **v0.2.0**, MultiCompress is fully fiber-friendly and plays nicely with Ruby's `Fiber::Scheduler`-based runtimes like [async](https://github.com/socketry/async) and [falcon](https://github.com/socketry/falcon).
|
|
192
|
+
|
|
193
|
+
### The Problem It Solves
|
|
194
|
+
|
|
195
|
+
Compression is CPU-bound work. Historically, calling `zstd`/`lz4`/`brotli` from inside an `Async` block would hold the GVL for the entire duration of the compress/decompress call, freezing the event loop and starving every other fiber — HTTP requests, timers, DB queries, everything. On a 50 MB zstd compression that meant tens to hundreds of milliseconds of total reactor stall.
|
|
196
|
+
|
|
197
|
+
### How It Works
|
|
198
|
+
|
|
199
|
+
When MultiCompress detects an active `Fiber::Scheduler`, it:
|
|
200
|
+
|
|
201
|
+
1. Spawns a **dedicated worker thread** via `rb_thread_create` to run the compression with the GVL released.
|
|
202
|
+
2. Parks the calling fiber with `rb_fiber_scheduler_block(scheduler, blocker, Qnil)`.
|
|
203
|
+
3. While the worker crunches bytes, the scheduler is free to run **every other ready fiber** — IO, timers, parallel compression tasks, you name it.
|
|
204
|
+
4. When the worker finishes, it calls `rb_fiber_scheduler_unblock(scheduler, blocker, fiber)` to resume the original fiber with the result.
|
|
205
|
+
|
|
206
|
+
Outside of a fiber scheduler, MultiCompress uses the same `rb_thread_call_without_gvl` fast path as before — zero overhead, zero behavior change for non-async users.
|
|
207
|
+
|
|
208
|
+
### What's Covered
|
|
209
|
+
|
|
210
|
+
| API | zstd | lz4 | brotli |
|
|
211
|
+
|----------------------------------|------|-----|--------|
|
|
212
|
+
| `MultiCompress.compress` | ✅ | ✅ | ✅ |
|
|
213
|
+
| `MultiCompress.decompress` | ✅ | ✅ | ✅ |
|
|
214
|
+
| `MultiCompress::Deflater#write` | ✅ | ✅* | ✅ |
|
|
215
|
+
| `MultiCompress::Inflater#write` | ✅ | ✅* | ✅ |
|
|
216
|
+
|
|
217
|
+
<sub>\* LZ4 streaming uses cooperative `scheduler.yield` between 64 KB blocks instead of the worker-thread path — individual LZ4 blocks are too fast (~10μs) for pthread-create overhead (~20-50μs) to be worth it. Other fibers still get scheduling points, just via a lighter mechanism.</sub>
|
|
218
|
+
|
|
219
|
+
Streaming chunks smaller than **16 KB** (`FIBER_STREAM_THRESHOLD`) stay inline under the GVL to avoid pthread-create overhead on tiny payloads. Production workloads using 64 KB+ chunks get the full benefit.
|
|
220
|
+
|
|
221
|
+
### Example: Non-blocking Compression Under Async
|
|
222
|
+
|
|
223
|
+
```ruby
|
|
224
|
+
require 'async'
|
|
225
|
+
require 'async/http/internet'
|
|
226
|
+
require 'multi_compress'
|
|
227
|
+
|
|
228
|
+
Async do |task|
|
|
229
|
+
# Fiber 1: make HTTP requests every 100ms — this keeps ticking
|
|
230
|
+
# even while the compression fiber is working.
|
|
231
|
+
poller = task.async do
|
|
232
|
+
internet = Async::HTTP::Internet.new
|
|
233
|
+
loop do
|
|
234
|
+
response = internet.get("https://httpbin.org/uuid")
|
|
235
|
+
puts "Got uuid at #{Time.now}: #{response.read[0..20]}"
|
|
236
|
+
task.sleep(0.1)
|
|
237
|
+
end
|
|
238
|
+
ensure
|
|
239
|
+
internet&.close
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Fiber 2: compress a huge payload. In v0.1.x this would have
|
|
243
|
+
# frozen the poller for the entire duration. In v0.2.0, the
|
|
244
|
+
# poller keeps firing while compression runs on a worker thread.
|
|
245
|
+
compressor = task.async do
|
|
246
|
+
huge_payload = File.read("dataset.json") # say, 50 MB
|
|
247
|
+
compressed = MultiCompress.compress(huge_payload, algo: :zstd)
|
|
248
|
+
File.binwrite("dataset.json.zst", compressed)
|
|
249
|
+
puts "Compressed #{huge_payload.bytesize} → #{compressed.bytesize} bytes"
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
compressor.wait
|
|
253
|
+
poller.stop
|
|
254
|
+
end
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Example: Streaming With Concurrent IO
|
|
258
|
+
|
|
259
|
+
```ruby
|
|
260
|
+
require 'async'
|
|
261
|
+
require 'multi_compress'
|
|
262
|
+
|
|
263
|
+
Async do |task|
|
|
264
|
+
# Read incoming network data in one fiber, compress it in another.
|
|
265
|
+
# Both fibers make progress concurrently.
|
|
266
|
+
reader, writer = IO.pipe
|
|
267
|
+
|
|
268
|
+
producer = task.async do
|
|
269
|
+
File.open("huge.log", "rb") do |f|
|
|
270
|
+
while chunk = f.read(256 * 1024)
|
|
271
|
+
writer.write(chunk)
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
writer.close
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
compressor = task.async do
|
|
278
|
+
deflater = MultiCompress::Deflater.new(algo: :zstd, level: 3)
|
|
279
|
+
File.open("huge.log.zst", "wb") do |out|
|
|
280
|
+
while chunk = reader.read(256 * 1024)
|
|
281
|
+
# Each 256 KB deflater.write runs on a worker thread when
|
|
282
|
+
# a scheduler is active — the producer fiber keeps reading
|
|
283
|
+
# the pipe in parallel.
|
|
284
|
+
out.write(deflater.write(chunk))
|
|
285
|
+
end
|
|
286
|
+
out.write(deflater.finish)
|
|
287
|
+
end
|
|
288
|
+
deflater.close
|
|
289
|
+
reader.close
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
[producer, compressor].each(&:wait)
|
|
293
|
+
end
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### Parallel Compression Across Fibers
|
|
297
|
+
|
|
298
|
+
Because each compression call blocks only its own fiber, you can fan out multiple compressions and the scheduler will overlap them with each other (and with any other IO fibers):
|
|
299
|
+
|
|
300
|
+
```ruby
|
|
301
|
+
require 'async'
|
|
302
|
+
require 'async/barrier'
|
|
303
|
+
require 'multi_compress'
|
|
304
|
+
|
|
305
|
+
files = Dir.glob("logs/*.log")
|
|
306
|
+
results = {}
|
|
307
|
+
|
|
308
|
+
Async do |task|
|
|
309
|
+
barrier = Async::Barrier.new
|
|
310
|
+
|
|
311
|
+
files.each do |path|
|
|
312
|
+
barrier.async do
|
|
313
|
+
data = File.read(path)
|
|
314
|
+
results[path] = MultiCompress.compress(data, algo: :zstd)
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
barrier.wait
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
results.each do |path, compressed|
|
|
322
|
+
File.binwrite("#{path}.zst", compressed)
|
|
323
|
+
end
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Requirements
|
|
327
|
+
|
|
328
|
+
- Ruby **>= 3.1.0**
|
|
329
|
+
- A running `Fiber::Scheduler` — typically provided by `Async { ... }` or Falcon's web server
|
|
330
|
+
|
|
331
|
+
### No Code Changes Required
|
|
332
|
+
|
|
333
|
+
If you're already using MultiCompress under Async or Falcon, upgrading to v0.2.0 gives you non-blocking compression automatically. No new API, no config flags, nothing to opt into. Just upgrade the gem.
|
|
334
|
+
|
|
335
|
+
## File I/O Integration
|
|
336
|
+
|
|
337
|
+
### Writing MultiCompressed Files
|
|
338
|
+
|
|
339
|
+
```ruby
|
|
340
|
+
# Simple file writing
|
|
341
|
+
MultiCompress::Writer.open("data.zst", algo: :zstd, level: 3) do |writer|
|
|
342
|
+
writer.write("Line 1\n")
|
|
343
|
+
writer.write("Line 2\n")
|
|
344
|
+
writer.puts("Line 3") # adds newline
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# Auto-detect algorithm by extension
|
|
348
|
+
MultiCompress::Writer.open("logs.lz4") do |writer| # automatically uses LZ4
|
|
349
|
+
1000.times { |i| writer.puts "Log entry #{i}" }
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# Different file extensions
|
|
353
|
+
MultiCompress::Writer.open("api_response.br") # Brotli (.br)
|
|
354
|
+
MultiCompress::Writer.open("backup.zst") # Zstd (.zst)
|
|
355
|
+
MultiCompress::Writer.open("cache.lz4") # LZ4 (.lz4)
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Reading MultiCompressed Files
|
|
359
|
+
|
|
360
|
+
```ruby
|
|
361
|
+
# Read entire file
|
|
362
|
+
content = MultiCompress::Reader.open("data.zst", max_output_size: 64 * 1024 * 1024) { |r| r.read }
|
|
363
|
+
|
|
364
|
+
# Read line by line (memory efficient)
|
|
365
|
+
MultiCompress::Reader.open("large_log.zst") do |reader|
|
|
366
|
+
reader.each_line do |line|
|
|
367
|
+
process_log_entry(line.chomp)
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
# Manual reading
|
|
372
|
+
MultiCompress::Reader.open("data.zst") do |reader|
|
|
373
|
+
while chunk = reader.read(1024)
|
|
374
|
+
process_chunk(chunk)
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
### In-Memory I/O with StringIO
|
|
380
|
+
|
|
381
|
+
```ruby
|
|
382
|
+
require 'stringio'
|
|
383
|
+
|
|
384
|
+
# MultiCompress to StringIO
|
|
385
|
+
sio = StringIO.new
|
|
386
|
+
MultiCompress::Writer.open(sio, algo: :brotli, level: 8) do |writer|
|
|
387
|
+
writer.write({ status: 'ok', data: [1, 2, 3] }.to_json)
|
|
388
|
+
end
|
|
389
|
+
compressed_json = sio.string
|
|
390
|
+
|
|
391
|
+
# Decompress from StringIO
|
|
392
|
+
sio = StringIO.new(compressed_json)
|
|
393
|
+
MultiCompress::Reader.open(sio) do |reader|
|
|
394
|
+
json_data = reader.read
|
|
395
|
+
puts JSON.parse(json_data)
|
|
396
|
+
end
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
## Dictionary MultiCompression
|
|
400
|
+
|
|
401
|
+
Dramatically improves compression on small, similar data (JSON APIs, configs, logs).
|
|
402
|
+
|
|
403
|
+
**Important**: Dictionary training is available for **Zstd** in the current release (vendored zstd **1.5.2**). Brotli dictionaries can be used, but this gem does not expose Brotli training through `train_dictionary`.
|
|
404
|
+
|
|
405
|
+
### Training Dictionary (Zstd)
|
|
406
|
+
|
|
407
|
+
```ruby
|
|
408
|
+
# Collect training samples (similar structure)
|
|
409
|
+
api_responses = [
|
|
410
|
+
'{"status":"ok","users":[{"id":1,"name":"Alice"}]}',
|
|
411
|
+
'{"status":"ok","users":[{"id":2,"name":"Bob"}]}',
|
|
412
|
+
'{"status":"ok","users":[{"id":3,"name":"Charlie"}]}',
|
|
413
|
+
# ... more samples
|
|
414
|
+
]
|
|
415
|
+
|
|
416
|
+
# Train dictionary for Zstd
|
|
417
|
+
zstd_dict = MultiCompress::Zstd.train_dictionary(api_responses, size: 16384)
|
|
418
|
+
|
|
419
|
+
# Save for reuse
|
|
420
|
+
zstd_dict.save("api_v1_zstd.dict")
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
**Brotli note**: `MultiCompress::Brotli.train_dictionary(...)` raises `MultiCompress::UnsupportedError` in the current implementation. To use Brotli dictionaries, create a raw dictionary explicitly with `MultiCompress::Dictionary.new(data, algo: :brotli)`.
|
|
424
|
+
|
|
425
|
+
### Using Dictionary
|
|
426
|
+
|
|
427
|
+
```ruby
|
|
428
|
+
# Load dictionary (created with Zstd training)
|
|
429
|
+
zstd_dict = MultiCompress::Dictionary.load("api_v1_zstd.dict", algo: :zstd)
|
|
430
|
+
|
|
431
|
+
response = '{"status":"ok","users":[{"id":4,"name":"David"}]}'
|
|
432
|
+
compressed = MultiCompress.compress(response, algo: :zstd, dictionary: zstd_dict)
|
|
433
|
+
original = MultiCompress.decompress(compressed, algo: :zstd, dictionary: zstd_dict)
|
|
434
|
+
|
|
435
|
+
# Brotli can also use raw dictionaries
|
|
436
|
+
brotli_dict = MultiCompress::Dictionary.new("shared-prefix-data", algo: :brotli)
|
|
437
|
+
brotli_compressed = MultiCompress.compress(response, algo: :brotli, dictionary: brotli_dict)
|
|
438
|
+
brotli_original = MultiCompress.decompress(brotli_compressed, algo: :brotli, dictionary: brotli_dict)
|
|
439
|
+
|
|
440
|
+
puts original == response # => true
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
### Dictionary with Streaming
|
|
444
|
+
|
|
445
|
+
```ruby
|
|
446
|
+
# Load Zstd-trained dictionary
|
|
447
|
+
dict = MultiCompress::Dictionary.load("api_v1_zstd.dict", algo: :zstd)
|
|
448
|
+
|
|
449
|
+
# Compress multiple API responses using Zstd
|
|
450
|
+
deflater = MultiCompress::Deflater.new(algo: :zstd, level: 3, dictionary: dict)
|
|
451
|
+
|
|
452
|
+
api_responses.each do |response|
|
|
453
|
+
compressed = deflater.write(response)
|
|
454
|
+
store_compressed(compressed)
|
|
455
|
+
end
|
|
456
|
+
|
|
457
|
+
deflater.finish
|
|
458
|
+
deflater.close
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
## Real-World Examples
|
|
462
|
+
|
|
463
|
+
### HTTP Middleware (Rails/Rack)
|
|
464
|
+
|
|
465
|
+
```ruby
|
|
466
|
+
class MultiCompressMiddleware
|
|
467
|
+
COMPRESSIBLE_TYPES = %w[
|
|
468
|
+
application/json
|
|
469
|
+
application/xml
|
|
470
|
+
text/html
|
|
471
|
+
text/css
|
|
472
|
+
text/javascript
|
|
473
|
+
application/javascript
|
|
474
|
+
].freeze
|
|
475
|
+
|
|
476
|
+
def initialize(app)
|
|
477
|
+
@app = app
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
def call(env)
|
|
481
|
+
status, headers, body = @app.call(env)
|
|
482
|
+
|
|
483
|
+
content_type = headers['Content-Type']&.split(';')&.first
|
|
484
|
+
|
|
485
|
+
if should_compress?(env, content_type, body)
|
|
486
|
+
compressed_body = compress_body(body, env)
|
|
487
|
+
headers['Content-Encoding'] = compression_algorithm(env)
|
|
488
|
+
headers['Content-Length'] = compressed_body.bytesize.to_s
|
|
489
|
+
[status, headers, [compressed_body]]
|
|
490
|
+
else
|
|
491
|
+
[status, headers, body]
|
|
492
|
+
end
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
private
|
|
496
|
+
|
|
497
|
+
def should_compress?(env, content_type, body)
|
|
498
|
+
return false unless COMPRESSIBLE_TYPES.include?(content_type)
|
|
499
|
+
return false if body.sum(&:bytesize) < 1024 # Don't compress small responses
|
|
500
|
+
|
|
501
|
+
accept_encoding = env['HTTP_ACCEPT_ENCODING'] || ''
|
|
502
|
+
%w[br zstd gzip].any? { |enc| accept_encoding.include?(enc) }
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
def compress_body(body, env)
|
|
506
|
+
data = body.respond_to?(:join) ? body.join : body.to_s
|
|
507
|
+
|
|
508
|
+
case compression_algorithm(env)
|
|
509
|
+
when 'br'
|
|
510
|
+
MultiCompress.brotli(data, level: 4) # Good balance for HTTP
|
|
511
|
+
when 'zstd'
|
|
512
|
+
MultiCompress.zstd(data, level: 3) # Fast compression
|
|
513
|
+
else
|
|
514
|
+
data # fallback
|
|
515
|
+
end
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
def compression_algorithm(env)
|
|
519
|
+
accept_encoding = env['HTTP_ACCEPT_ENCODING'] || ''
|
|
520
|
+
return 'br' if accept_encoding.include?('br')
|
|
521
|
+
return 'zstd' if accept_encoding.include?('zstd')
|
|
522
|
+
'gzip' # fallback
|
|
523
|
+
end
|
|
524
|
+
end
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
### Redis Cache with MultiCompression
|
|
528
|
+
|
|
529
|
+
```ruby
|
|
530
|
+
class MultiCompressedRedisCache
|
|
531
|
+
def initialize(redis: Redis.current, algo: :zstd, level: 3)
|
|
532
|
+
@redis = redis
|
|
533
|
+
@algo = algo
|
|
534
|
+
@level = level
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
def write(key, value, expires_in: nil)
|
|
538
|
+
serialized = Marshal.dump(value)
|
|
539
|
+
compressed = MultiCompress.compress(serialized, algo: @algo, level: @level)
|
|
540
|
+
|
|
541
|
+
if expires_in
|
|
542
|
+
@redis.setex(key, expires_in, compressed)
|
|
543
|
+
else
|
|
544
|
+
@redis.set(key, compressed)
|
|
545
|
+
end
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
def read(key)
|
|
549
|
+
compressed = @redis.get(key)
|
|
550
|
+
return nil unless compressed
|
|
551
|
+
|
|
552
|
+
serialized = MultiCompress.decompress(compressed)
|
|
553
|
+
Marshal.load(serialized)
|
|
554
|
+
rescue MultiCompress::Error => e
|
|
555
|
+
Rails.logger.error "Cache decompression failed for #{key}: #{e.message}"
|
|
556
|
+
nil
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
def fetch(key, expires_in: nil, &block)
|
|
560
|
+
value = read(key)
|
|
561
|
+
return value if value
|
|
562
|
+
|
|
563
|
+
value = block.call
|
|
564
|
+
write(key, value, expires_in: expires_in)
|
|
565
|
+
value
|
|
566
|
+
end
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
# Usage
|
|
570
|
+
cache = MultiCompressedRedisCache.new(algo: :zstd, level: 5)
|
|
571
|
+
|
|
572
|
+
# Store complex objects
|
|
573
|
+
user_data = { id: 123, name: "Alice", posts: [...] }
|
|
574
|
+
cache.write("user:123", user_data, expires_in: 3600)
|
|
575
|
+
|
|
576
|
+
# Retrieve
|
|
577
|
+
user = cache.read("user:123")
|
|
578
|
+
|
|
579
|
+
# Fetch with fallback
|
|
580
|
+
user = cache.fetch("user:456", expires_in: 3600) do
|
|
581
|
+
User.find(456).to_h
|
|
582
|
+
end
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
### Background Job Processing
|
|
586
|
+
|
|
587
|
+
```ruby
|
|
588
|
+
class MultiCompressedJobProcessor
|
|
589
|
+
def self.enqueue(job_class, *args)
|
|
590
|
+
payload = { class: job_class.name, args: args }
|
|
591
|
+
serialized = JSON.generate(payload)
|
|
592
|
+
compressed = MultiCompress.zstd(serialized, level: 1) # Fast compression for queues
|
|
593
|
+
|
|
594
|
+
Redis.current.lpush("jobs", compressed)
|
|
595
|
+
end
|
|
596
|
+
|
|
597
|
+
def self.process_jobs
|
|
598
|
+
while compressed_job = Redis.current.brpop("jobs", timeout: 5)&.last
|
|
599
|
+
begin
|
|
600
|
+
serialized = MultiCompress.decompress(compressed_job)
|
|
601
|
+
payload = JSON.parse(serialized)
|
|
602
|
+
|
|
603
|
+
job_class = Object.const_get(payload['class'])
|
|
604
|
+
job_class.new.perform(*payload['args'])
|
|
605
|
+
rescue => e
|
|
606
|
+
logger.error "Job processing failed: #{e.message}"
|
|
607
|
+
end
|
|
608
|
+
end
|
|
609
|
+
end
|
|
610
|
+
end
|
|
611
|
+
```
|
|
612
|
+
|
|
613
|
+
### Log File Rotation with MultiCompression
|
|
614
|
+
|
|
615
|
+
```ruby
|
|
616
|
+
class MultiCompressedLogger
|
|
617
|
+
def initialize(base_path, max_size: 10 * 1024 * 1024) # 10MB
|
|
618
|
+
@base_path = base_path
|
|
619
|
+
@max_size = max_size
|
|
620
|
+
@current_file = nil
|
|
621
|
+
@current_writer = nil
|
|
622
|
+
end
|
|
623
|
+
|
|
624
|
+
def log(message)
|
|
625
|
+
ensure_current_file_open
|
|
626
|
+
|
|
627
|
+
line = "[#{Time.now.iso8601}] #{message}\n"
|
|
628
|
+
@current_writer.write(line)
|
|
629
|
+
|
|
630
|
+
rotate_if_needed
|
|
631
|
+
end
|
|
632
|
+
|
|
633
|
+
def close
|
|
634
|
+
@current_writer&.close
|
|
635
|
+
@current_file&.close
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
private
|
|
639
|
+
|
|
640
|
+
def ensure_current_file_open
|
|
641
|
+
return if @current_writer && !@current_writer.closed?
|
|
642
|
+
|
|
643
|
+
timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
|
|
644
|
+
compressed_path = "#{@base_path}_#{timestamp}.zst"
|
|
645
|
+
|
|
646
|
+
@current_writer = MultiCompress::Writer.open(compressed_path, algo: :zstd, level: 3)
|
|
647
|
+
end
|
|
648
|
+
|
|
649
|
+
def rotate_if_needed
|
|
650
|
+
return unless @current_writer
|
|
651
|
+
|
|
652
|
+
# Check file size (approximate, since it's compressed)
|
|
653
|
+
if @current_writer.tell > @max_size
|
|
654
|
+
@current_writer.close
|
|
655
|
+
@current_writer = nil
|
|
656
|
+
end
|
|
657
|
+
end
|
|
658
|
+
end
|
|
659
|
+
|
|
660
|
+
# Usage
|
|
661
|
+
logger = MultiCompressedLogger.new("/var/log/app/application.log")
|
|
662
|
+
logger.log("Application started")
|
|
663
|
+
logger.log("Processing request...")
|
|
664
|
+
logger.close
|
|
665
|
+
```
|
|
666
|
+
|
|
667
|
+
## Utility Functions
|
|
668
|
+
|
|
669
|
+
### Algorithm Information
|
|
670
|
+
|
|
671
|
+
```ruby
|
|
672
|
+
# Check available algorithms
|
|
673
|
+
MultiCompress.algorithms # => [:zstd, :lz4, :brotli]
|
|
674
|
+
|
|
675
|
+
# Check if specific algorithm is available
|
|
676
|
+
MultiCompress.available?(:zstd) # => true
|
|
677
|
+
MultiCompress.available?(:fake) # => false
|
|
678
|
+
|
|
679
|
+
# Get library versions
|
|
680
|
+
MultiCompress.version(:zstd) # => "1.5.2"
|
|
681
|
+
MultiCompress.version(:lz4) # => "1.10.0"
|
|
682
|
+
MultiCompress.version(:brotli) # => "1.1.0"
|
|
683
|
+
```
|
|
684
|
+
|
|
685
|
+
### Data Integrity
|
|
686
|
+
|
|
687
|
+
```ruby
|
|
688
|
+
# Calculate CRC32 checksum
|
|
689
|
+
data = "Important data"
|
|
690
|
+
checksum = MultiCompress.crc32(data) # => Integer
|
|
691
|
+
|
|
692
|
+
# Verify data integrity
|
|
693
|
+
received_data = get_data_from_network()
|
|
694
|
+
if MultiCompress.crc32(received_data) == expected_checksum
|
|
695
|
+
puts "Data integrity verified"
|
|
696
|
+
end
|
|
697
|
+
```
|
|
698
|
+
|
|
699
|
+
### MultiCompression Levels
|
|
700
|
+
|
|
701
|
+
```ruby
|
|
702
|
+
# Access level constants
|
|
703
|
+
puts MultiCompress::Zstd::MIN_LEVEL # => 1
|
|
704
|
+
puts MultiCompress::Zstd::MAX_LEVEL # => 22
|
|
705
|
+
puts MultiCompress::Zstd::DEFAULT_LEVEL # => 3
|
|
706
|
+
|
|
707
|
+
puts MultiCompress::LZ4::MIN_LEVEL # => 1
|
|
708
|
+
puts MultiCompress::LZ4::MAX_LEVEL # => 16
|
|
709
|
+
puts MultiCompress::LZ4::DEFAULT_LEVEL # => 1
|
|
710
|
+
|
|
711
|
+
puts MultiCompress::Brotli::MIN_LEVEL # => 0
|
|
712
|
+
puts MultiCompress::Brotli::MAX_LEVEL # => 11
|
|
713
|
+
puts MultiCompress::Brotli::DEFAULT_LEVEL # => 6
|
|
714
|
+
|
|
715
|
+
# Validate level before compression
|
|
716
|
+
def safe_compress(data, algo, level)
|
|
717
|
+
case algo
|
|
718
|
+
when :zstd
|
|
719
|
+
level = level.clamp(MultiCompress::Zstd::MIN_LEVEL, MultiCompress::Zstd::MAX_LEVEL)
|
|
720
|
+
when :lz4
|
|
721
|
+
level = level.clamp(MultiCompress::LZ4::MIN_LEVEL, MultiCompress::LZ4::MAX_LEVEL)
|
|
722
|
+
when :brotli
|
|
723
|
+
level = level.clamp(MultiCompress::Brotli::MIN_LEVEL, MultiCompress::Brotli::MAX_LEVEL)
|
|
724
|
+
end
|
|
725
|
+
|
|
726
|
+
MultiCompress.compress(data, algo: algo, level: level)
|
|
727
|
+
end
|
|
728
|
+
```
|
|
729
|
+
|
|
730
|
+
## Error Handling
|
|
731
|
+
|
|
732
|
+
### Exception Types
|
|
733
|
+
|
|
734
|
+
```ruby
|
|
735
|
+
begin
|
|
736
|
+
compressed = MultiCompress.compress(data, algo: :zstd, level: 999)
|
|
737
|
+
rescue MultiCompress::LevelError => e
|
|
738
|
+
puts "Invalid compression level: #{e.message}"
|
|
739
|
+
rescue MultiCompress::Error => e
|
|
740
|
+
puts "General compression error: #{e.message}"
|
|
741
|
+
end
|
|
742
|
+
|
|
743
|
+
# All exception types
|
|
744
|
+
MultiCompress::Error # Base class for all compress errors
|
|
745
|
+
MultiCompress::DataError # Corrupt or invalid compressed data
|
|
746
|
+
MultiCompress::MemError # Out of memory during operation
|
|
747
|
+
MultiCompress::StreamError # Stream operation error (e.g. write after close)
|
|
748
|
+
MultiCompress::UnsupportedError # Unsupported operation (e.g. dictionary with LZ4)
|
|
749
|
+
MultiCompress::LevelError # Invalid compression level
|
|
750
|
+
```
|
|
751
|
+
|
|
752
|
+
### Robust Error Handling
|
|
753
|
+
|
|
754
|
+
```ruby
|
|
755
|
+
def safe_decompress(compressed_data, fallback: nil)
|
|
756
|
+
MultiCompress.decompress(compressed_data)
|
|
757
|
+
rescue MultiCompress::DataError => e
|
|
758
|
+
Rails.logger.warn "Data corruption detected: #{e.message}"
|
|
759
|
+
fallback
|
|
760
|
+
rescue MultiCompress::Error => e
|
|
761
|
+
Rails.logger.error "Decompression failed: #{e.message}"
|
|
762
|
+
fallback
|
|
763
|
+
end
|
|
764
|
+
|
|
765
|
+
def safe_compress_with_retry(data, algo: :zstd, level: 3, retries: 2)
|
|
766
|
+
attempt = 0
|
|
767
|
+
|
|
768
|
+
begin
|
|
769
|
+
MultiCompress.compress(data, algo: algo, level: level)
|
|
770
|
+
rescue MultiCompress::MemError => e
|
|
771
|
+
attempt += 1
|
|
772
|
+
if attempt <= retries
|
|
773
|
+
GC.start # Try to free memory
|
|
774
|
+
level = [level - 1, 1].max # Reduce compression level
|
|
775
|
+
retry
|
|
776
|
+
else
|
|
777
|
+
raise
|
|
778
|
+
end
|
|
779
|
+
end
|
|
780
|
+
end
|
|
781
|
+
```
|
|
782
|
+
|
|
783
|
+
## Performance Tips
|
|
784
|
+
|
|
785
|
+
### Choosing MultiCompression Levels
|
|
786
|
+
|
|
787
|
+
```ruby
|
|
788
|
+
# For real-time applications (low latency)
|
|
789
|
+
MultiCompress.compress(data, algo: :lz4, level: 1) # Fastest
|
|
790
|
+
|
|
791
|
+
# For network transfer (balance speed/size)
|
|
792
|
+
MultiCompress.compress(data, algo: :zstd, level: 3) # Default balance
|
|
793
|
+
|
|
794
|
+
# For archival (maximum compression)
|
|
795
|
+
MultiCompress.compress(data, algo: :brotli, level: 11) # Best ratio
|
|
796
|
+
|
|
797
|
+
# For streaming/hot paths
|
|
798
|
+
MultiCompress.compress(data, algo: :zstd, level: 1) # Fast zstd
|
|
799
|
+
```
|
|
800
|
+
|
|
801
|
+
### Memory Management
|
|
802
|
+
|
|
803
|
+
```ruby
|
|
804
|
+
# Process large files in chunks to avoid memory issues
|
|
805
|
+
def process_large_file(input_path, output_path)
|
|
806
|
+
deflater = MultiCompress::Deflater.new(algo: :zstd, level: 3)
|
|
807
|
+
|
|
808
|
+
begin
|
|
809
|
+
File.open(input_path, 'rb') do |input|
|
|
810
|
+
File.open(output_path, 'wb') do |output|
|
|
811
|
+
while chunk = input.read(1024 * 1024) # 1MB chunks
|
|
812
|
+
compressed = deflater.write(chunk)
|
|
813
|
+
output.write(compressed) unless compressed.empty?
|
|
814
|
+
|
|
815
|
+
# Explicitly manage memory for very large files
|
|
816
|
+
GC.start if input.tell % (100 * 1024 * 1024) == 0 # Every 100MB
|
|
817
|
+
end
|
|
818
|
+
|
|
819
|
+
output.write(deflater.finish)
|
|
820
|
+
end
|
|
821
|
+
end
|
|
822
|
+
ensure
|
|
823
|
+
deflater.close # Always close to free resources
|
|
824
|
+
end
|
|
825
|
+
end
|
|
826
|
+
```
|
|
827
|
+
|
|
828
|
+
### Benchmarking Your Data
|
|
829
|
+
|
|
830
|
+
```ruby
|
|
831
|
+
require 'benchmark'
|
|
832
|
+
|
|
833
|
+
def benchmark_algorithms(data)
|
|
834
|
+
algorithms = [:lz4, :zstd, :brotli]
|
|
835
|
+
results = {}
|
|
836
|
+
|
|
837
|
+
algorithms.each do |algo|
|
|
838
|
+
time = Benchmark.realtime do
|
|
839
|
+
compressed = MultiCompress.compress(data, algo: algo)
|
|
840
|
+
decompressed = MultiCompress.decompress(compressed)
|
|
841
|
+
end
|
|
842
|
+
|
|
843
|
+
compressed = MultiCompress.compress(data, algo: algo)
|
|
844
|
+
ratio = compressed.bytesize.to_f / data.bytesize
|
|
845
|
+
|
|
846
|
+
results[algo] = {
|
|
847
|
+
time: time.round(4),
|
|
848
|
+
ratio: ratio.round(3),
|
|
849
|
+
size: compressed.bytesize
|
|
850
|
+
}
|
|
851
|
+
end
|
|
852
|
+
|
|
853
|
+
results
|
|
854
|
+
end
|
|
855
|
+
|
|
856
|
+
# Test with your specific data
|
|
857
|
+
sample_data = File.read("typical_payload.json")
|
|
858
|
+
results = benchmark_algorithms(sample_data)
|
|
859
|
+
puts results
|
|
860
|
+
# => {:lz4=>{:time=>0.0012, :ratio=>0.234, :size=>1024}, ...}
|
|
861
|
+
```
|
|
862
|
+
|
|
863
|
+
This guide covers comprehensive usage of the MultiCompress gem. For advanced use cases or questions, see the source code or create an issue on GitHub.
|