lzwrb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/lzwrb.rb +392 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3ca54b9ac48840e65fc5d307707491fc3f06dae167a62fb14e1700877a169cc8
|
4
|
+
data.tar.gz: ca28f83f51533b04d093f79cf16f4a23cb02a646d496e48ef7f98802128290f8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0a146638c2720f8cc5b2e6d8af24f461ddad63d09eb4d86660897a73f7832cdf1439ef53aeb9e52754a536e5d36b5f4799b488864372195a0b5c47154db76b86
|
7
|
+
data.tar.gz: 663e70084a893fa73b210ead73e91656f16753f00da72ac00e7f1474e861f65f98b2b81e048959daa8938c0a227c64338f14c463526589573b4ccaad163da079
|
data/lib/lzwrb.rb
ADDED
@@ -0,0 +1,392 @@
|
|
1
|
+
class LZWrb
|
2
|
+
|
3
|
+
# Default alphabets
|
4
|
+
DEC = (0...10).to_a.map(&:chr)
|
5
|
+
HEX_UPPER = (0...16).to_a.map{ |n| n.to_s(16).upcase }
|
6
|
+
HEX_LOWER = (0...16).to_a.map{ |n| n.to_s(16).downcase }
|
7
|
+
LATIN_UPPER = ('A'..'Z').to_a
|
8
|
+
LATIN_LOWER = ('a'..'z').to_a
|
9
|
+
ALPHA_UPPER = LATIN_UPPER + DEC
|
10
|
+
ALPHA_LOWER = LATIN_LOWER + DEC
|
11
|
+
ALPHA = LATIN_UPPER + LATIN_LOWER + DEC
|
12
|
+
PRINTABLE = (32...127).to_a.map(&:chr)
|
13
|
+
ASCII = (0...128).to_a.map(&:chr)
|
14
|
+
BINARY = (0...256).to_a.map(&:chr)
|
15
|
+
|
16
|
+
# Default presets
|
17
|
+
PRESET_GIF = {
|
18
|
+
min_bits: 8,
|
19
|
+
max_bits: 12,
|
20
|
+
lsb: true,
|
21
|
+
clear: true,
|
22
|
+
stop: true,
|
23
|
+
deferred: true
|
24
|
+
}
|
25
|
+
PRESET_FAST = {
|
26
|
+
min_bits: 16,
|
27
|
+
max_bits: 16,
|
28
|
+
lsb: true,
|
29
|
+
clear: false,
|
30
|
+
stop: false
|
31
|
+
}
|
32
|
+
PRESET_BEST = {
|
33
|
+
min_bits: 8,
|
34
|
+
max_bits: 16,
|
35
|
+
lsb: true,
|
36
|
+
clear: false,
|
37
|
+
stop: false
|
38
|
+
}
|
39
|
+
|
40
|
+
# Verbosity of the encoder/decoder
|
41
|
+
VERBOSITY = {
|
42
|
+
silent: 0, # Don't print anything to the console
|
43
|
+
minimal: 1, # Print only errors
|
44
|
+
quiet: 2, # Print errors and warnings
|
45
|
+
normal: 3, # Print errors, warnings and regular encoding information
|
46
|
+
debug: 4 # Print everything, including debug details about the encoding process
|
47
|
+
}
|
48
|
+
|
49
|
+
# Class default values (no NIL's here!)
|
50
|
+
@@min_bits = 8 # Minimum code bit length
|
51
|
+
@@max_bits = 16 # Maximum code bit length before rebuilding table
|
52
|
+
@@lsb = true # Least significant bit first order
|
53
|
+
@@clear = false # Use CLEAR codes
|
54
|
+
@@stop = false # Use STOP codes
|
55
|
+
@@deferred = false # Use deferred CLEAR codes
|
56
|
+
|
57
|
+
def initialize(
|
58
|
+
preset: nil, # Predefined configurations (GIF...)
|
59
|
+
bits: nil, # Code bit size for constant length encoding (superseeds min/max bit size)
|
60
|
+
min_bits: nil, # Minimum code bit size for variable length encoding (superseeded by 'bits')
|
61
|
+
max_bits: nil, # Maximum code bit size for variable length encoding (superseeded by 'bits')
|
62
|
+
binary: nil, # Use binary encoding (vs regular text encoding)
|
63
|
+
alphabet: BINARY, # Set of characters that compose the messages to encode
|
64
|
+
safe: false, # First encoding pass to verify alphabet covers all data
|
65
|
+
lsb: nil, # Use least or most significant bit packing
|
66
|
+
clear: nil, # Use clear codes every time the table gets reinitialized
|
67
|
+
stop: nil, # Use stop codes at the end of the encoding
|
68
|
+
deferred: nil, # Use deferred clear codes
|
69
|
+
verbosity: :normal # Verbosity level of the encoder
|
70
|
+
)
|
71
|
+
# Parse preset
|
72
|
+
params = preset || {}
|
73
|
+
|
74
|
+
# Verbosity
|
75
|
+
if VERBOSITY[verbosity]
|
76
|
+
@verbosity = VERBOSITY[verbosity]
|
77
|
+
else
|
78
|
+
warn("Unrecognized verbosity level, using normal.")
|
79
|
+
@verbosity = VERBOSITY[:normal]
|
80
|
+
end
|
81
|
+
|
82
|
+
# Alphabet
|
83
|
+
if !alphabet.is_a?(Array) || alphabet.any?{ |a| !a.is_a?(String) || a.length > 1 }
|
84
|
+
err('The alphabet must be an array of characters, i.e., of strings of length 1')
|
85
|
+
exit
|
86
|
+
end
|
87
|
+
@alphabet = alphabet.uniq
|
88
|
+
warn('Removed duplicate entries from alphabet') if @alphabet.size < alphabet.size
|
89
|
+
|
90
|
+
# Binary compression
|
91
|
+
@binary = binary.nil? ? alphabet == BINARY : binary
|
92
|
+
|
93
|
+
# Safe mode for encoding (verifies that the data provided is composed exclusively
|
94
|
+
# by characters from the alphabet)
|
95
|
+
@safe = safe
|
96
|
+
|
97
|
+
# Code bit size
|
98
|
+
if bits
|
99
|
+
if !bits.is_a?(Integer) || bits < 1
|
100
|
+
err('Code size should be a positive integer.')
|
101
|
+
exit
|
102
|
+
else
|
103
|
+
@min_bits = bits
|
104
|
+
@max_bits = bits
|
105
|
+
end
|
106
|
+
else
|
107
|
+
@min_bits = find_arg(min_bits, params[:min_bits], @@min_bits)
|
108
|
+
@max_bits = find_arg(max_bits, params[:max_bits], @@max_bits)
|
109
|
+
if @max_bits < @min_bits
|
110
|
+
warn("Max code size (#{@max_bits}) should be higher than min code size (#{@min_bits}): changed max code size to #{@min_bits}.")
|
111
|
+
@max_bits = @min_bits
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Determine min bits based on alphabet length if not specified
|
116
|
+
if !find_arg(min_bits, params[:min_bits])
|
117
|
+
@min_bits = (@alphabet.size - 1).bit_length
|
118
|
+
@max_bits = @min_bits if @max_bits < @min_bits
|
119
|
+
end
|
120
|
+
|
121
|
+
# Clear and stop codes
|
122
|
+
use_clear = find_arg(clear, params[:clear], @@clear)
|
123
|
+
use_stop = find_arg(stop, params[:stop], @@stop)
|
124
|
+
if !use_stop && @min_bits < 8
|
125
|
+
use_stop = true
|
126
|
+
# Warning if stop codes were explicitly disabled (false, NOT nil)
|
127
|
+
if find_arg(stop, params[:stop]) == false
|
128
|
+
warn("Stop codes are necessary for code sizes below 8 bits to prevent ambiguity: enabled stop codes.")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Alphabet length checks
|
133
|
+
extra = (use_clear ? 1 : 0) + (use_stop ? 1 : 0)
|
134
|
+
# Max bits doesn't fit alphabet (needs explicit adjustment)
|
135
|
+
if (@alphabet.size + extra) > 1 << @max_bits
|
136
|
+
if @binary
|
137
|
+
@alphabet = @alphabet.take((1 << @max_bits - 1))
|
138
|
+
warn("Using #{@max_bits - 1} bit binary alphabet (#{(1 << @max_bits - 1)} entries).")
|
139
|
+
else
|
140
|
+
@max_bits = (@alphabet.size + extra).bit_length
|
141
|
+
warn("Max code size needs to fit the alphabet (and clear & stop codes, if used): increased to #{@max_bits} bits.")
|
142
|
+
end
|
143
|
+
end
|
144
|
+
# Min bits doesn't fit alphabet (needs implicit adjustment)
|
145
|
+
if (@alphabet.size + extra) > 1 << @min_bits
|
146
|
+
@min_bits = (@alphabet.size + extra - 1).bit_length
|
147
|
+
end
|
148
|
+
|
149
|
+
# Clear and stop codes
|
150
|
+
idx = @alphabet.size - 1
|
151
|
+
@clear = use_clear ? idx += 1 : nil
|
152
|
+
@stop = use_stop ? idx += 1 : nil
|
153
|
+
@deferred = find_arg(deferred, params[:deferred], @@deferred)
|
154
|
+
|
155
|
+
# Least/most significant bit packing order
|
156
|
+
@lsb = find_arg(lsb, params[:lsb], @@lsb)
|
157
|
+
end
|
158
|
+
|
159
|
+
def encode(data)
|
160
|
+
# Log
|
161
|
+
log("<- Encoding #{format_size(data.bytesize)} with #{format_params}.")
|
162
|
+
stime = Time.now
|
163
|
+
|
164
|
+
# Setup
|
165
|
+
init(true)
|
166
|
+
table_init
|
167
|
+
verify_data(data) if @safe
|
168
|
+
|
169
|
+
# LZW-encode data
|
170
|
+
buf = ''
|
171
|
+
put_code(@clear) if !@clear.nil?
|
172
|
+
data.each_char do |c|
|
173
|
+
next_buf = buf + c
|
174
|
+
if table_has(next_buf)
|
175
|
+
buf = next_buf
|
176
|
+
else
|
177
|
+
put_code(@table[buf])
|
178
|
+
table_add(next_buf)
|
179
|
+
table_check()
|
180
|
+
buf = c
|
181
|
+
end
|
182
|
+
end
|
183
|
+
put_code(@table[buf])
|
184
|
+
put_code(@stop) if !@stop.nil?
|
185
|
+
|
186
|
+
# Pack codes to binary string
|
187
|
+
res = @buffer.pack('C*')
|
188
|
+
|
189
|
+
# Return
|
190
|
+
ttime = Time.now - stime
|
191
|
+
log("-> Encoding finished in #{"%.3fs" % [ttime]} (avg. #{"%.3f" % [(8.0 * data.bytesize / 1024 ** 2) / ttime]} mbit\/s).")
|
192
|
+
log("-> Encoded data: #{format_size(res.bytesize)} (#{"%5.2f%%" % [100 * (1 - res.bytesize.to_f / data.bytesize)]} compression).")
|
193
|
+
res
|
194
|
+
rescue => e
|
195
|
+
lex(e, 'Encoding error', true)
|
196
|
+
end
|
197
|
+
|
198
|
+
# Optimization? Unpack bits subsequently, rather than converting between strings and ints
|
199
|
+
def decode(data)
|
200
|
+
# Log
|
201
|
+
log("<- Decoding #{format_size(data.bytesize)} with #{format_params}.")
|
202
|
+
stime = Time.now
|
203
|
+
|
204
|
+
# Setup
|
205
|
+
init(false)
|
206
|
+
table_init
|
207
|
+
bits = data.unpack('b*')[0]
|
208
|
+
len = bits.length
|
209
|
+
|
210
|
+
# Parse data
|
211
|
+
off = 0
|
212
|
+
out = ''.b
|
213
|
+
old_code = nil
|
214
|
+
width = @bits
|
215
|
+
while off + width <= len
|
216
|
+
# Parse code
|
217
|
+
code = bits[off ... off + width].reverse.to_i(2)
|
218
|
+
off += width
|
219
|
+
|
220
|
+
# Handle clear and stop codes, if present
|
221
|
+
if code == @clear && @clear
|
222
|
+
table_init
|
223
|
+
old_code = nil
|
224
|
+
width = @bits
|
225
|
+
next
|
226
|
+
end
|
227
|
+
break if code == @stop && @stop
|
228
|
+
|
229
|
+
# Handle regular codes
|
230
|
+
if old_code.nil? # Initial code
|
231
|
+
out << @table[code]
|
232
|
+
elsif table_has(code) # Existing code
|
233
|
+
out << @table[code]
|
234
|
+
table_add(@table[old_code] + @table[code][0])
|
235
|
+
else # New code
|
236
|
+
out << @table[old_code] + @table[old_code][0]
|
237
|
+
table_add(@table[old_code] + @table[old_code][0])
|
238
|
+
end
|
239
|
+
|
240
|
+
# Prepare next iteration
|
241
|
+
old_code = table_check ? nil : code
|
242
|
+
width = @bits unless !old_code && @clear
|
243
|
+
end
|
244
|
+
|
245
|
+
# Return
|
246
|
+
ttime = Time.now - stime
|
247
|
+
log("-> Decoding finished in #{"%.3fs" % [ttime]} (avg. #{"%.3f" % [(8.0 * data.bytesize / 1024 ** 2) / ttime]} mbit\/s).")
|
248
|
+
log("-> Decoded data: #{format_size(out.bytesize)} (#{"%5.2f%%" % [100 * (1 - data.bytesize.to_f / out.bytesize)]} compression).")
|
249
|
+
out
|
250
|
+
rescue => e
|
251
|
+
lex(e, 'Decoding error', false)
|
252
|
+
end
|
253
|
+
|
254
|
+
private
|
255
|
+
|
256
|
+
# Initialize buffers, needs to be called every time we execute a new
|
257
|
+
# compression / decompression job
|
258
|
+
def init(compress)
|
259
|
+
@buffer = [] # Contains result of compression
|
260
|
+
@boff = 0 # BIT offset of last buffer byte, for packing
|
261
|
+
@compress = compress # Compression or decompression job
|
262
|
+
@step = @compress ? 0 : 1 # Decoder is always 1 step behind the encoder
|
263
|
+
end
|
264
|
+
|
265
|
+
# < --------------------------- PARSING METHODS ---------------------------- >
|
266
|
+
|
267
|
+
# Return first non-nil argument
|
268
|
+
def find_arg(*args)
|
269
|
+
args.each{ |arg| arg.nil? ? next : (return arg) }
|
270
|
+
nil
|
271
|
+
end
|
272
|
+
|
273
|
+
# < --------------------------- LOGGING METHODS ---------------------------- >
|
274
|
+
|
275
|
+
def format_params
|
276
|
+
log_bits = @min_bits == @max_bits ? @min_bits : "#{@min_bits}-#{@max_bits}"
|
277
|
+
log_codes = @clear ? (@stop ? 'CLEAR & STOP codes' : 'CLEAR codes') : (@stop ? 'STOP codes' : 'no special codes')
|
278
|
+
log_lsb = @lsb ? 'LSB' : 'MSB'
|
279
|
+
log_binary = @binary ? 'binary' : 'textual'
|
280
|
+
"#{log_bits} bit codes, #{log_lsb} packing, #{log_codes}, #{log_binary} mode"
|
281
|
+
end
|
282
|
+
|
283
|
+
def format_size(sz)
|
284
|
+
mag = Math.log(sz, 1024).to_i.clamp(0, 3)
|
285
|
+
unit = ['B', 'KiB', 'MiB', 'GiB']
|
286
|
+
fmt = mag == 0 ? '%d' : '%.3f'
|
287
|
+
"#{fmt}%s" % [sz.to_f / 1024 ** mag, unit[mag]]
|
288
|
+
end
|
289
|
+
|
290
|
+
def log(txt, level = 3)
|
291
|
+
return if level > @verbosity
|
292
|
+
puts "#{Time.now.strftime('[%H:%M:%S.%L]')} LZW #{txt}"
|
293
|
+
end
|
294
|
+
|
295
|
+
def err(txt) log("\x1B[31m\x1B[1m✗\x1B[0m \x1B[31m#{txt}\x1B[0m", 1) end
|
296
|
+
def warn(txt) log("\x1B[33m\x1B[1m!\x1B[0m \x1B[33m#{txt}\x1B[0m", 2) end
|
297
|
+
def dbg(txt) log("\x1B[90m\x1B[1mD\x1B[0m \x1B[90m#{txt}\x1B[0m", 4) end
|
298
|
+
|
299
|
+
def lex(e, msg = '', fatal = false)
|
300
|
+
err("#{msg}: #{e}")
|
301
|
+
dbg(e.backtrace.unshift('Backtrace:').join("\n"))
|
302
|
+
exit(1) if fatal
|
303
|
+
end
|
304
|
+
|
305
|
+
# < --------------------------- TABLE METHODS ---------------------------- >
|
306
|
+
|
307
|
+
# Initializes the table, needs to be called at the start of each compression
|
308
|
+
# / decompression job, as well as whenever the table gets full, which may
|
309
|
+
# happen many times in a single job.
|
310
|
+
#
|
311
|
+
# During compression, the table is a hash. During decompression, the table
|
312
|
+
# is an array, making the job faster.
|
313
|
+
def table_init
|
314
|
+
# Add symbols for all strings of length 1 (e.g. all 256 byte values)
|
315
|
+
@key = @alphabet.size - 1
|
316
|
+
@table = @compress ? @alphabet.each_with_index.to_h : @alphabet.dup
|
317
|
+
|
318
|
+
# Increment key index if clear/stop symbols are being used
|
319
|
+
if @clear
|
320
|
+
@key += 1
|
321
|
+
@table << '' if !@compress
|
322
|
+
end
|
323
|
+
if @stop
|
324
|
+
@key += 1
|
325
|
+
@table << '' if !@compress
|
326
|
+
end
|
327
|
+
|
328
|
+
@bits = [@key.bit_length, @min_bits].max
|
329
|
+
end
|
330
|
+
|
331
|
+
def table_has(val)
|
332
|
+
@compress ? @table.include?(val) : @key >= val
|
333
|
+
end
|
334
|
+
|
335
|
+
# Add new code to the table
|
336
|
+
def table_add(val)
|
337
|
+
# Table is full
|
338
|
+
return if @key + @step >= 1 << @max_bits
|
339
|
+
|
340
|
+
# Add code and increase index
|
341
|
+
@key += 1
|
342
|
+
@compress ? @table[val] = @key : @table << val
|
343
|
+
end
|
344
|
+
|
345
|
+
# Check table size, and increase code length or reinitialize if needed
|
346
|
+
def table_check
|
347
|
+
if @key + @step == 1 << @bits
|
348
|
+
if @bits == @max_bits
|
349
|
+
put_code(@clear) if @compress && @clear
|
350
|
+
refresh = @compress || !@clear || !@deferred
|
351
|
+
table_init if refresh
|
352
|
+
return refresh
|
353
|
+
else
|
354
|
+
@bits += 1
|
355
|
+
end
|
356
|
+
end
|
357
|
+
return false
|
358
|
+
end
|
359
|
+
|
360
|
+
# < ------------------------- ENCODING METHODS --------------------------- >
|
361
|
+
|
362
|
+
def verify_data(data)
|
363
|
+
alph = @alphabet.each_with_index.to_h
|
364
|
+
raise "Data contains characters not present in the alphabet" if data.each_char.any?{ |c| !alph.include?(c) }
|
365
|
+
end
|
366
|
+
|
367
|
+
def put_code(code)
|
368
|
+
raise 'Found character not in alphabet' if code.nil?
|
369
|
+
bits = @bits
|
370
|
+
|
371
|
+
while bits > 0
|
372
|
+
# Pack bits in last byte if there's space, otherwise add new byte
|
373
|
+
if @boff > 0
|
374
|
+
@buffer[-1] |= code << @boff & 0xFF
|
375
|
+
else
|
376
|
+
@buffer << (code & 0xFF)
|
377
|
+
end
|
378
|
+
|
379
|
+
# If we didn't fill byte, packing is done, adjust offset and return
|
380
|
+
if bits < 8 - @boff
|
381
|
+
@boff += bits
|
382
|
+
return
|
383
|
+
end
|
384
|
+
|
385
|
+
# Otherwise adjust code, bits left and offset, and do next iteration
|
386
|
+
bits -= 8 - @boff
|
387
|
+
code >>= 8 - @boff
|
388
|
+
@boff = 0
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
end
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lzwrb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- edelkas
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-02-02 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: |2
|
14
|
+
This library provides LZW encoding and decoding capabilities with no
|
15
|
+
dependencies and reasonably fast speed. It is highly configurable,
|
16
|
+
supporting both constant and variable code lengths, custom alphabets,
|
17
|
+
usage of clear/stop codes...
|
18
|
+
|
19
|
+
It is compatible with the GIF specification, and comes equipped with
|
20
|
+
several presets. Eventually I'd like to add compatibility with other
|
21
|
+
standards, such as the ones used for UNIX compress, PDF and TIFF.
|
22
|
+
email:
|
23
|
+
executables: []
|
24
|
+
extensions: []
|
25
|
+
extra_rdoc_files: []
|
26
|
+
files:
|
27
|
+
- lib/lzwrb.rb
|
28
|
+
homepage: https://github.com/edelkas/lzwrb
|
29
|
+
licenses: []
|
30
|
+
metadata:
|
31
|
+
source_code_uri: https://github.com/edelkas/lzwrb
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
require_paths:
|
35
|
+
- lib
|
36
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
requirements: []
|
47
|
+
rubygems_version: 3.1.6
|
48
|
+
signing_key:
|
49
|
+
specification_version: 4
|
50
|
+
summary: Pury Ruby LZW encoder/decoder with a wide range of settings
|
51
|
+
test_files: []
|