lzwrb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/lzwrb.rb +392 -0
  3. metadata +51 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3ca54b9ac48840e65fc5d307707491fc3f06dae167a62fb14e1700877a169cc8
4
+ data.tar.gz: ca28f83f51533b04d093f79cf16f4a23cb02a646d496e48ef7f98802128290f8
5
+ SHA512:
6
+ metadata.gz: 0a146638c2720f8cc5b2e6d8af24f461ddad63d09eb4d86660897a73f7832cdf1439ef53aeb9e52754a536e5d36b5f4799b488864372195a0b5c47154db76b86
7
+ data.tar.gz: 663e70084a893fa73b210ead73e91656f16753f00da72ac00e7f1474e861f65f98b2b81e048959daa8938c0a227c64338f14c463526589573b4ccaad163da079
data/lib/lzwrb.rb ADDED
@@ -0,0 +1,392 @@
1
+ class LZWrb
2
+
3
+ # Default alphabets
4
+ DEC = (0...10).to_a.map(&:chr)
5
+ HEX_UPPER = (0...16).to_a.map{ |n| n.to_s(16).upcase }
6
+ HEX_LOWER = (0...16).to_a.map{ |n| n.to_s(16).downcase }
7
+ LATIN_UPPER = ('A'..'Z').to_a
8
+ LATIN_LOWER = ('a'..'z').to_a
9
+ ALPHA_UPPER = LATIN_UPPER + DEC
10
+ ALPHA_LOWER = LATIN_LOWER + DEC
11
+ ALPHA = LATIN_UPPER + LATIN_LOWER + DEC
12
+ PRINTABLE = (32...127).to_a.map(&:chr)
13
+ ASCII = (0...128).to_a.map(&:chr)
14
+ BINARY = (0...256).to_a.map(&:chr)
15
+
16
+ # Default presets
17
+ PRESET_GIF = {
18
+ min_bits: 8,
19
+ max_bits: 12,
20
+ lsb: true,
21
+ clear: true,
22
+ stop: true,
23
+ deferred: true
24
+ }
25
+ PRESET_FAST = {
26
+ min_bits: 16,
27
+ max_bits: 16,
28
+ lsb: true,
29
+ clear: false,
30
+ stop: false
31
+ }
32
+ PRESET_BEST = {
33
+ min_bits: 8,
34
+ max_bits: 16,
35
+ lsb: true,
36
+ clear: false,
37
+ stop: false
38
+ }
39
+
40
+ # Verbosity of the encoder/decoder
41
+ VERBOSITY = {
42
+ silent: 0, # Don't print anything to the console
43
+ minimal: 1, # Print only errors
44
+ quiet: 2, # Print errors and warnings
45
+ normal: 3, # Print errors, warnings and regular encoding information
46
+ debug: 4 # Print everything, including debug details about the encoding process
47
+ }
48
+
49
+ # Class default values (no NIL's here!)
50
+ @@min_bits = 8 # Minimum code bit length
51
+ @@max_bits = 16 # Maximum code bit length before rebuilding table
52
+ @@lsb = true # Least significant bit first order
53
+ @@clear = false # Use CLEAR codes
54
+ @@stop = false # Use STOP codes
55
+ @@deferred = false # Use deferred CLEAR codes
56
+
57
+ def initialize(
58
+ preset: nil, # Predefined configurations (GIF...)
59
+ bits: nil, # Code bit size for constant length encoding (superseeds min/max bit size)
60
+ min_bits: nil, # Minimum code bit size for variable length encoding (superseeded by 'bits')
61
+ max_bits: nil, # Maximum code bit size for variable length encoding (superseeded by 'bits')
62
+ binary: nil, # Use binary encoding (vs regular text encoding)
63
+ alphabet: BINARY, # Set of characters that compose the messages to encode
64
+ safe: false, # First encoding pass to verify alphabet covers all data
65
+ lsb: nil, # Use least or most significant bit packing
66
+ clear: nil, # Use clear codes every time the table gets reinitialized
67
+ stop: nil, # Use stop codes at the end of the encoding
68
+ deferred: nil, # Use deferred clear codes
69
+ verbosity: :normal # Verbosity level of the encoder
70
+ )
71
+ # Parse preset
72
+ params = preset || {}
73
+
74
+ # Verbosity
75
+ if VERBOSITY[verbosity]
76
+ @verbosity = VERBOSITY[verbosity]
77
+ else
78
+ warn("Unrecognized verbosity level, using normal.")
79
+ @verbosity = VERBOSITY[:normal]
80
+ end
81
+
82
+ # Alphabet
83
+ if !alphabet.is_a?(Array) || alphabet.any?{ |a| !a.is_a?(String) || a.length > 1 }
84
+ err('The alphabet must be an array of characters, i.e., of strings of length 1')
85
+ exit
86
+ end
87
+ @alphabet = alphabet.uniq
88
+ warn('Removed duplicate entries from alphabet') if @alphabet.size < alphabet.size
89
+
90
+ # Binary compression
91
+ @binary = binary.nil? ? alphabet == BINARY : binary
92
+
93
+ # Safe mode for encoding (verifies that the data provided is composed exclusively
94
+ # by characters from the alphabet)
95
+ @safe = safe
96
+
97
+ # Code bit size
98
+ if bits
99
+ if !bits.is_a?(Integer) || bits < 1
100
+ err('Code size should be a positive integer.')
101
+ exit
102
+ else
103
+ @min_bits = bits
104
+ @max_bits = bits
105
+ end
106
+ else
107
+ @min_bits = find_arg(min_bits, params[:min_bits], @@min_bits)
108
+ @max_bits = find_arg(max_bits, params[:max_bits], @@max_bits)
109
+ if @max_bits < @min_bits
110
+ warn("Max code size (#{@max_bits}) should be higher than min code size (#{@min_bits}): changed max code size to #{@min_bits}.")
111
+ @max_bits = @min_bits
112
+ end
113
+ end
114
+
115
+ # Determine min bits based on alphabet length if not specified
116
+ if !find_arg(min_bits, params[:min_bits])
117
+ @min_bits = (@alphabet.size - 1).bit_length
118
+ @max_bits = @min_bits if @max_bits < @min_bits
119
+ end
120
+
121
+ # Clear and stop codes
122
+ use_clear = find_arg(clear, params[:clear], @@clear)
123
+ use_stop = find_arg(stop, params[:stop], @@stop)
124
+ if !use_stop && @min_bits < 8
125
+ use_stop = true
126
+ # Warning if stop codes were explicitly disabled (false, NOT nil)
127
+ if find_arg(stop, params[:stop]) == false
128
+ warn("Stop codes are necessary for code sizes below 8 bits to prevent ambiguity: enabled stop codes.")
129
+ end
130
+ end
131
+
132
+ # Alphabet length checks
133
+ extra = (use_clear ? 1 : 0) + (use_stop ? 1 : 0)
134
+ # Max bits doesn't fit alphabet (needs explicit adjustment)
135
+ if (@alphabet.size + extra) > 1 << @max_bits
136
+ if @binary
137
+ @alphabet = @alphabet.take((1 << @max_bits - 1))
138
+ warn("Using #{@max_bits - 1} bit binary alphabet (#{(1 << @max_bits - 1)} entries).")
139
+ else
140
+ @max_bits = (@alphabet.size + extra).bit_length
141
+ warn("Max code size needs to fit the alphabet (and clear & stop codes, if used): increased to #{@max_bits} bits.")
142
+ end
143
+ end
144
+ # Min bits doesn't fit alphabet (needs implicit adjustment)
145
+ if (@alphabet.size + extra) > 1 << @min_bits
146
+ @min_bits = (@alphabet.size + extra - 1).bit_length
147
+ end
148
+
149
+ # Clear and stop codes
150
+ idx = @alphabet.size - 1
151
+ @clear = use_clear ? idx += 1 : nil
152
+ @stop = use_stop ? idx += 1 : nil
153
+ @deferred = find_arg(deferred, params[:deferred], @@deferred)
154
+
155
+ # Least/most significant bit packing order
156
+ @lsb = find_arg(lsb, params[:lsb], @@lsb)
157
+ end
158
+
159
+ def encode(data)
160
+ # Log
161
+ log("<- Encoding #{format_size(data.bytesize)} with #{format_params}.")
162
+ stime = Time.now
163
+
164
+ # Setup
165
+ init(true)
166
+ table_init
167
+ verify_data(data) if @safe
168
+
169
+ # LZW-encode data
170
+ buf = ''
171
+ put_code(@clear) if !@clear.nil?
172
+ data.each_char do |c|
173
+ next_buf = buf + c
174
+ if table_has(next_buf)
175
+ buf = next_buf
176
+ else
177
+ put_code(@table[buf])
178
+ table_add(next_buf)
179
+ table_check()
180
+ buf = c
181
+ end
182
+ end
183
+ put_code(@table[buf])
184
+ put_code(@stop) if !@stop.nil?
185
+
186
+ # Pack codes to binary string
187
+ res = @buffer.pack('C*')
188
+
189
+ # Return
190
+ ttime = Time.now - stime
191
+ log("-> Encoding finished in #{"%.3fs" % [ttime]} (avg. #{"%.3f" % [(8.0 * data.bytesize / 1024 ** 2) / ttime]} mbit\/s).")
192
+ log("-> Encoded data: #{format_size(res.bytesize)} (#{"%5.2f%%" % [100 * (1 - res.bytesize.to_f / data.bytesize)]} compression).")
193
+ res
194
+ rescue => e
195
+ lex(e, 'Encoding error', true)
196
+ end
197
+
198
+ # Optimization? Unpack bits subsequently, rather than converting between strings and ints
199
+ def decode(data)
200
+ # Log
201
+ log("<- Decoding #{format_size(data.bytesize)} with #{format_params}.")
202
+ stime = Time.now
203
+
204
+ # Setup
205
+ init(false)
206
+ table_init
207
+ bits = data.unpack('b*')[0]
208
+ len = bits.length
209
+
210
+ # Parse data
211
+ off = 0
212
+ out = ''.b
213
+ old_code = nil
214
+ width = @bits
215
+ while off + width <= len
216
+ # Parse code
217
+ code = bits[off ... off + width].reverse.to_i(2)
218
+ off += width
219
+
220
+ # Handle clear and stop codes, if present
221
+ if code == @clear && @clear
222
+ table_init
223
+ old_code = nil
224
+ width = @bits
225
+ next
226
+ end
227
+ break if code == @stop && @stop
228
+
229
+ # Handle regular codes
230
+ if old_code.nil? # Initial code
231
+ out << @table[code]
232
+ elsif table_has(code) # Existing code
233
+ out << @table[code]
234
+ table_add(@table[old_code] + @table[code][0])
235
+ else # New code
236
+ out << @table[old_code] + @table[old_code][0]
237
+ table_add(@table[old_code] + @table[old_code][0])
238
+ end
239
+
240
+ # Prepare next iteration
241
+ old_code = table_check ? nil : code
242
+ width = @bits unless !old_code && @clear
243
+ end
244
+
245
+ # Return
246
+ ttime = Time.now - stime
247
+ log("-> Decoding finished in #{"%.3fs" % [ttime]} (avg. #{"%.3f" % [(8.0 * data.bytesize / 1024 ** 2) / ttime]} mbit\/s).")
248
+ log("-> Decoded data: #{format_size(out.bytesize)} (#{"%5.2f%%" % [100 * (1 - data.bytesize.to_f / out.bytesize)]} compression).")
249
+ out
250
+ rescue => e
251
+ lex(e, 'Decoding error', false)
252
+ end
253
+
254
+ private
255
+
256
+ # Initialize buffers, needs to be called every time we execute a new
257
+ # compression / decompression job
258
+ def init(compress)
259
+ @buffer = [] # Contains result of compression
260
+ @boff = 0 # BIT offset of last buffer byte, for packing
261
+ @compress = compress # Compression or decompression job
262
+ @step = @compress ? 0 : 1 # Decoder is always 1 step behind the encoder
263
+ end
264
+
265
+ # < --------------------------- PARSING METHODS ---------------------------- >
266
+
267
+ # Return first non-nil argument
268
+ def find_arg(*args)
269
+ args.each{ |arg| arg.nil? ? next : (return arg) }
270
+ nil
271
+ end
272
+
273
+ # < --------------------------- LOGGING METHODS ---------------------------- >
274
+
275
+ def format_params
276
+ log_bits = @min_bits == @max_bits ? @min_bits : "#{@min_bits}-#{@max_bits}"
277
+ log_codes = @clear ? (@stop ? 'CLEAR & STOP codes' : 'CLEAR codes') : (@stop ? 'STOP codes' : 'no special codes')
278
+ log_lsb = @lsb ? 'LSB' : 'MSB'
279
+ log_binary = @binary ? 'binary' : 'textual'
280
+ "#{log_bits} bit codes, #{log_lsb} packing, #{log_codes}, #{log_binary} mode"
281
+ end
282
+
283
+ def format_size(sz)
284
+ mag = Math.log(sz, 1024).to_i.clamp(0, 3)
285
+ unit = ['B', 'KiB', 'MiB', 'GiB']
286
+ fmt = mag == 0 ? '%d' : '%.3f'
287
+ "#{fmt}%s" % [sz.to_f / 1024 ** mag, unit[mag]]
288
+ end
289
+
290
+ def log(txt, level = 3)
291
+ return if level > @verbosity
292
+ puts "#{Time.now.strftime('[%H:%M:%S.%L]')} LZW #{txt}"
293
+ end
294
+
295
+ def err(txt) log("\x1B[31m\x1B[1m✗\x1B[0m \x1B[31m#{txt}\x1B[0m", 1) end
296
+ def warn(txt) log("\x1B[33m\x1B[1m!\x1B[0m \x1B[33m#{txt}\x1B[0m", 2) end
297
+ def dbg(txt) log("\x1B[90m\x1B[1mD\x1B[0m \x1B[90m#{txt}\x1B[0m", 4) end
298
+
299
+ def lex(e, msg = '', fatal = false)
300
+ err("#{msg}: #{e}")
301
+ dbg(e.backtrace.unshift('Backtrace:').join("\n"))
302
+ exit(1) if fatal
303
+ end
304
+
305
+ # < --------------------------- TABLE METHODS ---------------------------- >
306
+
307
+ # Initializes the table, needs to be called at the start of each compression
308
+ # / decompression job, as well as whenever the table gets full, which may
309
+ # happen many times in a single job.
310
+ #
311
+ # During compression, the table is a hash. During decompression, the table
312
+ # is an array, making the job faster.
313
+ def table_init
314
+ # Add symbols for all strings of length 1 (e.g. all 256 byte values)
315
+ @key = @alphabet.size - 1
316
+ @table = @compress ? @alphabet.each_with_index.to_h : @alphabet.dup
317
+
318
+ # Increment key index if clear/stop symbols are being used
319
+ if @clear
320
+ @key += 1
321
+ @table << '' if !@compress
322
+ end
323
+ if @stop
324
+ @key += 1
325
+ @table << '' if !@compress
326
+ end
327
+
328
+ @bits = [@key.bit_length, @min_bits].max
329
+ end
330
+
331
+ def table_has(val)
332
+ @compress ? @table.include?(val) : @key >= val
333
+ end
334
+
335
+ # Add new code to the table
336
+ def table_add(val)
337
+ # Table is full
338
+ return if @key + @step >= 1 << @max_bits
339
+
340
+ # Add code and increase index
341
+ @key += 1
342
+ @compress ? @table[val] = @key : @table << val
343
+ end
344
+
345
+ # Check table size, and increase code length or reinitialize if needed
346
+ def table_check
347
+ if @key + @step == 1 << @bits
348
+ if @bits == @max_bits
349
+ put_code(@clear) if @compress && @clear
350
+ refresh = @compress || !@clear || !@deferred
351
+ table_init if refresh
352
+ return refresh
353
+ else
354
+ @bits += 1
355
+ end
356
+ end
357
+ return false
358
+ end
359
+
360
+ # < ------------------------- ENCODING METHODS --------------------------- >
361
+
362
+ def verify_data(data)
363
+ alph = @alphabet.each_with_index.to_h
364
+ raise "Data contains characters not present in the alphabet" if data.each_char.any?{ |c| !alph.include?(c) }
365
+ end
366
+
367
+ def put_code(code)
368
+ raise 'Found character not in alphabet' if code.nil?
369
+ bits = @bits
370
+
371
+ while bits > 0
372
+ # Pack bits in last byte if there's space, otherwise add new byte
373
+ if @boff > 0
374
+ @buffer[-1] |= code << @boff & 0xFF
375
+ else
376
+ @buffer << (code & 0xFF)
377
+ end
378
+
379
+ # If we didn't fill byte, packing is done, adjust offset and return
380
+ if bits < 8 - @boff
381
+ @boff += bits
382
+ return
383
+ end
384
+
385
+ # Otherwise adjust code, bits left and offset, and do next iteration
386
+ bits -= 8 - @boff
387
+ code >>= 8 - @boff
388
+ @boff = 0
389
+ end
390
+ end
391
+
392
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lzwrb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - edelkas
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-02-02 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |2
14
+ This library provides LZW encoding and decoding capabilities with no
15
+ dependencies and reasonably fast speed. It is highly configurable,
16
+ supporting both constant and variable code lengths, custom alphabets,
17
+ usage of clear/stop codes...
18
+
19
+ It is compatible with the GIF specification, and comes equipped with
20
+ several presets. Eventually I'd like to add compatibility with other
21
+ standards, such as the ones used for UNIX compress, PDF and TIFF.
22
+ email:
23
+ executables: []
24
+ extensions: []
25
+ extra_rdoc_files: []
26
+ files:
27
+ - lib/lzwrb.rb
28
+ homepage: https://github.com/edelkas/lzwrb
29
+ licenses: []
30
+ metadata:
31
+ source_code_uri: https://github.com/edelkas/lzwrb
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ requirements: []
47
+ rubygems_version: 3.1.6
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: Pury Ruby LZW encoder/decoder with a wide range of settings
51
+ test_files: []