Ascii85 1.1.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ascii85.rb CHANGED
@@ -1,220 +1,466 @@
1
- # encoding: utf-8
2
1
  # frozen_string_literal: true
3
2
 
3
+ require 'stringio'
4
4
 
5
5
  #
6
6
  # Ascii85 is an implementation of Adobe's binary-to-text encoding of the
7
7
  # same name in pure Ruby.
8
8
  #
9
- # See http://www.adobe.com/products/postscript/pdfs/PLRM.pdf page 131
10
- # and http://en.wikipedia.org/wiki/Ascii85 for more information about
11
- # the format.
9
+ # See http://en.wikipedia.org/wiki/Ascii85 for more information about the
10
+ # format.
12
11
  #
13
12
  # Author:: Johannes Holzfuß (johannes@holzfuss.name)
14
13
  # License:: Distributed under the MIT License (see LICENSE file)
15
14
  #
15
+ module Ascii85
16
+ class << self
17
+ EMPTY_STRING = ''.dup.force_encoding(Encoding::ASCII_8BIT)
18
+ START_MARKER = '<~'.dup.force_encoding(Encoding::ASCII_8BIT)
19
+ ENDING_MARKER = '~>'.dup.force_encoding(Encoding::ASCII_8BIT)
20
+ LINE_BREAK = "\n".dup.force_encoding(Encoding::ASCII_8BIT)
16
21
 
22
+ #
23
+ # Encodes the bytes of the given String or IO-like object as Ascii85.
24
+ #
25
+ # @param str_or_io [String, IO] The input to encode
26
+ # @param wrap_lines [Integer, false] The line length for wrapping, or +false+ for no wrapping
27
+ # @param out [IO, nil] An optional IO-like object to write the output to
28
+ #
29
+ # @return [String, IO] The encoded String or the output IO object that was passed in
30
+ #
31
+ # @example Encoding a simple String
32
+ # Ascii85.encode("Ruby")
33
+ # # => <~;KZGo~>
34
+ #
35
+ # @example Encoding with line wrapping
36
+ # Ascii85.encode("Supercalifragilisticexpialidocious", 15)
37
+ # # => <~;g!%jEarNoBkD
38
+ # # BoB5)0rF*),+AU&
39
+ # # 0.@;KXgDe!L"F`R
40
+ # # ~>
41
+ #
42
+ # @example Encoding without line wrapping
43
+ # Ascii85.encode("Supercalifragilisticexpialidocious", false)
44
+ # # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~>
45
+ #
46
+ # @example Encoding from an IO-like object
47
+ # input = StringIO.new("Ruby")
48
+ # Ascii85.encode(input)
49
+ # # => "<~;KZGo~>"
50
+ #
51
+ # @example Encoding to an IO object
52
+ # output = StringIO.new
53
+ # Ascii85.encode("Ruby", out: output)
54
+ # # => output (with "<~;KZGo~>" written to it)
55
+ #
56
+ def encode(str_or_io, wrap_lines = 80, out: nil)
57
+ reader = if io_like?(str_or_io)
58
+ str_or_io
59
+ else
60
+ StringIO.new(str_or_io.to_s, 'rb')
61
+ end
62
+
63
+ return EMPTY_STRING.dup if reader.eof?
64
+
65
+ # Setup buffered Reader and Writers
66
+ bufreader = BufferedReader.new(reader, unencoded_chunk_size)
67
+ bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), encoded_chunk_size)
68
+ writer = wrap_lines ? Wrapper.new(bufwriter, wrap_lines) : DummyWrapper.new(bufwriter)
69
+
70
+ padding = unfrozen_binary_copy("\0\0\0\0")
71
+ tuplebuf = unfrozen_binary_copy('!!!!!')
72
+ exclamations = unfrozen_binary_copy('!!!!!')
73
+ z = unfrozen_binary_copy('z')
74
+
75
+ bufreader.each_chunk do |chunk|
76
+ chunk.unpack('N*').each do |word|
77
+ # Encode each big-endian 32-bit word into a 5-character tuple (except
78
+ # for 0, which encodes to 'z')
79
+ if word.zero?
80
+ writer.write(z)
81
+ else
82
+ word, b0 = word.divmod(85)
83
+ word, b1 = word.divmod(85)
84
+ word, b2 = word.divmod(85)
85
+ word, b3 = word.divmod(85)
86
+ b4 = word
87
+
88
+ tuplebuf.setbyte(0, b4 + 33)
89
+ tuplebuf.setbyte(1, b3 + 33)
90
+ tuplebuf.setbyte(2, b2 + 33)
91
+ tuplebuf.setbyte(3, b1 + 33)
92
+ tuplebuf.setbyte(4, b0 + 33)
93
+
94
+ writer.write(tuplebuf)
95
+ end
96
+ end
17
97
 
18
- module Ascii85
19
- #
20
- # Encodes the bytes of the given String as Ascii85.
21
- #
22
- # If +wrap_lines+ evaluates to +false+, the output will be returned as
23
- # a single long line. Otherwise #encode formats the output into lines
24
- # of length +wrap_lines+ (minimum is 2).
25
- #
26
- # Ascii85.encode("Ruby")
27
- # => <~;KZGo~>
28
- #
29
- # Ascii85.encode("Supercalifragilisticexpialidocious", 15)
30
- # => <~;g!%jEarNoBkD
31
- # BoB5)0rF*),+AU&
32
- # 0.@;KXgDe!L"F`R
33
- # ~>
34
- #
35
- # Ascii85.encode("Supercalifragilisticexpialidocious", false)
36
- # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~>
37
- #
38
- #
39
- def self.encode(str, wrap_lines = 80)
40
- to_encode = str.to_s
41
- return '' if to_encode.empty?
42
-
43
- # Deal with multi-byte encodings
44
- if to_encode.respond_to?(:bytesize)
45
- input_size = to_encode.bytesize
46
- else
47
- input_size = to_encode.size
48
- end
98
+ next if (chunk.bytesize & 0b11).zero?
49
99
 
50
- # Compute number of \0s to pad the message with (0..3)
51
- padding_length = (-input_size) % 4
52
-
53
- # Extract big-endian integers
54
- tuples = (to_encode + ("\0" * padding_length)).unpack('N*')
55
-
56
- # Encode
57
- tuples.map! do |tuple|
58
- if tuple == 0
59
- 'z'
60
- else
61
- tmp = String.new
62
- 5.times do
63
- tmp << ((tuple % 85) + 33).chr
64
- tuple /= 85
100
+ # If we have leftover bytes, we need to zero-pad to a multiple of four
101
+ # before converting to a 32-bit word.
102
+ padding_length = (-chunk.bytesize) % 4
103
+ trailing = chunk[-(4 - padding_length)..]
104
+ word = (trailing + padding[0...padding_length]).unpack1('N')
105
+
106
+ # Encode the last word and cut off any padding
107
+ if word.zero?
108
+ writer.write(exclamations[0..(4 - padding_length)])
109
+ else
110
+ word, b0 = word.divmod(85)
111
+ word, b1 = word.divmod(85)
112
+ word, b2 = word.divmod(85)
113
+ word, b3 = word.divmod(85)
114
+ b4 = word
115
+
116
+ tuplebuf.setbyte(0, b4 + 33)
117
+ tuplebuf.setbyte(1, b3 + 33)
118
+ tuplebuf.setbyte(2, b2 + 33)
119
+ tuplebuf.setbyte(3, b1 + 33)
120
+ tuplebuf.setbyte(4, b0 + 33)
121
+
122
+ writer.write(tuplebuf[0..(4 - padding_length)])
65
123
  end
66
- tmp.reverse
67
124
  end
68
- end
69
125
 
70
- # We can't use the z-abbreviation if we're going to cut off padding
71
- if (padding_length > 0) and (tuples.last == 'z')
72
- tuples[-1] = '!!!!!'
126
+ # If no output IO-object was provided, extract the encoded String from the
127
+ # default StringIO writer. We force the encoding to 'ASCII-8BIT' to work
128
+ # around a TruffleRuby bug.
129
+ return writer.finish.io.string.force_encoding(Encoding::ASCII_8BIT) if out.nil?
130
+
131
+ # Otherwise we make sure to flush the output writer, and then return it.
132
+ writer.finish.io
73
133
  end
74
134
 
75
- # Cut off the padding
76
- tuples[-1] = tuples[-1][0..(4 - padding_length)]
135
+ # Searches through a String and extracts the first substring enclosed by '<~' and '~>'.
136
+ #
137
+ # @param str [String] The String to search through
138
+ #
139
+ # @return [String] The extracted substring, or an empty String if no valid delimiters are found
140
+ #
141
+ # @example Extracting Ascii85 content
142
+ # Ascii85.extract("Foo<~;KZGo~>Bar<~z~>Baz")
143
+ # # => ";KZGo"
144
+ #
145
+ # @example When no delimiters are found
146
+ # Ascii85.extract("No delimiters")
147
+ # # => ""
148
+ #
149
+ # @note This method only accepts a String, not an IO-like object, as the entire input
150
+ # needs to be available to ensure validity.
151
+ #
152
+ def extract(str)
153
+ input = str.to_s
154
+
155
+ # Make sure the delimiter Strings have the correct encoding.
156
+ opening_delim = '<~'.encode(input.encoding)
157
+ closing_delim = '~>'.encode(input.encoding)
77
158
 
78
- # If we don't need to wrap the lines, add delimiters and return
79
- if (!wrap_lines)
80
- return '<~' + tuples.join + '~>'
159
+ # Get the positions of the opening/closing delimiters. If there is no pair
160
+ # of opening/closing delimiters, return an unfrozen empty String.
161
+ (start_pos = input.index(opening_delim)) or return EMPTY_STRING.dup
162
+ (end_pos = input.index(closing_delim, start_pos + 2)) or return EMPTY_STRING.dup
163
+
164
+ # Get the String inside the delimiter-pair
165
+ input[(start_pos + 2)...end_pos]
81
166
  end
82
167
 
83
- # Otherwise we wrap the lines
84
- line_length = [2, wrap_lines.to_i].max
168
+ #
169
+ # Searches through a String and decodes the first substring enclosed by '<~' and '~>'.
170
+ #
171
+ # @param str [String] The String containing Ascii85-encoded content
172
+ # @param out [IO, nil] An optional IO-like object to write the output to
173
+ #
174
+ # @return [String, IO] The decoded String (in ASCII-8BIT encoding) or the output IO object (if it was provided)
175
+ #
176
+ # @raise [Ascii85::DecodingError] When malformed input is encountered
177
+ #
178
+ # @example Decoding Ascii85 content
179
+ # Ascii85.decode("<~;KZGo~>")
180
+ # # => "Ruby"
181
+ #
182
+ # @example Decoding with multiple Ascii85 blocks present (ignores all but the first)
183
+ # Ascii85.decode("Foo<~;KZGo~>Bar<~87cURDZ~>Baz")
184
+ # # => "Ruby"
185
+ #
186
+ # @example When no delimiters are found
187
+ # Ascii85.decode("No delimiters")
188
+ # # => ""
189
+ #
190
+ # @example Decoding to an IO object
191
+ # output = StringIO.new
192
+ # Ascii85.decode("<~;KZGo~>", out: output)
193
+ # # => output (with "Ruby" written to it)
194
+ #
195
+ # @note This method only accepts a String, not an IO-like object, as the entire input
196
+ # needs to be available to ensure validity.
197
+ #
198
+ def decode(str, out: nil)
199
+ decode_raw(extract(str), out: out)
200
+ end
85
201
 
86
- wrapped = []
87
- to_wrap = '<~' + tuples.join
202
+ #
203
+ # Decodes the given raw Ascii85-encoded String or IO-like object.
204
+ #
205
+ # @param str_or_io [String, IO] The Ascii85-encoded input to decode
206
+ # @param out [IO, nil] An optional IO-like object to write the output to
207
+ #
208
+ # @return [String, IO] The decoded String (in ASCII-8BIT encoding) or the output IO object (if it was provided)
209
+ #
210
+ # @raise [Ascii85::DecodingError] When malformed input is encountered
211
+ #
212
+ # @example Decoding a raw Ascii85 String
213
+ # Ascii85.decode_raw(";KZGo")
214
+ # # => "Ruby"
215
+ #
216
+ # @example Decoding from an IO-like object
217
+ # input = StringIO.new(";KZGo")
218
+ # Ascii85.decode_raw(input)
219
+ # # => "Ruby"
220
+ #
221
+ # @example Decoding to an IO object
222
+ # output = StringIO.new
223
+ # Ascii85.decode_raw(";KZGo", out: output)
224
+ # # => output (with "Ruby" written to it)
225
+ #
226
+ # @note The input must not be enclosed in '<~' and '~>' delimiters.
227
+ #
228
+ def decode_raw(str_or_io, out: nil)
229
+ reader = if io_like?(str_or_io)
230
+ str_or_io
231
+ else
232
+ StringIO.new(str_or_io.to_s, 'rb')
233
+ end
234
+
235
+ # Return an unfrozen String on empty input
236
+ return EMPTY_STRING.dup if reader.eof?
237
+
238
+ # Setup buffered Reader and Writers
239
+ bufreader = BufferedReader.new(reader, encoded_chunk_size)
240
+ bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), unencoded_chunk_size)
241
+
242
+ # Populate the lookup table (caches the exponentiation)
243
+ lut = (0..4).map { |count| 85**(4 - count) }
244
+
245
+ # Decode
246
+ word = 0
247
+ count = 0
248
+ zeroes = unfrozen_binary_copy("\0\0\0\0")
249
+ wordbuf = zeroes.dup
250
+
251
+ bufreader.each_chunk do |chunk|
252
+ chunk.each_byte do |c|
253
+ case c.chr
254
+ when ' ', "\t", "\r", "\n", "\f", "\0"
255
+ # Ignore whitespace
256
+ next
257
+
258
+ when 'z'
259
+ raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple") unless count.zero?
260
+
261
+ # Expand z to 0-word
262
+ bufwriter.write(zeroes)
263
+
264
+ when '!'..'u'
265
+ # Decode 5 characters into a 4-byte word
266
+ word += (c - 33) * lut[count]
267
+ count += 1
268
+
269
+ if count == 5 && word > 0xffffffff
270
+ raise(Ascii85::DecodingError, "Invalid Ascii85 5-tuple (#{word} >= 2**32)")
271
+ elsif count == 5
272
+ b3 = word & 0xff; word >>= 8
273
+ b2 = word & 0xff; word >>= 8
274
+ b1 = word & 0xff; word >>= 8
275
+ b0 = word
276
+
277
+ wordbuf.setbyte(0, b0)
278
+ wordbuf.setbyte(1, b1)
279
+ wordbuf.setbyte(2, b2)
280
+ wordbuf.setbyte(3, b3)
281
+
282
+ bufwriter.write(wordbuf)
283
+
284
+ word = 0
285
+ count = 0
286
+ end
287
+
288
+ else
289
+ raise(Ascii85::DecodingError, "Illegal character inside Ascii85: #{c.chr.dump}")
290
+ end
291
+ end
292
+ end
293
+
294
+ # We're done if all 5-tuples have been consumed
295
+ if count.zero?
296
+ bufwriter.flush
297
+ return out || bufwriter.io.string.force_encoding(Encoding::ASCII_8BIT)
298
+ end
299
+
300
+ raise(Ascii85::DecodingError, 'Last 5-tuple consists of single character') if count == 1
301
+
302
+ # Finish last, partially decoded 32-bit word
303
+ count -= 1
304
+ word += lut[count]
305
+
306
+ bufwriter.write((word >> 24).chr) if count >= 1
307
+ bufwriter.write(((word >> 16) & 0xff).chr) if count >= 2
308
+ bufwriter.write(((word >> 8) & 0xff).chr) if count == 3
309
+ bufwriter.flush
88
310
 
89
- 0.step(to_wrap.length, line_length) do |index|
90
- wrapped << to_wrap.slice(index, line_length)
311
+ out || bufwriter.io.string.force_encoding(Encoding::ASCII_8BIT)
91
312
  end
92
313
 
93
- # Add end-marker – on a new line if necessary
94
- if (wrapped.last.length + 2) > line_length
95
- wrapped << '~>'
96
- else
97
- wrapped[-1] << '~>'
314
+ private
315
+
316
+ # Copies the given String and forces the encoding of the returned copy to
317
+ # be Encoding::ASCII_8BIT.
318
+ def unfrozen_binary_copy(str)
319
+ str.dup.force_encoding(Encoding::ASCII_8BIT)
98
320
  end
99
321
 
100
- return wrapped.join("\n")
101
- end
322
+ # Buffers an underlying IO object to increase efficiency. You do not need
323
+ # to use this directly.
324
+ #
325
+ # @private
326
+ #
327
+ class BufferedReader
328
+ def initialize(io, buffer_size)
329
+ @io = io
330
+ @buffer_size = buffer_size
331
+ end
102
332
 
103
- #
104
- # Searches through +str+ and decodes the _first_ Ascii85-String found.
105
- #
106
- # #decode expects an Ascii85-encoded String enclosed in <~ and ~> — it will
107
- # ignore all characters outside these markers. The returned strings are always
108
- # encoded as ASCII-8BIT.
109
- #
110
- # Ascii85.decode("<~;KZGo~>")
111
- # => "Ruby"
112
- #
113
- # Ascii85.decode("Foo<~;KZGo~>Bar<~;KZGo~>Baz")
114
- # => "Ruby"
115
- #
116
- # Ascii85.decode("No markers")
117
- # => ""
118
- #
119
- # #decode will raise Ascii85::DecodingError when malformed input is
120
- # encountered.
121
- #
122
- def self.decode(str)
123
- input = str.to_s
333
+ def each_chunk
334
+ return enum_for(:each_chunk) unless block_given?
124
335
 
125
- opening_delim = '<~'
126
- closing_delim = '~>'
336
+ until @io.eof?
337
+ chunk = @io.read(@buffer_size)
338
+ yield chunk if chunk
339
+ end
340
+ end
341
+ end
127
342
 
128
- # Make sure the delimiter strings have the correct encoding.
343
+ # Buffers an underlying IO object to increase efficiency. You do not need
344
+ # to use this directly.
129
345
  #
130
- # Although I don't think it likely, this may raise encoding
131
- # errors if an especially exotic input encoding is introduced.
132
- # As of Ruby 1.9.2 all non-dummy encodings work fine though.
346
+ # @private
133
347
  #
134
- if opening_delim.respond_to?(:encode)
135
- opening_delim = opening_delim.encode(input.encoding)
136
- closing_delim = closing_delim.encode(input.encoding)
137
- end
348
+ class BufferedWriter
349
+ attr_accessor :io
138
350
 
139
- # Get the positions of the opening/closing delimiters. If there is
140
- # no pair of opening/closing delimiters, return the empty string.
141
- (start_pos = input.index(opening_delim)) or return ''
142
- (end_pos = input.index(closing_delim, start_pos + 2)) or return ''
143
-
144
- # Get the string inside the delimiter-pair
145
- input = input[(start_pos + 2)...end_pos]
146
-
147
- # Decode
148
- word = 0
149
- count = 0
150
- result = []
151
-
152
- input.each_byte do |c|
153
- case c.chr
154
- when " ", "\t", "\r", "\n", "\f", "\0"
155
- # Ignore whitespace
156
- next
157
-
158
- when 'z'
159
- if count == 0
160
- # Expand z to 0-word
161
- result << 0
162
- else
163
- raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple")
164
- end
351
+ def initialize(io, buffer_size)
352
+ @io = io
353
+ @buffer_size = buffer_size
354
+ @buffer = String.new(capacity: buffer_size, encoding: Encoding::ASCII_8BIT)
355
+ end
165
356
 
166
- when '!'..'u'
167
- # Decode 5 characters into a 4-byte word
168
- word += (c - 33) * 85**(4 - count)
169
- count += 1
357
+ def write(tuple)
358
+ flush if @buffer.bytesize + tuple.bytesize > @buffer_size
359
+ @buffer << tuple
360
+ end
170
361
 
171
- if count == 5
362
+ def flush
363
+ @io.write(@buffer)
364
+ @buffer.clear
365
+ end
366
+ end
172
367
 
173
- if word > 0xffffffff
174
- raise(Ascii85::DecodingError,
175
- "Invalid Ascii85 5-tuple (#{word} >= 2**32)")
176
- end
368
+ # Wraps the input in '<~' and '~>' delimiters and passes it through
369
+ # unmodified to the underlying IO object otherwise. You do not need to
370
+ # use this directly.
371
+ #
372
+ # @private
373
+ #
374
+ class DummyWrapper
375
+ def initialize(out)
376
+ @out = out
377
+ @out.write(START_MARKER)
378
+ end
177
379
 
178
- result << word
380
+ def write(buffer)
381
+ @out.write(buffer)
382
+ end
179
383
 
180
- word = 0
181
- count = 0
182
- end
384
+ def finish
385
+ @out.write(ENDING_MARKER)
386
+ @out.flush
183
387
 
184
- else
185
- raise(Ascii85::DecodingError,
186
- "Illegal character inside Ascii85: #{c.chr.dump}")
388
+ @out
187
389
  end
188
390
  end
189
391
 
190
- # Convert result into a String
191
- result = result.pack('N*')
392
+ # Wraps the input in '<~' and '~>' delimiters and ensures that no line is
393
+ # longer than the specified length. You do not need to use this directly.
394
+ #
395
+ # @private
396
+ #
397
+ class Wrapper
398
+ def initialize(out, wrap_lines)
399
+ @line_length = [2, wrap_lines.to_i].max
192
400
 
193
- if count > 0
194
- # Finish last, partially decoded 32-bit-word
401
+ @out = out
402
+ @out.write(START_MARKER)
195
403
 
196
- if count == 1
197
- raise(Ascii85::DecodingError,
198
- "Last 5-tuple consists of single character")
404
+ @cur_len = 2
199
405
  end
200
406
 
201
- count -= 1
202
- word += 85**(4 - count)
407
+ def write(buffer)
408
+ loop do
409
+ s = buffer.bytesize
203
410
 
204
- result << ((word >> 24) & 255).chr if count >= 1
205
- result << ((word >> 16) & 255).chr if count >= 2
206
- result << ((word >> 8) & 255).chr if count == 3
411
+ if @cur_len + s < @line_length
412
+ @out.write(buffer)
413
+ @cur_len += s
414
+ return
415
+ end
416
+
417
+ remaining = @line_length - @cur_len
418
+ @out.write(buffer[0...remaining])
419
+ @out.write(LINE_BREAK)
420
+ @cur_len = 0
421
+ buffer = buffer[remaining..]
422
+ return if buffer.empty?
423
+ end
424
+ end
425
+
426
+ def finish
427
+ # Add the closing delimiter (may need to be pushed to the next line)
428
+ @out.write(LINE_BREAK) if @cur_len + 2 > @line_length
429
+ @out.write(ENDING_MARKER)
430
+
431
+ @out.flush
432
+ @out
433
+ end
207
434
  end
208
435
 
209
- return result
436
+ # Check if an object is IO-like
437
+ #
438
+ # @private
439
+ #
440
+ def io_like?(obj)
441
+ obj.respond_to?(:read) &&
442
+ obj.respond_to?(:eof?)
443
+ end
444
+
445
+ # @return [Integer] Buffer size for to-be-encoded input
446
+ #
447
+ def unencoded_chunk_size
448
+ 4 * 2048
449
+ end
450
+
451
+ # @return [Integer] Buffer size for encoded output
452
+ #
453
+ def encoded_chunk_size
454
+ 5 * 2048
455
+ end
210
456
  end
211
457
 
212
458
  #
213
- # This error is raised when Ascii85.decode encounters one of the following
214
- # problems in the input:
459
+ # Error raised when Ascii85 encounters problems while decoding the input.
215
460
  #
216
- # * An invalid character. Valid characters are '!'..'u' and 'z'.
217
- # * A 'z' character inside a 5-tuple. 'z's are only valid on their own.
461
+ # This error is raised for the following issues:
462
+ # * An invalid character (valid characters are '!'..'u' and 'z')
463
+ # * A 'z' character inside a 5-tuple ('z' is only valid on its own)
218
464
  # * An invalid 5-tuple that decodes to >= 2**32
219
465
  # * The last tuple consisting of a single character. Valid tuples always have
220
466
  # at least two characters.