Ascii85 1.1.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ascii85.rb CHANGED
@@ -1,220 +1,452 @@
1
- # encoding: utf-8
2
1
  # frozen_string_literal: true
3
2
 
3
+ require 'stringio'
4
4
 
5
5
  #
6
6
  # Ascii85 is an implementation of Adobe's binary-to-text encoding of the
7
7
  # same name in pure Ruby.
8
8
  #
9
- # See http://www.adobe.com/products/postscript/pdfs/PLRM.pdf page 131
10
- # and http://en.wikipedia.org/wiki/Ascii85 for more information about
11
- # the format.
9
+ # See http://en.wikipedia.org/wiki/Ascii85 for more information about the
10
+ # format.
12
11
  #
13
12
  # Author:: Johannes Holzfuß (johannes@holzfuss.name)
14
13
  # License:: Distributed under the MIT License (see LICENSE file)
15
14
  #
15
+ module Ascii85
16
+ class << self
17
+ #
18
+ # Encodes the bytes of the given String or IO-like object as Ascii85.
19
+ #
20
+ # @param str_or_io [String, IO] The input to encode
21
+ # @param wrap_lines [Integer, false] The line length for wrapping, or +false+ for no wrapping
22
+ # @param out [IO, nil] An optional IO-like object to write the output to
23
+ #
24
+ # @return [String, IO] The encoded String or the output IO object that was passed in
25
+ #
26
+ # @example Encoding a simple String
27
+ # Ascii85.encode("Ruby")
28
+ # # => <~;KZGo~>
29
+ #
30
+ # @example Encoding with line wrapping
31
+ # Ascii85.encode("Supercalifragilisticexpialidocious", 15)
32
+ # # => <~;g!%jEarNoBkD
33
+ # # BoB5)0rF*),+AU&
34
+ # # 0.@;KXgDe!L"F`R
35
+ # # ~>
36
+ #
37
+ # @example Encoding without line wrapping
38
+ # Ascii85.encode("Supercalifragilisticexpialidocious", false)
39
+ # # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~>
40
+ #
41
+ # @example Encoding from an IO-like object
42
+ # input = StringIO.new("Ruby")
43
+ # Ascii85.encode(input)
44
+ # # => "<~;KZGo~>"
45
+ #
46
+ # @example Encoding to an IO object
47
+ # output = StringIO.new
48
+ # Ascii85.encode("Ruby", out: output)
49
+ # # => output (with "<~;KZGo~>" written to it)
50
+ #
51
+ def encode(str_or_io, wrap_lines = 80, out: nil)
52
+ reader = if io_like?(str_or_io)
53
+ str_or_io
54
+ else
55
+ StringIO.new(str_or_io.to_s, 'rb')
56
+ end
57
+
58
+ return ''.dup if reader.eof?
59
+
60
+ # Setup buffered Reader and Writers
61
+ bufreader = BufferedReader.new(reader, unencoded_chunk_size)
62
+ bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), encoded_chunk_size)
63
+ writer = wrap_lines ? Wrapper.new(bufwriter, wrap_lines) : DummyWrapper.new(bufwriter)
64
+
65
+ padding = "\0\0\0\0"
66
+ tuplebuf = '!!!!!'.dup
67
+
68
+ bufreader.each_chunk do |chunk|
69
+ chunk.unpack('N*').each do |word|
70
+ # Encode each big-endian 32-bit word into a 5-character tuple (except
71
+ # for 0, which encodes to 'z')
72
+ if word.zero?
73
+ writer.write('z')
74
+ else
75
+ word, b0 = word.divmod(85)
76
+ word, b1 = word.divmod(85)
77
+ word, b2 = word.divmod(85)
78
+ word, b3 = word.divmod(85)
79
+ b4 = word
80
+
81
+ tuplebuf.setbyte(0, b4 + 33)
82
+ tuplebuf.setbyte(1, b3 + 33)
83
+ tuplebuf.setbyte(2, b2 + 33)
84
+ tuplebuf.setbyte(3, b1 + 33)
85
+ tuplebuf.setbyte(4, b0 + 33)
86
+
87
+ writer.write(tuplebuf)
88
+ end
89
+ end
16
90
 
91
+ next if (chunk.bytesize & 0b11).zero?
17
92
 
18
- module Ascii85
19
- #
20
- # Encodes the bytes of the given String as Ascii85.
21
- #
22
- # If +wrap_lines+ evaluates to +false+, the output will be returned as
23
- # a single long line. Otherwise #encode formats the output into lines
24
- # of length +wrap_lines+ (minimum is 2).
25
- #
26
- # Ascii85.encode("Ruby")
27
- # => <~;KZGo~>
28
- #
29
- # Ascii85.encode("Supercalifragilisticexpialidocious", 15)
30
- # => <~;g!%jEarNoBkD
31
- # BoB5)0rF*),+AU&
32
- # 0.@;KXgDe!L"F`R
33
- # ~>
34
- #
35
- # Ascii85.encode("Supercalifragilisticexpialidocious", false)
36
- # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~>
37
- #
38
- #
39
- def self.encode(str, wrap_lines = 80)
40
- to_encode = str.to_s
41
- return '' if to_encode.empty?
42
-
43
- # Deal with multi-byte encodings
44
- if to_encode.respond_to?(:bytesize)
45
- input_size = to_encode.bytesize
46
- else
47
- input_size = to_encode.size
48
- end
93
+ # If we have leftover bytes, we need to zero-pad to a multiple of four
94
+ # before converting to a 32-bit word.
95
+ padding_length = (-chunk.bytesize) % 4
96
+ trailing = chunk[-(4 - padding_length)..]
97
+ word = (trailing + padding[0...padding_length]).unpack1('N')
49
98
 
50
- # Compute number of \0s to pad the message with (0..3)
51
- padding_length = (-input_size) % 4
52
-
53
- # Extract big-endian integers
54
- tuples = (to_encode + ("\0" * padding_length)).unpack('N*')
55
-
56
- # Encode
57
- tuples.map! do |tuple|
58
- if tuple == 0
59
- 'z'
60
- else
61
- tmp = String.new
62
- 5.times do
63
- tmp << ((tuple % 85) + 33).chr
64
- tuple /= 85
99
+ # Encode the last word and cut off any padding
100
+ if word.zero?
101
+ writer.write('!!!!!'[0..(4 - padding_length)])
102
+ else
103
+ word, b0 = word.divmod(85)
104
+ word, b1 = word.divmod(85)
105
+ word, b2 = word.divmod(85)
106
+ word, b3 = word.divmod(85)
107
+ b4 = word
108
+
109
+ tuplebuf.setbyte(0, b4 + 33)
110
+ tuplebuf.setbyte(1, b3 + 33)
111
+ tuplebuf.setbyte(2, b2 + 33)
112
+ tuplebuf.setbyte(3, b1 + 33)
113
+ tuplebuf.setbyte(4, b0 + 33)
114
+
115
+ writer.write(tuplebuf[0..(4 - padding_length)])
65
116
  end
66
- tmp.reverse
67
117
  end
68
- end
69
-
70
- # We can't use the z-abbreviation if we're going to cut off padding
71
- if (padding_length > 0) and (tuples.last == 'z')
72
- tuples[-1] = '!!!!!'
73
- end
74
118
 
75
- # Cut off the padding
76
- tuples[-1] = tuples[-1][0..(4 - padding_length)]
119
+ # If no output IO-object was provided, extract the encoded String from the
120
+ # default StringIO writer. We force the encoding to 'ASCII-8BIT' to work
121
+ # around a TruffleRuby bug.
122
+ return writer.finish.io.string.force_encoding('ASCII-8BIT') if out.nil?
77
123
 
78
- # If we don't need to wrap the lines, add delimiters and return
79
- if (!wrap_lines)
80
- return '<~' + tuples.join + '~>'
124
+ # Otherwise we make sure to flush the output writer, and then return it.
125
+ writer.finish.io
81
126
  end
82
127
 
83
- # Otherwise we wrap the lines
84
- line_length = [2, wrap_lines.to_i].max
128
+ # Searches through a String and extracts the first substring enclosed by '<~' and '~>'.
129
+ #
130
+ # @param str [String] The String to search through
131
+ #
132
+ # @return [String] The extracted substring, or an empty String if no valid delimiters are found
133
+ #
134
+ # @example Extracting Ascii85 content
135
+ # Ascii85.extract("Foo<~;KZGo~>Bar<~z~>Baz")
136
+ # # => ";KZGo"
137
+ #
138
+ # @example When no delimiters are found
139
+ # Ascii85.extract("No delimiters")
140
+ # # => ""
141
+ #
142
+ # @note This method only accepts a String, not an IO-like object, as the entire input
143
+ # needs to be available to ensure validity.
144
+ #
145
+ def extract(str)
146
+ input = str.to_s
147
+
148
+ # Make sure the delimiter Strings have the correct encoding.
149
+ opening_delim = '<~'.encode(input.encoding)
150
+ closing_delim = '~>'.encode(input.encoding)
85
151
 
86
- wrapped = []
87
- to_wrap = '<~' + tuples.join
152
+ # Get the positions of the opening/closing delimiters. If there is no pair
153
+ # of opening/closing delimiters, return an unfrozen empty String.
154
+ (start_pos = input.index(opening_delim)) or return ''.dup
155
+ (end_pos = input.index(closing_delim, start_pos + 2)) or return ''.dup
88
156
 
89
- 0.step(to_wrap.length, line_length) do |index|
90
- wrapped << to_wrap.slice(index, line_length)
157
+ # Get the String inside the delimiter-pair
158
+ input[(start_pos + 2)...end_pos]
91
159
  end
92
160
 
93
- # Add end-marker – on a new line if necessary
94
- if (wrapped.last.length + 2) > line_length
95
- wrapped << '~>'
96
- else
97
- wrapped[-1] << '~>'
161
+ #
162
+ # Searches through a String and decodes the first substring enclosed by '<~' and '~>'.
163
+ #
164
+ # @param str [String] The String containing Ascii85-encoded content
165
+ # @param out [IO, nil] An optional IO-like object to write the output to
166
+ #
167
+ # @return [String, IO] The decoded String (in ASCII-8BIT encoding) or the output IO object (if it was provided)
168
+ #
169
+ # @raise [Ascii85::DecodingError] When malformed input is encountered
170
+ #
171
+ # @example Decoding Ascii85 content
172
+ # Ascii85.decode("<~;KZGo~>")
173
+ # # => "Ruby"
174
+ #
175
+ # @example Decoding with multiple Ascii85 blocks present (ignores all but the first)
176
+ # Ascii85.decode("Foo<~;KZGo~>Bar<~87cURDZ~>Baz")
177
+ # # => "Ruby"
178
+ #
179
+ # @example When no delimiters are found
180
+ # Ascii85.decode("No delimiters")
181
+ # # => ""
182
+ #
183
+ # @example Decoding to an IO object
184
+ # output = StringIO.new
185
+ # Ascii85.decode("<~;KZGo~>", out: output)
186
+ # # => output (with "Ruby" written to it)
187
+ #
188
+ # @note This method only accepts a String, not an IO-like object, as the entire input
189
+ # needs to be available to ensure validity.
190
+ #
191
+ def decode(str, out: nil)
192
+ decode_raw(extract(str), out: out)
98
193
  end
99
194
 
100
- return wrapped.join("\n")
101
- end
195
+ #
196
+ # Decodes the given raw Ascii85-encoded String or IO-like object.
197
+ #
198
+ # @param str_or_io [String, IO] The Ascii85-encoded input to decode
199
+ # @param out [IO, nil] An optional IO-like object to write the output to
200
+ #
201
+ # @return [String, IO] The decoded String (in ASCII-8BIT encoding) or the output IO object (if it was provided)
202
+ #
203
+ # @raise [Ascii85::DecodingError] When malformed input is encountered
204
+ #
205
+ # @example Decoding a raw Ascii85 String
206
+ # Ascii85.decode_raw(";KZGo")
207
+ # # => "Ruby"
208
+ #
209
+ # @example Decoding from an IO-like object
210
+ # input = StringIO.new(";KZGo")
211
+ # Ascii85.decode_raw(input)
212
+ # # => "Ruby"
213
+ #
214
+ # @example Decoding to an IO object
215
+ # output = StringIO.new
216
+ # Ascii85.decode_raw(";KZGo", out: output)
217
+ # # => output (with "Ruby" written to it)
218
+ #
219
+ # @note The input must not be enclosed in '<~' and '~>' delimiters.
220
+ #
221
+ def decode_raw(str_or_io, out: nil)
222
+ reader = if io_like?(str_or_io)
223
+ str_or_io
224
+ else
225
+ StringIO.new(str_or_io.to_s, 'rb')
226
+ end
227
+
228
+ # Return an unfrozen String on empty input
229
+ return ''.dup if reader.eof?
230
+
231
+ # Setup buffered Reader and Writers
232
+ bufreader = BufferedReader.new(reader, encoded_chunk_size)
233
+ bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), unencoded_chunk_size)
234
+
235
+ # Populate the lookup table (caches the exponentiation)
236
+ lut = (0..4).map { |count| 85**(4 - count) }
237
+
238
+ # Decode
239
+ word = 0
240
+ count = 0
241
+ wordbuf = "\0\0\0\0".dup
242
+
243
+ bufreader.each_chunk do |chunk|
244
+ chunk.each_byte do |c|
245
+ case c.chr
246
+ when ' ', "\t", "\r", "\n", "\f", "\0"
247
+ # Ignore whitespace
248
+ next
249
+
250
+ when 'z'
251
+ raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple") unless count.zero?
252
+
253
+ # Expand z to 0-word
254
+ bufwriter.write("\0\0\0\0")
255
+
256
+ when '!'..'u'
257
+ # Decode 5 characters into a 4-byte word
258
+ word += (c - 33) * lut[count]
259
+ count += 1
260
+
261
+ if count == 5 && word > 0xffffffff
262
+ raise(Ascii85::DecodingError, "Invalid Ascii85 5-tuple (#{word} >= 2**32)")
263
+ elsif count == 5
264
+ b3 = word & 0xff; word >>= 8
265
+ b2 = word & 0xff; word >>= 8
266
+ b1 = word & 0xff; word >>= 8
267
+ b0 = word
268
+
269
+ wordbuf.setbyte(0, b0)
270
+ wordbuf.setbyte(1, b1)
271
+ wordbuf.setbyte(2, b2)
272
+ wordbuf.setbyte(3, b3)
273
+
274
+ bufwriter.write(wordbuf)
275
+
276
+ word = 0
277
+ count = 0
278
+ end
279
+
280
+ else
281
+ raise(Ascii85::DecodingError, "Illegal character inside Ascii85: #{c.chr.dump}")
282
+ end
283
+ end
284
+ end
102
285
 
103
- #
104
- # Searches through +str+ and decodes the _first_ Ascii85-String found.
105
- #
106
- # #decode expects an Ascii85-encoded String enclosed in <~ and ~> — it will
107
- # ignore all characters outside these markers. The returned strings are always
108
- # encoded as ASCII-8BIT.
109
- #
110
- # Ascii85.decode("<~;KZGo~>")
111
- # => "Ruby"
112
- #
113
- # Ascii85.decode("Foo<~;KZGo~>Bar<~;KZGo~>Baz")
114
- # => "Ruby"
115
- #
116
- # Ascii85.decode("No markers")
117
- # => ""
118
- #
119
- # #decode will raise Ascii85::DecodingError when malformed input is
120
- # encountered.
121
- #
122
- def self.decode(str)
123
- input = str.to_s
286
+ # We're done if all 5-tuples have been consumed
287
+ if count.zero?
288
+ bufwriter.flush
289
+ return out || bufwriter.io.string.force_encoding('ASCII-8BIT')
290
+ end
124
291
 
125
- opening_delim = '<~'
126
- closing_delim = '~>'
292
+ raise(Ascii85::DecodingError, 'Last 5-tuple consists of single character') if count == 1
293
+
294
+ # Finish last, partially decoded 32-bit word
295
+ count -= 1
296
+ word += lut[count]
297
+
298
+ bufwriter.write((word >> 24).chr) if count >= 1
299
+ bufwriter.write(((word >> 16) & 0xff).chr) if count >= 2
300
+ bufwriter.write(((word >> 8) & 0xff).chr) if count == 3
301
+ bufwriter.flush
302
+
303
+ out || bufwriter.io.string.force_encoding('ASCII-8BIT')
304
+ end
127
305
 
128
- # Make sure the delimiter strings have the correct encoding.
306
+ private
307
+
308
+ # Buffers an underlying IO object to increase efficiency. You do not need
309
+ # to use this directly.
129
310
  #
130
- # Although I don't think it likely, this may raise encoding
131
- # errors if an especially exotic input encoding is introduced.
132
- # As of Ruby 1.9.2 all non-dummy encodings work fine though.
311
+ # @private
133
312
  #
134
- if opening_delim.respond_to?(:encode)
135
- opening_delim = opening_delim.encode(input.encoding)
136
- closing_delim = closing_delim.encode(input.encoding)
137
- end
313
+ class BufferedReader
314
+ def initialize(io, buffer_size)
315
+ @io = io
316
+ @buffer_size = buffer_size
317
+ end
138
318
 
139
- # Get the positions of the opening/closing delimiters. If there is
140
- # no pair of opening/closing delimiters, return the empty string.
141
- (start_pos = input.index(opening_delim)) or return ''
142
- (end_pos = input.index(closing_delim, start_pos + 2)) or return ''
143
-
144
- # Get the string inside the delimiter-pair
145
- input = input[(start_pos + 2)...end_pos]
146
-
147
- # Decode
148
- word = 0
149
- count = 0
150
- result = []
151
-
152
- input.each_byte do |c|
153
- case c.chr
154
- when " ", "\t", "\r", "\n", "\f", "\0"
155
- # Ignore whitespace
156
- next
157
-
158
- when 'z'
159
- if count == 0
160
- # Expand z to 0-word
161
- result << 0
162
- else
163
- raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple")
319
+ def each_chunk
320
+ return enum_for(:each_chunk) unless block_given?
321
+
322
+ until @io.eof?
323
+ chunk = @io.read(@buffer_size)
324
+ yield chunk if chunk
164
325
  end
326
+ end
327
+ end
165
328
 
166
- when '!'..'u'
167
- # Decode 5 characters into a 4-byte word
168
- word += (c - 33) * 85**(4 - count)
169
- count += 1
329
+ # Buffers an underlying IO object to increase efficiency. You do not need
330
+ # to use this directly.
331
+ #
332
+ # @private
333
+ #
334
+ class BufferedWriter
335
+ attr_accessor :io
336
+
337
+ def initialize(io, buffer_size)
338
+ @io = io
339
+ @buffer_size = buffer_size
340
+ @buffer = String.new(capacity: buffer_size)
341
+ end
170
342
 
171
- if count == 5
343
+ def write(tuple)
344
+ flush if @buffer.bytesize + tuple.bytesize > @buffer_size
345
+ @buffer << tuple
346
+ end
172
347
 
173
- if word > 0xffffffff
174
- raise(Ascii85::DecodingError,
175
- "Invalid Ascii85 5-tuple (#{word} >= 2**32)")
176
- end
348
+ def flush
349
+ @io.write(@buffer)
350
+ @buffer.clear
351
+ end
352
+ end
177
353
 
178
- result << word
354
+ # Wraps the input in '<~' and '~>' delimiters and passes it through
355
+ # unmodified to the underlying IO object otherwise. You do not need to
356
+ # use this directly.
357
+ #
358
+ # @private
359
+ #
360
+ class DummyWrapper
361
+ def initialize(out)
362
+ @out = out
363
+ @out.write('<~')
364
+ end
179
365
 
180
- word = 0
181
- count = 0
182
- end
366
+ def write(buffer)
367
+ @out.write(buffer)
368
+ end
183
369
 
184
- else
185
- raise(Ascii85::DecodingError,
186
- "Illegal character inside Ascii85: #{c.chr.dump}")
370
+ def finish
371
+ @out.write('~>')
372
+ @out.flush
373
+
374
+ @out
187
375
  end
188
376
  end
189
377
 
190
- # Convert result into a String
191
- result = result.pack('N*')
378
+ # Wraps the input in '<~' and '~>' delimiters and ensures that no line is
379
+ # longer than the specified length. You do not need to use this directly.
380
+ #
381
+ # @private
382
+ #
383
+ class Wrapper
384
+ def initialize(out, wrap_lines)
385
+ @line_length = [2, wrap_lines.to_i].max
192
386
 
193
- if count > 0
194
- # Finish last, partially decoded 32-bit-word
387
+ @out = out
388
+ @out.write('<~')
195
389
 
196
- if count == 1
197
- raise(Ascii85::DecodingError,
198
- "Last 5-tuple consists of single character")
390
+ @cur_len = 2
199
391
  end
200
392
 
201
- count -= 1
202
- word += 85**(4 - count)
393
+ def write(buffer)
394
+ loop do
395
+ s = buffer.bytesize
203
396
 
204
- result << ((word >> 24) & 255).chr if count >= 1
205
- result << ((word >> 16) & 255).chr if count >= 2
206
- result << ((word >> 8) & 255).chr if count == 3
397
+ if @cur_len + s < @line_length
398
+ @out.write(buffer)
399
+ @cur_len += s
400
+ return
401
+ end
402
+
403
+ remaining = @line_length - @cur_len
404
+ @out.write(buffer[0...remaining])
405
+ @out.write("\n")
406
+ @cur_len = 0
407
+ buffer = buffer[remaining..]
408
+ return if buffer.empty?
409
+ end
410
+ end
411
+
412
+ def finish
413
+ # Add the closing delimiter (may need to be pushed to the next line)
414
+ @out.write("\n") if @cur_len + 2 > @line_length
415
+ @out.write('~>')
416
+
417
+ @out.flush
418
+ @out
419
+ end
207
420
  end
208
421
 
209
- return result
422
+ # Check if an object is IO-like
423
+ #
424
+ # @private
425
+ #
426
+ def io_like?(obj)
427
+ obj.respond_to?(:read) &&
428
+ obj.respond_to?(:eof?)
429
+ end
430
+
431
+ # @return [Integer] Buffer size for to-be-encoded input
432
+ #
433
+ def unencoded_chunk_size
434
+ 4 * 2048
435
+ end
436
+
437
+ # @return [Integer] Buffer size for encoded output
438
+ #
439
+ def encoded_chunk_size
440
+ 5 * 2048
441
+ end
210
442
  end
211
443
 
212
444
  #
213
- # This error is raised when Ascii85.decode encounters one of the following
214
- # problems in the input:
445
+ # Error raised when Ascii85 encounters problems while decoding the input.
215
446
  #
216
- # * An invalid character. Valid characters are '!'..'u' and 'z'.
217
- # * A 'z' character inside a 5-tuple. 'z's are only valid on their own.
447
+ # This error is raised for the following issues:
448
+ # * An invalid character (valid characters are '!'..'u' and 'z')
449
+ # * A 'z' character inside a 5-tuple ('z' is only valid on its own)
218
450
  # * An invalid 5-tuple that decodes to >= 2**32
219
451
  # * The last tuple consisting of a single character. Valid tuples always have
220
452
  # at least two characters.