Ascii85 1.1.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ascii85.rb CHANGED
@@ -1,220 +1,452 @@
1
- # encoding: utf-8
2
1
  # frozen_string_literal: true
3
2
 
3
+ require 'stringio'
4
4
 
5
5
  #
6
6
  # Ascii85 is an implementation of Adobe's binary-to-text encoding of the
7
7
  # same name in pure Ruby.
8
8
  #
9
- # See http://www.adobe.com/products/postscript/pdfs/PLRM.pdf page 131
10
- # and http://en.wikipedia.org/wiki/Ascii85 for more information about
11
- # the format.
9
+ # See http://en.wikipedia.org/wiki/Ascii85 for more information about the
10
+ # format.
12
11
  #
13
12
  # Author:: Johannes Holzfuß (johannes@holzfuss.name)
14
13
  # License:: Distributed under the MIT License (see LICENSE file)
15
14
  #
15
+ module Ascii85
16
+ class << self
17
+ #
18
+ # Encodes the bytes of the given String or IO-like object as Ascii85.
19
+ #
20
+ # @param str_or_io [String, IO] The input to encode
21
+ # @param wrap_lines [Integer, false] The line length for wrapping, or +false+ for no wrapping
22
+ # @param out [IO, nil] An optional IO-like object to write the output to
23
+ #
24
+ # @return [String, IO] The encoded String or the output IO object that was passed in
25
+ #
26
+ # @example Encoding a simple String
27
+ # Ascii85.encode("Ruby")
28
+ # # => <~;KZGo~>
29
+ #
30
+ # @example Encoding with line wrapping
31
+ # Ascii85.encode("Supercalifragilisticexpialidocious", 15)
32
+ # # => <~;g!%jEarNoBkD
33
+ # # BoB5)0rF*),+AU&
34
+ # # 0.@;KXgDe!L"F`R
35
+ # # ~>
36
+ #
37
+ # @example Encoding without line wrapping
38
+ # Ascii85.encode("Supercalifragilisticexpialidocious", false)
39
+ # # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~>
40
+ #
41
+ # @example Encoding from an IO-like object
42
+ # input = StringIO.new("Ruby")
43
+ # Ascii85.encode(input)
44
+ # # => "<~;KZGo~>"
45
+ #
46
+ # @example Encoding to an IO object
47
+ # output = StringIO.new
48
+ # Ascii85.encode("Ruby", out: output)
49
+ # # => output (with "<~;KZGo~>" written to it)
50
+ #
51
+ def encode(str_or_io, wrap_lines = 80, out: nil)
52
+ reader = if io_like?(str_or_io)
53
+ str_or_io
54
+ else
55
+ StringIO.new(str_or_io.to_s, 'rb')
56
+ end
57
+
58
+ return ''.dup if reader.eof?
59
+
60
+ # Setup buffered Reader and Writers
61
+ bufreader = BufferedReader.new(reader, unencoded_chunk_size)
62
+ bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), encoded_chunk_size)
63
+ writer = wrap_lines ? Wrapper.new(bufwriter, wrap_lines) : DummyWrapper.new(bufwriter)
64
+
65
+ padding = "\0\0\0\0"
66
+ tuplebuf = '!!!!!'.dup
67
+
68
+ bufreader.each_chunk do |chunk|
69
+ chunk.unpack('N*').each do |word|
70
+ # Encode each big-endian 32-bit word into a 5-character tuple (except
71
+ # for 0, which encodes to 'z')
72
+ if word.zero?
73
+ writer.write('z')
74
+ else
75
+ word, b0 = word.divmod(85)
76
+ word, b1 = word.divmod(85)
77
+ word, b2 = word.divmod(85)
78
+ word, b3 = word.divmod(85)
79
+ b4 = word
80
+
81
+ tuplebuf.setbyte(0, b4 + 33)
82
+ tuplebuf.setbyte(1, b3 + 33)
83
+ tuplebuf.setbyte(2, b2 + 33)
84
+ tuplebuf.setbyte(3, b1 + 33)
85
+ tuplebuf.setbyte(4, b0 + 33)
86
+
87
+ writer.write(tuplebuf)
88
+ end
89
+ end
16
90
 
91
+ next if (chunk.bytesize & 0b11).zero?
17
92
 
18
- module Ascii85
19
- #
20
- # Encodes the bytes of the given String as Ascii85.
21
- #
22
- # If +wrap_lines+ evaluates to +false+, the output will be returned as
23
- # a single long line. Otherwise #encode formats the output into lines
24
- # of length +wrap_lines+ (minimum is 2).
25
- #
26
- # Ascii85.encode("Ruby")
27
- # => <~;KZGo~>
28
- #
29
- # Ascii85.encode("Supercalifragilisticexpialidocious", 15)
30
- # => <~;g!%jEarNoBkD
31
- # BoB5)0rF*),+AU&
32
- # 0.@;KXgDe!L"F`R
33
- # ~>
34
- #
35
- # Ascii85.encode("Supercalifragilisticexpialidocious", false)
36
- # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~>
37
- #
38
- #
39
- def self.encode(str, wrap_lines = 80)
40
- to_encode = str.to_s
41
- return '' if to_encode.empty?
42
-
43
- # Deal with multi-byte encodings
44
- if to_encode.respond_to?(:bytesize)
45
- input_size = to_encode.bytesize
46
- else
47
- input_size = to_encode.size
48
- end
93
+ # If we have leftover bytes, we need to zero-pad to a multiple of four
94
+ # before converting to a 32-bit word.
95
+ padding_length = (-chunk.bytesize) % 4
96
+ trailing = chunk[-(4 - padding_length)..]
97
+ word = (trailing + padding[0...padding_length]).unpack1('N')
49
98
 
50
- # Compute number of \0s to pad the message with (0..3)
51
- padding_length = (-input_size) % 4
52
-
53
- # Extract big-endian integers
54
- tuples = (to_encode + ("\0" * padding_length)).unpack('N*')
55
-
56
- # Encode
57
- tuples.map! do |tuple|
58
- if tuple == 0
59
- 'z'
60
- else
61
- tmp = String.new
62
- 5.times do
63
- tmp << ((tuple % 85) + 33).chr
64
- tuple /= 85
99
+ # Encode the last word and cut off any padding
100
+ if word.zero?
101
+ writer.write('!!!!!'[0..(4 - padding_length)])
102
+ else
103
+ word, b0 = word.divmod(85)
104
+ word, b1 = word.divmod(85)
105
+ word, b2 = word.divmod(85)
106
+ word, b3 = word.divmod(85)
107
+ b4 = word
108
+
109
+ tuplebuf.setbyte(0, b4 + 33)
110
+ tuplebuf.setbyte(1, b3 + 33)
111
+ tuplebuf.setbyte(2, b2 + 33)
112
+ tuplebuf.setbyte(3, b1 + 33)
113
+ tuplebuf.setbyte(4, b0 + 33)
114
+
115
+ writer.write(tuplebuf[0..(4 - padding_length)])
65
116
  end
66
- tmp.reverse
67
117
  end
68
- end
69
-
70
- # We can't use the z-abbreviation if we're going to cut off padding
71
- if (padding_length > 0) and (tuples.last == 'z')
72
- tuples[-1] = '!!!!!'
73
- end
74
118
 
75
- # Cut off the padding
76
- tuples[-1] = tuples[-1][0..(4 - padding_length)]
119
+ # If no output IO-object was provided, extract the encoded String from the
120
+ # default StringIO writer. We force the encoding to 'ASCII-8BIT' to work
121
+ # around a TruffleRuby bug.
122
+ return writer.finish.io.string.force_encoding('ASCII-8BIT') if out.nil?
77
123
 
78
- # If we don't need to wrap the lines, add delimiters and return
79
- if (!wrap_lines)
80
- return '<~' + tuples.join + '~>'
124
+ # Otherwise we make sure to flush the output writer, and then return it.
125
+ writer.finish.io
81
126
  end
82
127
 
83
- # Otherwise we wrap the lines
84
- line_length = [2, wrap_lines.to_i].max
128
+ # Searches through a String and extracts the first substring enclosed by '<~' and '~>'.
129
+ #
130
+ # @param str [String] The String to search through
131
+ #
132
+ # @return [String] The extracted substring, or an empty String if no valid delimiters are found
133
+ #
134
+ # @example Extracting Ascii85 content
135
+ # Ascii85.extract("Foo<~;KZGo~>Bar<~z~>Baz")
136
+ # # => ";KZGo"
137
+ #
138
+ # @example When no delimiters are found
139
+ # Ascii85.extract("No delimiters")
140
+ # # => ""
141
+ #
142
+ # @note This method only accepts a String, not an IO-like object, as the entire input
143
+ # needs to be available to ensure validity.
144
+ #
145
+ def extract(str)
146
+ input = str.to_s
147
+
148
+ # Make sure the delimiter Strings have the correct encoding.
149
+ opening_delim = '<~'.encode(input.encoding)
150
+ closing_delim = '~>'.encode(input.encoding)
85
151
 
86
- wrapped = []
87
- to_wrap = '<~' + tuples.join
152
+ # Get the positions of the opening/closing delimiters. If there is no pair
153
+ # of opening/closing delimiters, return an unfrozen empty String.
154
+ (start_pos = input.index(opening_delim)) or return ''.dup
155
+ (end_pos = input.index(closing_delim, start_pos + 2)) or return ''.dup
88
156
 
89
- 0.step(to_wrap.length, line_length) do |index|
90
- wrapped << to_wrap.slice(index, line_length)
157
+ # Get the String inside the delimiter-pair
158
+ input[(start_pos + 2)...end_pos]
91
159
  end
92
160
 
93
- # Add end-marker – on a new line if necessary
94
- if (wrapped.last.length + 2) > line_length
95
- wrapped << '~>'
96
- else
97
- wrapped[-1] << '~>'
161
+ #
162
+ # Searches through a String and decodes the first substring enclosed by '<~' and '~>'.
163
+ #
164
+ # @param str [String] The String containing Ascii85-encoded content
165
+ # @param out [IO, nil] An optional IO-like object to write the output to
166
+ #
167
+ # @return [String, IO] The decoded String (in ASCII-8BIT encoding) or the output IO object (if it was provided)
168
+ #
169
+ # @raise [Ascii85::DecodingError] When malformed input is encountered
170
+ #
171
+ # @example Decoding Ascii85 content
172
+ # Ascii85.decode("<~;KZGo~>")
173
+ # # => "Ruby"
174
+ #
175
+ # @example Decoding with multiple Ascii85 blocks present (ignores all but the first)
176
+ # Ascii85.decode("Foo<~;KZGo~>Bar<~87cURDZ~>Baz")
177
+ # # => "Ruby"
178
+ #
179
+ # @example When no delimiters are found
180
+ # Ascii85.decode("No delimiters")
181
+ # # => ""
182
+ #
183
+ # @example Decoding to an IO object
184
+ # output = StringIO.new
185
+ # Ascii85.decode("<~;KZGo~>", out: output)
186
+ # # => output (with "Ruby" written to it)
187
+ #
188
+ # @note This method only accepts a String, not an IO-like object, as the entire input
189
+ # needs to be available to ensure validity.
190
+ #
191
+ def decode(str, out: nil)
192
+ decode_raw(extract(str), out: out)
98
193
  end
99
194
 
100
- return wrapped.join("\n")
101
- end
195
+ #
196
+ # Decodes the given raw Ascii85-encoded String or IO-like object.
197
+ #
198
+ # @param str_or_io [String, IO] The Ascii85-encoded input to decode
199
+ # @param out [IO, nil] An optional IO-like object to write the output to
200
+ #
201
+ # @return [String, IO] The decoded String (in ASCII-8BIT encoding) or the output IO object (if it was provided)
202
+ #
203
+ # @raise [Ascii85::DecodingError] When malformed input is encountered
204
+ #
205
+ # @example Decoding a raw Ascii85 String
206
+ # Ascii85.decode_raw(";KZGo")
207
+ # # => "Ruby"
208
+ #
209
+ # @example Decoding from an IO-like object
210
+ # input = StringIO.new(";KZGo")
211
+ # Ascii85.decode_raw(input)
212
+ # # => "Ruby"
213
+ #
214
+ # @example Decoding to an IO object
215
+ # output = StringIO.new
216
+ # Ascii85.decode_raw(";KZGo", out: output)
217
+ # # => output (with "Ruby" written to it)
218
+ #
219
+ # @note The input must not be enclosed in '<~' and '~>' delimiters.
220
+ #
221
+ def decode_raw(str_or_io, out: nil)
222
+ reader = if io_like?(str_or_io)
223
+ str_or_io
224
+ else
225
+ StringIO.new(str_or_io.to_s, 'rb')
226
+ end
227
+
228
+ # Return an unfrozen String on empty input
229
+ return ''.dup if reader.eof?
230
+
231
+ # Setup buffered Reader and Writers
232
+ bufreader = BufferedReader.new(reader, encoded_chunk_size)
233
+ bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), unencoded_chunk_size)
234
+
235
+ # Populate the lookup table (caches the exponentiation)
236
+ lut = (0..4).map { |count| 85**(4 - count) }
237
+
238
+ # Decode
239
+ word = 0
240
+ count = 0
241
+ wordbuf = "\0\0\0\0".dup
242
+
243
+ bufreader.each_chunk do |chunk|
244
+ chunk.each_byte do |c|
245
+ case c.chr
246
+ when ' ', "\t", "\r", "\n", "\f", "\0"
247
+ # Ignore whitespace
248
+ next
249
+
250
+ when 'z'
251
+ raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple") unless count.zero?
252
+
253
+ # Expand z to 0-word
254
+ bufwriter.write("\0\0\0\0")
255
+
256
+ when '!'..'u'
257
+ # Decode 5 characters into a 4-byte word
258
+ word += (c - 33) * lut[count]
259
+ count += 1
260
+
261
+ if count == 5 && word > 0xffffffff
262
+ raise(Ascii85::DecodingError, "Invalid Ascii85 5-tuple (#{word} >= 2**32)")
263
+ elsif count == 5
264
+ b3 = word & 0xff; word >>= 8
265
+ b2 = word & 0xff; word >>= 8
266
+ b1 = word & 0xff; word >>= 8
267
+ b0 = word
268
+
269
+ wordbuf.setbyte(0, b0)
270
+ wordbuf.setbyte(1, b1)
271
+ wordbuf.setbyte(2, b2)
272
+ wordbuf.setbyte(3, b3)
273
+
274
+ bufwriter.write(wordbuf)
275
+
276
+ word = 0
277
+ count = 0
278
+ end
279
+
280
+ else
281
+ raise(Ascii85::DecodingError, "Illegal character inside Ascii85: #{c.chr.dump}")
282
+ end
283
+ end
284
+ end
102
285
 
103
- #
104
- # Searches through +str+ and decodes the _first_ Ascii85-String found.
105
- #
106
- # #decode expects an Ascii85-encoded String enclosed in <~ and ~> — it will
107
- # ignore all characters outside these markers. The returned strings are always
108
- # encoded as ASCII-8BIT.
109
- #
110
- # Ascii85.decode("<~;KZGo~>")
111
- # => "Ruby"
112
- #
113
- # Ascii85.decode("Foo<~;KZGo~>Bar<~;KZGo~>Baz")
114
- # => "Ruby"
115
- #
116
- # Ascii85.decode("No markers")
117
- # => ""
118
- #
119
- # #decode will raise Ascii85::DecodingError when malformed input is
120
- # encountered.
121
- #
122
- def self.decode(str)
123
- input = str.to_s
286
+ # We're done if all 5-tuples have been consumed
287
+ if count.zero?
288
+ bufwriter.flush
289
+ return out || bufwriter.io.string.force_encoding('ASCII-8BIT')
290
+ end
124
291
 
125
- opening_delim = '<~'
126
- closing_delim = '~>'
292
+ raise(Ascii85::DecodingError, 'Last 5-tuple consists of single character') if count == 1
293
+
294
+ # Finish last, partially decoded 32-bit word
295
+ count -= 1
296
+ word += lut[count]
297
+
298
+ bufwriter.write((word >> 24).chr) if count >= 1
299
+ bufwriter.write(((word >> 16) & 0xff).chr) if count >= 2
300
+ bufwriter.write(((word >> 8) & 0xff).chr) if count == 3
301
+ bufwriter.flush
302
+
303
+ out || bufwriter.io.string.force_encoding('ASCII-8BIT')
304
+ end
127
305
 
128
- # Make sure the delimiter strings have the correct encoding.
306
+ private
307
+
308
+ # Buffers an underlying IO object to increase efficiency. You do not need
309
+ # to use this directly.
129
310
  #
130
- # Although I don't think it likely, this may raise encoding
131
- # errors if an especially exotic input encoding is introduced.
132
- # As of Ruby 1.9.2 all non-dummy encodings work fine though.
311
+ # @private
133
312
  #
134
- if opening_delim.respond_to?(:encode)
135
- opening_delim = opening_delim.encode(input.encoding)
136
- closing_delim = closing_delim.encode(input.encoding)
137
- end
313
+ class BufferedReader
314
+ def initialize(io, buffer_size)
315
+ @io = io
316
+ @buffer_size = buffer_size
317
+ end
138
318
 
139
- # Get the positions of the opening/closing delimiters. If there is
140
- # no pair of opening/closing delimiters, return the empty string.
141
- (start_pos = input.index(opening_delim)) or return ''
142
- (end_pos = input.index(closing_delim, start_pos + 2)) or return ''
143
-
144
- # Get the string inside the delimiter-pair
145
- input = input[(start_pos + 2)...end_pos]
146
-
147
- # Decode
148
- word = 0
149
- count = 0
150
- result = []
151
-
152
- input.each_byte do |c|
153
- case c.chr
154
- when " ", "\t", "\r", "\n", "\f", "\0"
155
- # Ignore whitespace
156
- next
157
-
158
- when 'z'
159
- if count == 0
160
- # Expand z to 0-word
161
- result << 0
162
- else
163
- raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple")
319
+ def each_chunk
320
+ return enum_for(:each_chunk) unless block_given?
321
+
322
+ until @io.eof?
323
+ chunk = @io.read(@buffer_size)
324
+ yield chunk if chunk
164
325
  end
326
+ end
327
+ end
165
328
 
166
- when '!'..'u'
167
- # Decode 5 characters into a 4-byte word
168
- word += (c - 33) * 85**(4 - count)
169
- count += 1
329
+ # Buffers an underlying IO object to increase efficiency. You do not need
330
+ # to use this directly.
331
+ #
332
+ # @private
333
+ #
334
+ class BufferedWriter
335
+ attr_accessor :io
336
+
337
+ def initialize(io, buffer_size)
338
+ @io = io
339
+ @buffer_size = buffer_size
340
+ @buffer = String.new(capacity: buffer_size)
341
+ end
170
342
 
171
- if count == 5
343
+ def write(tuple)
344
+ flush if @buffer.bytesize + tuple.bytesize > @buffer_size
345
+ @buffer << tuple
346
+ end
172
347
 
173
- if word > 0xffffffff
174
- raise(Ascii85::DecodingError,
175
- "Invalid Ascii85 5-tuple (#{word} >= 2**32)")
176
- end
348
+ def flush
349
+ @io.write(@buffer)
350
+ @buffer.clear
351
+ end
352
+ end
177
353
 
178
- result << word
354
+ # Wraps the input in '<~' and '~>' delimiters and passes it through
355
+ # unmodified to the underlying IO object otherwise. You do not need to
356
+ # use this directly.
357
+ #
358
+ # @private
359
+ #
360
+ class DummyWrapper
361
+ def initialize(out)
362
+ @out = out
363
+ @out.write('<~')
364
+ end
179
365
 
180
- word = 0
181
- count = 0
182
- end
366
+ def write(buffer)
367
+ @out.write(buffer)
368
+ end
183
369
 
184
- else
185
- raise(Ascii85::DecodingError,
186
- "Illegal character inside Ascii85: #{c.chr.dump}")
370
+ def finish
371
+ @out.write('~>')
372
+ @out.flush
373
+
374
+ @out
187
375
  end
188
376
  end
189
377
 
190
- # Convert result into a String
191
- result = result.pack('N*')
378
+ # Wraps the input in '<~' and '~>' delimiters and ensures that no line is
379
+ # longer than the specified length. You do not need to use this directly.
380
+ #
381
+ # @private
382
+ #
383
+ class Wrapper
384
+ def initialize(out, wrap_lines)
385
+ @line_length = [2, wrap_lines.to_i].max
192
386
 
193
- if count > 0
194
- # Finish last, partially decoded 32-bit-word
387
+ @out = out
388
+ @out.write('<~')
195
389
 
196
- if count == 1
197
- raise(Ascii85::DecodingError,
198
- "Last 5-tuple consists of single character")
390
+ @cur_len = 2
199
391
  end
200
392
 
201
- count -= 1
202
- word += 85**(4 - count)
393
+ def write(buffer)
394
+ loop do
395
+ s = buffer.bytesize
203
396
 
204
- result << ((word >> 24) & 255).chr if count >= 1
205
- result << ((word >> 16) & 255).chr if count >= 2
206
- result << ((word >> 8) & 255).chr if count == 3
397
+ if @cur_len + s < @line_length
398
+ @out.write(buffer)
399
+ @cur_len += s
400
+ return
401
+ end
402
+
403
+ remaining = @line_length - @cur_len
404
+ @out.write(buffer[0...remaining])
405
+ @out.write("\n")
406
+ @cur_len = 0
407
+ buffer = buffer[remaining..]
408
+ return if buffer.empty?
409
+ end
410
+ end
411
+
412
+ def finish
413
+ # Add the closing delimiter (may need to be pushed to the next line)
414
+ @out.write("\n") if @cur_len + 2 > @line_length
415
+ @out.write('~>')
416
+
417
+ @out.flush
418
+ @out
419
+ end
207
420
  end
208
421
 
209
- return result
422
+ # Check if an object is IO-like
423
+ #
424
+ # @private
425
+ #
426
+ def io_like?(obj)
427
+ obj.respond_to?(:read) &&
428
+ obj.respond_to?(:eof?)
429
+ end
430
+
431
+ # @return [Integer] Buffer size for to-be-encoded input
432
+ #
433
+ def unencoded_chunk_size
434
+ 4 * 2048
435
+ end
436
+
437
+ # @return [Integer] Buffer size for encoded output
438
+ #
439
+ def encoded_chunk_size
440
+ 5 * 2048
441
+ end
210
442
  end
211
443
 
212
444
  #
213
- # This error is raised when Ascii85.decode encounters one of the following
214
- # problems in the input:
445
+ # Error raised when Ascii85 encounters problems while decoding the input.
215
446
  #
216
- # * An invalid character. Valid characters are '!'..'u' and 'z'.
217
- # * A 'z' character inside a 5-tuple. 'z's are only valid on their own.
447
+ # This error is raised for the following issues:
448
+ # * An invalid character (valid characters are '!'..'u' and 'z')
449
+ # * A 'z' character inside a 5-tuple ('z' is only valid on its own)
218
450
  # * An invalid 5-tuple that decodes to >= 2**32
219
451
  # * The last tuple consisting of a single character. Valid tuples always have
220
452
  # at least two characters.