Ascii85 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e134217a95580db89dd446ac40511345e8c0a317be617b9b04e2ac8ca5db9670
4
- data.tar.gz: bcc1f3f71f2f5748958825602ef58c7365ef52f7fe3265a9409fd1b84b5c8d4f
3
+ metadata.gz: 32c27ed5387adae778bb5f1b5ef82da6ca878d6cd3128dd9a6d65c182ad89296
4
+ data.tar.gz: 81ba054e37db894fcb7411c143c6cfade2437bc91fd00956eeaf16233e96c25e
5
5
  SHA512:
6
- metadata.gz: 4c15434410e46485ada5dcc04929092f77da1533861d22ffe65ab0e2f1e8300a821b16524fbeb5ad137cc6a139232a5684dfe6fd34c5ad33e19d68a95d3f604e
7
- data.tar.gz: a397048e6009d3adf0c2582cfe9dbd913f6b8888c19fbf48939b1a7ab48de704fa7fb76253d5d3a05d105215fb9e9f2da299d91c12cf5e6c2a3cb2636a2b6f8a
6
+ metadata.gz: da7757b98444ed6e745c0c9aa7930aa20346a273f94d316cb6eae473a6be293fd592320718f3d1eaef0ebc7d877deb4cb8ec0a43b13f4595835f56053abf3e5d
7
+ data.tar.gz: e881585d8b1f1cbdba501e7891daa2ed19b268c0b67d0cc8e424d944ce84d16c685b563bc18f34cd92db45b9a8e306e99e7b62b6d41908443eecdc15ee8bcf30
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Ascii85 Changelog
2
2
 
3
+ ## [2.0.1] - 2024-09-15
4
+
5
+ ### Fixed
6
+
7
+ - Decoding binary data could lead to Encoding errors (Issue #8)
8
+
3
9
  ## [2.0.0] - 2024-08-20
4
10
 
5
11
  ### BREAKING CHANGES
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ascii85
4
- VERSION = '2.0.0'
4
+ VERSION = '2.0.1'
5
5
  end
data/lib/ascii85.rb CHANGED
@@ -14,6 +14,11 @@ require 'stringio'
14
14
  #
15
15
  module Ascii85
16
16
  class << self
17
+ EMPTY_STRING = ''.dup.force_encoding(Encoding::ASCII_8BIT)
18
+ START_MARKER = '<~'.dup.force_encoding(Encoding::ASCII_8BIT)
19
+ ENDING_MARKER = '~>'.dup.force_encoding(Encoding::ASCII_8BIT)
20
+ LINE_BREAK = "\n".dup.force_encoding(Encoding::ASCII_8BIT)
21
+
17
22
  #
18
23
  # Encodes the bytes of the given String or IO-like object as Ascii85.
19
24
  #
@@ -55,22 +60,24 @@ module Ascii85
55
60
  StringIO.new(str_or_io.to_s, 'rb')
56
61
  end
57
62
 
58
- return ''.dup if reader.eof?
63
+ return EMPTY_STRING.dup if reader.eof?
59
64
 
60
65
  # Setup buffered Reader and Writers
61
66
  bufreader = BufferedReader.new(reader, unencoded_chunk_size)
62
67
  bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), encoded_chunk_size)
63
68
  writer = wrap_lines ? Wrapper.new(bufwriter, wrap_lines) : DummyWrapper.new(bufwriter)
64
69
 
65
- padding = "\0\0\0\0"
66
- tuplebuf = '!!!!!'.dup
70
+ padding = unfrozen_binary_copy("\0\0\0\0")
71
+ tuplebuf = unfrozen_binary_copy('!!!!!')
72
+ exclamations = unfrozen_binary_copy('!!!!!')
73
+ z = unfrozen_binary_copy('z')
67
74
 
68
75
  bufreader.each_chunk do |chunk|
69
76
  chunk.unpack('N*').each do |word|
70
77
  # Encode each big-endian 32-bit word into a 5-character tuple (except
71
78
  # for 0, which encodes to 'z')
72
79
  if word.zero?
73
- writer.write('z')
80
+ writer.write(z)
74
81
  else
75
82
  word, b0 = word.divmod(85)
76
83
  word, b1 = word.divmod(85)
@@ -98,7 +105,7 @@ module Ascii85
98
105
 
99
106
  # Encode the last word and cut off any padding
100
107
  if word.zero?
101
- writer.write('!!!!!'[0..(4 - padding_length)])
108
+ writer.write(exclamations[0..(4 - padding_length)])
102
109
  else
103
110
  word, b0 = word.divmod(85)
104
111
  word, b1 = word.divmod(85)
@@ -119,7 +126,7 @@ module Ascii85
119
126
  # If no output IO-object was provided, extract the encoded String from the
120
127
  # default StringIO writer. We force the encoding to 'ASCII-8BIT' to work
121
128
  # around a TruffleRuby bug.
122
- return writer.finish.io.string.force_encoding('ASCII-8BIT') if out.nil?
129
+ return writer.finish.io.string.force_encoding(Encoding::ASCII_8BIT) if out.nil?
123
130
 
124
131
  # Otherwise we make sure to flush the output writer, and then return it.
125
132
  writer.finish.io
@@ -151,8 +158,8 @@ module Ascii85
151
158
 
152
159
  # Get the positions of the opening/closing delimiters. If there is no pair
153
160
  # of opening/closing delimiters, return an unfrozen empty String.
154
- (start_pos = input.index(opening_delim)) or return ''.dup
155
- (end_pos = input.index(closing_delim, start_pos + 2)) or return ''.dup
161
+ (start_pos = input.index(opening_delim)) or return EMPTY_STRING.dup
162
+ (end_pos = input.index(closing_delim, start_pos + 2)) or return EMPTY_STRING.dup
156
163
 
157
164
  # Get the String inside the delimiter-pair
158
165
  input[(start_pos + 2)...end_pos]
@@ -226,7 +233,7 @@ module Ascii85
226
233
  end
227
234
 
228
235
  # Return an unfrozen String on empty input
229
- return ''.dup if reader.eof?
236
+ return EMPTY_STRING.dup if reader.eof?
230
237
 
231
238
  # Setup buffered Reader and Writers
232
239
  bufreader = BufferedReader.new(reader, encoded_chunk_size)
@@ -238,7 +245,8 @@ module Ascii85
238
245
  # Decode
239
246
  word = 0
240
247
  count = 0
241
- wordbuf = "\0\0\0\0".dup
248
+ zeroes = unfrozen_binary_copy("\0\0\0\0")
249
+ wordbuf = zeroes.dup
242
250
 
243
251
  bufreader.each_chunk do |chunk|
244
252
  chunk.each_byte do |c|
@@ -251,7 +259,7 @@ module Ascii85
251
259
  raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple") unless count.zero?
252
260
 
253
261
  # Expand z to 0-word
254
- bufwriter.write("\0\0\0\0")
262
+ bufwriter.write(zeroes)
255
263
 
256
264
  when '!'..'u'
257
265
  # Decode 5 characters into a 4-byte word
@@ -286,7 +294,7 @@ module Ascii85
286
294
  # We're done if all 5-tuples have been consumed
287
295
  if count.zero?
288
296
  bufwriter.flush
289
- return out || bufwriter.io.string.force_encoding('ASCII-8BIT')
297
+ return out || bufwriter.io.string.force_encoding(Encoding::ASCII_8BIT)
290
298
  end
291
299
 
292
300
  raise(Ascii85::DecodingError, 'Last 5-tuple consists of single character') if count == 1
@@ -300,11 +308,17 @@ module Ascii85
300
308
  bufwriter.write(((word >> 8) & 0xff).chr) if count == 3
301
309
  bufwriter.flush
302
310
 
303
- out || bufwriter.io.string.force_encoding('ASCII-8BIT')
311
+ out || bufwriter.io.string.force_encoding(Encoding::ASCII_8BIT)
304
312
  end
305
313
 
306
314
  private
307
315
 
316
+ # Copies the given String and forces the encoding of the returned copy to
317
+ # be Encoding::ASCII_8BIT.
318
+ def unfrozen_binary_copy(str)
319
+ str.dup.force_encoding(Encoding::ASCII_8BIT)
320
+ end
321
+
308
322
  # Buffers an underlying IO object to increase efficiency. You do not need
309
323
  # to use this directly.
310
324
  #
@@ -337,7 +351,7 @@ module Ascii85
337
351
  def initialize(io, buffer_size)
338
352
  @io = io
339
353
  @buffer_size = buffer_size
340
- @buffer = String.new(capacity: buffer_size)
354
+ @buffer = String.new(capacity: buffer_size, encoding: Encoding::ASCII_8BIT)
341
355
  end
342
356
 
343
357
  def write(tuple)
@@ -360,7 +374,7 @@ module Ascii85
360
374
  class DummyWrapper
361
375
  def initialize(out)
362
376
  @out = out
363
- @out.write('<~')
377
+ @out.write(START_MARKER)
364
378
  end
365
379
 
366
380
  def write(buffer)
@@ -368,7 +382,7 @@ module Ascii85
368
382
  end
369
383
 
370
384
  def finish
371
- @out.write('~>')
385
+ @out.write(ENDING_MARKER)
372
386
  @out.flush
373
387
 
374
388
  @out
@@ -385,7 +399,7 @@ module Ascii85
385
399
  @line_length = [2, wrap_lines.to_i].max
386
400
 
387
401
  @out = out
388
- @out.write('<~')
402
+ @out.write(START_MARKER)
389
403
 
390
404
  @cur_len = 2
391
405
  end
@@ -402,7 +416,7 @@ module Ascii85
402
416
 
403
417
  remaining = @line_length - @cur_len
404
418
  @out.write(buffer[0...remaining])
405
- @out.write("\n")
419
+ @out.write(LINE_BREAK)
406
420
  @cur_len = 0
407
421
  buffer = buffer[remaining..]
408
422
  return if buffer.empty?
@@ -411,8 +425,8 @@ module Ascii85
411
425
 
412
426
  def finish
413
427
  # Add the closing delimiter (may need to be pushed to the next line)
414
- @out.write("\n") if @cur_len + 2 > @line_length
415
- @out.write('~>')
428
+ @out.write(LINE_BREAK) if @cur_len + 2 > @line_length
429
+ @out.write(ENDING_MARKER)
416
430
 
417
431
  @out.flush
418
432
  @out
@@ -35,7 +35,11 @@ TEST_CASES = {
35
35
  '<~j+42iJVN3:K&_E6j+<0KJW/W?W8iG`j+EuaK"9on^Z0sZj+FJoK:LtSKB%T?~>',
36
36
 
37
37
  [Math::PI].pack('G') => '<~5RAV2<(&;T~>',
38
- [Math::E].pack('G') => '<~5R"n0M\\K6,~>'
38
+ [Math::E].pack('G') => '<~5R"n0M\\K6,~>',
39
+
40
+ # Minified example from Github issue 8.
41
+ # Note that OT and OU as the trailing characters are equivalent.
42
+ "\x9B\xB6\xB9+\x91" => '<~S$ojXOT~>'
39
43
  }.freeze
40
44
 
41
45
  describe Ascii85 do
@@ -59,6 +63,12 @@ describe Ascii85 do
59
63
  end
60
64
  end
61
65
 
66
+ it 'should always return unfrozen Strings' do
67
+ TEST_CASES.each_pair do |input, encoded|
68
+ assert_equal false, Ascii85.encode(input).frozen?
69
+ end
70
+ end
71
+
62
72
  it 'should encode Strings in different encodings correctly' do
63
73
  input_euc_jp = 'どうもありがとうミスターロボット'.encode('EUC-JP')
64
74
  input_binary = input_euc_jp.force_encoding('ASCII-8BIT')
@@ -139,6 +149,12 @@ describe Ascii85 do
139
149
  end
140
150
  end
141
151
 
152
+ it 'should always return unfrozen Strings' do
153
+ TEST_CASES.each_pair do |input, encoded|
154
+ assert_equal false, Ascii85.decode(encoded).frozen?
155
+ end
156
+ end
157
+
142
158
  it 'should accept valid input in encodings other than the default' do
143
159
  input = 'Ragnarök τέχνη русский язык I ♥ Ruby'
144
160
  input_ascii85 = Ascii85.encode(input)
@@ -217,6 +233,13 @@ describe Ascii85 do
217
233
  end
218
234
  end
219
235
 
236
+ it 'should always return unfrozen Strings' do
237
+ TEST_CASES.each_pair do |decoded, input|
238
+ raw_input = input[2...-2] # Remove '<~' and '~>'
239
+ assert_equal false, Ascii85.decode_raw(raw_input).frozen?
240
+ end
241
+ end
242
+
220
243
  it 'should decode from an IO object' do
221
244
  input = StringIO.new(';KZGo')
222
245
  result = Ascii85.decode_raw(input)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: Ascii85
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Johannes Holzfuß
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-20 00:00:00.000000000 Z
11
+ date: 2024-09-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: minitest