iostreams 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/lib/io_streams/bzip2/reader.rb +1 -1
  3. data/lib/io_streams/bzip2/writer.rb +1 -1
  4. data/lib/io_streams/encode/reader.rb +102 -0
  5. data/lib/io_streams/encode/writer.rb +78 -0
  6. data/lib/io_streams/errors.rb +19 -0
  7. data/lib/io_streams/file/reader.rb +1 -1
  8. data/lib/io_streams/file/writer.rb +1 -3
  9. data/lib/io_streams/gzip/reader.rb +1 -1
  10. data/lib/io_streams/gzip/writer.rb +1 -1
  11. data/lib/io_streams/io_streams.rb +57 -38
  12. data/lib/io_streams/line/reader.rb +125 -69
  13. data/lib/io_streams/line/writer.rb +11 -35
  14. data/lib/io_streams/pgp.rb +1 -1
  15. data/lib/io_streams/record/reader.rb +12 -14
  16. data/lib/io_streams/record/writer.rb +12 -14
  17. data/lib/io_streams/row/reader.rb +15 -16
  18. data/lib/io_streams/row/writer.rb +14 -12
  19. data/lib/io_streams/tabular.rb +50 -30
  20. data/lib/io_streams/tabular/header.rb +6 -6
  21. data/lib/io_streams/tabular/parser/array.rb +2 -2
  22. data/lib/io_streams/tabular/parser/csv.rb +6 -2
  23. data/lib/io_streams/tabular/parser/fixed.rb +18 -37
  24. data/lib/io_streams/tabular/parser/hash.rb +1 -1
  25. data/lib/io_streams/tabular/parser/json.rb +3 -1
  26. data/lib/io_streams/tabular/parser/psv.rb +6 -2
  27. data/lib/io_streams/version.rb +1 -1
  28. data/lib/io_streams/xlsx/reader.rb +22 -32
  29. data/lib/iostreams.rb +6 -0
  30. data/test/encode_reader_test.rb +54 -0
  31. data/test/encode_writer_test.rb +82 -0
  32. data/test/io_streams_test.rb +0 -65
  33. data/test/line_reader_test.rb +180 -37
  34. data/test/tabular_test.rb +79 -3
  35. data/test/test_helper.rb +1 -1
  36. data/test/xlsx_reader_test.rb +7 -10
  37. metadata +10 -4
  38. data/lib/io_streams/tabular/errors.rb +0 -14
@@ -1,6 +1,8 @@
1
1
  require 'io_streams/version'
2
2
  #@formatter:off
3
3
  module IOStreams
4
+ autoload :Errors, 'io_streams/errors'
5
+
4
6
  module Bzip2
5
7
  autoload :Reader, 'io_streams/bzip2/reader'
6
8
  autoload :Writer, 'io_streams/bzip2/writer'
@@ -27,6 +29,10 @@ module IOStreams
27
29
  autoload :Writer, 'io_streams/zip/writer'
28
30
  end
29
31
 
32
+ module Encode
33
+ autoload :Reader, 'io_streams/encode/reader'
34
+ autoload :Writer, 'io_streams/encode/writer'
35
+ end
30
36
  module Line
31
37
  autoload :Reader, 'io_streams/line/reader'
32
38
  autoload :Writer, 'io_streams/line/writer'
@@ -0,0 +1,54 @@
1
+ require_relative 'test_helper'
2
+
3
+ class EncodeReaderTest < Minitest::Test
4
+ describe IOStreams::Encode::Reader do
5
+ let :bad_data do
6
+ [
7
+ "New M\xE9xico,NE".b,
8
+ 'good line',
9
+ "New M\xE9xico,\x07SF".b
10
+ ].join("\n").encode('BINARY')
11
+ end
12
+
13
+ let :cleansed_data do
14
+ bad_data.gsub("\xE9".b, '?')
15
+ end
16
+
17
+ let :stripped_data do
18
+ cleansed_data.gsub("\x07", '')
19
+ end
20
+
21
+ describe '#read' do
22
+ describe 'replacement' do
23
+ it 'does not strip invalid characters' do
24
+ input = StringIO.new(bad_data)
25
+ IOStreams::Encode::Reader.open(input, encoding: 'UTF-8') do |io|
26
+ assert_raises ::Encoding::UndefinedConversionError do
27
+ io.read.encoding
28
+ end
29
+ end
30
+ end
31
+
32
+ it 'strips invalid characters' do
33
+ input = StringIO.new(bad_data)
34
+ data =
35
+ IOStreams::Encode::Reader.open(input, encoding: 'UTF-8', encode_replace: '?') do |io|
36
+ io.read
37
+ end
38
+ assert_equal cleansed_data, data
39
+ end
40
+ end
41
+
42
+ describe 'printable' do
43
+ it 'strips non-printable characters' do
44
+ input = StringIO.new(bad_data)
45
+ data =
46
+ IOStreams::Encode::Reader.open(input, encoding: 'UTF-8', encode_cleaner: :printable, encode_replace: '?') do |io|
47
+ io.read
48
+ end
49
+ assert_equal stripped_data, data
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,82 @@
1
+ require_relative 'test_helper'
2
+
3
+ class EncodeWriterTest < Minitest::Test
4
+ describe IOStreams::Encode::Writer do
5
+ let :bad_data do
6
+ [
7
+ "New M\xE9xico,NE".b,
8
+ 'good line',
9
+ "New M\xE9xico,\x07SF".b
10
+ ].join("\n").encode('BINARY')
11
+ end
12
+
13
+ let :cleansed_data do
14
+ bad_data.gsub("\xE9".b, '?')
15
+ end
16
+
17
+ let :stripped_data do
18
+ cleansed_data.gsub("\x07", '')
19
+ end
20
+
21
+ describe '#<<' do
22
+ it 'file' do
23
+ temp_file = Tempfile.new('rocket_job')
24
+ file_name = temp_file.to_path
25
+ IOStreams::Encode::Writer.open(file_name, encoding: 'ASCII-8BIT') do |io|
26
+ io << bad_data
27
+ end
28
+ result = File.read(file_name, mode: 'rb')
29
+ assert_equal bad_data, result
30
+ end
31
+
32
+ it 'stream' do
33
+ io = StringIO.new(''.b)
34
+ IOStreams::Encode::Writer.open(io, encoding: 'ASCII-8BIT') do |encoded|
35
+ encoded << bad_data
36
+ end
37
+ assert_equal 'ASCII-8BIT', io.string.encoding.to_s
38
+ assert_equal bad_data, io.string
39
+ end
40
+
41
+ it 'stream as utf-8' do
42
+ io = StringIO.new('')
43
+ assert_raises Encoding::UndefinedConversionError do
44
+ IOStreams::Encode::Writer.open(io, encoding: 'UTF-8') do |encoded|
45
+ encoded << bad_data
46
+ end
47
+ end
48
+ end
49
+
50
+ it 'stream as utf-8 with replacement' do
51
+ io = StringIO.new('')
52
+ IOStreams::Encode::Writer.open(io, encoding: 'UTF-8', encode_replace: '?') do |encoded|
53
+ encoded << bad_data
54
+ end
55
+ assert_equal 'UTF-8', io.string.encoding.to_s
56
+ assert_equal cleansed_data, io.string
57
+ end
58
+
59
+ it 'stream as utf-8 with replacement and printable cleansing' do
60
+ io = StringIO.new('')
61
+ IOStreams::Encode::Writer.open(io, encoding: 'UTF-8', encode_replace: '?', encode_cleaner: :printable) do |encoded|
62
+ encoded << bad_data
63
+ end
64
+ assert_equal 'UTF-8', io.string.encoding.to_s
65
+ assert_equal stripped_data, io.string
66
+ end
67
+ end
68
+
69
+ describe '.write' do
70
+ it 'returns byte count' do
71
+ io_string = StringIO.new(''.b)
72
+ count = 0
73
+ IOStreams::Encode::Writer.open(io_string, encoding: 'ASCII-8BIT') do |io|
74
+ count += io.write(bad_data)
75
+ end
76
+ assert_equal bad_data, io_string.string
77
+ assert_equal bad_data.size, count
78
+ end
79
+ end
80
+
81
+ end
82
+ end
@@ -91,70 +91,5 @@ class IOStreamsTest < Minitest::Test
91
91
  end
92
92
  end
93
93
 
94
- describe '.reader' do
95
- # IOStreams.reader('abc.csv') do |io|
96
- # p data while (data = io.read(128))
97
- # end
98
- end
99
-
100
- describe '.each_line' do
101
- # IOStreams.each_line('abc.csv') do |line|
102
- # puts line
103
- # end
104
- end
105
-
106
- describe '.each_row' do
107
- # IOStreams.each_row('abc.csv') do |array|
108
- # p array
109
- # end
110
- end
111
-
112
- describe '.each_record' do
113
- # IOStreams.each_record('abc.csv') do |hash|
114
- # p hash
115
- # end
116
-
117
- # array = [
118
- # 'name, address, zip_code',
119
- # 'Jack, Down Under, 12345'
120
- # ]
121
- # IOStreams.each_record(array) do |hash|
122
- # p hash
123
- # end
124
- end
125
-
126
- describe '.writer' do
127
- # IOStreams.writer('abc.csv') do |io|
128
- # io.write('This')
129
- # io.write(' is ')
130
- # io.write(" one line\n")
131
- # end
132
- end
133
-
134
- describe '.line_writer' do
135
- # IOStreams.line_writer('abc.csv') do |file|
136
- # file << 'these'
137
- # file << 'are'
138
- # file << 'all'
139
- # file << 'separate'
140
- # file << 'lines'
141
- # end
142
- end
143
-
144
- describe '.row_writer' do
145
- # IOStreams.row_writer('abc.csv') do |io|
146
- # io << %w[name address zip_code]
147
- # io << %w[Jack There 1234]
148
- # io << ['Joe', 'Over There somewhere', 1234]
149
- # end
150
- end
151
-
152
- describe '.record_writer' do
153
- # IOStreams.record_writer('abc.csv') do |stream|
154
- # stream << {name: 'Jack', address: 'There', zip_code: 1234}
155
- # stream << {name: 'Joe', address: 'Over There somewhere', zip_code: 1234}
156
- # end
157
- end
158
-
159
94
  end
160
95
  end
@@ -16,40 +16,6 @@ class LineReaderTest < Minitest::Test
16
16
  data
17
17
  end
18
18
 
19
- describe '#initialize' do
20
- it 'does not strip invalid characters' do
21
- bad_lines = [
22
- "New M\xE9xico,NE",
23
- 'good line',
24
- "New M\xE9xico,SF"
25
- ]
26
- input = StringIO.new(bad_lines.join("\n"))
27
- lines = []
28
- IOStreams::Line::Reader.open(input) do |io|
29
- assert_equal false, io.strip_non_printable
30
- assert_raises ArgumentError do
31
- io.each { |line| lines << line }
32
- end
33
- end
34
- end
35
-
36
- it 'strips invalid characters' do
37
- bad_lines = [
38
- "New M\xE9xico,NE",
39
- 'good line',
40
- "New M\xE9xico,SF"
41
- ]
42
- fixed_lines = bad_lines.collect { |line| line.force_encoding('BINARY').gsub(/[^[:print:]|\r|\n]/, '') }
43
- input = StringIO.new(bad_lines.join("\n"))
44
- lines = []
45
- IOStreams::Line::Reader.open(input, strip_non_printable: true) do |io|
46
- assert_equal true, io.strip_non_printable
47
- io.each { |line| lines << line }
48
- end
49
- assert_equal fixed_lines, lines
50
- end
51
- end
52
-
53
19
  describe '#each' do
54
20
  it 'each_line file' do
55
21
  lines = []
@@ -69,7 +35,7 @@ class LineReaderTest < Minitest::Test
69
35
  assert_equal data, lines
70
36
  end
71
37
 
72
- ["\r\n", "\n\r", "\n", "\r"].each do |delimiter|
38
+ ["\r\n", "\n", "\r"].each do |delimiter|
73
39
  it "autodetect delimiter: #{delimiter.inspect}" do
74
40
  lines = []
75
41
  stream = StringIO.new(data.join(delimiter))
@@ -78,6 +44,15 @@ class LineReaderTest < Minitest::Test
78
44
  end
79
45
  assert_equal data, lines
80
46
  end
47
+
48
+ it "single read autodetect delimiter: #{delimiter.inspect}" do
49
+ lines = []
50
+ stream = StringIO.new(data.join(delimiter))
51
+ IOStreams::Line::Reader.open(stream) do |io|
52
+ io.each { |line| lines << line }
53
+ end
54
+ assert_equal data, lines
55
+ end
81
56
  end
82
57
 
83
58
  ['@', 'BLAH'].each do |delimiter|
@@ -94,12 +69,180 @@ class LineReaderTest < Minitest::Test
94
69
  it 'reads binary delimited' do
95
70
  delimiter = "\x01"
96
71
  lines = []
97
- stream = StringIO.new(data.join(delimiter))
98
- IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter, encoding: IOStreams::BINARY_ENCODING) do |io|
72
+ stream = StringIO.new(data.join(delimiter).encode('ASCII-8BIT'))
73
+ IOStreams::Line::Reader.open(stream, buffer_size: 15, delimiter: delimiter) do |io|
99
74
  io.each { |line| lines << line }
100
75
  end
101
76
  assert_equal data, lines
102
77
  end
78
+
79
+ describe '#readline' do
80
+ let(:short_line) { '0123456789' }
81
+ let(:longer_line) { 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' }
82
+ let(:delimiter) { "\r\n" }
83
+
84
+ it 'reads delimiter in first block, no delimiter at end' do
85
+ data = [short_line, longer_line].join(delimiter)
86
+ buffer_size = short_line.length + delimiter.size + (longer_line.size / 2)
87
+
88
+ stream = StringIO.new(data)
89
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size) do |io|
90
+ refute io.eof?
91
+ assert_equal delimiter, io.delimiter, -> { io.delimiter.ai }
92
+
93
+ assert_equal short_line, io.readline
94
+ assert_equal longer_line, io.readline
95
+
96
+ assert io.eof?
97
+ assert_nil io.readline
98
+ end
99
+ end
100
+
101
+ it 'reads delimiter in second block, no delimiter at end' do
102
+ data = [longer_line, short_line, short_line].join(delimiter)
103
+ buffer_size = (longer_line.length + delimiter.size + 5) / 2
104
+
105
+ stream = StringIO.new(data)
106
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size) do |io|
107
+ refute io.eof?
108
+ assert_equal delimiter, io.delimiter, -> { io.delimiter.ai }
109
+ assert_equal longer_line, io.readline
110
+ assert_equal short_line, io.readline
111
+ assert_equal short_line, io.readline
112
+ assert io.eof?
113
+ assert_nil io.readline
114
+ end
115
+ end
116
+
117
+ it 'reads delimiter split across first and second blocks' do
118
+ data = [longer_line, short_line, short_line].join(delimiter)
119
+ buffer_size = longer_line.length + 1
120
+
121
+ stream = StringIO.new(data)
122
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size) do |io|
123
+ refute io.eof?
124
+ assert_equal delimiter, io.delimiter, -> { io.delimiter.ai }
125
+ assert_equal longer_line, io.readline
126
+ assert_equal short_line, io.readline
127
+ assert_equal short_line, io.readline
128
+ assert io.eof?
129
+ assert_nil io.readline
130
+ end
131
+ end
132
+
133
+ it 'reads file with no matching delimiter' do
134
+ delimiter = '@'
135
+ data = [longer_line, short_line, longer_line].join(delimiter) + delimiter
136
+ buffer_size = longer_line.length + 1
137
+
138
+ stream = StringIO.new(data)
139
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size) do |io|
140
+ refute io.eof?
141
+ assert_equal "\n", io.delimiter, -> { io.delimiter.ai }
142
+ assert_equal data, io.readline
143
+ assert io.eof?
144
+ assert_nil io.readline
145
+ end
146
+ end
147
+
148
+ it 'reads small file with no matching delimiter' do
149
+ data = short_line
150
+ buffer_size = short_line.length + 100
151
+
152
+ stream = StringIO.new(data)
153
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size) do |io|
154
+ refute io.eof?
155
+ assert_equal "\n", io.delimiter, -> { io.delimiter.ai }
156
+ assert_equal short_line, io.readline
157
+ assert io.eof?
158
+ assert_nil io.readline
159
+ end
160
+ end
161
+
162
+ it 'reads last line with the delimiter as the last character' do
163
+ delimiter = '@'
164
+ data = [longer_line, short_line, longer_line].join(delimiter) + delimiter
165
+ buffer_size = longer_line.length + 1
166
+
167
+ stream = StringIO.new(data)
168
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size, delimiter: delimiter) do |io|
169
+ refute io.eof?
170
+ assert_equal delimiter, io.delimiter, -> { io.delimiter.ai }
171
+ assert_equal longer_line, io.readline
172
+ assert_equal short_line, io.readline
173
+ assert_equal longer_line, io.readline
174
+ assert_nil io.readline
175
+ assert io.eof?
176
+ end
177
+ end
178
+
179
+ it 'reads last line with the multi-byte delimiter as the last bytes' do
180
+ data = [longer_line, short_line, longer_line].join(delimiter) + delimiter
181
+ buffer_size = longer_line.length + 1
182
+
183
+ stream = StringIO.new(data)
184
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size) do |io|
185
+ refute io.eof?
186
+ assert_equal delimiter, io.delimiter, -> { io.delimiter.ai }
187
+ assert_equal longer_line, io.readline
188
+ assert_equal short_line, io.readline
189
+ assert_equal longer_line, io.readline
190
+ assert_nil io.readline
191
+ assert io.eof?
192
+ end
193
+ end
194
+
195
+ describe 'read 1 char at a time' do
196
+ let(:buffer_size) { 1 }
197
+
198
+ it 'delimiter at the end' do
199
+ data = [longer_line, short_line, longer_line].join(delimiter) + delimiter
200
+
201
+ stream = StringIO.new(data)
202
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size) do |io|
203
+ refute io.eof?
204
+ assert_equal delimiter, io.delimiter, -> { io.delimiter.ai }
205
+ assert_equal longer_line, io.readline
206
+ assert_equal short_line, io.readline
207
+ assert_equal longer_line, io.readline
208
+ assert_nil io.readline
209
+ assert io.eof?
210
+ end
211
+ end
212
+
213
+ it 'no delimiter at the end' do
214
+ data = [longer_line, short_line, longer_line].join(delimiter)
215
+
216
+ stream = StringIO.new(data)
217
+ IOStreams::Line::Reader.open(stream, buffer_size: buffer_size) do |io|
218
+ refute io.eof?
219
+ assert_equal delimiter, io.delimiter, -> { io.delimiter.ai }
220
+ assert_equal longer_line, io.readline
221
+ assert_equal short_line, io.readline
222
+ assert_equal longer_line, io.readline
223
+ assert_nil io.readline
224
+ assert io.eof?
225
+ end
226
+ end
227
+ end
228
+
229
+ it 'reads empty file' do
230
+ stream = StringIO.new
231
+ IOStreams::Line::Reader.open(stream) do |io|
232
+ assert io.eof?
233
+ end
234
+ end
235
+
236
+ it 'prevents denial of service' do
237
+ data = 'a' * IOStreams::Line::Reader::MAX_BLOCKS_MULTIPLIER + 'a'
238
+ stream = StringIO.new(data)
239
+ assert_raises IOStreams::Errors::DelimiterNotFound do
240
+ IOStreams::Line::Reader.open(stream, buffer_size: 1) do |io|
241
+ end
242
+ end
243
+
244
+ end
245
+ end
103
246
  end
104
247
  end
105
248
  end