iostreams 1.3.3 → 1.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -1
- data/lib/io_streams/builder.rb +19 -5
- data/lib/io_streams/bzip2/reader.rb +3 -3
- data/lib/io_streams/bzip2/writer.rb +3 -3
- data/lib/io_streams/encode/reader.rb +0 -2
- data/lib/io_streams/errors.rb +12 -0
- data/lib/io_streams/io_streams.rb +0 -4
- data/lib/io_streams/line/reader.rb +28 -16
- data/lib/io_streams/path.rb +3 -1
- data/lib/io_streams/paths/s3.rb +5 -2
- data/lib/io_streams/paths/sftp.rb +5 -2
- data/lib/io_streams/pgp.rb +7 -13
- data/lib/io_streams/stream.rb +60 -5
- data/lib/io_streams/tabular.rb +23 -25
- data/lib/io_streams/tabular/parser/csv.rb +4 -2
- data/lib/io_streams/tabular/parser/fixed.rb +60 -30
- data/lib/io_streams/tabular/utility/csv_row.rb +1 -4
- data/lib/io_streams/version.rb +1 -1
- data/test/builder_test.rb +29 -0
- data/test/bzip2_writer_test.rb +6 -4
- data/test/deprecated_test.rb +2 -0
- data/test/files/test.psv +4 -0
- data/test/files/unclosed_quote_large_test.csv +1658 -0
- data/test/files/unclosed_quote_test2.csv +3 -0
- data/test/line_reader_test.rb +30 -4
- data/test/stream_test.rb +174 -8
- data/test/tabular_test.rb +71 -40
- metadata +28 -23
data/test/line_reader_test.rb
CHANGED
@@ -14,6 +14,14 @@ class LineReaderTest < Minitest::Test
|
|
14
14
|
File.join(File.dirname(__FILE__), "files", "unclosed_quote_test.csv")
|
15
15
|
end
|
16
16
|
|
17
|
+
let :unclosed_quote_file2 do
|
18
|
+
File.join(File.dirname(__FILE__), "files", "unclosed_quote_test2.csv")
|
19
|
+
end
|
20
|
+
|
21
|
+
let :unclosed_quote_large_file do
|
22
|
+
File.join(File.dirname(__FILE__), "files", "unclosed_quote_large_test.csv")
|
23
|
+
end
|
24
|
+
|
17
25
|
let :data do
|
18
26
|
data = []
|
19
27
|
File.open(file_name, "rt") do |file|
|
@@ -51,13 +59,31 @@ class LineReaderTest < Minitest::Test
|
|
51
59
|
assert_equal 4, lines.count
|
52
60
|
end
|
53
61
|
|
54
|
-
it "raises error for
|
55
|
-
assert_raises(
|
62
|
+
it "raises error for unbalanced quotes" do
|
63
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
56
64
|
IOStreams::Line::Reader.file(unclosed_quote_file, embedded_within: '"') do |io|
|
57
|
-
io.each
|
58
|
-
|
65
|
+
io.each { |line| }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
69
|
+
end
|
70
|
+
|
71
|
+
it "raises error for unclosed quote" do
|
72
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
73
|
+
IOStreams::Line::Reader.file(unclosed_quote_file2, embedded_within: '"') do |io|
|
74
|
+
io.each { |line| }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "raises error for unclosed quote before eof" do
|
81
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
82
|
+
IOStreams::Line::Reader.file(unclosed_quote_large_file, embedded_within: '"', buffer_size: 20) do |io|
|
83
|
+
io.each { |line| }
|
59
84
|
end
|
60
85
|
end
|
86
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
61
87
|
end
|
62
88
|
end
|
63
89
|
end
|
data/test/stream_test.rb
CHANGED
@@ -45,9 +45,9 @@ class StreamTest < Minitest::Test
|
|
45
45
|
it "reads a zip file" do
|
46
46
|
File.open(multiple_zip_file_name, "rb") do |io|
|
47
47
|
result = IOStreams::Stream.new(io).
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
file_name(multiple_zip_file_name).
|
49
|
+
option(:zip, entry_file_name: "test.json").
|
50
|
+
read
|
51
51
|
assert_equal contents_test_json, result
|
52
52
|
end
|
53
53
|
end
|
@@ -55,8 +55,8 @@ class StreamTest < Minitest::Test
|
|
55
55
|
it "reads a zip file from within a gz file" do
|
56
56
|
File.open(zip_gz_file_name, "rb") do |io|
|
57
57
|
result = IOStreams::Stream.new(io).
|
58
|
-
|
59
|
-
|
58
|
+
file_name(zip_gz_file_name).
|
59
|
+
read
|
60
60
|
assert_equal contents_test_txt, result
|
61
61
|
end
|
62
62
|
end
|
@@ -71,7 +71,7 @@ class StreamTest < Minitest::Test
|
|
71
71
|
describe ".record_reader" do
|
72
72
|
end
|
73
73
|
|
74
|
-
describe "
|
74
|
+
describe "#each(:line)" do
|
75
75
|
it "returns a line at a time" do
|
76
76
|
lines = []
|
77
77
|
stream.stream(:none)
|
@@ -91,10 +91,114 @@ class StreamTest < Minitest::Test
|
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
|
-
describe "
|
94
|
+
describe "#each(:array)" do
|
95
|
+
describe "csv" do
|
96
|
+
let :source_file_name do
|
97
|
+
File.join(__dir__, "files", "test.csv")
|
98
|
+
end
|
99
|
+
|
100
|
+
let :expected_rows do
|
101
|
+
rows = []
|
102
|
+
CSV.open(source_file_name).each { |row| rows << row }
|
103
|
+
rows
|
104
|
+
end
|
105
|
+
|
106
|
+
it "detects format from file_name" do
|
107
|
+
output = []
|
108
|
+
stream.file_name = source_file_name
|
109
|
+
stream.each(:array) { |record| output << record }
|
110
|
+
assert_equal expected_rows, output
|
111
|
+
end
|
112
|
+
|
113
|
+
it "honors format" do
|
114
|
+
output = []
|
115
|
+
stream.file_name = "blah"
|
116
|
+
stream.format = :csv
|
117
|
+
stream.each(:array) { |record| output << record }
|
118
|
+
assert_equal expected_rows, output
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
describe "psv" do
|
123
|
+
let :source_file_name do
|
124
|
+
File.join(__dir__, "files", "test.psv")
|
125
|
+
end
|
126
|
+
|
127
|
+
let :expected_rows do
|
128
|
+
File.readlines(source_file_name).collect { |line| line.chomp.split("|") }
|
129
|
+
end
|
130
|
+
|
131
|
+
it "detects format from file_name" do
|
132
|
+
output = []
|
133
|
+
stream.file_name = source_file_name
|
134
|
+
stream.each(:array) { |record| output << record }
|
135
|
+
assert_equal expected_rows, output
|
136
|
+
end
|
137
|
+
|
138
|
+
it "honors format" do
|
139
|
+
output = []
|
140
|
+
stream.file_name = "blah"
|
141
|
+
stream.format = :psv
|
142
|
+
stream.each(:array) { |record| output << record }
|
143
|
+
assert_equal expected_rows, output
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "json" do
|
148
|
+
let :source_file_name do
|
149
|
+
File.join(__dir__, "files", "test.json")
|
150
|
+
end
|
151
|
+
|
152
|
+
let :expected_rows do
|
153
|
+
hash_rows = File.readlines(source_file_name).collect { |line| JSON.load(line) }
|
154
|
+
rows = []
|
155
|
+
rows << hash_rows.first.keys
|
156
|
+
hash_rows.each { |hash| rows << hash.values }
|
157
|
+
rows
|
158
|
+
end
|
159
|
+
|
160
|
+
it "detects format from file_name" do
|
161
|
+
skip "TODO: Support reading json files as arrays"
|
162
|
+
output = []
|
163
|
+
stream.file_name = source_file_name
|
164
|
+
stream.each(:array) { |record| output << record }
|
165
|
+
assert_equal expected_rows, output
|
166
|
+
end
|
167
|
+
|
168
|
+
it "honors format" do
|
169
|
+
skip "TODO: Support reading json files as arrays"
|
170
|
+
output = []
|
171
|
+
stream.file_name = "blah"
|
172
|
+
stream.format = :json
|
173
|
+
stream.each(:array) { |record| output << record }
|
174
|
+
assert_equal expected_rows, output
|
175
|
+
end
|
176
|
+
end
|
95
177
|
end
|
96
178
|
|
97
|
-
describe ".each
|
179
|
+
describe ".each hash" do
|
180
|
+
let :source_file_name do
|
181
|
+
File.join(__dir__, "files", "test.json")
|
182
|
+
end
|
183
|
+
|
184
|
+
let :expected_json do
|
185
|
+
File.readlines(source_file_name).collect { |line| JSON.load(line) }
|
186
|
+
end
|
187
|
+
|
188
|
+
it "detects format from file_name" do
|
189
|
+
output = []
|
190
|
+
stream.file_name = source_file_name
|
191
|
+
stream.each(:hash) { |record| output << record }
|
192
|
+
assert_equal expected_json, output
|
193
|
+
end
|
194
|
+
|
195
|
+
it "honors format" do
|
196
|
+
output = []
|
197
|
+
stream.file_name = "blah"
|
198
|
+
stream.format = :json
|
199
|
+
stream.each(:hash) { |record| output << record }
|
200
|
+
assert_equal expected_json, output
|
201
|
+
end
|
98
202
|
end
|
99
203
|
|
100
204
|
describe "#writer" do
|
@@ -359,6 +463,24 @@ class StreamTest < Minitest::Test
|
|
359
463
|
end
|
360
464
|
assert_equal "\nHe\n\nl\n\nlo \nWorld\n\n", io.string, io.string.inspect
|
361
465
|
end
|
466
|
+
|
467
|
+
it "honors format" do
|
468
|
+
io = StringIO.new
|
469
|
+
IOStreams::Stream.new(io).format(:psv).writer(:array) do |stream|
|
470
|
+
stream << %w[first_name last_name]
|
471
|
+
stream << %w[Jack Johnson]
|
472
|
+
end
|
473
|
+
assert_equal "first_name|last_name\nJack|Johnson\n", io.string, io.string.inspect
|
474
|
+
end
|
475
|
+
|
476
|
+
it "auto detects format" do
|
477
|
+
io = StringIO.new
|
478
|
+
IOStreams::Stream.new(io).file_name("abc.psv").writer(:array) do |stream|
|
479
|
+
stream << %w[first_name last_name]
|
480
|
+
stream << %w[Jack Johnson]
|
481
|
+
end
|
482
|
+
assert_equal "first_name|last_name\nJack|Johnson\n", io.string, io.string.inspect
|
483
|
+
end
|
362
484
|
end
|
363
485
|
end
|
364
486
|
|
@@ -402,6 +524,50 @@ class StreamTest < Minitest::Test
|
|
402
524
|
end
|
403
525
|
assert_equal "first_name,last_name\nJack,Johnson\n\n{:first_name=>\"Able\", :last_name=>\"Smith\"}\n\n", io.string, io.string.inspect
|
404
526
|
end
|
527
|
+
|
528
|
+
it "honors format" do
|
529
|
+
io = StringIO.new
|
530
|
+
IOStreams::Stream.new(io).format(:json).writer(:hash) do |stream|
|
531
|
+
stream << {first_name: "Jack", last_name: "Johnson"}
|
532
|
+
end
|
533
|
+
assert_equal "{\"first_name\":\"Jack\",\"last_name\":\"Johnson\"}\n", io.string, io.string.inspect
|
534
|
+
end
|
535
|
+
|
536
|
+
it "auto detects format" do
|
537
|
+
io = StringIO.new
|
538
|
+
IOStreams::Stream.new(io).file_name("abc.json").writer(:hash) do |stream|
|
539
|
+
stream << {first_name: "Jack", last_name: "Johnson"}
|
540
|
+
end
|
541
|
+
assert_equal "{\"first_name\":\"Jack\",\"last_name\":\"Johnson\"}\n", io.string, io.string.inspect
|
542
|
+
end
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
describe "#format" do
|
547
|
+
it "detects the format from the file name" do
|
548
|
+
stream.file_name = "abc.json"
|
549
|
+
assert_equal :json, stream.format
|
550
|
+
end
|
551
|
+
|
552
|
+
it "is nil if the file name has no meaningful format" do
|
553
|
+
assert_nil stream.format
|
554
|
+
end
|
555
|
+
|
556
|
+
it "returns set format with no file_name" do
|
557
|
+
stream.format = :csv
|
558
|
+
assert_equal :csv, stream.format
|
559
|
+
end
|
560
|
+
|
561
|
+
it "returns set format with file_name" do
|
562
|
+
stream.file_name = "abc.json"
|
563
|
+
stream.format = :csv
|
564
|
+
assert_equal :csv, stream.format
|
565
|
+
end
|
566
|
+
|
567
|
+
it "validates bad format" do
|
568
|
+
assert_raises ArgumentError do
|
569
|
+
stream.format = :blah
|
570
|
+
end
|
405
571
|
end
|
406
572
|
end
|
407
573
|
end
|
data/test/tabular_test.rb
CHANGED
@@ -10,6 +10,36 @@ class TabularTest < Minitest::Test
|
|
10
10
|
IOStreams::Tabular.new(columns: %w[first_field second third], format: format)
|
11
11
|
end
|
12
12
|
|
13
|
+
let :fixed do
|
14
|
+
layout = [
|
15
|
+
{size: 23, key: :name},
|
16
|
+
{size: 40, key: :address},
|
17
|
+
{size: 2},
|
18
|
+
{size: 5, key: :zip, type: :integer},
|
19
|
+
{size: 8, key: :age, type: :integer},
|
20
|
+
{size: 10, key: :weight, type: :float, decimals: 2}
|
21
|
+
]
|
22
|
+
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
23
|
+
end
|
24
|
+
|
25
|
+
let :fixed_with_remainder do
|
26
|
+
layout = [
|
27
|
+
{size: 23, key: :name},
|
28
|
+
{size: 40, key: :address},
|
29
|
+
{size: :remainder, key: :remainder}
|
30
|
+
]
|
31
|
+
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
32
|
+
end
|
33
|
+
|
34
|
+
let :fixed_discard_remainder do
|
35
|
+
layout = [
|
36
|
+
{size: 23, key: :name},
|
37
|
+
{size: 40, key: :address},
|
38
|
+
{size: :remainder}
|
39
|
+
]
|
40
|
+
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
41
|
+
end
|
42
|
+
|
13
43
|
describe "#parse_header" do
|
14
44
|
it "parses and sets the csv header" do
|
15
45
|
tabular = IOStreams::Tabular.new(format: :csv)
|
@@ -136,57 +166,55 @@ class TabularTest < Minitest::Test
|
|
136
166
|
end
|
137
167
|
|
138
168
|
describe ":fixed format" do
|
139
|
-
let :tabular do
|
140
|
-
layout = [
|
141
|
-
{size: 23, key: :name},
|
142
|
-
{size: 40, key: :address},
|
143
|
-
{size: 2},
|
144
|
-
{size: 5, key: :zip, type: :integer},
|
145
|
-
{size: 8, key: :age, type: :integer},
|
146
|
-
{size: 10, key: :weight, type: :float, decimals: 2}
|
147
|
-
]
|
148
|
-
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
149
|
-
end
|
150
|
-
|
151
169
|
it "parses to hash" do
|
152
|
-
assert hash =
|
170
|
+
assert hash = fixed.record_parse("Jack over there XX34618012345670012345.01")
|
153
171
|
assert_equal({name: "Jack", address: "over there", zip: 34_618, age: 1_234_567, weight: 12_345.01}, hash)
|
154
172
|
end
|
155
173
|
|
156
174
|
it "parses short string" do
|
157
175
|
assert_raises IOStreams::Errors::InvalidLineLength do
|
158
|
-
|
176
|
+
fixed.record_parse("Jack over th")
|
159
177
|
end
|
160
178
|
end
|
161
179
|
|
162
180
|
it "parses longer string" do
|
163
181
|
assert_raises IOStreams::Errors::InvalidLineLength do
|
164
|
-
|
182
|
+
fixed.record_parse("Jack over there XX34618012345670012345.01............")
|
165
183
|
end
|
166
184
|
end
|
167
185
|
|
168
186
|
it "parses zero values" do
|
169
|
-
assert hash =
|
187
|
+
assert hash = fixed.record_parse(" 00000000000000000000000")
|
170
188
|
assert_equal({name: "", address: "", zip: 0, age: 0, weight: 0.0}, hash)
|
171
189
|
end
|
172
190
|
|
173
191
|
it "parses empty values" do
|
174
|
-
assert hash =
|
192
|
+
assert hash = fixed.record_parse(" XX ")
|
175
193
|
assert_equal({name: "", address: "", zip: nil, age: nil, weight: nil}, hash)
|
176
194
|
end
|
177
195
|
|
178
196
|
it "parses blank strings" do
|
179
|
-
skip "TODO: Part of
|
180
|
-
assert hash =
|
197
|
+
skip "TODO: Part of fixed refactor to get this working"
|
198
|
+
assert hash = fixed.record_parse(" ")
|
181
199
|
assert_equal({name: "", address: "", zip: nil, age: nil, weight: nil}, hash)
|
182
200
|
end
|
183
201
|
|
184
202
|
it "parses nil data as nil" do
|
185
|
-
refute
|
203
|
+
refute fixed.record_parse(nil)
|
186
204
|
end
|
187
205
|
|
188
206
|
it "parses empty string as nil" do
|
189
|
-
refute
|
207
|
+
refute fixed.record_parse("")
|
208
|
+
end
|
209
|
+
|
210
|
+
it "parses remainder" do
|
211
|
+
hash = fixed_with_remainder.record_parse("Jack over there XX34618012345670012345.01............")
|
212
|
+
assert_equal({name: "Jack", address: "over there", remainder: "XX34618012345670012345.01............"}, hash)
|
213
|
+
end
|
214
|
+
|
215
|
+
it "discards remainder" do
|
216
|
+
hash = fixed_discard_remainder.record_parse("Jack over there XX34618012345670012345.01............")
|
217
|
+
assert_equal({name: "Jack", address: "over there"}, hash)
|
190
218
|
end
|
191
219
|
end
|
192
220
|
|
@@ -236,52 +264,55 @@ class TabularTest < Minitest::Test
|
|
236
264
|
end
|
237
265
|
|
238
266
|
describe ":fixed format" do
|
239
|
-
let :tabular do
|
240
|
-
layout = [
|
241
|
-
{size: 23, key: :name},
|
242
|
-
{size: 40, key: :address},
|
243
|
-
{size: 2},
|
244
|
-
{size: 5, key: :zip, type: :integer},
|
245
|
-
{size: 8, key: :age, type: :integer},
|
246
|
-
{size: 10, key: :weight, type: :float, decimals: 2}
|
247
|
-
]
|
248
|
-
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
249
|
-
end
|
250
|
-
|
251
267
|
it "renders fixed data" do
|
252
|
-
assert string =
|
268
|
+
assert string = fixed.render(name: "Jack", address: "over there", zip: 34_618, weight: 123_456.789123, age: 21)
|
253
269
|
assert_equal "Jack over there 34618000000210123456.79", string
|
254
270
|
end
|
255
271
|
|
256
272
|
it "truncates long strings" do
|
257
|
-
assert string =
|
273
|
+
assert string = fixed.render(name: "Jack ran up the beanstalk and when jack reached the top it was truncated", address: "over there", zip: 34_618)
|
258
274
|
assert_equal "Jack ran up the beanstaover there 34618000000000000000.00", string
|
259
275
|
end
|
260
276
|
|
261
277
|
it "when integer is too large" do
|
262
278
|
assert_raises IOStreams::Errors::ValueTooLong do
|
263
|
-
|
279
|
+
fixed.render(zip: 3_461_832_653_653_265)
|
264
280
|
end
|
265
281
|
end
|
266
282
|
|
267
283
|
it "when float is too large" do
|
268
284
|
assert_raises IOStreams::Errors::ValueTooLong do
|
269
|
-
|
285
|
+
fixed.render(weight: 3_461_832_653_653_265.234)
|
270
286
|
end
|
271
287
|
end
|
272
288
|
|
273
289
|
it "renders nil as empty string" do
|
274
|
-
assert string =
|
290
|
+
assert string = fixed.render(zip: 34_618)
|
275
291
|
assert_equal " 34618000000000000000.00", string
|
276
292
|
end
|
277
293
|
|
278
294
|
it "renders boolean" do
|
279
|
-
assert string =
|
295
|
+
assert string = fixed.render(name: true, address: false)
|
280
296
|
assert_equal "true false 00000000000000000000.00", string
|
281
297
|
end
|
282
298
|
|
283
299
|
it "renders no data as nil" do
|
284
|
-
refute
|
300
|
+
refute fixed.render({})
|
301
|
+
end
|
302
|
+
|
303
|
+
it "any size last string" do
|
304
|
+
assert string = fixed_with_remainder.render(name: "Jack", address: "over there", remainder: "XX34618012345670012345.01............")
|
305
|
+
assert_equal "Jack over there XX34618012345670012345.01............", string
|
306
|
+
end
|
307
|
+
|
308
|
+
it "nil last string" do
|
309
|
+
assert string = fixed_with_remainder.render(name: "Jack", address: "over there", remainder: nil)
|
310
|
+
assert_equal "Jack over there ", string
|
311
|
+
end
|
312
|
+
|
313
|
+
it "skips last filler" do
|
314
|
+
assert string = fixed_discard_remainder.render(name: "Jack", address: "over there")
|
315
|
+
assert_equal "Jack over there ", string
|
285
316
|
end
|
286
317
|
end
|
287
318
|
end
|