iostreams 1.4.0 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -1
- data/lib/io_streams/builder.rb +19 -5
- data/lib/io_streams/bzip2/reader.rb +1 -3
- data/lib/io_streams/bzip2/writer.rb +1 -3
- data/lib/io_streams/encode/reader.rb +0 -2
- data/lib/io_streams/errors.rb +12 -0
- data/lib/io_streams/io_streams.rb +0 -4
- data/lib/io_streams/line/reader.rb +28 -16
- data/lib/io_streams/path.rb +3 -1
- data/lib/io_streams/paths/s3.rb +5 -2
- data/lib/io_streams/paths/sftp.rb +18 -9
- data/lib/io_streams/pgp.rb +7 -13
- data/lib/io_streams/stream.rb +60 -5
- data/lib/io_streams/tabular.rb +23 -25
- data/lib/io_streams/tabular/parser/csv.rb +4 -2
- data/lib/io_streams/tabular/parser/fixed.rb +60 -30
- data/lib/io_streams/tabular/utility/csv_row.rb +1 -4
- data/lib/io_streams/version.rb +1 -1
- data/test/builder_test.rb +29 -0
- data/test/deprecated_test.rb +2 -0
- data/test/files/test.psv +4 -0
- data/test/files/unclosed_quote_large_test.csv +1658 -0
- data/test/files/unclosed_quote_test2.csv +3 -0
- data/test/line_reader_test.rb +30 -4
- data/test/stream_test.rb +174 -8
- data/test/tabular_test.rb +71 -40
- metadata +47 -42
data/test/line_reader_test.rb
CHANGED
@@ -14,6 +14,14 @@ class LineReaderTest < Minitest::Test
|
|
14
14
|
File.join(File.dirname(__FILE__), "files", "unclosed_quote_test.csv")
|
15
15
|
end
|
16
16
|
|
17
|
+
let :unclosed_quote_file2 do
|
18
|
+
File.join(File.dirname(__FILE__), "files", "unclosed_quote_test2.csv")
|
19
|
+
end
|
20
|
+
|
21
|
+
let :unclosed_quote_large_file do
|
22
|
+
File.join(File.dirname(__FILE__), "files", "unclosed_quote_large_test.csv")
|
23
|
+
end
|
24
|
+
|
17
25
|
let :data do
|
18
26
|
data = []
|
19
27
|
File.open(file_name, "rt") do |file|
|
@@ -51,13 +59,31 @@ class LineReaderTest < Minitest::Test
|
|
51
59
|
assert_equal 4, lines.count
|
52
60
|
end
|
53
61
|
|
54
|
-
it "raises error for
|
55
|
-
assert_raises(
|
62
|
+
it "raises error for unbalanced quotes" do
|
63
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
56
64
|
IOStreams::Line::Reader.file(unclosed_quote_file, embedded_within: '"') do |io|
|
57
|
-
io.each
|
58
|
-
|
65
|
+
io.each { |line| }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
69
|
+
end
|
70
|
+
|
71
|
+
it "raises error for unclosed quote" do
|
72
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
73
|
+
IOStreams::Line::Reader.file(unclosed_quote_file2, embedded_within: '"') do |io|
|
74
|
+
io.each { |line| }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "raises error for unclosed quote before eof" do
|
81
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
82
|
+
IOStreams::Line::Reader.file(unclosed_quote_large_file, embedded_within: '"', buffer_size: 20) do |io|
|
83
|
+
io.each { |line| }
|
59
84
|
end
|
60
85
|
end
|
86
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
61
87
|
end
|
62
88
|
end
|
63
89
|
end
|
data/test/stream_test.rb
CHANGED
@@ -45,9 +45,9 @@ class StreamTest < Minitest::Test
|
|
45
45
|
it "reads a zip file" do
|
46
46
|
File.open(multiple_zip_file_name, "rb") do |io|
|
47
47
|
result = IOStreams::Stream.new(io).
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
file_name(multiple_zip_file_name).
|
49
|
+
option(:zip, entry_file_name: "test.json").
|
50
|
+
read
|
51
51
|
assert_equal contents_test_json, result
|
52
52
|
end
|
53
53
|
end
|
@@ -55,8 +55,8 @@ class StreamTest < Minitest::Test
|
|
55
55
|
it "reads a zip file from within a gz file" do
|
56
56
|
File.open(zip_gz_file_name, "rb") do |io|
|
57
57
|
result = IOStreams::Stream.new(io).
|
58
|
-
|
59
|
-
|
58
|
+
file_name(zip_gz_file_name).
|
59
|
+
read
|
60
60
|
assert_equal contents_test_txt, result
|
61
61
|
end
|
62
62
|
end
|
@@ -71,7 +71,7 @@ class StreamTest < Minitest::Test
|
|
71
71
|
describe ".record_reader" do
|
72
72
|
end
|
73
73
|
|
74
|
-
describe "
|
74
|
+
describe "#each(:line)" do
|
75
75
|
it "returns a line at a time" do
|
76
76
|
lines = []
|
77
77
|
stream.stream(:none)
|
@@ -91,10 +91,114 @@ class StreamTest < Minitest::Test
|
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
|
-
describe "
|
94
|
+
describe "#each(:array)" do
|
95
|
+
describe "csv" do
|
96
|
+
let :source_file_name do
|
97
|
+
File.join(__dir__, "files", "test.csv")
|
98
|
+
end
|
99
|
+
|
100
|
+
let :expected_rows do
|
101
|
+
rows = []
|
102
|
+
CSV.open(source_file_name).each { |row| rows << row }
|
103
|
+
rows
|
104
|
+
end
|
105
|
+
|
106
|
+
it "detects format from file_name" do
|
107
|
+
output = []
|
108
|
+
stream.file_name = source_file_name
|
109
|
+
stream.each(:array) { |record| output << record }
|
110
|
+
assert_equal expected_rows, output
|
111
|
+
end
|
112
|
+
|
113
|
+
it "honors format" do
|
114
|
+
output = []
|
115
|
+
stream.file_name = "blah"
|
116
|
+
stream.format = :csv
|
117
|
+
stream.each(:array) { |record| output << record }
|
118
|
+
assert_equal expected_rows, output
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
describe "psv" do
|
123
|
+
let :source_file_name do
|
124
|
+
File.join(__dir__, "files", "test.psv")
|
125
|
+
end
|
126
|
+
|
127
|
+
let :expected_rows do
|
128
|
+
File.readlines(source_file_name).collect { |line| line.chomp.split("|") }
|
129
|
+
end
|
130
|
+
|
131
|
+
it "detects format from file_name" do
|
132
|
+
output = []
|
133
|
+
stream.file_name = source_file_name
|
134
|
+
stream.each(:array) { |record| output << record }
|
135
|
+
assert_equal expected_rows, output
|
136
|
+
end
|
137
|
+
|
138
|
+
it "honors format" do
|
139
|
+
output = []
|
140
|
+
stream.file_name = "blah"
|
141
|
+
stream.format = :psv
|
142
|
+
stream.each(:array) { |record| output << record }
|
143
|
+
assert_equal expected_rows, output
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "json" do
|
148
|
+
let :source_file_name do
|
149
|
+
File.join(__dir__, "files", "test.json")
|
150
|
+
end
|
151
|
+
|
152
|
+
let :expected_rows do
|
153
|
+
hash_rows = File.readlines(source_file_name).collect { |line| JSON.load(line) }
|
154
|
+
rows = []
|
155
|
+
rows << hash_rows.first.keys
|
156
|
+
hash_rows.each { |hash| rows << hash.values }
|
157
|
+
rows
|
158
|
+
end
|
159
|
+
|
160
|
+
it "detects format from file_name" do
|
161
|
+
skip "TODO: Support reading json files as arrays"
|
162
|
+
output = []
|
163
|
+
stream.file_name = source_file_name
|
164
|
+
stream.each(:array) { |record| output << record }
|
165
|
+
assert_equal expected_rows, output
|
166
|
+
end
|
167
|
+
|
168
|
+
it "honors format" do
|
169
|
+
skip "TODO: Support reading json files as arrays"
|
170
|
+
output = []
|
171
|
+
stream.file_name = "blah"
|
172
|
+
stream.format = :json
|
173
|
+
stream.each(:array) { |record| output << record }
|
174
|
+
assert_equal expected_rows, output
|
175
|
+
end
|
176
|
+
end
|
95
177
|
end
|
96
178
|
|
97
|
-
describe ".each
|
179
|
+
describe ".each hash" do
|
180
|
+
let :source_file_name do
|
181
|
+
File.join(__dir__, "files", "test.json")
|
182
|
+
end
|
183
|
+
|
184
|
+
let :expected_json do
|
185
|
+
File.readlines(source_file_name).collect { |line| JSON.load(line) }
|
186
|
+
end
|
187
|
+
|
188
|
+
it "detects format from file_name" do
|
189
|
+
output = []
|
190
|
+
stream.file_name = source_file_name
|
191
|
+
stream.each(:hash) { |record| output << record }
|
192
|
+
assert_equal expected_json, output
|
193
|
+
end
|
194
|
+
|
195
|
+
it "honors format" do
|
196
|
+
output = []
|
197
|
+
stream.file_name = "blah"
|
198
|
+
stream.format = :json
|
199
|
+
stream.each(:hash) { |record| output << record }
|
200
|
+
assert_equal expected_json, output
|
201
|
+
end
|
98
202
|
end
|
99
203
|
|
100
204
|
describe "#writer" do
|
@@ -359,6 +463,24 @@ class StreamTest < Minitest::Test
|
|
359
463
|
end
|
360
464
|
assert_equal "\nHe\n\nl\n\nlo \nWorld\n\n", io.string, io.string.inspect
|
361
465
|
end
|
466
|
+
|
467
|
+
it "honors format" do
|
468
|
+
io = StringIO.new
|
469
|
+
IOStreams::Stream.new(io).format(:psv).writer(:array) do |stream|
|
470
|
+
stream << %w[first_name last_name]
|
471
|
+
stream << %w[Jack Johnson]
|
472
|
+
end
|
473
|
+
assert_equal "first_name|last_name\nJack|Johnson\n", io.string, io.string.inspect
|
474
|
+
end
|
475
|
+
|
476
|
+
it "auto detects format" do
|
477
|
+
io = StringIO.new
|
478
|
+
IOStreams::Stream.new(io).file_name("abc.psv").writer(:array) do |stream|
|
479
|
+
stream << %w[first_name last_name]
|
480
|
+
stream << %w[Jack Johnson]
|
481
|
+
end
|
482
|
+
assert_equal "first_name|last_name\nJack|Johnson\n", io.string, io.string.inspect
|
483
|
+
end
|
362
484
|
end
|
363
485
|
end
|
364
486
|
|
@@ -402,6 +524,50 @@ class StreamTest < Minitest::Test
|
|
402
524
|
end
|
403
525
|
assert_equal "first_name,last_name\nJack,Johnson\n\n{:first_name=>\"Able\", :last_name=>\"Smith\"}\n\n", io.string, io.string.inspect
|
404
526
|
end
|
527
|
+
|
528
|
+
it "honors format" do
|
529
|
+
io = StringIO.new
|
530
|
+
IOStreams::Stream.new(io).format(:json).writer(:hash) do |stream|
|
531
|
+
stream << {first_name: "Jack", last_name: "Johnson"}
|
532
|
+
end
|
533
|
+
assert_equal "{\"first_name\":\"Jack\",\"last_name\":\"Johnson\"}\n", io.string, io.string.inspect
|
534
|
+
end
|
535
|
+
|
536
|
+
it "auto detects format" do
|
537
|
+
io = StringIO.new
|
538
|
+
IOStreams::Stream.new(io).file_name("abc.json").writer(:hash) do |stream|
|
539
|
+
stream << {first_name: "Jack", last_name: "Johnson"}
|
540
|
+
end
|
541
|
+
assert_equal "{\"first_name\":\"Jack\",\"last_name\":\"Johnson\"}\n", io.string, io.string.inspect
|
542
|
+
end
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
describe "#format" do
|
547
|
+
it "detects the format from the file name" do
|
548
|
+
stream.file_name = "abc.json"
|
549
|
+
assert_equal :json, stream.format
|
550
|
+
end
|
551
|
+
|
552
|
+
it "is nil if the file name has no meaningful format" do
|
553
|
+
assert_nil stream.format
|
554
|
+
end
|
555
|
+
|
556
|
+
it "returns set format with no file_name" do
|
557
|
+
stream.format = :csv
|
558
|
+
assert_equal :csv, stream.format
|
559
|
+
end
|
560
|
+
|
561
|
+
it "returns set format with file_name" do
|
562
|
+
stream.file_name = "abc.json"
|
563
|
+
stream.format = :csv
|
564
|
+
assert_equal :csv, stream.format
|
565
|
+
end
|
566
|
+
|
567
|
+
it "validates bad format" do
|
568
|
+
assert_raises ArgumentError do
|
569
|
+
stream.format = :blah
|
570
|
+
end
|
405
571
|
end
|
406
572
|
end
|
407
573
|
end
|
data/test/tabular_test.rb
CHANGED
@@ -10,6 +10,36 @@ class TabularTest < Minitest::Test
|
|
10
10
|
IOStreams::Tabular.new(columns: %w[first_field second third], format: format)
|
11
11
|
end
|
12
12
|
|
13
|
+
let :fixed do
|
14
|
+
layout = [
|
15
|
+
{size: 23, key: :name},
|
16
|
+
{size: 40, key: :address},
|
17
|
+
{size: 2},
|
18
|
+
{size: 5, key: :zip, type: :integer},
|
19
|
+
{size: 8, key: :age, type: :integer},
|
20
|
+
{size: 10, key: :weight, type: :float, decimals: 2}
|
21
|
+
]
|
22
|
+
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
23
|
+
end
|
24
|
+
|
25
|
+
let :fixed_with_remainder do
|
26
|
+
layout = [
|
27
|
+
{size: 23, key: :name},
|
28
|
+
{size: 40, key: :address},
|
29
|
+
{size: :remainder, key: :remainder}
|
30
|
+
]
|
31
|
+
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
32
|
+
end
|
33
|
+
|
34
|
+
let :fixed_discard_remainder do
|
35
|
+
layout = [
|
36
|
+
{size: 23, key: :name},
|
37
|
+
{size: 40, key: :address},
|
38
|
+
{size: :remainder}
|
39
|
+
]
|
40
|
+
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
41
|
+
end
|
42
|
+
|
13
43
|
describe "#parse_header" do
|
14
44
|
it "parses and sets the csv header" do
|
15
45
|
tabular = IOStreams::Tabular.new(format: :csv)
|
@@ -136,57 +166,55 @@ class TabularTest < Minitest::Test
|
|
136
166
|
end
|
137
167
|
|
138
168
|
describe ":fixed format" do
|
139
|
-
let :tabular do
|
140
|
-
layout = [
|
141
|
-
{size: 23, key: :name},
|
142
|
-
{size: 40, key: :address},
|
143
|
-
{size: 2},
|
144
|
-
{size: 5, key: :zip, type: :integer},
|
145
|
-
{size: 8, key: :age, type: :integer},
|
146
|
-
{size: 10, key: :weight, type: :float, decimals: 2}
|
147
|
-
]
|
148
|
-
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
149
|
-
end
|
150
|
-
|
151
169
|
it "parses to hash" do
|
152
|
-
assert hash =
|
170
|
+
assert hash = fixed.record_parse("Jack over there XX34618012345670012345.01")
|
153
171
|
assert_equal({name: "Jack", address: "over there", zip: 34_618, age: 1_234_567, weight: 12_345.01}, hash)
|
154
172
|
end
|
155
173
|
|
156
174
|
it "parses short string" do
|
157
175
|
assert_raises IOStreams::Errors::InvalidLineLength do
|
158
|
-
|
176
|
+
fixed.record_parse("Jack over th")
|
159
177
|
end
|
160
178
|
end
|
161
179
|
|
162
180
|
it "parses longer string" do
|
163
181
|
assert_raises IOStreams::Errors::InvalidLineLength do
|
164
|
-
|
182
|
+
fixed.record_parse("Jack over there XX34618012345670012345.01............")
|
165
183
|
end
|
166
184
|
end
|
167
185
|
|
168
186
|
it "parses zero values" do
|
169
|
-
assert hash =
|
187
|
+
assert hash = fixed.record_parse(" 00000000000000000000000")
|
170
188
|
assert_equal({name: "", address: "", zip: 0, age: 0, weight: 0.0}, hash)
|
171
189
|
end
|
172
190
|
|
173
191
|
it "parses empty values" do
|
174
|
-
assert hash =
|
192
|
+
assert hash = fixed.record_parse(" XX ")
|
175
193
|
assert_equal({name: "", address: "", zip: nil, age: nil, weight: nil}, hash)
|
176
194
|
end
|
177
195
|
|
178
196
|
it "parses blank strings" do
|
179
|
-
skip "TODO: Part of
|
180
|
-
assert hash =
|
197
|
+
skip "TODO: Part of fixed refactor to get this working"
|
198
|
+
assert hash = fixed.record_parse(" ")
|
181
199
|
assert_equal({name: "", address: "", zip: nil, age: nil, weight: nil}, hash)
|
182
200
|
end
|
183
201
|
|
184
202
|
it "parses nil data as nil" do
|
185
|
-
refute
|
203
|
+
refute fixed.record_parse(nil)
|
186
204
|
end
|
187
205
|
|
188
206
|
it "parses empty string as nil" do
|
189
|
-
refute
|
207
|
+
refute fixed.record_parse("")
|
208
|
+
end
|
209
|
+
|
210
|
+
it "parses remainder" do
|
211
|
+
hash = fixed_with_remainder.record_parse("Jack over there XX34618012345670012345.01............")
|
212
|
+
assert_equal({name: "Jack", address: "over there", remainder: "XX34618012345670012345.01............"}, hash)
|
213
|
+
end
|
214
|
+
|
215
|
+
it "discards remainder" do
|
216
|
+
hash = fixed_discard_remainder.record_parse("Jack over there XX34618012345670012345.01............")
|
217
|
+
assert_equal({name: "Jack", address: "over there"}, hash)
|
190
218
|
end
|
191
219
|
end
|
192
220
|
|
@@ -236,52 +264,55 @@ class TabularTest < Minitest::Test
|
|
236
264
|
end
|
237
265
|
|
238
266
|
describe ":fixed format" do
|
239
|
-
let :tabular do
|
240
|
-
layout = [
|
241
|
-
{size: 23, key: :name},
|
242
|
-
{size: 40, key: :address},
|
243
|
-
{size: 2},
|
244
|
-
{size: 5, key: :zip, type: :integer},
|
245
|
-
{size: 8, key: :age, type: :integer},
|
246
|
-
{size: 10, key: :weight, type: :float, decimals: 2}
|
247
|
-
]
|
248
|
-
IOStreams::Tabular.new(format: :fixed, format_options: {layout: layout})
|
249
|
-
end
|
250
|
-
|
251
267
|
it "renders fixed data" do
|
252
|
-
assert string =
|
268
|
+
assert string = fixed.render(name: "Jack", address: "over there", zip: 34_618, weight: 123_456.789123, age: 21)
|
253
269
|
assert_equal "Jack over there 34618000000210123456.79", string
|
254
270
|
end
|
255
271
|
|
256
272
|
it "truncates long strings" do
|
257
|
-
assert string =
|
273
|
+
assert string = fixed.render(name: "Jack ran up the beanstalk and when jack reached the top it was truncated", address: "over there", zip: 34_618)
|
258
274
|
assert_equal "Jack ran up the beanstaover there 34618000000000000000.00", string
|
259
275
|
end
|
260
276
|
|
261
277
|
it "when integer is too large" do
|
262
278
|
assert_raises IOStreams::Errors::ValueTooLong do
|
263
|
-
|
279
|
+
fixed.render(zip: 3_461_832_653_653_265)
|
264
280
|
end
|
265
281
|
end
|
266
282
|
|
267
283
|
it "when float is too large" do
|
268
284
|
assert_raises IOStreams::Errors::ValueTooLong do
|
269
|
-
|
285
|
+
fixed.render(weight: 3_461_832_653_653_265.234)
|
270
286
|
end
|
271
287
|
end
|
272
288
|
|
273
289
|
it "renders nil as empty string" do
|
274
|
-
assert string =
|
290
|
+
assert string = fixed.render(zip: 34_618)
|
275
291
|
assert_equal " 34618000000000000000.00", string
|
276
292
|
end
|
277
293
|
|
278
294
|
it "renders boolean" do
|
279
|
-
assert string =
|
295
|
+
assert string = fixed.render(name: true, address: false)
|
280
296
|
assert_equal "true false 00000000000000000000.00", string
|
281
297
|
end
|
282
298
|
|
283
299
|
it "renders no data as nil" do
|
284
|
-
refute
|
300
|
+
refute fixed.render({})
|
301
|
+
end
|
302
|
+
|
303
|
+
it "any size last string" do
|
304
|
+
assert string = fixed_with_remainder.render(name: "Jack", address: "over there", remainder: "XX34618012345670012345.01............")
|
305
|
+
assert_equal "Jack over there XX34618012345670012345.01............", string
|
306
|
+
end
|
307
|
+
|
308
|
+
it "nil last string" do
|
309
|
+
assert string = fixed_with_remainder.render(name: "Jack", address: "over there", remainder: nil)
|
310
|
+
assert_equal "Jack over there ", string
|
311
|
+
end
|
312
|
+
|
313
|
+
it "skips last filler" do
|
314
|
+
assert string = fixed_discard_remainder.render(name: "Jack", address: "over there")
|
315
|
+
assert_equal "Jack over there ", string
|
285
316
|
end
|
286
317
|
end
|
287
318
|
end
|