iostreams 1.5.1 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -1
- data/lib/io_streams/builder.rb +19 -5
- data/lib/io_streams/errors.rb +12 -0
- data/lib/io_streams/io_streams.rb +0 -2
- data/lib/io_streams/line/reader.rb +23 -11
- data/lib/io_streams/path.rb +1 -1
- data/lib/io_streams/paths/s3.rb +5 -2
- data/lib/io_streams/stream.rb +60 -5
- data/lib/io_streams/tabular.rb +23 -23
- data/lib/io_streams/tabular/parser/csv.rb +4 -2
- data/lib/io_streams/tabular/utility/csv_row.rb +1 -4
- data/lib/io_streams/version.rb +1 -1
- data/test/builder_test.rb +29 -0
- data/test/deprecated_test.rb +2 -0
- data/test/files/test.psv +4 -0
- data/test/files/unclosed_quote_large_test.csv +1658 -0
- data/test/files/unclosed_quote_test2.csv +3 -0
- data/test/line_reader_test.rb +30 -4
- data/test/stream_test.rb +174 -8
- metadata +47 -42
data/test/line_reader_test.rb
CHANGED
@@ -14,6 +14,14 @@ class LineReaderTest < Minitest::Test
|
|
14
14
|
File.join(File.dirname(__FILE__), "files", "unclosed_quote_test.csv")
|
15
15
|
end
|
16
16
|
|
17
|
+
let :unclosed_quote_file2 do
|
18
|
+
File.join(File.dirname(__FILE__), "files", "unclosed_quote_test2.csv")
|
19
|
+
end
|
20
|
+
|
21
|
+
let :unclosed_quote_large_file do
|
22
|
+
File.join(File.dirname(__FILE__), "files", "unclosed_quote_large_test.csv")
|
23
|
+
end
|
24
|
+
|
17
25
|
let :data do
|
18
26
|
data = []
|
19
27
|
File.open(file_name, "rt") do |file|
|
@@ -51,13 +59,31 @@ class LineReaderTest < Minitest::Test
|
|
51
59
|
assert_equal 4, lines.count
|
52
60
|
end
|
53
61
|
|
54
|
-
it "raises error for
|
55
|
-
assert_raises(
|
62
|
+
it "raises error for unbalanced quotes" do
|
63
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
56
64
|
IOStreams::Line::Reader.file(unclosed_quote_file, embedded_within: '"') do |io|
|
57
|
-
io.each
|
58
|
-
|
65
|
+
io.each { |line| }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
69
|
+
end
|
70
|
+
|
71
|
+
it "raises error for unclosed quote" do
|
72
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
73
|
+
IOStreams::Line::Reader.file(unclosed_quote_file2, embedded_within: '"') do |io|
|
74
|
+
io.each { |line| }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "raises error for unclosed quote before eof" do
|
81
|
+
exc = assert_raises(IOStreams::Errors::MalformedDataError) do
|
82
|
+
IOStreams::Line::Reader.file(unclosed_quote_large_file, embedded_within: '"', buffer_size: 20) do |io|
|
83
|
+
io.each { |line| }
|
59
84
|
end
|
60
85
|
end
|
86
|
+
assert_includes exc.message, "Unbalanced delimited field, delimiter:"
|
61
87
|
end
|
62
88
|
end
|
63
89
|
end
|
data/test/stream_test.rb
CHANGED
@@ -45,9 +45,9 @@ class StreamTest < Minitest::Test
|
|
45
45
|
it "reads a zip file" do
|
46
46
|
File.open(multiple_zip_file_name, "rb") do |io|
|
47
47
|
result = IOStreams::Stream.new(io).
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
file_name(multiple_zip_file_name).
|
49
|
+
option(:zip, entry_file_name: "test.json").
|
50
|
+
read
|
51
51
|
assert_equal contents_test_json, result
|
52
52
|
end
|
53
53
|
end
|
@@ -55,8 +55,8 @@ class StreamTest < Minitest::Test
|
|
55
55
|
it "reads a zip file from within a gz file" do
|
56
56
|
File.open(zip_gz_file_name, "rb") do |io|
|
57
57
|
result = IOStreams::Stream.new(io).
|
58
|
-
|
59
|
-
|
58
|
+
file_name(zip_gz_file_name).
|
59
|
+
read
|
60
60
|
assert_equal contents_test_txt, result
|
61
61
|
end
|
62
62
|
end
|
@@ -71,7 +71,7 @@ class StreamTest < Minitest::Test
|
|
71
71
|
describe ".record_reader" do
|
72
72
|
end
|
73
73
|
|
74
|
-
describe "
|
74
|
+
describe "#each(:line)" do
|
75
75
|
it "returns a line at a time" do
|
76
76
|
lines = []
|
77
77
|
stream.stream(:none)
|
@@ -91,10 +91,114 @@ class StreamTest < Minitest::Test
|
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
|
-
describe "
|
94
|
+
describe "#each(:array)" do
|
95
|
+
describe "csv" do
|
96
|
+
let :source_file_name do
|
97
|
+
File.join(__dir__, "files", "test.csv")
|
98
|
+
end
|
99
|
+
|
100
|
+
let :expected_rows do
|
101
|
+
rows = []
|
102
|
+
CSV.open(source_file_name).each { |row| rows << row }
|
103
|
+
rows
|
104
|
+
end
|
105
|
+
|
106
|
+
it "detects format from file_name" do
|
107
|
+
output = []
|
108
|
+
stream.file_name = source_file_name
|
109
|
+
stream.each(:array) { |record| output << record }
|
110
|
+
assert_equal expected_rows, output
|
111
|
+
end
|
112
|
+
|
113
|
+
it "honors format" do
|
114
|
+
output = []
|
115
|
+
stream.file_name = "blah"
|
116
|
+
stream.format = :csv
|
117
|
+
stream.each(:array) { |record| output << record }
|
118
|
+
assert_equal expected_rows, output
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
describe "psv" do
|
123
|
+
let :source_file_name do
|
124
|
+
File.join(__dir__, "files", "test.psv")
|
125
|
+
end
|
126
|
+
|
127
|
+
let :expected_rows do
|
128
|
+
File.readlines(source_file_name).collect { |line| line.chomp.split("|") }
|
129
|
+
end
|
130
|
+
|
131
|
+
it "detects format from file_name" do
|
132
|
+
output = []
|
133
|
+
stream.file_name = source_file_name
|
134
|
+
stream.each(:array) { |record| output << record }
|
135
|
+
assert_equal expected_rows, output
|
136
|
+
end
|
137
|
+
|
138
|
+
it "honors format" do
|
139
|
+
output = []
|
140
|
+
stream.file_name = "blah"
|
141
|
+
stream.format = :psv
|
142
|
+
stream.each(:array) { |record| output << record }
|
143
|
+
assert_equal expected_rows, output
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "json" do
|
148
|
+
let :source_file_name do
|
149
|
+
File.join(__dir__, "files", "test.json")
|
150
|
+
end
|
151
|
+
|
152
|
+
let :expected_rows do
|
153
|
+
hash_rows = File.readlines(source_file_name).collect { |line| JSON.load(line) }
|
154
|
+
rows = []
|
155
|
+
rows << hash_rows.first.keys
|
156
|
+
hash_rows.each { |hash| rows << hash.values }
|
157
|
+
rows
|
158
|
+
end
|
159
|
+
|
160
|
+
it "detects format from file_name" do
|
161
|
+
skip "TODO: Support reading json files as arrays"
|
162
|
+
output = []
|
163
|
+
stream.file_name = source_file_name
|
164
|
+
stream.each(:array) { |record| output << record }
|
165
|
+
assert_equal expected_rows, output
|
166
|
+
end
|
167
|
+
|
168
|
+
it "honors format" do
|
169
|
+
skip "TODO: Support reading json files as arrays"
|
170
|
+
output = []
|
171
|
+
stream.file_name = "blah"
|
172
|
+
stream.format = :json
|
173
|
+
stream.each(:array) { |record| output << record }
|
174
|
+
assert_equal expected_rows, output
|
175
|
+
end
|
176
|
+
end
|
95
177
|
end
|
96
178
|
|
97
|
-
describe ".each
|
179
|
+
describe ".each hash" do
|
180
|
+
let :source_file_name do
|
181
|
+
File.join(__dir__, "files", "test.json")
|
182
|
+
end
|
183
|
+
|
184
|
+
let :expected_json do
|
185
|
+
File.readlines(source_file_name).collect { |line| JSON.load(line) }
|
186
|
+
end
|
187
|
+
|
188
|
+
it "detects format from file_name" do
|
189
|
+
output = []
|
190
|
+
stream.file_name = source_file_name
|
191
|
+
stream.each(:hash) { |record| output << record }
|
192
|
+
assert_equal expected_json, output
|
193
|
+
end
|
194
|
+
|
195
|
+
it "honors format" do
|
196
|
+
output = []
|
197
|
+
stream.file_name = "blah"
|
198
|
+
stream.format = :json
|
199
|
+
stream.each(:hash) { |record| output << record }
|
200
|
+
assert_equal expected_json, output
|
201
|
+
end
|
98
202
|
end
|
99
203
|
|
100
204
|
describe "#writer" do
|
@@ -359,6 +463,24 @@ class StreamTest < Minitest::Test
|
|
359
463
|
end
|
360
464
|
assert_equal "\nHe\n\nl\n\nlo \nWorld\n\n", io.string, io.string.inspect
|
361
465
|
end
|
466
|
+
|
467
|
+
it "honors format" do
|
468
|
+
io = StringIO.new
|
469
|
+
IOStreams::Stream.new(io).format(:psv).writer(:array) do |stream|
|
470
|
+
stream << %w[first_name last_name]
|
471
|
+
stream << %w[Jack Johnson]
|
472
|
+
end
|
473
|
+
assert_equal "first_name|last_name\nJack|Johnson\n", io.string, io.string.inspect
|
474
|
+
end
|
475
|
+
|
476
|
+
it "auto detects format" do
|
477
|
+
io = StringIO.new
|
478
|
+
IOStreams::Stream.new(io).file_name("abc.psv").writer(:array) do |stream|
|
479
|
+
stream << %w[first_name last_name]
|
480
|
+
stream << %w[Jack Johnson]
|
481
|
+
end
|
482
|
+
assert_equal "first_name|last_name\nJack|Johnson\n", io.string, io.string.inspect
|
483
|
+
end
|
362
484
|
end
|
363
485
|
end
|
364
486
|
|
@@ -402,6 +524,50 @@ class StreamTest < Minitest::Test
|
|
402
524
|
end
|
403
525
|
assert_equal "first_name,last_name\nJack,Johnson\n\n{:first_name=>\"Able\", :last_name=>\"Smith\"}\n\n", io.string, io.string.inspect
|
404
526
|
end
|
527
|
+
|
528
|
+
it "honors format" do
|
529
|
+
io = StringIO.new
|
530
|
+
IOStreams::Stream.new(io).format(:json).writer(:hash) do |stream|
|
531
|
+
stream << {first_name: "Jack", last_name: "Johnson"}
|
532
|
+
end
|
533
|
+
assert_equal "{\"first_name\":\"Jack\",\"last_name\":\"Johnson\"}\n", io.string, io.string.inspect
|
534
|
+
end
|
535
|
+
|
536
|
+
it "auto detects format" do
|
537
|
+
io = StringIO.new
|
538
|
+
IOStreams::Stream.new(io).file_name("abc.json").writer(:hash) do |stream|
|
539
|
+
stream << {first_name: "Jack", last_name: "Johnson"}
|
540
|
+
end
|
541
|
+
assert_equal "{\"first_name\":\"Jack\",\"last_name\":\"Johnson\"}\n", io.string, io.string.inspect
|
542
|
+
end
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
describe "#format" do
|
547
|
+
it "detects the format from the file name" do
|
548
|
+
stream.file_name = "abc.json"
|
549
|
+
assert_equal :json, stream.format
|
550
|
+
end
|
551
|
+
|
552
|
+
it "is nil if the file name has no meaningful format" do
|
553
|
+
assert_nil stream.format
|
554
|
+
end
|
555
|
+
|
556
|
+
it "returns set format with no file_name" do
|
557
|
+
stream.format = :csv
|
558
|
+
assert_equal :csv, stream.format
|
559
|
+
end
|
560
|
+
|
561
|
+
it "returns set format with file_name" do
|
562
|
+
stream.file_name = "abc.json"
|
563
|
+
stream.format = :csv
|
564
|
+
assert_equal :csv, stream.format
|
565
|
+
end
|
566
|
+
|
567
|
+
it "validates bad format" do
|
568
|
+
assert_raises ArgumentError do
|
569
|
+
stream.format = :blah
|
570
|
+
end
|
405
571
|
end
|
406
572
|
end
|
407
573
|
end
|
metadata
CHANGED
@@ -1,18 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iostreams
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description:
|
13
|
+
description:
|
14
14
|
email:
|
15
|
-
- reidmo@gmail.com
|
16
15
|
executables: []
|
17
16
|
extensions: []
|
18
17
|
extra_rdoc_files: []
|
@@ -77,6 +76,7 @@ files:
|
|
77
76
|
- test/files/spreadsheet.xlsx
|
78
77
|
- test/files/test.csv
|
79
78
|
- test/files/test.json
|
79
|
+
- test/files/test.psv
|
80
80
|
- test/files/text file.txt
|
81
81
|
- test/files/text.txt
|
82
82
|
- test/files/text.txt.bz2
|
@@ -84,7 +84,9 @@ files:
|
|
84
84
|
- test/files/text.txt.gz.zip
|
85
85
|
- test/files/text.zip
|
86
86
|
- test/files/text.zip.gz
|
87
|
+
- test/files/unclosed_quote_large_test.csv
|
87
88
|
- test/files/unclosed_quote_test.csv
|
89
|
+
- test/files/unclosed_quote_test2.csv
|
88
90
|
- test/gzip_reader_test.rb
|
89
91
|
- test/gzip_writer_test.rb
|
90
92
|
- test/io_streams_test.rb
|
@@ -111,11 +113,11 @@ files:
|
|
111
113
|
- test/xlsx_reader_test.rb
|
112
114
|
- test/zip_reader_test.rb
|
113
115
|
- test/zip_writer_test.rb
|
114
|
-
homepage: https://
|
116
|
+
homepage: https://iostreams.rocketjob.io
|
115
117
|
licenses:
|
116
118
|
- Apache-2.0
|
117
119
|
metadata: {}
|
118
|
-
post_install_message:
|
120
|
+
post_install_message:
|
119
121
|
rdoc_options: []
|
120
122
|
require_paths:
|
121
123
|
- lib
|
@@ -130,53 +132,56 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
130
132
|
- !ruby/object:Gem::Version
|
131
133
|
version: '0'
|
132
134
|
requirements: []
|
133
|
-
rubygems_version: 3.
|
134
|
-
signing_key:
|
135
|
+
rubygems_version: 3.2.3
|
136
|
+
signing_key:
|
135
137
|
specification_version: 4
|
136
138
|
summary: Input and Output streaming for Ruby.
|
137
139
|
test_files:
|
138
|
-
- test/pgp_reader_test.rb
|
139
|
-
- test/deprecated_test.rb
|
140
140
|
- test/builder_test.rb
|
141
|
-
- test/
|
142
|
-
- test/xlsx_reader_test.rb
|
143
|
-
- test/minimal_file_reader.rb
|
144
|
-
- test/row_writer_test.rb
|
145
|
-
- test/zip_reader_test.rb
|
141
|
+
- test/bzip2_reader_test.rb
|
146
142
|
- test/bzip2_writer_test.rb
|
143
|
+
- test/deprecated_test.rb
|
144
|
+
- test/encode_reader_test.rb
|
147
145
|
- test/encode_writer_test.rb
|
148
|
-
- test/gzip_writer_test.rb
|
149
|
-
- test/stream_test.rb
|
150
|
-
- test/paths/matcher_test.rb
|
151
|
-
- test/paths/s3_test.rb
|
152
|
-
- test/paths/sftp_test.rb
|
153
|
-
- test/paths/file_test.rb
|
154
|
-
- test/paths/http_test.rb
|
155
|
-
- test/record_reader_test.rb
|
156
|
-
- test/pgp_writer_test.rb
|
157
|
-
- test/line_writer_test.rb
|
158
|
-
- test/row_reader_test.rb
|
159
|
-
- test/bzip2_reader_test.rb
|
160
|
-
- test/zip_writer_test.rb
|
161
|
-
- test/files/text.zip
|
162
|
-
- test/files/spreadsheet.xlsx
|
163
146
|
- test/files/embedded_lines_test.csv
|
147
|
+
- test/files/multiple_files.zip
|
148
|
+
- test/files/spreadsheet.xlsx
|
164
149
|
- test/files/test.csv
|
165
150
|
- test/files/test.json
|
166
|
-
- test/files/
|
167
|
-
- test/files/
|
151
|
+
- test/files/test.psv
|
152
|
+
- test/files/text file.txt
|
153
|
+
- test/files/text.txt
|
168
154
|
- test/files/text.txt.bz2
|
169
|
-
- test/files/text.txt.gz.zip
|
170
155
|
- test/files/text.txt.gz
|
171
|
-
- test/files/text.txt
|
172
|
-
- test/files/
|
173
|
-
- test/files/text
|
156
|
+
- test/files/text.txt.gz.zip
|
157
|
+
- test/files/text.zip
|
158
|
+
- test/files/text.zip.gz
|
159
|
+
- test/files/unclosed_quote_large_test.csv
|
160
|
+
- test/files/unclosed_quote_test.csv
|
161
|
+
- test/files/unclosed_quote_test2.csv
|
174
162
|
- test/gzip_reader_test.rb
|
175
|
-
- test/
|
176
|
-
- test/
|
177
|
-
- test/
|
178
|
-
- test/
|
163
|
+
- test/gzip_writer_test.rb
|
164
|
+
- test/io_streams_test.rb
|
165
|
+
- test/line_reader_test.rb
|
166
|
+
- test/line_writer_test.rb
|
167
|
+
- test/minimal_file_reader.rb
|
179
168
|
- test/path_test.rb
|
169
|
+
- test/paths/file_test.rb
|
170
|
+
- test/paths/http_test.rb
|
171
|
+
- test/paths/matcher_test.rb
|
172
|
+
- test/paths/s3_test.rb
|
173
|
+
- test/paths/sftp_test.rb
|
174
|
+
- test/pgp_reader_test.rb
|
180
175
|
- test/pgp_test.rb
|
181
|
-
- test/
|
176
|
+
- test/pgp_writer_test.rb
|
177
|
+
- test/record_reader_test.rb
|
182
178
|
- test/record_writer_test.rb
|
179
|
+
- test/row_reader_test.rb
|
180
|
+
- test/row_writer_test.rb
|
181
|
+
- test/stream_test.rb
|
182
|
+
- test/tabular_test.rb
|
183
|
+
- test/test_helper.rb
|
184
|
+
- test/utils_test.rb
|
185
|
+
- test/xlsx_reader_test.rb
|
186
|
+
- test/zip_reader_test.rb
|
187
|
+
- test/zip_writer_test.rb
|