iostreams 0.20.3 → 1.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/lib/io_streams/bzip2/reader.rb +9 -21
  3. data/lib/io_streams/bzip2/writer.rb +9 -21
  4. data/lib/io_streams/deprecated.rb +217 -0
  5. data/lib/io_streams/encode/reader.rb +12 -16
  6. data/lib/io_streams/encode/writer.rb +9 -13
  7. data/lib/io_streams/errors.rb +6 -6
  8. data/lib/io_streams/gzip/reader.rb +7 -14
  9. data/lib/io_streams/gzip/writer.rb +7 -15
  10. data/lib/io_streams/io_streams.rb +182 -524
  11. data/lib/io_streams/line/reader.rb +9 -9
  12. data/lib/io_streams/line/writer.rb +10 -11
  13. data/lib/io_streams/path.rb +190 -0
  14. data/lib/io_streams/paths/file.rb +176 -0
  15. data/lib/io_streams/paths/http.rb +92 -0
  16. data/lib/io_streams/paths/matcher.rb +61 -0
  17. data/lib/io_streams/paths/s3.rb +269 -0
  18. data/lib/io_streams/paths/sftp.rb +99 -0
  19. data/lib/io_streams/pgp.rb +47 -19
  20. data/lib/io_streams/pgp/reader.rb +20 -28
  21. data/lib/io_streams/pgp/writer.rb +24 -46
  22. data/lib/io_streams/reader.rb +28 -0
  23. data/lib/io_streams/record/reader.rb +20 -16
  24. data/lib/io_streams/record/writer.rb +28 -28
  25. data/lib/io_streams/row/reader.rb +22 -26
  26. data/lib/io_streams/row/writer.rb +29 -28
  27. data/lib/io_streams/stream.rb +400 -0
  28. data/lib/io_streams/streams.rb +125 -0
  29. data/lib/io_streams/symmetric_encryption/reader.rb +5 -13
  30. data/lib/io_streams/symmetric_encryption/writer.rb +16 -15
  31. data/lib/io_streams/tabular/header.rb +9 -3
  32. data/lib/io_streams/tabular/parser/array.rb +8 -3
  33. data/lib/io_streams/tabular/parser/csv.rb +6 -2
  34. data/lib/io_streams/tabular/parser/hash.rb +4 -1
  35. data/lib/io_streams/tabular/parser/json.rb +3 -1
  36. data/lib/io_streams/tabular/parser/psv.rb +3 -1
  37. data/lib/io_streams/tabular/utility/csv_row.rb +9 -8
  38. data/lib/io_streams/utils.rb +22 -0
  39. data/lib/io_streams/version.rb +1 -1
  40. data/lib/io_streams/writer.rb +28 -0
  41. data/lib/io_streams/xlsx/reader.rb +7 -19
  42. data/lib/io_streams/zip/reader.rb +7 -26
  43. data/lib/io_streams/zip/writer.rb +21 -38
  44. data/lib/iostreams.rb +15 -15
  45. data/test/bzip2_reader_test.rb +3 -3
  46. data/test/bzip2_writer_test.rb +3 -3
  47. data/test/deprecated_test.rb +123 -0
  48. data/test/encode_reader_test.rb +3 -3
  49. data/test/encode_writer_test.rb +6 -6
  50. data/test/gzip_reader_test.rb +2 -2
  51. data/test/gzip_writer_test.rb +3 -3
  52. data/test/io_streams_test.rb +43 -136
  53. data/test/line_reader_test.rb +20 -20
  54. data/test/line_writer_test.rb +3 -3
  55. data/test/path_test.rb +30 -28
  56. data/test/paths/file_test.rb +206 -0
  57. data/test/paths/http_test.rb +34 -0
  58. data/test/paths/matcher_test.rb +111 -0
  59. data/test/paths/s3_test.rb +207 -0
  60. data/test/pgp_reader_test.rb +8 -8
  61. data/test/pgp_writer_test.rb +13 -13
  62. data/test/record_reader_test.rb +5 -5
  63. data/test/record_writer_test.rb +4 -4
  64. data/test/row_reader_test.rb +5 -5
  65. data/test/row_writer_test.rb +6 -6
  66. data/test/stream_test.rb +116 -0
  67. data/test/streams_test.rb +255 -0
  68. data/test/utils_test.rb +20 -0
  69. data/test/xlsx_reader_test.rb +3 -3
  70. data/test/zip_reader_test.rb +12 -12
  71. data/test/zip_writer_test.rb +5 -5
  72. metadata +33 -45
  73. data/lib/io_streams/base_path.rb +0 -72
  74. data/lib/io_streams/file/path.rb +0 -58
  75. data/lib/io_streams/file/reader.rb +0 -12
  76. data/lib/io_streams/file/writer.rb +0 -22
  77. data/lib/io_streams/http/reader.rb +0 -71
  78. data/lib/io_streams/s3.rb +0 -26
  79. data/lib/io_streams/s3/path.rb +0 -40
  80. data/lib/io_streams/s3/reader.rb +0 -28
  81. data/lib/io_streams/s3/writer.rb +0 -85
  82. data/lib/io_streams/sftp/reader.rb +0 -67
  83. data/lib/io_streams/sftp/writer.rb +0 -68
  84. data/test/base_path_test.rb +0 -35
  85. data/test/file_path_test.rb +0 -97
  86. data/test/file_reader_test.rb +0 -33
  87. data/test/file_writer_test.rb +0 -50
  88. data/test/http_reader_test.rb +0 -38
  89. data/test/s3_reader_test.rb +0 -41
  90. data/test/s3_writer_test.rb +0 -41
@@ -23,7 +23,7 @@ class RecordReaderTest < Minitest::Test
23
23
  describe '#each' do
24
24
  it 'csv file' do
25
25
  records = []
26
- IOStreams::Record::Reader.open(file_name, cleanse_header: false) do |io|
26
+ IOStreams::Record::Reader.file(file_name, cleanse_header: false) do |io|
27
27
  io.each { |row| records << row }
28
28
  end
29
29
  assert_equal expected, records
@@ -31,7 +31,7 @@ class RecordReaderTest < Minitest::Test
31
31
 
32
32
  it 'json file' do
33
33
  records = []
34
- IOStreams::Record::Reader.open(json_file_name, cleanse_header: false) do |input|
34
+ IOStreams::Record::Reader.file(json_file_name, cleanse_header: false, format: :json) do |input|
35
35
  input.each { |row| records << row }
36
36
  end
37
37
  assert_equal expected, records
@@ -39,8 +39,8 @@ class RecordReaderTest < Minitest::Test
39
39
 
40
40
  it 'stream' do
41
41
  rows = []
42
- IOStreams.line_reader(file_name) do |file|
43
- IOStreams::Record::Reader.open(file, cleanse_header: false) do |io|
42
+ IOStreams::Line::Reader.file(file_name) do |file|
43
+ IOStreams::Record::Reader.stream(file, cleanse_header: false) do |io|
44
44
  io.each { |row| rows << row }
45
45
  end
46
46
  end
@@ -50,7 +50,7 @@ class RecordReaderTest < Minitest::Test
50
50
 
51
51
  describe '#collect' do
52
52
  it 'json file' do
53
- records = IOStreams::Record::Reader.open(json_file_name) do |input|
53
+ records = IOStreams::Record::Reader.file(json_file_name, format: :json) do |input|
54
54
  input.collect { |record| record["state"] }
55
55
  end
56
56
  assert_equal expected.collect { |record| record["state"] }, records
@@ -43,7 +43,7 @@ class RecordWriterTest < Minitest::Test
43
43
 
44
44
  describe '#<<' do
45
45
  it 'file' do
46
- IOStreams::Record::Writer.open(file_name) do |io|
46
+ IOStreams::Record::Writer.file(file_name) do |io|
47
47
  inputs.each { |hash| io << hash }
48
48
  end
49
49
  result = File.read(file_name)
@@ -51,7 +51,7 @@ class RecordWriterTest < Minitest::Test
51
51
  end
52
52
 
53
53
  it 'json file' do
54
- IOStreams::Record::Writer.open(file_name, file_name: 'abc.json') do |io|
54
+ IOStreams::Record::Writer.file(file_name, file_name: 'abc.json') do |io|
55
55
  inputs.each { |hash| io << hash }
56
56
  end
57
57
  result = File.read(file_name)
@@ -60,8 +60,8 @@ class RecordWriterTest < Minitest::Test
60
60
 
61
61
  it 'stream' do
62
62
  io_string = StringIO.new
63
- IOStreams::Line::Writer.open(io_string) do |io|
64
- IOStreams::Record::Writer.open(io) do |stream|
63
+ IOStreams::Line::Writer.stream(io_string) do |io|
64
+ IOStreams::Record::Writer.stream(io) do |stream|
65
65
  inputs.each { |row| stream << row }
66
66
  end
67
67
  end
@@ -12,8 +12,8 @@ class RowReaderTest < Minitest::Test
12
12
 
13
13
  describe '.open' do
14
14
  it 'file' do
15
- rows = []
16
- count = IOStreams::Row::Reader.open(file_name) do |io|
15
+ rows = []
16
+ count = IOStreams::Row::Reader.file(file_name) do |io|
17
17
  io.each { |row| rows << row }
18
18
  end
19
19
  assert_equal expected, rows
@@ -21,9 +21,9 @@ class RowReaderTest < Minitest::Test
21
21
  end
22
22
 
23
23
  it 'stream' do
24
- rows = []
25
- count = IOStreams.line_reader(file_name) do |file|
26
- IOStreams::Row::Reader.open(file) do |io|
24
+ rows = []
25
+ count = IOStreams::Line::Reader.file(file_name) do |file|
26
+ IOStreams::Row::Reader.stream(file) do |io|
27
27
  io.each { |row| rows << row }
28
28
  end
29
29
  end
@@ -27,19 +27,19 @@ class RowWriterTest < Minitest::Test
27
27
  temp_file.delete
28
28
  end
29
29
 
30
- describe '.open' do
30
+ describe '.stream' do
31
31
  it 'file' do
32
- IOStreams::Row::Writer.open(file_name) do |io|
32
+ IOStreams::Row::Writer.file(file_name) do |io|
33
33
  csv_rows.each { |array| io << array }
34
34
  end
35
- result = File.read(file_name)
35
+ result = ::File.read(file_name)
36
36
  assert_equal raw_csv_data, result
37
37
  end
38
38
 
39
- it 'stream' do
39
+ it 'streams' do
40
40
  io_string = StringIO.new
41
- IOStreams::Line::Writer.open(io_string) do |io|
42
- IOStreams::Row::Writer.open(io) do |stream|
41
+ IOStreams::Line::Writer.stream(io_string) do |io|
42
+ IOStreams::Row::Writer.stream(io) do |stream|
43
43
  csv_rows.each { |array| stream << array }
44
44
  end
45
45
  end
@@ -0,0 +1,116 @@
1
+ require_relative 'test_helper'
2
+
3
+ class StreamTest < Minitest::Test
4
+ describe IOStreams::Stream do
5
+ let :source_file_name do
6
+ File.join(__dir__, 'files', 'text.txt')
7
+ end
8
+
9
+ let :data do
10
+ File.read(source_file_name)
11
+ end
12
+
13
+ let :bad_data do
14
+ [
15
+ "New M\xE9xico,NE".b,
16
+ 'good line',
17
+ "New M\xE9xico,\x07SF".b
18
+ ].join("\n").encode('BINARY')
19
+ end
20
+
21
+ let :stripped_data do
22
+ bad_data.gsub("\xE9".b, '').gsub("\x07", '')
23
+ end
24
+
25
+ let :multiple_zip_file_name do
26
+ File.join(File.dirname(__FILE__), 'files', 'multiple_files.zip')
27
+ end
28
+
29
+ let :zip_gz_file_name do
30
+ File.join(File.dirname(__FILE__), 'files', 'text.zip.gz')
31
+ end
32
+
33
+ let :contents_test_txt do
34
+ File.read(File.join(File.dirname(__FILE__), 'files', 'text.txt'))
35
+ end
36
+
37
+ let :contents_test_json do
38
+ File.read(File.join(File.dirname(__FILE__), 'files', 'test.json'))
39
+ end
40
+
41
+ let(:string_io) { StringIO.new(data) }
42
+ let(:stream) { IOStreams::Stream.new(string_io) }
43
+
44
+ describe '.reader' do
45
+ it 'reads a zip file' do
46
+ File.open(multiple_zip_file_name, 'rb') do |io|
47
+ result = IOStreams::Stream.new(io).
48
+ file_name(multiple_zip_file_name).
49
+ option(:zip, entry_file_name: 'test.json').
50
+ reader { |io| io.read }
51
+ assert_equal contents_test_json, result
52
+ end
53
+ end
54
+
55
+ it 'reads a zip file from within a gz file' do
56
+ File.open(zip_gz_file_name, 'rb') do |io|
57
+ result = IOStreams::Stream.new(io).
58
+ file_name(zip_gz_file_name).
59
+ reader { |io| io.read }
60
+ assert_equal contents_test_txt, result
61
+ end
62
+ end
63
+ end
64
+
65
+ describe '.line_reader' do
66
+ end
67
+
68
+ describe '.row_reader' do
69
+ end
70
+
71
+ describe '.record_reader' do
72
+ end
73
+
74
+ describe '.each_line' do
75
+ it 'returns a line at a time' do
76
+ lines = []
77
+ stream.stream(:none)
78
+ count = stream.each_line { |line| lines << line }
79
+ assert_equal data.lines.map(&:strip), lines
80
+ assert_equal data.lines.count, count
81
+ end
82
+
83
+ it 'strips non-printable characters' do
84
+ input = StringIO.new(bad_data)
85
+ lines = []
86
+ stream = IOStreams::Stream.new(input)
87
+ stream.stream(:encode, encoding: 'UTF-8', cleaner: :printable, replace: '')
88
+ count = stream.each_line { |line| lines << line }
89
+ assert_equal stripped_data.lines.map(&:strip), lines
90
+ assert_equal stripped_data.lines.count, count
91
+ end
92
+ end
93
+
94
+ describe '.each_row' do
95
+ end
96
+
97
+ describe '.each_record' do
98
+ end
99
+
100
+ describe '.writer' do
101
+ end
102
+
103
+ describe '.writer' do
104
+ end
105
+
106
+ describe '.line_writer' do
107
+ end
108
+
109
+ describe '.row_writer' do
110
+ end
111
+
112
+ describe '.record_writer' do
113
+ end
114
+
115
+ end
116
+ end
@@ -0,0 +1,255 @@
1
+ require_relative 'test_helper'
2
+
3
+ class StreamsTest < Minitest::Test
4
+ describe IOStreams::Streams do
5
+ let(:file_name) { 'my/path/abc.bcd.xlsx.zip.gz.pgp' }
6
+ let(:streams) { IOStreams::Streams.new(file_name) }
7
+
8
+ describe '#option' do
9
+ it 'adds one option' do
10
+ streams.option(:pgp, passphrase: 'unlock-me')
11
+ assert_equal({pgp: {passphrase: 'unlock-me'}}, streams.options)
12
+ end
13
+
14
+ it 'adds options in order' do
15
+ streams.option(:pgp, passphrase: 'unlock-me')
16
+ streams.option(:enc, compress: false)
17
+ assert_equal({pgp: {passphrase: 'unlock-me'}, enc: {compress: false}}, streams.options)
18
+ end
19
+
20
+ it 'will not add an option if a stream was already set' do
21
+ streams.stream(:pgp, passphrase: 'unlock-me')
22
+ assert_raises ArgumentError do
23
+ streams.option(:pgp, passphrase: 'unlock-me')
24
+ end
25
+ end
26
+
27
+ it 'will not add an invalid option' do
28
+ assert_raises ArgumentError do
29
+ streams.option(:blah, value: 23)
30
+ end
31
+ end
32
+
33
+ describe 'with no file_name' do
34
+ let(:file_name) { nil }
35
+
36
+ it 'prevents options being set' do
37
+ assert_raises ArgumentError do
38
+ streams.option(:pgp, passphrase: 'unlock-me')
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ describe '#stream' do
45
+ it 'adds one stream' do
46
+ streams.stream(:pgp, passphrase: 'unlock-me')
47
+ assert_equal({pgp: {passphrase: 'unlock-me'}}, streams.streams)
48
+ end
49
+
50
+ it 'adds streams in order' do
51
+ streams.stream(:pgp, passphrase: 'unlock-me')
52
+ streams.stream(:enc, compress: false)
53
+ assert_equal({pgp: {passphrase: 'unlock-me'}, enc: {compress: false}}, streams.streams)
54
+ end
55
+
56
+ it 'will not add a stream if an option was already set' do
57
+ streams.option(:pgp, passphrase: 'unlock-me')
58
+ assert_raises ArgumentError do
59
+ streams.stream(:pgp, passphrase: 'unlock-me')
60
+ end
61
+ end
62
+
63
+ it 'will not add an invalid stream' do
64
+ assert_raises ArgumentError do
65
+ streams.stream(:blah, value: 23)
66
+ end
67
+ end
68
+ end
69
+
70
+ describe '#reader' do
71
+ it 'directly calls block for an empty stream' do
72
+ string_io = StringIO.new
73
+ value = nil
74
+ streams.stream(:none)
75
+ streams.reader(string_io) do |io|
76
+ assert_equal io, string_io
77
+ value = 32
78
+ end
79
+ assert_equal 32, value
80
+ end
81
+
82
+ it 'returns the reader' do
83
+ string_io = StringIO.new
84
+ streams.stream(:zip)
85
+ streams.reader(string_io) do |io|
86
+ assert io.is_a?(::Zip::InputStream), io
87
+ end
88
+ end
89
+
90
+ it 'returns the last reader' do
91
+ string_io = StringIO.new
92
+ streams.stream(:encode)
93
+ streams.stream(:zip)
94
+ streams.reader(string_io) do |io|
95
+ assert io.is_a?(IOStreams::Encode::Reader), io
96
+ end
97
+ end
98
+ end
99
+
100
+ describe '#writer' do
101
+ it 'directly calls block for an empty stream' do
102
+ string_io = StringIO.new
103
+ value = nil
104
+ streams.stream(:none)
105
+ streams.writer(string_io) do |io|
106
+ assert_equal io, string_io
107
+ value = 32
108
+ end
109
+ assert_equal 32, value
110
+ end
111
+
112
+ it 'returns the reader' do
113
+ string_io = StringIO.new
114
+ streams.stream(:zip)
115
+ streams.writer(string_io) do |io|
116
+ assert io.is_a?(::Zip::OutputStream), io
117
+ end
118
+ end
119
+
120
+ it 'returns the last reader' do
121
+ string_io = StringIO.new
122
+ streams.stream(:encode)
123
+ streams.stream(:zip)
124
+ streams.writer(string_io) do |io|
125
+ assert io.is_a?(IOStreams::Encode::Writer), io
126
+ end
127
+ end
128
+ end
129
+
130
+ # Internal methods
131
+
132
+ describe '#class_for_stream' do
133
+ it 'xlsx' do
134
+ assert_equal IOStreams::Xlsx::Reader, streams.send(:class_for_stream, :reader, :xlsx)
135
+ end
136
+
137
+ it 'gzip' do
138
+ assert_equal IOStreams::Gzip::Writer, streams.send(:class_for_stream, :writer, :gzip)
139
+ end
140
+
141
+ it 'unknown' do
142
+ assert_raises ArgumentError do
143
+ streams.send(:class_for_stream, :reader, :unknown)
144
+ end
145
+ end
146
+ end
147
+
148
+ describe '#parse_extensions' do
149
+ it 'single stream' do
150
+ streams = IOStreams::Streams.new('my/path/abc.xlsx')
151
+ assert_equal %i[xlsx], streams.send(:parse_extensions)
152
+ end
153
+
154
+ it 'empty' do
155
+ streams = IOStreams::Streams.new('my/path/abc.csv')
156
+ assert_equal [], streams.send(:parse_extensions)
157
+ end
158
+
159
+ it 'handles multiple extensions' do
160
+ assert_equal %i[xlsx zip gz pgp], streams.send(:parse_extensions)
161
+ end
162
+
163
+ describe 'case-insensitive' do
164
+ let(:file_name) { 'a.XlsX.GzIp' }
165
+
166
+ it 'is case-insensitive' do
167
+ assert_equal %i[xlsx gzip], streams.send(:parse_extensions)
168
+ end
169
+ end
170
+ end
171
+
172
+ describe '#pipeline' do
173
+ it 'with stream and file name' do
174
+ expected = {enc: {compress: false}}
175
+ streams.stream(:enc, compress: false)
176
+ assert_equal expected, streams.pipeline
177
+ end
178
+
179
+ it 'no file name, streams, or options' do
180
+ expected = {}
181
+ streams = IOStreams::Streams.new
182
+ assert_equal expected, streams.pipeline
183
+ end
184
+
185
+ it 'file name without options' do
186
+ expected = {:xlsx => {}, :zip => {}, :gz => {}, :pgp => {}}
187
+ assert_equal expected, streams.pipeline
188
+ end
189
+
190
+ it 'file name with encode option' do
191
+ expected = {encode: {encoding: 'BINARY'}, :xlsx => {}, :zip => {}, :gz => {}, :pgp => {}}
192
+ streams.option(:encode, encoding: 'BINARY')
193
+ assert_equal expected, streams.pipeline
194
+ end
195
+
196
+ it 'file name with option' do
197
+ expected = {:xlsx => {}, :zip => {}, :gz => {}, :pgp => {passphrase: 'unlock-me'}}
198
+ streams.option(:pgp, passphrase: 'unlock-me')
199
+ assert_equal expected, streams.pipeline
200
+ end
201
+ end
202
+
203
+ describe '#execute' do
204
+ it 'directly calls block for an empty stream' do
205
+ string_io = StringIO.new
206
+ value = nil
207
+ streams.send(:execute, :writer, {}, string_io) do |io|
208
+ assert_equal io, string_io
209
+ value = 32
210
+ end
211
+ assert_equal 32, value
212
+ end
213
+
214
+ it 'calls last block in one element stream' do
215
+ pipeline = {simple: {arg: 'first'}}
216
+ string_io = StringIO.new
217
+ streams.send(:execute, :writer, pipeline, string_io) { |io| io.write('last') }
218
+ assert_equal 'first>last', string_io.string
219
+ end
220
+
221
+ it 'chains blocks in 2 element stream' do
222
+ pipeline = {simple: {arg: 'first'}, simple2: {arg: 'second'}}
223
+ string_io = StringIO.new
224
+ streams.send(:execute, :writer, pipeline, string_io) { |io| io.write('last') }
225
+ assert_equal 'second>first>last', string_io.string
226
+ end
227
+
228
+ it 'chains blocks in 3 element stream' do
229
+ pipeline = {simple: {arg: 'first'}, simple2: {arg: 'second'}, simple3: {arg: 'third'}}
230
+ string_io = StringIO.new
231
+ streams.send(:execute, :writer, pipeline, string_io) { |io| io.write('last') }
232
+ assert_equal 'third>second>first>last', string_io.string
233
+ end
234
+ end
235
+
236
+ class SimpleStream
237
+ def self.stream(io, **args)
238
+ yield new(io, **args)
239
+ end
240
+
241
+ def initialize(io, arg:)
242
+ @io = io
243
+ @arg = arg
244
+ end
245
+
246
+ def write(data)
247
+ @io.write("#{@arg}>#{data}")
248
+ end
249
+ end
250
+
251
+ IOStreams.register_extension(:simple, nil, SimpleStream)
252
+ IOStreams.register_extension(:simple2, nil, SimpleStream)
253
+ IOStreams.register_extension(:simple3, nil, SimpleStream)
254
+ end
255
+ end