iostreams 0.14.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +202 -0
  3. data/README.md +155 -47
  4. data/lib/io_streams/file/reader.rb +7 -8
  5. data/lib/io_streams/file/writer.rb +7 -8
  6. data/lib/io_streams/io_streams.rb +313 -129
  7. data/lib/io_streams/{delimited → line}/reader.rb +20 -30
  8. data/lib/io_streams/line/writer.rb +81 -0
  9. data/lib/io_streams/pgp.rb +4 -14
  10. data/lib/io_streams/record/reader.rb +55 -0
  11. data/lib/io_streams/record/writer.rb +63 -0
  12. data/lib/io_streams/row/reader.rb +60 -0
  13. data/lib/io_streams/row/writer.rb +62 -0
  14. data/lib/io_streams/s3.rb +25 -0
  15. data/lib/io_streams/s3/reader.rb +64 -0
  16. data/lib/io_streams/s3/writer.rb +13 -0
  17. data/lib/io_streams/streams.rb +1 -1
  18. data/lib/io_streams/tabular.rb +163 -0
  19. data/lib/io_streams/tabular/errors.rb +14 -0
  20. data/lib/io_streams/tabular/header.rb +146 -0
  21. data/lib/io_streams/tabular/parser/array.rb +26 -0
  22. data/lib/io_streams/tabular/parser/base.rb +12 -0
  23. data/lib/io_streams/tabular/parser/csv.rb +35 -0
  24. data/lib/io_streams/tabular/parser/fixed.rb +88 -0
  25. data/lib/io_streams/tabular/parser/hash.rb +21 -0
  26. data/lib/io_streams/tabular/parser/json.rb +25 -0
  27. data/lib/io_streams/tabular/parser/psv.rb +34 -0
  28. data/lib/io_streams/tabular/utility/csv_row.rb +115 -0
  29. data/lib/io_streams/version.rb +2 -2
  30. data/lib/io_streams/xlsx/reader.rb +1 -1
  31. data/lib/io_streams/zip/reader.rb +1 -1
  32. data/lib/io_streams/zip/writer.rb +1 -1
  33. data/lib/iostreams.rb +21 -10
  34. data/test/bzip2_reader_test.rb +21 -22
  35. data/test/bzip2_writer_test.rb +38 -32
  36. data/test/file_reader_test.rb +19 -18
  37. data/test/file_writer_test.rb +23 -22
  38. data/test/files/test.json +3 -0
  39. data/test/gzip_reader_test.rb +21 -22
  40. data/test/gzip_writer_test.rb +35 -29
  41. data/test/io_streams_test.rb +137 -61
  42. data/test/line_reader_test.rb +105 -0
  43. data/test/line_writer_test.rb +50 -0
  44. data/test/pgp_reader_test.rb +29 -29
  45. data/test/pgp_test.rb +149 -195
  46. data/test/pgp_writer_test.rb +63 -62
  47. data/test/record_reader_test.rb +61 -0
  48. data/test/record_writer_test.rb +73 -0
  49. data/test/row_reader_test.rb +34 -0
  50. data/test/row_writer_test.rb +51 -0
  51. data/test/tabular_test.rb +184 -0
  52. data/test/xlsx_reader_test.rb +13 -17
  53. data/test/zip_reader_test.rb +21 -22
  54. data/test/zip_writer_test.rb +40 -36
  55. metadata +41 -17
  56. data/lib/io_streams/csv/reader.rb +0 -21
  57. data/lib/io_streams/csv/writer.rb +0 -20
  58. data/lib/io_streams/delimited/writer.rb +0 -67
  59. data/test/csv_reader_test.rb +0 -34
  60. data/test/csv_writer_test.rb +0 -35
  61. data/test/delimited_reader_test.rb +0 -115
  62. data/test/delimited_writer_test.rb +0 -44
@@ -1,15 +1,14 @@
1
1
  module IOStreams
2
2
  module File
3
3
  class Reader
4
- # Read from a file or stream
5
- def self.open(file_name_or_io, _=nil, &block)
6
- unless IOStreams.reader_stream?(file_name_or_io)
7
- ::File.open(file_name_or_io, 'rb', &block)
8
- else
9
- block.call(file_name_or_io)
10
- end
11
- end
4
+ # Read from a named file
5
+ # TODO: Add support for mode (text / binary)
6
+ # TODO: Add encoding support: external_encoding, internal_encoding
7
+ def self.open(file_name, _=nil, &block)
8
+ raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
12
9
 
10
+ ::File.open(file_name, 'rb', &block)
11
+ end
13
12
  end
14
13
  end
15
14
  end
@@ -1,15 +1,14 @@
1
1
  module IOStreams
2
2
  module File
3
3
  class Writer
4
- # Write to a file or stream
5
- def self.open(file_name_or_io, _=nil, &block)
6
- unless IOStreams.writer_stream?(file_name_or_io)
7
- ::File.open(file_name_or_io, 'wb', &block)
8
- else
9
- block.call(file_name_or_io)
10
- end
11
- end
4
+ # Write to a named file
5
+ # TODO: Add support for mode (text / binary), permissions, buffering, append
6
+ # TODO: Add encoding support: external_encoding, internal_encoding
7
+ def self.open(file_name, _=nil, &block)
8
+ raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
12
9
 
10
+ ::File.open(file_name, 'wb', &block)
11
+ end
13
12
  end
14
13
  end
15
14
  end
@@ -1,72 +1,21 @@
1
1
  require 'concurrent'
2
- module IOStreams
3
- # A registry to hold formats for processing files during upload or download
4
- @extensions = Concurrent::Map.new
5
2
 
3
+ # Streaming library for Ruby
4
+ #
5
+ # Stream types / extensions supported:
6
+ # .zip Zip File [ :zip ]
7
+ # .gz, .gzip GZip File [ :gzip ]
8
+ # .enc File Encrypted using symmetric encryption [ :enc ]
9
+ # etc...
10
+ # other All other extensions will be returned as: []
11
+ #
12
+ # When a file is encrypted, it may also be compressed:
13
+ # .zip.enc [ :zip, :enc ]
14
+ # .gz.enc [ :gz, :enc ]
15
+ module IOStreams
6
16
  UTF8_ENCODING = Encoding.find('UTF-8').freeze
7
17
  BINARY_ENCODING = Encoding.find('BINARY').freeze
8
18
 
9
- # Returns [Array] the formats required to process the file by looking at
10
- # its extension(s)
11
- #
12
- # Extensions supported:
13
- # .zip Zip File [ :zip ]
14
- # .gz, .gzip GZip File [ :gzip ]
15
- # .enc File Encrypted using symmetric encryption [ :enc ]
16
- # other All other extensions will be returned as: [ :file ]
17
- #
18
- # When a file is encrypted, it may also be compressed:
19
- # .zip.enc [ :zip, :enc ]
20
- # .gz.enc [ :gz, :enc ]
21
- #
22
- # Example Zip file:
23
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
24
- # => [ :zip ]
25
- #
26
- # Example Encrypted Gzip file:
27
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
28
- # => [ :gz, :enc ]
29
- #
30
- # Example plain text / binary file:
31
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
32
- # => [ :file ]
33
- def self.streams_for_file_name(file_name)
34
- raise ArgumentError.new('File name cannot be nil') if file_name.nil?
35
- raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
36
- parts = file_name.split('.')
37
- extensions = []
38
- while extension = parts.pop
39
- sym = extension.downcase.to_sym
40
- break unless @extensions[sym]
41
- extensions.unshift(sym)
42
- end
43
- extensions << :file if extensions.size == 0
44
- extensions
45
- end
46
-
47
- Extension = Struct.new(:reader_class, :writer_class)
48
-
49
- # Register a file extension and the reader and writer classes to use to format it
50
- #
51
- # Example:
52
- # # MyXls::Reader and MyXls::Writer must implement .open
53
- # register_extension(:xls, MyXls::Reader, MyXls::Writer)
54
- def self.register_extension(extension, reader_class, writer_class)
55
- raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
56
- @extensions[extension.to_sym] = Extension.new(reader_class, writer_class)
57
- end
58
-
59
- # De-Register a file extension
60
- #
61
- # Returns [Symbol] the extension removed, or nil if the extension was not registered
62
- #
63
- # Example:
64
- # register_extension(:xls)
65
- def self.deregister_extension(extension)
66
- raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
67
- @extensions.delete(extension.to_sym)
68
- end
69
-
70
19
  # Returns a Reader for reading a file / stream
71
20
  #
72
21
  # Parameters
@@ -81,15 +30,11 @@ module IOStreams
81
30
  # streams should be applied.
82
31
  # Default: nil
83
32
  #
84
- # Stream types / extensions supported:
85
- # .zip Zip File [ :zip ]
86
- # .gz, .gzip GZip File [ :gzip ]
87
- # .enc File Encrypted using symmetric encryption [ :enc ]
88
- # other All other extensions will be returned as: [ :file ]
89
- #
90
- # When a file is encrypted, it may also be compressed:
91
- # .zip.enc [ :zip, :enc ]
92
- # .gz.enc [ :gz, :enc ]
33
+ # file_name [String]
34
+ # When `streams` is not supplied, `file_name` can be used for determining the streams
35
+ # to apply to read the file/stream.
36
+ # This is particularly useful when `file_name_or_io` is a stream, or a temporary file name.
37
+ # Default: nil
93
38
  #
94
39
  # Example: Zip
95
40
  # IOStreams.reader('myfile.zip') do |stream|
@@ -108,11 +53,47 @@ module IOStreams
108
53
  #
109
54
  # Example: Supply custom options
110
55
  # # Encrypt the file and get Symmetric Encryption to also compress it
111
- # IOStreams.reader('myfile.csv.enc', [:enc]) do |stream|
56
+ # IOStreams.reader('myfile.csv.enc', streams: enc: {compress: true}) do |stream|
112
57
  # puts stream.read
113
58
  # end
114
- def self.reader(file_name_or_io, streams = nil, &block)
115
- stream(:reader, file_name_or_io, streams, &block)
59
+ #
60
+ # Note:
61
+ # * Passes the file_name_or_io as-is into the block if it is already a reader stream AND
62
+ # no streams are passed in.
63
+ def self.reader(file_name_or_io, streams: nil, file_name: nil, &block)
64
+ stream(:reader, file_name_or_io, streams: streams, file_name: file_name, &block)
65
+ end
66
+
67
+ # Iterate over a file / stream returning one line at a time.
68
+ def self.each_line(file_name_or_io, **args, &block)
69
+ line_reader(file_name_or_io, **args) do |line_stream|
70
+ line_stream.each(&block)
71
+ end
72
+ end
73
+
74
+ # Iterate over a file / stream returning one line at a time.
75
+ def self.each_row(file_name_or_io, **args, &block)
76
+ row_reader(file_name_or_io, **args) do |row_stream|
77
+ row_stream.each(&block)
78
+ end
79
+ end
80
+
81
+ # Returns [Hash] of every record in a file or stream with support for headers.
82
+ #
83
+ # Reading a delimited stream and converting to tabular form.
84
+ #
85
+ # Each record / line is returned one at a time so that very large files
86
+ # can be read without having to load the entire file into memory.
87
+ #
88
+ # Example:
89
+ # file_name = 'customer_data.csv.pgp'
90
+ # IOStreams.each_record(file_name) do |hash|
91
+ # p hash
92
+ # end
93
+ def self.each_record(file_name_or_io, **args, &block)
94
+ record_reader(file_name_or_io, **args) do |record_stream|
95
+ record_stream.each(&block)
96
+ end
116
97
  end
117
98
 
118
99
  # Returns a Writer for writing to a file / stream
@@ -163,41 +144,121 @@ module IOStreams
163
144
  # IOStreams.writer('myfile.csv.zip', zip: { zip_file_name: 'myfile.csv' }) do |stream|
164
145
  # stream.write(data)
165
146
  # end
166
- def self.writer(file_name_or_io, streams = nil, &block)
167
- stream(:writer, file_name_or_io, streams, &block)
147
+ #
148
+ # Note:
149
+ # * Passes the file_name_or_io as-is into the block if it is already a writer stream AND
150
+ # no streams are passed in.
151
+ def self.writer(file_name_or_io, streams: nil, file_name: nil, &block)
152
+ stream(:writer, file_name_or_io, streams: streams, file_name: file_name, &block)
153
+ end
154
+
155
+ def self.line_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
156
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Writer) || file_name_or_io.is_a?(Array)
157
+
158
+ writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
159
+ IOStreams::Line::Writer.open(io, **args, &block)
160
+ end
161
+ end
162
+
163
+ def self.row_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
164
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Writer)
165
+
166
+ line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
167
+ file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
168
+
169
+ IOStreams::Row::Writer.open(io, file_name: file_name, **args, &block)
170
+ end
168
171
  end
169
172
 
170
- # Copies the source stream to the target stream
173
+ def self.record_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
174
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Writer)
175
+
176
+ line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
177
+ file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
178
+
179
+ IOStreams::Record::Writer.open(io, file_name: file_name, **args, &block)
180
+ end
181
+ end
182
+
183
+ # Copies the source file/stream to the target file/stream.
171
184
  # Returns [Integer] the number of bytes copied
172
185
  #
186
+ # Example: Copy between 2 files
187
+ # IOStreams.copy('a.csv', 'b.csv')
188
+ # # TODO: The above will convert the csv file to a Hash and then back to write it to the target file.
189
+ #
190
+ # Example: Read content from a Xlsx file and write it out in CSV form.
191
+ # IOStreams.copy('a.xlsx', 'b.csv')
192
+ #
193
+ # Example:
194
+ # # Read content from a JSON file and write it out in CSV form.
195
+ # #
196
+ # # The output header for the CSV file is extracted from the first row in the JSON file.
197
+ # # If the first JSON row does not contain all the column names then they will be ignored
198
+ # # for the rest of the file.
199
+ # IOStreams.copy('a.json', 'b.csv')
200
+ #
201
+ # Example:
202
+ # # Read a PSV file and write out a CSV file from it.
203
+ # IOStreams.copy('a.psv', 'b.csv')
204
+ #
205
+ # Example:
206
+ # # Copy between 2 files, encrypting the target file with Symmetric Encryption
207
+ # # Since the target file_name already includes `.enc` in the filename, it is automatically
208
+ # # encrypted.
209
+ # IOStreams.copy('a.csv', 'b.csv.enc')
210
+ #
211
+ # Example:
212
+ # # Copy between 2 files, encrypting the target file with Symmetric Encryption
213
+ # # Since the target file_name does not include `.enc` in the filename, to encrypt it
214
+ # # the encryption stream is added.
215
+ # IOStreams.copy('a.csv', 'b', target_options: [:enc])
216
+ #
217
+ # Example:
218
+ # # Copy between 2 files, encrypting the target file with Symmetric Encryption
219
+ # # Since the target file_name does not include `.enc` in the filename, to encrypt it
220
+ # # the encryption stream is added, along with the optional compression option.
221
+ # IOStreams.copy('a.csv', 'b', target_options: [enc: { compress: true }])
222
+ #
173
223
  # Example:
224
+ # # Create a pgp encrypted file.
225
+ # # For PGP Encryption the recipients email address is required.
226
+ # IOStreams.copy('a.xlsx', 'b.csv.pgp', target_options: [:csv, pgp: { recipient_email: 'user@nospam.org' }])
227
+ #
228
+ # Example: Copy between 2 existing streams
174
229
  # IOStreams.reader('a.csv') do |source_stream|
175
230
  # IOStreams.writer('b.csv.enc') do |target_stream|
176
231
  # IOStreams.copy(source_stream, target_stream)
177
232
  # end
178
233
  # end
179
- def self.copy(source_stream, target_stream, buffer_size = 65536)
180
- bytes = 0
181
- while data = source_stream.read(buffer_size)
182
- break if data.size == 0
183
- bytes += data.size
184
- target_stream.write(data)
185
- end
186
- bytes
187
- end
188
-
189
- # Copies the source file name to the target file name.
190
234
  #
191
- # Returns [Integer] the number of bytes copied
235
+ # Example:
236
+ # # Copy between 2 csv files, reducing the number of columns present and encrypting the
237
+ # # target file with Symmetric Encryption
238
+ # output_headers = %w[name address]
239
+ # IOStreams.copy(
240
+ # 'a.csv',
241
+ # 'b.csv.enc',
242
+ # target_options: [csv:{headers: output_headers}, enc: {compress: true}]
243
+ # )
192
244
  #
193
245
  # Example:
194
- # IOStreams.copy_file('a.csv', 'b.csv.enc')
195
- def self.copy_file(source_file_name, target_file_name, buffer_size = 65536)
196
- reader(source_file_name) do |source_stream|
197
- writer(target_file_name) do |target_stream|
198
- copy(source_stream, target_stream, buffer_size)
246
+ # # Copy a locally encrypted file to AWS S3.
247
+ # # Decrypts the file, then compresses it with gzip as it is being streamed into S3.
248
+ # # Useful for when the entire bucket is encrypted on S3.
249
+ # IOStreams.copy('a.csv.enc', 's3://my_bucket/b.csv.gz')
250
+ def self.copy(source_file_name_or_io, target_file_name_or_io, buffer_size: 65536, source_options: {}, target_options: {})
251
+ bytes = 0
252
+ reader(source_file_name_or_io, **source_options) do |source_stream|
253
+ writer(target_file_name_or_io, **target_options) do |target_stream|
254
+ while data = source_stream.read(buffer_size)
255
+ break if data.size == 0
256
+ bytes += data.size
257
+ target_stream.write(data)
258
+ end
199
259
  end
200
260
  end
261
+ bytes
201
262
  end
202
263
 
203
264
  # Returns [true|false] whether the supplied file_name_or_io is a reader stream
@@ -222,38 +283,149 @@ module IOStreams
222
283
  !(file_name =~ /\.(enc|pgp|gpg)\z/i).nil?
223
284
  end
224
285
 
225
- # Deletes the specified stream from the supplied streams if present
226
- # Returns deleted stream, or nil if not found
227
- def self.delete_stream(stream, streams)
228
- raise(ArgumentError, "Argument :stream must be a symbol: #{stream.inspect}") unless stream.is_a?(Symbol)
286
+ # Returns [Array] the formats required to process the file by looking at
287
+ # its extension(s)
288
+ #
289
+ # Example Zip file:
290
+ # RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
291
+ # => [ :zip ]
292
+ #
293
+ # Example Encrypted Gzip file:
294
+ # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
295
+ # => [ :gz, :enc ]
296
+ #
297
+ # Example plain text / binary file:
298
+ # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
299
+ # => [ :file ]
300
+ def self.streams_for_file_name(file_name)
301
+ raise ArgumentError.new('File name cannot be nil') if file_name.nil?
302
+ raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
303
+ parts = file_name.split('.')
304
+ extensions = []
305
+ while extension = parts.pop
306
+ sym = extension.downcase.to_sym
307
+ break unless @extensions[sym]
308
+ extensions.unshift(sym)
309
+ end
310
+ extensions
311
+ end
312
+
313
+ # Iterate over a file / stream returning each record/line one at a time.
314
+ def self.line_reader(file_name_or_io, streams: nil, file_name: nil, **args, &block)
315
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) ||
316
+ file_name_or_io.is_a?(IOStreams::Xlsx::Reader) ||
317
+ file_name_or_io.is_a?(Array)
318
+
319
+ reader(file_name_or_io, streams: streams, file_name: file_name) do |io|
320
+ IOStreams::Line::Reader.open(io, **args, &block)
321
+ end
322
+ end
323
+
324
+ # Iterate over a file / stream returning each line as a hash, one at a time.
325
+ def self.row_reader(file_name_or_io,
326
+ streams: nil,
327
+ delimiter: nil,
328
+ encoding: IOStreams::UTF8_ENCODING,
329
+ strip_non_printable: false,
330
+ file_name: nil,
331
+ **args,
332
+ &block)
333
+
334
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Reader)
335
+
336
+ line_reader(
337
+ file_name_or_io,
338
+ streams: streams,
339
+ delimiter: delimiter,
340
+ encoding: encoding,
341
+ strip_non_printable: strip_non_printable,
342
+ file_name: file_name) do |io|
343
+
344
+ file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
345
+ IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
346
+ end
347
+ end
348
+
349
+ # Iterate over a file / stream returning each line as a hash, one at a time.
350
+ def self.record_reader(file_name_or_io,
351
+ streams: nil,
352
+ delimiter: nil,
353
+ encoding: IOStreams::UTF8_ENCODING,
354
+ strip_non_printable: false,
355
+ file_name: nil,
356
+ **args,
357
+ &block)
229
358
 
230
- Array(streams).delete_if do |_stream|
231
- stream_key = _stream.is_a?(Symbol) ? _stream : _stream.keys.first
232
- stream == stream_key
359
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader) || file_name_or_io.is_a?(IOStreams::Xlsx::Reader)
360
+
361
+ line_reader(
362
+ file_name_or_io,
363
+ streams: streams,
364
+ delimiter: delimiter,
365
+ encoding: encoding,
366
+ strip_non_printable: strip_non_printable,
367
+ file_name: file_name) do |io|
368
+
369
+ file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
370
+ IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
233
371
  end
234
372
  end
235
373
 
236
- # Returns [true|false] whether the stream starts with a delimited reader or writer
237
- def self.delimited_stream?(streams)
238
- stream = Array(streams).first
239
- return false unless stream
374
+ Extension = Struct.new(:reader_class, :writer_class)
375
+
376
+ # Register a file extension and the reader and writer streaming classes
377
+ #
378
+ # Example:
379
+ # # MyXls::Reader and MyXls::Writer must implement .open
380
+ # register_extension(:xls, MyXls::Reader, MyXls::Writer)
381
+ def self.register_extension(extension, reader_class, writer_class)
382
+ raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.nil? || extension.to_s =~ /\A\w+\Z/
383
+ @extensions[extension.nil? ? nil : extension.to_sym] = Extension.new(reader_class, writer_class)
384
+ end
385
+
386
+ # De-Register a file extension
387
+ #
388
+ # Returns [Symbol] the extension removed, or nil if the extension was not registered
389
+ #
390
+ # Example:
391
+ # register_extension(:xls)
392
+ def self.deregister_extension(extension)
393
+ raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
394
+ @extensions.delete(extension.to_sym)
395
+ end
240
396
 
241
- # TODO Need to figure out a way so that this is not hard-coded
242
- [:xlsx, :xlsm, :delimited].include?(stream.is_a?(Symbol) ? stream : stream.keys.first)
397
+ # Helper method: Returns [true|false] if a value is blank?
398
+ def self.blank?(value)
399
+ if value.nil?
400
+ true
401
+ elsif value.is_a?(String)
402
+ value !~ /\S/
403
+ else
404
+ value.respond_to?(:empty?) ? value.empty? : !value
405
+ end
243
406
  end
244
407
 
245
- ##########################################################################
246
408
  private
247
409
 
410
+ # A registry to hold formats for processing files during upload or download
411
+ @extensions = {}
412
+
248
413
  # Struct to hold the Stream and options if any
249
414
  StreamStruct = Struct.new(:klass, :options)
250
415
 
251
416
  # Returns a reader or writer stream
252
- def self.stream(type, file_name_or_io, streams = nil, &block)
253
- unless streams
254
- respond_to = type == :reader ? :read : :write
255
- streams = file_name_or_io.respond_to?(respond_to) ? [:file] : streams_for_file_name(file_name_or_io)
417
+ def self.stream(type, file_name_or_io, streams:, file_name:, &block)
418
+ # TODO: Add support for different schemes, such as file://, s3://, sftp://
419
+
420
+ streams = streams_for_file_name(file_name) if streams.nil? && file_name
421
+
422
+ # Shortcut for when it is already a stream and no further streams need to be applied.
423
+ return block.call(file_name_or_io) if !file_name_or_io.is_a?(String) && (streams.nil? || streams.empty?)
424
+
425
+ if streams.nil?
426
+ streams = file_name_or_io.is_a?(String) ? streams_for_file_name(file_name_or_io) : [nil]
256
427
  end
428
+
257
429
  stream_structs = streams_for(type, streams)
258
430
  if stream_structs.size == 1
259
431
  stream_struct = stream_structs.first
@@ -270,6 +442,7 @@ module IOStreams
270
442
  if params.is_a?(Symbol)
271
443
  [stream_struct_for_stream(type, params)]
272
444
  elsif params.is_a?(Array)
445
+ return [stream_struct_for_stream(type, nil)] if params.empty?
273
446
  a = []
274
447
  params.each do |stream|
275
448
  if stream.is_a?(Hash)
@@ -288,24 +461,35 @@ module IOStreams
288
461
  end
289
462
  end
290
463
 
291
- def self.stream_struct_for_stream(type, stream, options={})
292
- ext = @extensions[stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
464
+ def self.stream_struct_for_stream(type, stream, options = {})
465
+ ext = @extensions[stream.nil? ? nil : stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
293
466
  klass = ext.send("#{type}_class")
294
467
  StreamStruct.new(klass, options)
295
468
  end
296
469
 
470
+ # Default reader/writer when no other streams need to be applied.
471
+ register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
472
+
297
473
  # Register File extensions
298
- # @formatter:off
299
- register_extension(:enc, SymmetricEncryption::Reader, SymmetricEncryption::Writer) if defined?(SymmetricEncryption)
300
- register_extension(:file, IOStreams::File::Reader, IOStreams::File::Writer)
301
- register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
302
- register_extension(:gz, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
303
- register_extension(:gzip, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
304
- register_extension(:zip, IOStreams::Zip::Reader, IOStreams::Zip::Writer)
305
- register_extension(:delimited, IOStreams::Delimited::Reader, IOStreams::Delimited::Writer)
306
- register_extension(:xlsx, IOStreams::Xlsx::Reader, nil)
307
- register_extension(:xlsm, IOStreams::Xlsx::Reader, nil)
308
- register_extension(:pgp, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
309
- register_extension(:gpg, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
310
- #register_extension(:csv, IOStreams::CSV::Reader, IOStreams::CSV::Writer)
474
+ register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
475
+ register_extension(:gz, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
476
+ register_extension(:gzip, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
477
+ register_extension(:zip, IOStreams::Zip::Reader, IOStreams::Zip::Writer)
478
+ register_extension(:pgp, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
479
+ register_extension(:gpg, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
480
+ register_extension(:xlsx, IOStreams::Xlsx::Reader, nil)
481
+ register_extension(:xlsm, IOStreams::Xlsx::Reader, nil)
482
+
483
+ # Use Symmetric Encryption to encrypt of decrypt files with the `enc` extension
484
+ # when the gem `symmetric-encryption` has been loaded.
485
+ if defined?(SymmetricEncryption)
486
+ register_extension(:enc, SymmetricEncryption::Reader, SymmetricEncryption::Writer)
487
+ end
488
+
489
+ # register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
490
+ # register_scheme(:file, IOStreams::File::Reader, IOStreams::File::Writer)
491
+ # register_scheme(:http, IOStreams::HTTP::Reader, IOStreams::HTTP::Writer)
492
+ # register_scheme(:https, IOStreams::HTTPS::Reader, IOStreams::HTTPS::Writer)
493
+ # register_scheme(:sftp, IOStreams::SFTP::Reader, IOStreams::SFTP::Writer)
494
+ # register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
311
495
  end