iostreams 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +202 -0
  3. data/README.md +155 -47
  4. data/lib/io_streams/file/reader.rb +7 -8
  5. data/lib/io_streams/file/writer.rb +7 -8
  6. data/lib/io_streams/io_streams.rb +313 -129
  7. data/lib/io_streams/{delimited → line}/reader.rb +20 -30
  8. data/lib/io_streams/line/writer.rb +81 -0
  9. data/lib/io_streams/pgp.rb +4 -14
  10. data/lib/io_streams/record/reader.rb +55 -0
  11. data/lib/io_streams/record/writer.rb +63 -0
  12. data/lib/io_streams/row/reader.rb +60 -0
  13. data/lib/io_streams/row/writer.rb +62 -0
  14. data/lib/io_streams/s3.rb +25 -0
  15. data/lib/io_streams/s3/reader.rb +64 -0
  16. data/lib/io_streams/s3/writer.rb +13 -0
  17. data/lib/io_streams/streams.rb +1 -1
  18. data/lib/io_streams/tabular.rb +163 -0
  19. data/lib/io_streams/tabular/errors.rb +14 -0
  20. data/lib/io_streams/tabular/header.rb +146 -0
  21. data/lib/io_streams/tabular/parser/array.rb +26 -0
  22. data/lib/io_streams/tabular/parser/base.rb +12 -0
  23. data/lib/io_streams/tabular/parser/csv.rb +35 -0
  24. data/lib/io_streams/tabular/parser/fixed.rb +88 -0
  25. data/lib/io_streams/tabular/parser/hash.rb +21 -0
  26. data/lib/io_streams/tabular/parser/json.rb +25 -0
  27. data/lib/io_streams/tabular/parser/psv.rb +34 -0
  28. data/lib/io_streams/tabular/utility/csv_row.rb +115 -0
  29. data/lib/io_streams/version.rb +2 -2
  30. data/lib/io_streams/xlsx/reader.rb +1 -1
  31. data/lib/io_streams/zip/reader.rb +1 -1
  32. data/lib/io_streams/zip/writer.rb +1 -1
  33. data/lib/iostreams.rb +21 -10
  34. data/test/bzip2_reader_test.rb +21 -22
  35. data/test/bzip2_writer_test.rb +38 -32
  36. data/test/file_reader_test.rb +19 -18
  37. data/test/file_writer_test.rb +23 -22
  38. data/test/files/test.json +3 -0
  39. data/test/gzip_reader_test.rb +21 -22
  40. data/test/gzip_writer_test.rb +35 -29
  41. data/test/io_streams_test.rb +137 -61
  42. data/test/line_reader_test.rb +105 -0
  43. data/test/line_writer_test.rb +50 -0
  44. data/test/pgp_reader_test.rb +29 -29
  45. data/test/pgp_test.rb +149 -195
  46. data/test/pgp_writer_test.rb +63 -62
  47. data/test/record_reader_test.rb +61 -0
  48. data/test/record_writer_test.rb +73 -0
  49. data/test/row_reader_test.rb +34 -0
  50. data/test/row_writer_test.rb +51 -0
  51. data/test/tabular_test.rb +184 -0
  52. data/test/xlsx_reader_test.rb +13 -17
  53. data/test/zip_reader_test.rb +21 -22
  54. data/test/zip_writer_test.rb +40 -36
  55. metadata +41 -17
  56. data/lib/io_streams/csv/reader.rb +0 -21
  57. data/lib/io_streams/csv/writer.rb +0 -20
  58. data/lib/io_streams/delimited/writer.rb +0 -67
  59. data/test/csv_reader_test.rb +0 -34
  60. data/test/csv_writer_test.rb +0 -35
  61. data/test/delimited_reader_test.rb +0 -115
  62. data/test/delimited_writer_test.rb +0 -44
@@ -1,15 +1,14 @@
1
1
  module IOStreams
2
2
  module File
3
3
  class Reader
4
- # Read from a file or stream
5
- def self.open(file_name_or_io, _=nil, &block)
6
- unless IOStreams.reader_stream?(file_name_or_io)
7
- ::File.open(file_name_or_io, 'rb', &block)
8
- else
9
- block.call(file_name_or_io)
10
- end
11
- end
4
+ # Read from a named file
5
+ # TODO: Add support for mode (text / binary)
6
+ # TODO: Add encoding support: external_encoding, internal_encoding
7
+ def self.open(file_name, _=nil, &block)
8
+ raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
12
9
 
10
+ ::File.open(file_name, 'rb', &block)
11
+ end
13
12
  end
14
13
  end
15
14
  end
@@ -1,15 +1,14 @@
1
1
  module IOStreams
2
2
  module File
3
3
  class Writer
4
- # Write to a file or stream
5
- def self.open(file_name_or_io, _=nil, &block)
6
- unless IOStreams.writer_stream?(file_name_or_io)
7
- ::File.open(file_name_or_io, 'wb', &block)
8
- else
9
- block.call(file_name_or_io)
10
- end
11
- end
4
+ # Write to a named file
5
+ # TODO: Add support for mode (text / binary), permissions, buffering, append
6
+ # TODO: Add encoding support: external_encoding, internal_encoding
7
+ def self.open(file_name, _=nil, &block)
8
+ raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
12
9
 
10
+ ::File.open(file_name, 'wb', &block)
11
+ end
13
12
  end
14
13
  end
15
14
  end
@@ -1,72 +1,21 @@
1
1
  require 'concurrent'
2
- module IOStreams
3
- # A registry to hold formats for processing files during upload or download
4
- @extensions = Concurrent::Map.new
5
2
 
3
+ # Streaming library for Ruby
4
+ #
5
+ # Stream types / extensions supported:
6
+ # .zip Zip File [ :zip ]
7
+ # .gz, .gzip GZip File [ :gzip ]
8
+ # .enc File Encrypted using symmetric encryption [ :enc ]
9
+ # etc...
10
+ # other All other extensions will be returned as: []
11
+ #
12
+ # When a file is encrypted, it may also be compressed:
13
+ # .zip.enc [ :zip, :enc ]
14
+ # .gz.enc [ :gz, :enc ]
15
+ module IOStreams
6
16
  UTF8_ENCODING = Encoding.find('UTF-8').freeze
7
17
  BINARY_ENCODING = Encoding.find('BINARY').freeze
8
18
 
9
- # Returns [Array] the formats required to process the file by looking at
10
- # its extension(s)
11
- #
12
- # Extensions supported:
13
- # .zip Zip File [ :zip ]
14
- # .gz, .gzip GZip File [ :gzip ]
15
- # .enc File Encrypted using symmetric encryption [ :enc ]
16
- # other All other extensions will be returned as: [ :file ]
17
- #
18
- # When a file is encrypted, it may also be compressed:
19
- # .zip.enc [ :zip, :enc ]
20
- # .gz.enc [ :gz, :enc ]
21
- #
22
- # Example Zip file:
23
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
24
- # => [ :zip ]
25
- #
26
- # Example Encrypted Gzip file:
27
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
28
- # => [ :gz, :enc ]
29
- #
30
- # Example plain text / binary file:
31
- # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
32
- # => [ :file ]
33
- def self.streams_for_file_name(file_name)
34
- raise ArgumentError.new('File name cannot be nil') if file_name.nil?
35
- raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
36
- parts = file_name.split('.')
37
- extensions = []
38
- while extension = parts.pop
39
- sym = extension.downcase.to_sym
40
- break unless @extensions[sym]
41
- extensions.unshift(sym)
42
- end
43
- extensions << :file if extensions.size == 0
44
- extensions
45
- end
46
-
47
- Extension = Struct.new(:reader_class, :writer_class)
48
-
49
- # Register a file extension and the reader and writer classes to use to format it
50
- #
51
- # Example:
52
- # # MyXls::Reader and MyXls::Writer must implement .open
53
- # register_extension(:xls, MyXls::Reader, MyXls::Writer)
54
- def self.register_extension(extension, reader_class, writer_class)
55
- raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
56
- @extensions[extension.to_sym] = Extension.new(reader_class, writer_class)
57
- end
58
-
59
- # De-Register a file extension
60
- #
61
- # Returns [Symbol] the extension removed, or nil if the extension was not registered
62
- #
63
- # Example:
64
- # register_extension(:xls)
65
- def self.deregister_extension(extension)
66
- raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
67
- @extensions.delete(extension.to_sym)
68
- end
69
-
70
19
  # Returns a Reader for reading a file / stream
71
20
  #
72
21
  # Parameters
@@ -81,15 +30,11 @@ module IOStreams
81
30
  # streams should be applied.
82
31
  # Default: nil
83
32
  #
84
- # Stream types / extensions supported:
85
- # .zip Zip File [ :zip ]
86
- # .gz, .gzip GZip File [ :gzip ]
87
- # .enc File Encrypted using symmetric encryption [ :enc ]
88
- # other All other extensions will be returned as: [ :file ]
89
- #
90
- # When a file is encrypted, it may also be compressed:
91
- # .zip.enc [ :zip, :enc ]
92
- # .gz.enc [ :gz, :enc ]
33
+ # file_name [String]
34
+ # When `streams` is not supplied, `file_name` can be used for determining the streams
35
+ # to apply to read the file/stream.
36
+ # This is particularly useful when `file_name_or_io` is a stream, or a temporary file name.
37
+ # Default: nil
93
38
  #
94
39
  # Example: Zip
95
40
  # IOStreams.reader('myfile.zip') do |stream|
@@ -108,11 +53,47 @@ module IOStreams
108
53
  #
109
54
  # Example: Supply custom options
110
55
  # # Encrypt the file and get Symmetric Encryption to also compress it
111
- # IOStreams.reader('myfile.csv.enc', [:enc]) do |stream|
56
+ # IOStreams.reader('myfile.csv.enc', streams: enc: {compress: true}) do |stream|
112
57
  # puts stream.read
113
58
  # end
114
- def self.reader(file_name_or_io, streams = nil, &block)
115
- stream(:reader, file_name_or_io, streams, &block)
59
+ #
60
+ # Note:
61
+ # * Passes the file_name_or_io as-is into the block if it is already a reader stream AND
62
+ # no streams are passed in.
63
+ def self.reader(file_name_or_io, streams: nil, file_name: nil, &block)
64
+ stream(:reader, file_name_or_io, streams: streams, file_name: file_name, &block)
65
+ end
66
+
67
+ # Iterate over a file / stream returning one line at a time.
68
+ def self.each_line(file_name_or_io, **args, &block)
69
+ line_reader(file_name_or_io, **args) do |line_stream|
70
+ line_stream.each(&block)
71
+ end
72
+ end
73
+
74
+ # Iterate over a file / stream returning one line at a time.
75
+ def self.each_row(file_name_or_io, **args, &block)
76
+ row_reader(file_name_or_io, **args) do |row_stream|
77
+ row_stream.each(&block)
78
+ end
79
+ end
80
+
81
+ # Returns [Hash] of every record in a file or stream with support for headers.
82
+ #
83
+ # Reading a delimited stream and converting to tabular form.
84
+ #
85
+ # Each record / line is returned one at a time so that very large files
86
+ # can be read without having to load the entire file into memory.
87
+ #
88
+ # Example:
89
+ # file_name = 'customer_data.csv.pgp'
90
+ # IOStreams.each_record(file_name) do |hash|
91
+ # p hash
92
+ # end
93
+ def self.each_record(file_name_or_io, **args, &block)
94
+ record_reader(file_name_or_io, **args) do |record_stream|
95
+ record_stream.each(&block)
96
+ end
116
97
  end
117
98
 
118
99
  # Returns a Writer for writing to a file / stream
@@ -163,41 +144,121 @@ module IOStreams
163
144
  # IOStreams.writer('myfile.csv.zip', zip: { zip_file_name: 'myfile.csv' }) do |stream|
164
145
  # stream.write(data)
165
146
  # end
166
- def self.writer(file_name_or_io, streams = nil, &block)
167
- stream(:writer, file_name_or_io, streams, &block)
147
+ #
148
+ # Note:
149
+ # * Passes the file_name_or_io as-is into the block if it is already a writer stream AND
150
+ # no streams are passed in.
151
+ def self.writer(file_name_or_io, streams: nil, file_name: nil, &block)
152
+ stream(:writer, file_name_or_io, streams: streams, file_name: file_name, &block)
153
+ end
154
+
155
+ def self.line_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
156
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Writer) || file_name_or_io.is_a?(Array)
157
+
158
+ writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
159
+ IOStreams::Line::Writer.open(io, **args, &block)
160
+ end
161
+ end
162
+
163
+ def self.row_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
164
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Writer)
165
+
166
+ line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
167
+ file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
168
+
169
+ IOStreams::Row::Writer.open(io, file_name: file_name, **args, &block)
170
+ end
168
171
  end
169
172
 
170
- # Copies the source stream to the target stream
173
+ def self.record_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
174
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Writer)
175
+
176
+ line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
177
+ file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
178
+
179
+ IOStreams::Record::Writer.open(io, file_name: file_name, **args, &block)
180
+ end
181
+ end
182
+
183
+ # Copies the source file/stream to the target file/stream.
171
184
  # Returns [Integer] the number of bytes copied
172
185
  #
186
+ # Example: Copy between 2 files
187
+ # IOStreams.copy('a.csv', 'b.csv')
188
+ # # TODO: The above will convert the csv file to a Hash and then back to write it to the target file.
189
+ #
190
+ # Example: Read content from a Xlsx file and write it out in CSV form.
191
+ # IOStreams.copy('a.xlsx', 'b.csv')
192
+ #
193
+ # Example:
194
+ # # Read content from a JSON file and write it out in CSV form.
195
+ # #
196
+ # # The output header for the CSV file is extracted from the first row in the JSON file.
197
+ # # If the first JSON row does not contain all the column names then they will be ignored
198
+ # # for the rest of the file.
199
+ # IOStreams.copy('a.json', 'b.csv')
200
+ #
201
+ # Example:
202
+ # # Read a PSV file and write out a CSV file from it.
203
+ # IOStreams.copy('a.psv', 'b.csv')
204
+ #
205
+ # Example:
206
+ # # Copy between 2 files, encrypting the target file with Symmetric Encryption
207
+ # # Since the target file_name already includes `.enc` in the filename, it is automatically
208
+ # # encrypted.
209
+ # IOStreams.copy('a.csv', 'b.csv.enc')
210
+ #
211
+ # Example:
212
+ # # Copy between 2 files, encrypting the target file with Symmetric Encryption
213
+ # # Since the target file_name does not include `.enc` in the filename, to encrypt it
214
+ # # the encryption stream is added.
215
+ # IOStreams.copy('a.csv', 'b', target_options: [:enc])
216
+ #
217
+ # Example:
218
+ # # Copy between 2 files, encrypting the target file with Symmetric Encryption
219
+ # # Since the target file_name does not include `.enc` in the filename, to encrypt it
220
+ # # the encryption stream is added, along with the optional compression option.
221
+ # IOStreams.copy('a.csv', 'b', target_options: [enc: { compress: true }])
222
+ #
173
223
  # Example:
224
+ # # Create a pgp encrypted file.
225
+ # # For PGP Encryption the recipients email address is required.
226
+ # IOStreams.copy('a.xlsx', 'b.csv.pgp', target_options: [:csv, pgp: { recipient_email: 'user@nospam.org' }])
227
+ #
228
+ # Example: Copy between 2 existing streams
174
229
  # IOStreams.reader('a.csv') do |source_stream|
175
230
  # IOStreams.writer('b.csv.enc') do |target_stream|
176
231
  # IOStreams.copy(source_stream, target_stream)
177
232
  # end
178
233
  # end
179
- def self.copy(source_stream, target_stream, buffer_size = 65536)
180
- bytes = 0
181
- while data = source_stream.read(buffer_size)
182
- break if data.size == 0
183
- bytes += data.size
184
- target_stream.write(data)
185
- end
186
- bytes
187
- end
188
-
189
- # Copies the source file name to the target file name.
190
234
  #
191
- # Returns [Integer] the number of bytes copied
235
+ # Example:
236
+ # # Copy between 2 csv files, reducing the number of columns present and encrypting the
237
+ # # target file with Symmetric Encryption
238
+ # output_headers = %w[name address]
239
+ # IOStreams.copy(
240
+ # 'a.csv',
241
+ # 'b.csv.enc',
242
+ # target_options: [csv:{headers: output_headers}, enc: {compress: true}]
243
+ # )
192
244
  #
193
245
  # Example:
194
- # IOStreams.copy_file('a.csv', 'b.csv.enc')
195
- def self.copy_file(source_file_name, target_file_name, buffer_size = 65536)
196
- reader(source_file_name) do |source_stream|
197
- writer(target_file_name) do |target_stream|
198
- copy(source_stream, target_stream, buffer_size)
246
+ # # Copy a locally encrypted file to AWS S3.
247
+ # # Decrypts the file, then compresses it with gzip as it is being streamed into S3.
248
+ # # Useful for when the entire bucket is encrypted on S3.
249
+ # IOStreams.copy('a.csv.enc', 's3://my_bucket/b.csv.gz')
250
+ def self.copy(source_file_name_or_io, target_file_name_or_io, buffer_size: 65536, source_options: {}, target_options: {})
251
+ bytes = 0
252
+ reader(source_file_name_or_io, **source_options) do |source_stream|
253
+ writer(target_file_name_or_io, **target_options) do |target_stream|
254
+ while data = source_stream.read(buffer_size)
255
+ break if data.size == 0
256
+ bytes += data.size
257
+ target_stream.write(data)
258
+ end
199
259
  end
200
260
  end
261
+ bytes
201
262
  end
202
263
 
203
264
  # Returns [true|false] whether the supplied file_name_or_io is a reader stream
@@ -222,38 +283,149 @@ module IOStreams
222
283
  !(file_name =~ /\.(enc|pgp|gpg)\z/i).nil?
223
284
  end
224
285
 
225
- # Deletes the specified stream from the supplied streams if present
226
- # Returns deleted stream, or nil if not found
227
- def self.delete_stream(stream, streams)
228
- raise(ArgumentError, "Argument :stream must be a symbol: #{stream.inspect}") unless stream.is_a?(Symbol)
286
+ # Returns [Array] the formats required to process the file by looking at
287
+ # its extension(s)
288
+ #
289
+ # Example Zip file:
290
+ # RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
291
+ # => [ :zip ]
292
+ #
293
+ # Example Encrypted Gzip file:
294
+ # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
295
+ # => [ :gz, :enc ]
296
+ #
297
+ # Example plain text / binary file:
298
+ # RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
299
+ # => [ :file ]
300
+ def self.streams_for_file_name(file_name)
301
+ raise ArgumentError.new('File name cannot be nil') if file_name.nil?
302
+ raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
303
+ parts = file_name.split('.')
304
+ extensions = []
305
+ while extension = parts.pop
306
+ sym = extension.downcase.to_sym
307
+ break unless @extensions[sym]
308
+ extensions.unshift(sym)
309
+ end
310
+ extensions
311
+ end
312
+
313
+ # Iterate over a file / stream returning each record/line one at a time.
314
+ def self.line_reader(file_name_or_io, streams: nil, file_name: nil, **args, &block)
315
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) ||
316
+ file_name_or_io.is_a?(IOStreams::Xlsx::Reader) ||
317
+ file_name_or_io.is_a?(Array)
318
+
319
+ reader(file_name_or_io, streams: streams, file_name: file_name) do |io|
320
+ IOStreams::Line::Reader.open(io, **args, &block)
321
+ end
322
+ end
323
+
324
+ # Iterate over a file / stream returning each line as a hash, one at a time.
325
+ def self.row_reader(file_name_or_io,
326
+ streams: nil,
327
+ delimiter: nil,
328
+ encoding: IOStreams::UTF8_ENCODING,
329
+ strip_non_printable: false,
330
+ file_name: nil,
331
+ **args,
332
+ &block)
333
+
334
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Reader)
335
+
336
+ line_reader(
337
+ file_name_or_io,
338
+ streams: streams,
339
+ delimiter: delimiter,
340
+ encoding: encoding,
341
+ strip_non_printable: strip_non_printable,
342
+ file_name: file_name) do |io|
343
+
344
+ file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
345
+ IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
346
+ end
347
+ end
348
+
349
+ # Iterate over a file / stream returning each line as a hash, one at a time.
350
+ def self.record_reader(file_name_or_io,
351
+ streams: nil,
352
+ delimiter: nil,
353
+ encoding: IOStreams::UTF8_ENCODING,
354
+ strip_non_printable: false,
355
+ file_name: nil,
356
+ **args,
357
+ &block)
229
358
 
230
- Array(streams).delete_if do |_stream|
231
- stream_key = _stream.is_a?(Symbol) ? _stream : _stream.keys.first
232
- stream == stream_key
359
+ return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader) || file_name_or_io.is_a?(IOStreams::Xlsx::Reader)
360
+
361
+ line_reader(
362
+ file_name_or_io,
363
+ streams: streams,
364
+ delimiter: delimiter,
365
+ encoding: encoding,
366
+ strip_non_printable: strip_non_printable,
367
+ file_name: file_name) do |io|
368
+
369
+ file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
370
+ IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
233
371
  end
234
372
  end
235
373
 
236
- # Returns [true|false] whether the stream starts with a delimited reader or writer
237
- def self.delimited_stream?(streams)
238
- stream = Array(streams).first
239
- return false unless stream
374
+ Extension = Struct.new(:reader_class, :writer_class)
375
+
376
+ # Register a file extension and the reader and writer streaming classes
377
+ #
378
+ # Example:
379
+ # # MyXls::Reader and MyXls::Writer must implement .open
380
+ # register_extension(:xls, MyXls::Reader, MyXls::Writer)
381
+ def self.register_extension(extension, reader_class, writer_class)
382
+ raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.nil? || extension.to_s =~ /\A\w+\Z/
383
+ @extensions[extension.nil? ? nil : extension.to_sym] = Extension.new(reader_class, writer_class)
384
+ end
385
+
386
+ # De-Register a file extension
387
+ #
388
+ # Returns [Symbol] the extension removed, or nil if the extension was not registered
389
+ #
390
+ # Example:
391
+ # register_extension(:xls)
392
+ def self.deregister_extension(extension)
393
+ raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
394
+ @extensions.delete(extension.to_sym)
395
+ end
240
396
 
241
- # TODO Need to figure out a way so that this is not hard-coded
242
- [:xlsx, :xlsm, :delimited].include?(stream.is_a?(Symbol) ? stream : stream.keys.first)
397
+ # Helper method: Returns [true|false] if a value is blank?
398
+ def self.blank?(value)
399
+ if value.nil?
400
+ true
401
+ elsif value.is_a?(String)
402
+ value !~ /\S/
403
+ else
404
+ value.respond_to?(:empty?) ? value.empty? : !value
405
+ end
243
406
  end
244
407
 
245
- ##########################################################################
246
408
  private
247
409
 
410
+ # A registry to hold formats for processing files during upload or download
411
+ @extensions = {}
412
+
248
413
  # Struct to hold the Stream and options if any
249
414
  StreamStruct = Struct.new(:klass, :options)
250
415
 
251
416
  # Returns a reader or writer stream
252
- def self.stream(type, file_name_or_io, streams = nil, &block)
253
- unless streams
254
- respond_to = type == :reader ? :read : :write
255
- streams = file_name_or_io.respond_to?(respond_to) ? [:file] : streams_for_file_name(file_name_or_io)
417
+ def self.stream(type, file_name_or_io, streams:, file_name:, &block)
418
+ # TODO: Add support for different schemes, such as file://, s3://, sftp://
419
+
420
+ streams = streams_for_file_name(file_name) if streams.nil? && file_name
421
+
422
+ # Shortcut for when it is already a stream and no further streams need to be applied.
423
+ return block.call(file_name_or_io) if !file_name_or_io.is_a?(String) && (streams.nil? || streams.empty?)
424
+
425
+ if streams.nil?
426
+ streams = file_name_or_io.is_a?(String) ? streams_for_file_name(file_name_or_io) : [nil]
256
427
  end
428
+
257
429
  stream_structs = streams_for(type, streams)
258
430
  if stream_structs.size == 1
259
431
  stream_struct = stream_structs.first
@@ -270,6 +442,7 @@ module IOStreams
270
442
  if params.is_a?(Symbol)
271
443
  [stream_struct_for_stream(type, params)]
272
444
  elsif params.is_a?(Array)
445
+ return [stream_struct_for_stream(type, nil)] if params.empty?
273
446
  a = []
274
447
  params.each do |stream|
275
448
  if stream.is_a?(Hash)
@@ -288,24 +461,35 @@ module IOStreams
288
461
  end
289
462
  end
290
463
 
291
- def self.stream_struct_for_stream(type, stream, options={})
292
- ext = @extensions[stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
464
+ def self.stream_struct_for_stream(type, stream, options = {})
465
+ ext = @extensions[stream.nil? ? nil : stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
293
466
  klass = ext.send("#{type}_class")
294
467
  StreamStruct.new(klass, options)
295
468
  end
296
469
 
470
+ # Default reader/writer when no other streams need to be applied.
471
+ register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
472
+
297
473
  # Register File extensions
298
- # @formatter:off
299
- register_extension(:enc, SymmetricEncryption::Reader, SymmetricEncryption::Writer) if defined?(SymmetricEncryption)
300
- register_extension(:file, IOStreams::File::Reader, IOStreams::File::Writer)
301
- register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
302
- register_extension(:gz, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
303
- register_extension(:gzip, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
304
- register_extension(:zip, IOStreams::Zip::Reader, IOStreams::Zip::Writer)
305
- register_extension(:delimited, IOStreams::Delimited::Reader, IOStreams::Delimited::Writer)
306
- register_extension(:xlsx, IOStreams::Xlsx::Reader, nil)
307
- register_extension(:xlsm, IOStreams::Xlsx::Reader, nil)
308
- register_extension(:pgp, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
309
- register_extension(:gpg, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
310
- #register_extension(:csv, IOStreams::CSV::Reader, IOStreams::CSV::Writer)
474
+ register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
475
+ register_extension(:gz, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
476
+ register_extension(:gzip, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
477
+ register_extension(:zip, IOStreams::Zip::Reader, IOStreams::Zip::Writer)
478
+ register_extension(:pgp, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
479
+ register_extension(:gpg, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
480
+ register_extension(:xlsx, IOStreams::Xlsx::Reader, nil)
481
+ register_extension(:xlsm, IOStreams::Xlsx::Reader, nil)
482
+
483
+ # Use Symmetric Encryption to encrypt of decrypt files with the `enc` extension
484
+ # when the gem `symmetric-encryption` has been loaded.
485
+ if defined?(SymmetricEncryption)
486
+ register_extension(:enc, SymmetricEncryption::Reader, SymmetricEncryption::Writer)
487
+ end
488
+
489
+ # register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
490
+ # register_scheme(:file, IOStreams::File::Reader, IOStreams::File::Writer)
491
+ # register_scheme(:http, IOStreams::HTTP::Reader, IOStreams::HTTP::Writer)
492
+ # register_scheme(:https, IOStreams::HTTPS::Reader, IOStreams::HTTPS::Writer)
493
+ # register_scheme(:sftp, IOStreams::SFTP::Reader, IOStreams::SFTP::Writer)
494
+ # register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
311
495
  end