iostreams 0.20.3 → 1.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/lib/io_streams/bzip2/reader.rb +9 -21
  3. data/lib/io_streams/bzip2/writer.rb +9 -21
  4. data/lib/io_streams/deprecated.rb +217 -0
  5. data/lib/io_streams/encode/reader.rb +12 -16
  6. data/lib/io_streams/encode/writer.rb +9 -13
  7. data/lib/io_streams/errors.rb +6 -6
  8. data/lib/io_streams/gzip/reader.rb +7 -14
  9. data/lib/io_streams/gzip/writer.rb +7 -15
  10. data/lib/io_streams/io_streams.rb +182 -524
  11. data/lib/io_streams/line/reader.rb +9 -9
  12. data/lib/io_streams/line/writer.rb +10 -11
  13. data/lib/io_streams/path.rb +190 -0
  14. data/lib/io_streams/paths/file.rb +176 -0
  15. data/lib/io_streams/paths/http.rb +92 -0
  16. data/lib/io_streams/paths/matcher.rb +61 -0
  17. data/lib/io_streams/paths/s3.rb +269 -0
  18. data/lib/io_streams/paths/sftp.rb +99 -0
  19. data/lib/io_streams/pgp.rb +47 -19
  20. data/lib/io_streams/pgp/reader.rb +20 -28
  21. data/lib/io_streams/pgp/writer.rb +24 -46
  22. data/lib/io_streams/reader.rb +28 -0
  23. data/lib/io_streams/record/reader.rb +20 -16
  24. data/lib/io_streams/record/writer.rb +28 -28
  25. data/lib/io_streams/row/reader.rb +22 -26
  26. data/lib/io_streams/row/writer.rb +29 -28
  27. data/lib/io_streams/stream.rb +400 -0
  28. data/lib/io_streams/streams.rb +125 -0
  29. data/lib/io_streams/symmetric_encryption/reader.rb +5 -13
  30. data/lib/io_streams/symmetric_encryption/writer.rb +16 -15
  31. data/lib/io_streams/tabular/header.rb +9 -3
  32. data/lib/io_streams/tabular/parser/array.rb +8 -3
  33. data/lib/io_streams/tabular/parser/csv.rb +6 -2
  34. data/lib/io_streams/tabular/parser/hash.rb +4 -1
  35. data/lib/io_streams/tabular/parser/json.rb +3 -1
  36. data/lib/io_streams/tabular/parser/psv.rb +3 -1
  37. data/lib/io_streams/tabular/utility/csv_row.rb +9 -8
  38. data/lib/io_streams/utils.rb +22 -0
  39. data/lib/io_streams/version.rb +1 -1
  40. data/lib/io_streams/writer.rb +28 -0
  41. data/lib/io_streams/xlsx/reader.rb +7 -19
  42. data/lib/io_streams/zip/reader.rb +7 -26
  43. data/lib/io_streams/zip/writer.rb +21 -38
  44. data/lib/iostreams.rb +15 -15
  45. data/test/bzip2_reader_test.rb +3 -3
  46. data/test/bzip2_writer_test.rb +3 -3
  47. data/test/deprecated_test.rb +123 -0
  48. data/test/encode_reader_test.rb +3 -3
  49. data/test/encode_writer_test.rb +6 -6
  50. data/test/gzip_reader_test.rb +2 -2
  51. data/test/gzip_writer_test.rb +3 -3
  52. data/test/io_streams_test.rb +43 -136
  53. data/test/line_reader_test.rb +20 -20
  54. data/test/line_writer_test.rb +3 -3
  55. data/test/path_test.rb +30 -28
  56. data/test/paths/file_test.rb +206 -0
  57. data/test/paths/http_test.rb +34 -0
  58. data/test/paths/matcher_test.rb +111 -0
  59. data/test/paths/s3_test.rb +207 -0
  60. data/test/pgp_reader_test.rb +8 -8
  61. data/test/pgp_writer_test.rb +13 -13
  62. data/test/record_reader_test.rb +5 -5
  63. data/test/record_writer_test.rb +4 -4
  64. data/test/row_reader_test.rb +5 -5
  65. data/test/row_writer_test.rb +6 -6
  66. data/test/stream_test.rb +116 -0
  67. data/test/streams_test.rb +255 -0
  68. data/test/utils_test.rb +20 -0
  69. data/test/xlsx_reader_test.rb +3 -3
  70. data/test/zip_reader_test.rb +12 -12
  71. data/test/zip_writer_test.rb +5 -5
  72. metadata +33 -45
  73. data/lib/io_streams/base_path.rb +0 -72
  74. data/lib/io_streams/file/path.rb +0 -58
  75. data/lib/io_streams/file/reader.rb +0 -12
  76. data/lib/io_streams/file/writer.rb +0 -22
  77. data/lib/io_streams/http/reader.rb +0 -71
  78. data/lib/io_streams/s3.rb +0 -26
  79. data/lib/io_streams/s3/path.rb +0 -40
  80. data/lib/io_streams/s3/reader.rb +0 -28
  81. data/lib/io_streams/s3/writer.rb +0 -85
  82. data/lib/io_streams/sftp/reader.rb +0 -67
  83. data/lib/io_streams/sftp/writer.rb +0 -68
  84. data/test/base_path_test.rb +0 -35
  85. data/test/file_path_test.rb +0 -97
  86. data/test/file_reader_test.rb +0 -33
  87. data/test/file_writer_test.rb +0 -50
  88. data/test/http_reader_test.rb +0 -38
  89. data/test/s3_reader_test.rb +0 -41
  90. data/test/s3_writer_test.rb +0 -41
@@ -1,21 +1,13 @@
1
1
  module IOStreams
2
2
  module Gzip
3
- class Writer
4
- # Write to a file / stream, compressing with GZip
5
- def self.open(file_name_or_io, **args, &block)
6
- unless IOStreams.writer_stream?(file_name_or_io)
7
- IOStreams::File::Path.mkpath(file_name_or_io)
8
- Zlib::GzipWriter.open(file_name_or_io, &block)
9
- else
10
- begin
11
- io = Zlib::GzipWriter.new(file_name_or_io)
12
- block.call(io)
13
- ensure
14
- io.close if io && (io.respond_to?(:closed?) && !io.closed?)
15
- end
16
- end
3
+ class Writer < IOStreams::Writer
4
+ # Write to a stream, compressing with GZip
5
+ def self.stream(input_stream, original_file_name: nil, &block)
6
+ io = ::Zlib::GzipWriter.new(input_stream)
7
+ block.call(io)
8
+ ensure
9
+ io&.close
17
10
  end
18
-
19
11
  end
20
12
  end
21
13
  end
@@ -1,5 +1,4 @@
1
- require 'concurrent'
2
- require 'fileutils'
1
+ require 'uri'
3
2
 
4
3
  # Streaming library for Ruby
5
4
  #
@@ -14,422 +13,13 @@ require 'fileutils'
14
13
  # .zip.enc [ :zip, :enc ]
15
14
  # .gz.enc [ :gz, :enc ]
16
15
  module IOStreams
17
- UTF8_ENCODING = Encoding.find('UTF-8').freeze
18
- BINARY_ENCODING = Encoding.find('BINARY').freeze
19
-
20
- # Returns a Reader for reading a file / stream
21
- #
22
- # Parameters
23
- # file_name_or_io [String|IO]
24
- # The file_name of the file to write to, or an IO Stream that implements
25
- # #read.
26
- #
27
- # streams [Symbol|Array]
28
- # The formats/streams that be used to convert the data whilst it is
29
- # being read.
30
- # When nil, the file_name will be inspected to try and determine what
31
- # streams should be applied.
32
- # Default: nil
33
- #
34
- # file_name [String]
35
- # When `streams` is not supplied, `file_name` can be used for determining the streams
36
- # to apply to read the file/stream.
37
- # This is particularly useful when `file_name_or_io` is a stream, or a temporary file name.
38
- # Default: nil
39
- #
40
- # Example: Zip
41
- # IOStreams.reader('myfile.zip') do |stream|
42
- # puts stream.read
43
- # end
44
- #
45
- # Example: Encrypted Zip
46
- # IOStreams.reader('myfile.zip.enc') do |stream|
47
- # puts stream.read
48
- # end
49
- #
50
- # Example: Explicitly set the streams
51
- # IOStreams.reader('myfile.zip.enc', [:zip, :enc]) do |stream|
52
- # puts stream.read
53
- # end
54
- #
55
- # Example: Supply custom options
56
- # # Encrypt the file and get Symmetric Encryption to also compress it
57
- # IOStreams.reader('myfile.csv.enc', streams: enc: {compress: true}) do |stream|
58
- # puts stream.read
59
- # end
60
- #
61
- # Note:
62
- # * Passes the file_name_or_io as-is into the block if it is already a reader stream AND
63
- # no streams are passed in.
64
- def self.reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
65
- stream(:reader, file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, &block)
66
- end
67
-
68
- # Iterate over a file / stream returning one line at a time.
69
- # Embedded lines (within double quotes) will be skipped if
70
- # 1. The file name contains .csv
71
- # 2. Or the embedded_within argument is set
72
- #
73
- # Example: Supply custom options
74
- # IOStreams.each_line(file_name, embedded_within: '"') do |line|
75
- # puts line
76
- # end
77
- #
78
- def self.each_line(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
79
- line_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |line_stream|
80
- line_stream.each(&block)
81
- end
82
- end
83
-
84
- # Iterate over a file / stream returning one line at a time.
85
- # Embedded lines (within double quotes) will be skipped if
86
- # 1. The file name contains .csv
87
- # 2. Or the embedded_within argument is set
88
- #
89
- # Example: Supply custom options
90
- # IOStreams.each_row(file_name, embedded_within: '"') do |line|
91
- # puts line
92
- # end
93
- #
94
- def self.each_row(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
95
- row_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |row_stream|
96
- row_stream.each(&block)
97
- end
98
- end
99
-
100
- # Returns [Hash] of every record in a file or stream with support for headers.
101
- #
102
- # Reading a delimited stream and converting to tabular form.
103
- #
104
- # Each record / line is returned one at a time so that very large files
105
- # can be read without having to load the entire file into memory.
106
- #
107
- # Embedded lines (within double quotes) will be skipped if
108
- # 1. The file name contains .csv
109
- # 2. Or the embedded_within argument is set
110
- #
111
- # Example: Supply custom options
112
- # IOStreams.each_record(file_name, embedded_within: '"') do |line|
113
- # puts line
114
- # end
115
- #
116
- # Example:
117
- # file_name = 'customer_data.csv.pgp'
118
- # IOStreams.each_record(file_name) do |hash|
119
- # p hash
120
- # end
121
- def self.each_record(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
122
- record_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |record_stream|
123
- record_stream.each(&block)
124
- end
125
- end
126
-
127
- # Returns a Writer for writing to a file / stream
128
- #
129
- # Parameters
130
- # file_name_or_io [String|IO]
131
- # The file_name of the file to write to, or an IO Stream that implements
132
- # #write.
133
- #
134
- # streams [Symbol|Array]
135
- # The formats/streams that be used to convert the data whilst it is
136
- # being written.
137
- # When nil, the file_name will be inspected to try and determine what
138
- # streams should be applied.
139
- # Default: nil
140
- #
141
- # Stream types / extensions supported:
142
- # .zip Zip File [ :zip ]
143
- # .gz, .gzip GZip File [ :gzip ]
144
- # .enc File Encrypted using symmetric encryption [ :enc ]
145
- # other All other extensions will be returned as: [ :file ]
146
- #
147
- # When a file is encrypted, it may also be compressed:
148
- # .zip.enc [ :zip, :enc ]
149
- # .gz.enc [ :gz, :enc ]
150
- #
151
- # Example: Zip
152
- # IOStreams.writer('myfile.zip') do |stream|
153
- # stream.write(data)
154
- # end
155
- #
156
- # Example: Encrypted Zip
157
- # IOStreams.writer('myfile.zip.enc') do |stream|
158
- # stream.write(data)
159
- # end
160
- #
161
- # Example: Explicitly set the streams
162
- # IOStreams.writer('myfile.zip.enc', [:zip, :enc]) do |stream|
163
- # stream.write(data)
164
- # end
165
- #
166
- # Example: Supply custom options
167
- # IOStreams.writer('myfile.csv.enc', [enc: { compress: true }]) do |stream|
168
- # stream.write(data)
169
- # end
170
- #
171
- # Example: Set internal filename when creating a zip file
172
- # IOStreams.writer('myfile.csv.zip', zip: { zip_file_name: 'myfile.csv' }) do |stream|
173
- # stream.write(data)
174
- # end
175
- #
176
- # Note:
177
- # * Passes the file_name_or_io as-is into the block if it is already a writer stream AND
178
- # no streams are passed in.
179
- def self.writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
180
- stream(:writer, file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, &block)
181
- end
182
-
183
- def self.line_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
184
- return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Writer) || file_name_or_io.is_a?(Array)
185
-
186
- writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
187
- IOStreams::Line::Writer.open(io, **args, &block)
188
- end
189
- end
190
-
191
- def self.row_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
192
- return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Writer)
193
-
194
- line_writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
195
- file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
196
-
197
- IOStreams::Row::Writer.open(io, file_name: file_name, **args, &block)
198
- end
199
- end
200
-
201
- def self.record_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
202
- return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Writer)
203
-
204
- line_writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
205
- file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
206
-
207
- IOStreams::Record::Writer.open(io, file_name: file_name, **args, &block)
208
- end
209
- end
210
-
211
- # Copies the source file/stream to the target file/stream.
212
- # Returns [Integer] the number of bytes copied
213
- #
214
- # Example: Copy between 2 files
215
- # IOStreams.copy('a.csv', 'b.csv')
216
- #
217
- # Example: Read content from a Xlsx file and write it out in CSV form.
218
- # IOStreams.copy('a.xlsx', 'b.csv')
219
- #
220
- # Example:
221
- # # Read content from a JSON file and write it out in CSV form.
222
- # #
223
- # # The output header for the CSV file is extracted from the first row in the JSON file.
224
- # # If the first JSON row does not contain all the column names then they will be ignored
225
- # # for the rest of the file.
226
- # IOStreams.copy('a.json', 'b.csv')
227
- #
228
- # Example:
229
- # # Read a PSV file and write out a CSV file from it.
230
- # IOStreams.copy('a.psv', 'b.csv')
231
- #
232
- # Example:
233
- # # Copy between 2 files, encrypting the target file with Symmetric Encryption
234
- # # Since the target file_name already includes `.enc` in the filename, it is automatically
235
- # # encrypted.
236
- # IOStreams.copy('a.csv', 'b.csv.enc')
237
- #
238
- # Example:
239
- # # Copy between 2 files, encrypting the target file with Symmetric Encryption
240
- # # Since the target file_name does not include `.enc` in the filename, to encrypt it
241
- # # the encryption stream is added.
242
- # IOStreams.copy('a.csv', 'b', target_options: [:enc])
243
- #
244
- # Example:
245
- # # Copy between 2 files, encrypting the target file with Symmetric Encryption
246
- # # Since the target file_name does not include `.enc` in the filename, to encrypt it
247
- # # the encryption stream is added, along with the optional compression option.
248
- # IOStreams.copy('a.csv', 'b', target_options: [enc: { compress: true }])
249
- #
250
- # Example:
251
- # # Create a pgp encrypted file.
252
- # # For PGP Encryption the recipients email address is required.
253
- # IOStreams.copy('a.xlsx', 'b.csv.pgp', target_options: [:csv, pgp: { recipient_email: 'user@nospam.org' }])
254
- #
255
- # Example: Copy between 2 existing streams
256
- # IOStreams.reader('a.csv') do |source_stream|
257
- # IOStreams.writer('b.csv.enc') do |target_stream|
258
- # IOStreams.copy(source_stream, target_stream)
259
- # end
260
- # end
261
- #
262
- # Example:
263
- # # Copy between 2 csv files, reducing the number of columns present and encrypting the
264
- # # target file with Symmetric Encryption
265
- # output_headers = %w[name address]
266
- # IOStreams.copy(
267
- # 'a.csv',
268
- # 'b.csv.enc',
269
- # target_options: [csv:{headers: output_headers}, enc: {compress: true}]
270
- # )
271
- #
272
- # Example:
273
- # # Copy a locally encrypted file to AWS S3.
274
- # # Decrypts the file, then compresses it with gzip as it is being streamed into S3.
275
- # # Useful for when the entire bucket is encrypted on S3.
276
- # IOStreams.copy('a.csv.enc', 's3://my_bucket/b.csv.gz')
277
- def self.copy(source_file_name_or_io, target_file_name_or_io, buffer_size: 65536, source_options: {}, target_options: {})
278
- bytes = 0
279
- reader(source_file_name_or_io, **source_options) do |source_stream|
280
- writer(target_file_name_or_io, **target_options) do |target_stream|
281
- while data = source_stream.read(buffer_size)
282
- break if data.size == 0
283
- bytes += data.size
284
- target_stream.write(data)
285
- end
286
- end
287
- end
288
- bytes
289
- end
290
-
291
- # Returns [true|false] whether the supplied file_name_or_io is a reader stream
292
- def self.reader_stream?(file_name_or_io)
293
- file_name_or_io.respond_to?(:read)
294
- end
295
-
296
- # Returns [true|false] whether the supplied file_name_or_io is a reader stream
297
- def self.writer_stream?(file_name_or_io)
298
- file_name_or_io.respond_to?(:write)
299
- end
300
-
301
- # Returns [true|false] whether the file is compressed.
302
- # Note: Currently only looks at the file name extension
303
- def self.compressed?(file_name)
304
- !(file_name =~ /\.(zip|gz|gzip|xls.|)\z/i).nil?
305
- end
306
-
307
- # Returns [true|false] whether the file is encrypted.
308
- # Note: Currently only looks at the file name extension
309
- def self.encrypted?(file_name)
310
- !(file_name =~ /\.(enc|pgp|gpg)\z/i).nil?
311
- end
312
-
313
- # Returns [Array] the formats required to process the file by looking at
314
- # its extension(s)
315
- #
316
- # Example Zip file:
317
- # IOStreams.streams_for_file_name('myfile.zip')
318
- # => [ :zip ]
319
- #
320
- # Example Encrypted Gzip file:
321
- # IOStreams.streams_for_file_name('myfile.csv.gz.enc')
322
- # => [ :gz, :enc ]
323
- #
324
- # Example plain text / binary file:
325
- # IOStreams.streams_for_file_name('myfile.csv')
326
- # => []
327
- def self.streams_for_file_name(file_name)
328
- raise ArgumentError.new('File name cannot be nil') if file_name.nil?
329
- raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
330
-
331
- parts = ::File.basename(file_name).split('.')
332
- extensions = []
333
- while extension = parts.pop
334
- sym = extension.downcase.to_sym
335
- break unless @extensions[sym]
336
- extensions.unshift(sym)
337
- end
338
- extensions
339
- end
340
-
341
- # Extract URI if any was supplied
342
- def self.scheme_for_file_name(file_name)
343
- raise ArgumentError.new('File name cannot be nil') if file_name.nil?
344
- raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
345
-
346
- if matches = file_name.match(/\A(\w+):\/\//)
347
- matches[1].downcase.to_sym
348
- end
349
- end
350
-
351
- # Iterate over a file / stream returning each record/line one at a time.
352
- # It will apply the embedded_within argument if the file or input_stream contain .csv in its name.
353
- def self.line_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, embedded_within: nil, **args, &block)
354
-
355
- return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) || file_name_or_io.is_a?(Array)
356
-
357
- # TODO: needs to be improved
358
- if embedded_within.nil? && file_name_or_io.is_a?(String)
359
- embedded_within = '"' if file_name_or_io.include?('.csv')
360
- elsif embedded_within.nil? && file_name
361
- embedded_within = '"' if file_name.include?('.csv')
362
- end
363
-
364
- reader(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
365
- IOStreams::Line::Reader.open(io, embedded_within: embedded_within, **args, &block)
366
- end
367
- end
368
-
369
- # Iterate over a file / stream returning each line as an array, one at a time.
370
- def self.row_reader(file_name_or_io,
371
- streams: nil,
372
- delimiter: nil,
373
- file_name: nil,
374
- encoding: nil,
375
- encode_cleaner: nil,
376
- encode_replace: nil,
377
- embedded_within: nil,
378
- **args,
379
- &block)
380
-
381
- return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Reader)
382
-
383
- line_reader(
384
- file_name_or_io,
385
- streams: streams,
386
- delimiter: delimiter,
387
- file_name: file_name,
388
- encoding: encoding,
389
- encode_cleaner: encode_cleaner,
390
- encode_replace: encode_replace,
391
- embedded_within: embedded_within
392
- ) do |io|
393
- file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
394
- IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
395
- end
396
- end
397
-
398
- # Iterate over a file / stream returning each line as a hash, one at a time.
399
- def self.record_reader(file_name_or_io,
400
- streams: nil,
401
- delimiter: nil,
402
- file_name: nil,
403
- encoding: nil,
404
- encode_cleaner: nil,
405
- encode_replace: nil,
406
- embedded_within: nil,
407
- **args,
408
- &block)
409
-
410
- return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader)
411
-
412
- line_reader(file_name_or_io,
413
- streams: streams,
414
- delimiter: delimiter,
415
- file_name: file_name,
416
- encoding: encoding,
417
- encode_cleaner: encode_cleaner,
418
- encode_replace: encode_replace,
419
- embedded_within: embedded_within
420
- ) do |io|
421
-
422
-
423
- file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
424
- IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
425
- end
426
- end
16
+ include Deprecated
427
17
 
428
18
  # Returns [Path] instance for the supplied complete path with optional scheme.
429
19
  #
430
20
  # Example:
431
21
  # IOStreams.path("/usr", "local", "sample")
432
- # # => #<IOStreams::File::Path:0x00007fec66e59b60 @path="/usr/local/sample">
22
+ # # => #<IOStreams::Paths::File:0x00007fec66e59b60 @path="/usr/local/sample">
433
23
  #
434
24
  # IOStreams.path("/usr", "local", "sample").to_s
435
25
  # # => "/usr/local/sample"
@@ -441,14 +31,47 @@ module IOStreams
441
31
  # # => "s3://mybucket/path/file.xls"
442
32
  #
443
33
  # IOStreams.path("file.xls")
444
- # # => #<IOStreams::File::Path:0x00007fec6be6aaf0 @path="file.xls">
34
+ # # => #<IOStreams::Paths::File:0x00007fec6be6aaf0 @path="file.xls">
445
35
  #
446
36
  # IOStreams.path("files", "file.xls").to_s
447
37
  # # => "files/file.xls"
38
+ #
39
+ # For Files
40
+ # IOStreams.path('blah.zip').option(:encode, encoding: 'BINARY').each_line { |line puts line }
41
+ # IOStreams.path('blah.zip').option(:encode, encoding: 'UTF-8').each_line.first
42
+ # IOStreams.path('blah.zip').option(:encode, encoding: 'UTF-8').each_record.last
43
+ # IOStreams.path('blah.zip').option(:encode, encoding: 'UTF-8').each_record.size
44
+ # IOStreams.path('blah.zip').option(:encode, encoding: 'UTF-8').reader.size
45
+ # IOStreams.path('blah.csv.zip').each_line { |line puts line }
46
+ # IOStreams.path('blah.zip').option(:pgp, passphrase: 'receiver_passphrase').reader(&:read)
47
+ # IOStreams.path('blah.zip').stream(:zip).stream(:pgp, passphrase: 'receiver_passphrase').reader(&:read)
48
+ # IOStreams.path('blah.zip').stream(:zip).stream(:encode, encoding: 'BINARY').reader(&:read)
49
+ #
448
50
  def self.path(*elements)
449
- path = ::File.join(*elements)
450
- uri = URI.parse(path)
451
- IOStreams.scheme(uri.scheme).path_class.new(path)
51
+ return elements.first if (elements.size == 1) && elements.first.is_a?(IOStreams::Path)
52
+
53
+ elements = elements.collect(&:to_s)
54
+ path = ::File.join(*elements)
55
+ uri = URI.parse(path)
56
+ scheme(uri.scheme).new(path)
57
+ end
58
+
59
+ # For an existing IO Stream
60
+ # IOStreams.stream(io).file_name('blah.zip').encoding('BINARY').reader(&:read)
61
+ # IOStreams.stream(io).file_name('blah.zip').encoding('BINARY').each_line(...)
62
+ # IOStreams.stream(io).file_name('blah.csv.zip').each_line(...)
63
+ # IOStreams.stream(io).stream(:zip).stream(:pgp, passphrase: 'receiver_passphrase').reader(&:read)
64
+ def self.stream(io_stream)
65
+ return io_stream if io_stream.is_a?(Stream)
66
+
67
+ Stream.new(io_stream)
68
+ end
69
+
70
+ # For processing by either a file name or an open IO stream.
71
+ def self.new(file_name_or_io)
72
+ return file_name_or_io if file_name_or_io.is_a?(Stream)
73
+
74
+ file_name_or_io.is_a?(String) ? path(file_name_or_io) : stream(file_name_or_io)
452
75
  end
453
76
 
454
77
  # Join the supplied path elements to a root path.
@@ -457,13 +80,13 @@ module IOStreams
457
80
  # IOStreams.add_root(:default, "tmp/export")
458
81
  #
459
82
  # IOStreams.join('file.xls')
460
- # # => #<IOStreams::File::Path:0x00007fec70391bd8 @path="tmp/export/sample">
83
+ # # => #<IOStreams::Paths::File:0x00007fec70391bd8 @path="tmp/export/sample">
461
84
  #
462
85
  # IOStreams.join('file.xls').to_s
463
86
  # # => "tmp/export/sample"
464
87
  #
465
88
  # IOStreams.join('sample', 'file.xls', root: :ftp)
466
- # # => #<IOStreams::File::Path:0x00007fec6ee329b8 @path="tmp/ftp/sample/file.xls">
89
+ # # => #<IOStreams::Paths::File:0x00007fec6ee329b8 @path="tmp/ftp/sample/file.xls">
467
90
  #
468
91
  # IOStreams.join('sample', 'file.xls', root: :ftp).to_s
469
92
  # # => "tmp/ftp/sample/file.xls"
@@ -475,20 +98,128 @@ module IOStreams
475
98
  root(root).join(*elements)
476
99
  end
477
100
 
478
- # Return named root path
101
+ # Returns a path to a local temporary file.
102
+ def self.temp_file(*args, &block)
103
+ # TODO: Possible enhancement: Add a :temp root so that temp files can be stored anywhere, or the location changed.
104
+ Paths::File.temp_file(*args, &block)
105
+ end
106
+
107
+ # Returns [IOStreams::Paths::File] current or named users home path
108
+ def self.home(username = nil)
109
+ IOStreams::Paths::File.new(Dir.home(username))
110
+ end
111
+
112
+ # Returns [IOStreams::Paths::File] the current working path for this process.
113
+ def self.working_path
114
+ IOStreams::Paths::File.new(Dir.pwd)
115
+ end
116
+
117
+ # Yields Paths within the current path.
118
+ #
119
+ # Examples:
120
+ #
121
+ # # Return all children in a complete path:
122
+ # IOStreams.each_child("/exports/files/customer/*") { |path| puts path }
123
+ #
124
+ # # Return all children in a complete path on S3:
125
+ # IOStreams.each_child("s3://my_bucket/exports/files/customer/*") { |path| puts path }
126
+ #
127
+ # # Case Insensitive file name lookup:
128
+ # IOStreams.each_child("/exports/files/customer/R*") { |path| puts path }
129
+ #
130
+ # # Case Sensitive file name lookup:
131
+ # IOStreams.each_child("/exports/files/customer/R*", case_sensitive: true) { |path| puts path }
132
+ #
133
+ # # Case Insensitive recursive file name lookup:
134
+ # IOStreams.each_child("source_files/**/fast*.rb") { |name| puts name }
135
+ #
136
+ # Parameters:
137
+ # pattern [String]
138
+ # The pattern is not a regexp, it is a string that may contain the following metacharacters:
139
+ # `*` Matches all regular files.
140
+ # `c*` Matches all regular files beginning with `c`.
141
+ # `*c` Matches all regular files ending with `c`.
142
+ # `*c*` Matches all regular files that have `c` in them.
143
+ #
144
+ # `**` Matches recursively into subdirectories.
145
+ #
146
+ # `?` Matches any one character.
147
+ #
148
+ # `[set]` Matches any one character in the supplied `set`.
149
+ # `[^set]` Does not matches any one character in the supplied `set`.
150
+ #
151
+ # `\` Escapes the next metacharacter.
152
+ #
153
+ # `{a,b}` Matches on either pattern `a` or pattern `b`.
154
+ #
155
+ # case_sensitive [true|false]
156
+ # Whether the pattern is case-sensitive.
157
+ #
158
+ # directories [true|false]
159
+ # Whether to yield directory names.
160
+ #
161
+ # hidden [true|false]
162
+ # Whether to yield hidden paths.
163
+ #
164
+ # Examples:
165
+ #
166
+ # Pattern: File name: match? Reason Options
167
+ # =========== ================ ====== ============================= ===========================
168
+ # "cat" "cat" true # Match entire string
169
+ # "cat" "category" false # Only match partial string
170
+ #
171
+ # "c{at,ub}s" "cats" true # { } is supported
172
+ #
173
+ # "c?t" "cat" true # "?" match only 1 character
174
+ # "c??t" "cat" false # ditto
175
+ # "c*" "cats" true # "*" match 0 or more characters
176
+ # "c*t" "c/a/b/t" true # ditto
177
+ # "ca[a-z]" "cat" true # inclusive bracket expression
178
+ # "ca[^t]" "cat" false # exclusive bracket expression ("^" or "!")
179
+ #
180
+ # "cat" "CAT" false # case sensitive {case_sensitive: false}
181
+ # "cat" "CAT" true # case insensitive
182
+ #
183
+ # "\?" "?" true # escaped wildcard becomes ordinary
184
+ # "\a" "a" true # escaped ordinary remains ordinary
185
+ # "[\?]" "?" true # can escape inside bracket expression
186
+ #
187
+ # "*" ".profile" false # wildcard doesn't match leading
188
+ # "*" ".profile" true # period by default.
189
+ # ".*" ".profile" true {hidden: true}
190
+ #
191
+ # "**/*.rb" "main.rb" false
192
+ # "**/*.rb" "./main.rb" false
193
+ # "**/*.rb" "lib/song.rb" true
194
+ # "**.rb" "main.rb" true
195
+ # "**.rb" "./main.rb" false
196
+ # "**.rb" "lib/song.rb" true
197
+ # "*" "dave/.profile" true
198
+ def self.each_child(pattern, case_sensitive: false, directories: false, hidden: false, &block)
199
+ matcher = Paths::Matcher.new(nil, pattern, case_sensitive: case_sensitive, hidden: hidden)
200
+
201
+ # When the pattern includes an exact file name without any pattern characters
202
+ if matcher.pattern.nil?
203
+ block.call(matcher.path) if matcher.path.exist?
204
+ return
205
+ end
206
+ matcher.path.each_child(matcher.pattern, case_sensitive: case_sensitive, directories: directories, hidden: hidden, &block)
207
+ end
208
+
209
+ # Returns [IOStreams::Paths::File] the default root path, or the named root path
479
210
  def self.root(root = :default)
480
- @roots_paths[root.to_sym] || raise(ArgumentError, "Unknown root: #{root.inspect}")
211
+ @root_paths[root.to_sym] || raise(ArgumentError, "Root: #{root.inspect} has not been registered.")
481
212
  end
482
213
 
483
214
  # Add a named root path
484
215
  def self.add_root(root, *elements)
485
- raise(ArgumentError, "Invalid root name #{root.inspect}") unless root.to_s =~ /\A\w+\Z/
216
+ raise(ArgumentError, "Invalid characters in root name #{root.inspect}") unless root.to_s =~ /\A\w+\Z/
486
217
 
487
- @roots_paths[root.to_sym] = path(*elements)
218
+ @root_paths[root.to_sym] = path(*elements)
488
219
  end
489
220
 
490
221
  def self.roots
491
- @roots_paths.dup
222
+ @root_paths.dup
492
223
  end
493
224
 
494
225
  # Register a file extension and the reader and writer streaming classes
@@ -512,114 +243,39 @@ module IOStreams
512
243
  @extensions.delete(extension.to_sym)
513
244
  end
514
245
 
246
+ # Registered file extensions
247
+ def self.extensions
248
+ @extensions.dup
249
+ end
250
+
515
251
  # Register a file extension and the reader and writer streaming classes
516
252
  #
517
253
  # Example:
518
254
  # # MyXls::Reader and MyXls::Writer must implement .open
519
- # register_extension(:xls, MyXls::Reader, MyXls::Writer)
520
- def self.register_scheme(scheme, reader_class, writer_class, path_class = nil)
255
+ # register_scheme(:xls, MyXls::Reader, MyXls::Writer)
256
+ def self.register_scheme(scheme, klass)
521
257
  raise(ArgumentError, "Invalid scheme #{scheme.inspect}") unless scheme.nil? || scheme.to_s =~ /\A\w+\Z/
522
- @schemes[scheme.nil? ? nil : scheme.to_sym] = Scheme.new(reader_class, writer_class, path_class)
258
+ @schemes[scheme.nil? ? nil : scheme.to_sym] = klass
523
259
  end
524
260
 
525
- private
526
-
527
- # Hold root paths
528
- @roots_paths = {}
529
-
530
- # A registry to hold formats for processing files during upload or download
531
- @extensions = {}
532
- @schemes = {}
533
-
534
- Extension = Struct.new(:reader_class, :writer_class)
535
- Scheme = Struct.new(:reader_class, :writer_class, :path_class)
536
-
537
- # Struct to hold the Stream and options if any
538
- StreamStruct = Struct.new(:klass, :options)
539
-
540
- # Returns a reader or writer stream
541
- def self.stream(type, file_name_or_io, streams:, file_name:, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
542
- raise(ArgumentError, 'IOStreams call is missing mandatory block') if block.nil?
543
-
544
- streams = streams_for_file_name(file_name) if streams.nil? && file_name
545
-
546
- # Shortcut for when it is already a stream
547
- if !file_name_or_io.is_a?(String) && (streams.nil? || streams.empty?)
548
- if encoding || encode_cleaner || encode_replace
549
- return IOStreams::Encode::Reader.open(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, &block)
550
- else
551
- return block.call(file_name_or_io)
552
- end
553
- end
554
-
555
- if streams.nil?
556
- streams = file_name_or_io.is_a?(String) ? streams_for_file_name(file_name_or_io) : [nil]
557
- end
558
- scheme = scheme_for_file_name(file_name_or_io) if file_name_or_io.is_a?(String)
559
-
560
- stream_structs = streams_for(type, streams)
561
- stream_structs << stream_struct_for_scheme(type, scheme) if stream_structs.empty? || scheme
562
-
563
- # Add encoding stream if any of its options are present
564
- if encoding || encode_cleaner || encode_replace
565
- klass = type == :reader ? IOStreams::Encode::Reader : IOStreams::Encode::Writer
566
- options = {}
567
- options[:encoding] = encoding if encoding
568
- options[:encode_cleaner] = encode_cleaner if encode_cleaner
569
- options[:encode_replace] = encode_replace if encode_replace
570
- stream_structs.unshift(StreamStruct.new(klass, options))
571
- end
572
-
573
- if stream_structs.size == 1
574
- stream_struct = stream_structs.first
575
- stream_struct.klass.open(file_name_or_io, stream_struct.options, &block)
576
- else
577
- # Daisy chain multiple streams together
578
- last = stream_structs.inject(block) { |inner, ss| -> io { ss.klass.open(io, ss.options, &inner) } }
579
- last.call(file_name_or_io)
580
- end
581
- end
582
-
583
- # type: :reader or :writer
584
- def self.streams_for(type, params)
585
- if params.is_a?(Symbol)
586
- [stream_struct_for_stream(type, params)]
587
- elsif params.is_a?(Array)
588
- a = []
589
- params.each do |stream|
590
- if stream.is_a?(Hash)
591
- stream.each_pair { |stream_sym, options| a << stream_struct_for_stream(type, stream_sym, options) }
592
- else
593
- a << stream_struct_for_stream(type, stream)
594
- end
595
- end
596
- a
597
- elsif params.is_a?(Hash)
598
- a = []
599
- params.each_pair { |stream, options| a << stream_struct_for_stream(type, stream, options) }
600
- a
601
- else
602
- raise ArgumentError, "Invalid params supplied: #{params.inspect}"
603
- end
604
- end
605
-
606
- def self.stream_struct_for_stream(type, stream, options = {})
607
- ext = @extensions[stream.nil? ? nil : stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
608
- klass = ext.send("#{type}_class")
609
- StreamStruct.new(klass, options)
261
+ def self.schemes
262
+ @schemes.dup
610
263
  end
611
264
 
612
265
  def self.scheme(scheme_name)
613
266
  @schemes[scheme_name.nil? ? nil : scheme_name.to_sym] || raise(ArgumentError, "Unknown Scheme type: #{scheme_name.inspect}")
614
267
  end
615
268
 
616
- def self.stream_struct_for_scheme(type, scheme_name, options = {})
617
- klass = scheme(scheme_name).send("#{type}_class")
618
- StreamStruct.new(klass, options)
619
- end
269
+ private
270
+
271
+ Extension = Struct.new(:reader_class, :writer_class)
620
272
 
621
- # Default reader/writer when no other streams need to be applied.
622
- # register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
273
+ # Hold root paths
274
+ @root_paths = {}
275
+
276
+ # A registry to hold formats for processing files during upload or download
277
+ @extensions = {}
278
+ @schemes = {}
623
279
 
624
280
  # Register File extensions
625
281
  register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
@@ -631,8 +287,9 @@ module IOStreams
631
287
  register_extension(:gpg, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
632
288
  register_extension(:xlsx, IOStreams::Xlsx::Reader, nil)
633
289
  register_extension(:xlsm, IOStreams::Xlsx::Reader, nil)
290
+ register_extension(:encode, IOStreams::Encode::Reader, IOStreams::Encode::Writer)
634
291
 
635
- # Support URI schemes
292
+ # Register Schemes
636
293
  #
637
294
  # Examples:
638
295
  # path/file_name
@@ -640,9 +297,10 @@ module IOStreams
640
297
  # https://hostname/path/file_name
641
298
  # sftp://hostname/path/file_name
642
299
  # s3://bucket/key
643
- register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer, IOStreams::File::Path)
644
- register_scheme(:http, IOStreams::HTTP::Reader, nil)
645
- register_scheme(:https, IOStreams::HTTP::Reader, nil)
646
- register_scheme(:sftp, IOStreams::SFTP::Reader, IOStreams::SFTP::Writer)
647
- register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer, IOStreams::S3::Path)
300
+ register_scheme(nil, IOStreams::Paths::File)
301
+ register_scheme(:file, IOStreams::Paths::File)
302
+ register_scheme(:http, IOStreams::Paths::HTTP)
303
+ register_scheme(:https, IOStreams::Paths::HTTP)
304
+ register_scheme(:sftp, IOStreams::Paths::SFTP)
305
+ register_scheme(:s3, IOStreams::Paths::S3)
648
306
  end