iostreams 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +202 -0
- data/README.md +155 -47
- data/lib/io_streams/file/reader.rb +7 -8
- data/lib/io_streams/file/writer.rb +7 -8
- data/lib/io_streams/io_streams.rb +313 -129
- data/lib/io_streams/{delimited → line}/reader.rb +20 -30
- data/lib/io_streams/line/writer.rb +81 -0
- data/lib/io_streams/pgp.rb +4 -14
- data/lib/io_streams/record/reader.rb +55 -0
- data/lib/io_streams/record/writer.rb +63 -0
- data/lib/io_streams/row/reader.rb +60 -0
- data/lib/io_streams/row/writer.rb +62 -0
- data/lib/io_streams/s3.rb +25 -0
- data/lib/io_streams/s3/reader.rb +64 -0
- data/lib/io_streams/s3/writer.rb +13 -0
- data/lib/io_streams/streams.rb +1 -1
- data/lib/io_streams/tabular.rb +163 -0
- data/lib/io_streams/tabular/errors.rb +14 -0
- data/lib/io_streams/tabular/header.rb +146 -0
- data/lib/io_streams/tabular/parser/array.rb +26 -0
- data/lib/io_streams/tabular/parser/base.rb +12 -0
- data/lib/io_streams/tabular/parser/csv.rb +35 -0
- data/lib/io_streams/tabular/parser/fixed.rb +88 -0
- data/lib/io_streams/tabular/parser/hash.rb +21 -0
- data/lib/io_streams/tabular/parser/json.rb +25 -0
- data/lib/io_streams/tabular/parser/psv.rb +34 -0
- data/lib/io_streams/tabular/utility/csv_row.rb +115 -0
- data/lib/io_streams/version.rb +2 -2
- data/lib/io_streams/xlsx/reader.rb +1 -1
- data/lib/io_streams/zip/reader.rb +1 -1
- data/lib/io_streams/zip/writer.rb +1 -1
- data/lib/iostreams.rb +21 -10
- data/test/bzip2_reader_test.rb +21 -22
- data/test/bzip2_writer_test.rb +38 -32
- data/test/file_reader_test.rb +19 -18
- data/test/file_writer_test.rb +23 -22
- data/test/files/test.json +3 -0
- data/test/gzip_reader_test.rb +21 -22
- data/test/gzip_writer_test.rb +35 -29
- data/test/io_streams_test.rb +137 -61
- data/test/line_reader_test.rb +105 -0
- data/test/line_writer_test.rb +50 -0
- data/test/pgp_reader_test.rb +29 -29
- data/test/pgp_test.rb +149 -195
- data/test/pgp_writer_test.rb +63 -62
- data/test/record_reader_test.rb +61 -0
- data/test/record_writer_test.rb +73 -0
- data/test/row_reader_test.rb +34 -0
- data/test/row_writer_test.rb +51 -0
- data/test/tabular_test.rb +184 -0
- data/test/xlsx_reader_test.rb +13 -17
- data/test/zip_reader_test.rb +21 -22
- data/test/zip_writer_test.rb +40 -36
- metadata +41 -17
- data/lib/io_streams/csv/reader.rb +0 -21
- data/lib/io_streams/csv/writer.rb +0 -20
- data/lib/io_streams/delimited/writer.rb +0 -67
- data/test/csv_reader_test.rb +0 -34
- data/test/csv_writer_test.rb +0 -35
- data/test/delimited_reader_test.rb +0 -115
- data/test/delimited_writer_test.rb +0 -44
@@ -1,15 +1,14 @@
|
|
1
1
|
module IOStreams
|
2
2
|
module File
|
3
3
|
class Reader
|
4
|
-
# Read from a file
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
block.call(file_name_or_io)
|
10
|
-
end
|
11
|
-
end
|
4
|
+
# Read from a named file
|
5
|
+
# TODO: Add support for mode (text / binary)
|
6
|
+
# TODO: Add encoding support: external_encoding, internal_encoding
|
7
|
+
def self.open(file_name, _=nil, &block)
|
8
|
+
raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
|
12
9
|
|
10
|
+
::File.open(file_name, 'rb', &block)
|
11
|
+
end
|
13
12
|
end
|
14
13
|
end
|
15
14
|
end
|
@@ -1,15 +1,14 @@
|
|
1
1
|
module IOStreams
|
2
2
|
module File
|
3
3
|
class Writer
|
4
|
-
# Write to a file
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
block.call(file_name_or_io)
|
10
|
-
end
|
11
|
-
end
|
4
|
+
# Write to a named file
|
5
|
+
# TODO: Add support for mode (text / binary), permissions, buffering, append
|
6
|
+
# TODO: Add encoding support: external_encoding, internal_encoding
|
7
|
+
def self.open(file_name, _=nil, &block)
|
8
|
+
raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
|
12
9
|
|
10
|
+
::File.open(file_name, 'wb', &block)
|
11
|
+
end
|
13
12
|
end
|
14
13
|
end
|
15
14
|
end
|
@@ -1,72 +1,21 @@
|
|
1
1
|
require 'concurrent'
|
2
|
-
module IOStreams
|
3
|
-
# A registry to hold formats for processing files during upload or download
|
4
|
-
@extensions = Concurrent::Map.new
|
5
2
|
|
3
|
+
# Streaming library for Ruby
|
4
|
+
#
|
5
|
+
# Stream types / extensions supported:
|
6
|
+
# .zip Zip File [ :zip ]
|
7
|
+
# .gz, .gzip GZip File [ :gzip ]
|
8
|
+
# .enc File Encrypted using symmetric encryption [ :enc ]
|
9
|
+
# etc...
|
10
|
+
# other All other extensions will be returned as: []
|
11
|
+
#
|
12
|
+
# When a file is encrypted, it may also be compressed:
|
13
|
+
# .zip.enc [ :zip, :enc ]
|
14
|
+
# .gz.enc [ :gz, :enc ]
|
15
|
+
module IOStreams
|
6
16
|
UTF8_ENCODING = Encoding.find('UTF-8').freeze
|
7
17
|
BINARY_ENCODING = Encoding.find('BINARY').freeze
|
8
18
|
|
9
|
-
# Returns [Array] the formats required to process the file by looking at
|
10
|
-
# its extension(s)
|
11
|
-
#
|
12
|
-
# Extensions supported:
|
13
|
-
# .zip Zip File [ :zip ]
|
14
|
-
# .gz, .gzip GZip File [ :gzip ]
|
15
|
-
# .enc File Encrypted using symmetric encryption [ :enc ]
|
16
|
-
# other All other extensions will be returned as: [ :file ]
|
17
|
-
#
|
18
|
-
# When a file is encrypted, it may also be compressed:
|
19
|
-
# .zip.enc [ :zip, :enc ]
|
20
|
-
# .gz.enc [ :gz, :enc ]
|
21
|
-
#
|
22
|
-
# Example Zip file:
|
23
|
-
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
|
24
|
-
# => [ :zip ]
|
25
|
-
#
|
26
|
-
# Example Encrypted Gzip file:
|
27
|
-
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
|
28
|
-
# => [ :gz, :enc ]
|
29
|
-
#
|
30
|
-
# Example plain text / binary file:
|
31
|
-
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
|
32
|
-
# => [ :file ]
|
33
|
-
def self.streams_for_file_name(file_name)
|
34
|
-
raise ArgumentError.new('File name cannot be nil') if file_name.nil?
|
35
|
-
raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
|
36
|
-
parts = file_name.split('.')
|
37
|
-
extensions = []
|
38
|
-
while extension = parts.pop
|
39
|
-
sym = extension.downcase.to_sym
|
40
|
-
break unless @extensions[sym]
|
41
|
-
extensions.unshift(sym)
|
42
|
-
end
|
43
|
-
extensions << :file if extensions.size == 0
|
44
|
-
extensions
|
45
|
-
end
|
46
|
-
|
47
|
-
Extension = Struct.new(:reader_class, :writer_class)
|
48
|
-
|
49
|
-
# Register a file extension and the reader and writer classes to use to format it
|
50
|
-
#
|
51
|
-
# Example:
|
52
|
-
# # MyXls::Reader and MyXls::Writer must implement .open
|
53
|
-
# register_extension(:xls, MyXls::Reader, MyXls::Writer)
|
54
|
-
def self.register_extension(extension, reader_class, writer_class)
|
55
|
-
raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
|
56
|
-
@extensions[extension.to_sym] = Extension.new(reader_class, writer_class)
|
57
|
-
end
|
58
|
-
|
59
|
-
# De-Register a file extension
|
60
|
-
#
|
61
|
-
# Returns [Symbol] the extension removed, or nil if the extension was not registered
|
62
|
-
#
|
63
|
-
# Example:
|
64
|
-
# register_extension(:xls)
|
65
|
-
def self.deregister_extension(extension)
|
66
|
-
raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
|
67
|
-
@extensions.delete(extension.to_sym)
|
68
|
-
end
|
69
|
-
|
70
19
|
# Returns a Reader for reading a file / stream
|
71
20
|
#
|
72
21
|
# Parameters
|
@@ -81,15 +30,11 @@ module IOStreams
|
|
81
30
|
# streams should be applied.
|
82
31
|
# Default: nil
|
83
32
|
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
#
|
87
|
-
#
|
88
|
-
#
|
89
|
-
#
|
90
|
-
# When a file is encrypted, it may also be compressed:
|
91
|
-
# .zip.enc [ :zip, :enc ]
|
92
|
-
# .gz.enc [ :gz, :enc ]
|
33
|
+
# file_name [String]
|
34
|
+
# When `streams` is not supplied, `file_name` can be used for determining the streams
|
35
|
+
# to apply to read the file/stream.
|
36
|
+
# This is particularly useful when `file_name_or_io` is a stream, or a temporary file name.
|
37
|
+
# Default: nil
|
93
38
|
#
|
94
39
|
# Example: Zip
|
95
40
|
# IOStreams.reader('myfile.zip') do |stream|
|
@@ -108,11 +53,47 @@ module IOStreams
|
|
108
53
|
#
|
109
54
|
# Example: Supply custom options
|
110
55
|
# # Encrypt the file and get Symmetric Encryption to also compress it
|
111
|
-
# IOStreams.reader('myfile.csv.enc',
|
56
|
+
# IOStreams.reader('myfile.csv.enc', streams: enc: {compress: true}) do |stream|
|
112
57
|
# puts stream.read
|
113
58
|
# end
|
114
|
-
|
115
|
-
|
59
|
+
#
|
60
|
+
# Note:
|
61
|
+
# * Passes the file_name_or_io as-is into the block if it is already a reader stream AND
|
62
|
+
# no streams are passed in.
|
63
|
+
def self.reader(file_name_or_io, streams: nil, file_name: nil, &block)
|
64
|
+
stream(:reader, file_name_or_io, streams: streams, file_name: file_name, &block)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Iterate over a file / stream returning one line at a time.
|
68
|
+
def self.each_line(file_name_or_io, **args, &block)
|
69
|
+
line_reader(file_name_or_io, **args) do |line_stream|
|
70
|
+
line_stream.each(&block)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Iterate over a file / stream returning one line at a time.
|
75
|
+
def self.each_row(file_name_or_io, **args, &block)
|
76
|
+
row_reader(file_name_or_io, **args) do |row_stream|
|
77
|
+
row_stream.each(&block)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns [Hash] of every record in a file or stream with support for headers.
|
82
|
+
#
|
83
|
+
# Reading a delimited stream and converting to tabular form.
|
84
|
+
#
|
85
|
+
# Each record / line is returned one at a time so that very large files
|
86
|
+
# can be read without having to load the entire file into memory.
|
87
|
+
#
|
88
|
+
# Example:
|
89
|
+
# file_name = 'customer_data.csv.pgp'
|
90
|
+
# IOStreams.each_record(file_name) do |hash|
|
91
|
+
# p hash
|
92
|
+
# end
|
93
|
+
def self.each_record(file_name_or_io, **args, &block)
|
94
|
+
record_reader(file_name_or_io, **args) do |record_stream|
|
95
|
+
record_stream.each(&block)
|
96
|
+
end
|
116
97
|
end
|
117
98
|
|
118
99
|
# Returns a Writer for writing to a file / stream
|
@@ -163,41 +144,121 @@ module IOStreams
|
|
163
144
|
# IOStreams.writer('myfile.csv.zip', zip: { zip_file_name: 'myfile.csv' }) do |stream|
|
164
145
|
# stream.write(data)
|
165
146
|
# end
|
166
|
-
|
167
|
-
|
147
|
+
#
|
148
|
+
# Note:
|
149
|
+
# * Passes the file_name_or_io as-is into the block if it is already a writer stream AND
|
150
|
+
# no streams are passed in.
|
151
|
+
def self.writer(file_name_or_io, streams: nil, file_name: nil, &block)
|
152
|
+
stream(:writer, file_name_or_io, streams: streams, file_name: file_name, &block)
|
153
|
+
end
|
154
|
+
|
155
|
+
def self.line_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
156
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Writer) || file_name_or_io.is_a?(Array)
|
157
|
+
|
158
|
+
writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
159
|
+
IOStreams::Line::Writer.open(io, **args, &block)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def self.row_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
164
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Writer)
|
165
|
+
|
166
|
+
line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
167
|
+
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
168
|
+
|
169
|
+
IOStreams::Row::Writer.open(io, file_name: file_name, **args, &block)
|
170
|
+
end
|
168
171
|
end
|
169
172
|
|
170
|
-
|
173
|
+
def self.record_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
174
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Writer)
|
175
|
+
|
176
|
+
line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
177
|
+
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
178
|
+
|
179
|
+
IOStreams::Record::Writer.open(io, file_name: file_name, **args, &block)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Copies the source file/stream to the target file/stream.
|
171
184
|
# Returns [Integer] the number of bytes copied
|
172
185
|
#
|
186
|
+
# Example: Copy between 2 files
|
187
|
+
# IOStreams.copy('a.csv', 'b.csv')
|
188
|
+
# # TODO: The above will convert the csv file to a Hash and then back to write it to the target file.
|
189
|
+
#
|
190
|
+
# Example: Read content from a Xlsx file and write it out in CSV form.
|
191
|
+
# IOStreams.copy('a.xlsx', 'b.csv')
|
192
|
+
#
|
193
|
+
# Example:
|
194
|
+
# # Read content from a JSON file and write it out in CSV form.
|
195
|
+
# #
|
196
|
+
# # The output header for the CSV file is extracted from the first row in the JSON file.
|
197
|
+
# # If the first JSON row does not contain all the column names then they will be ignored
|
198
|
+
# # for the rest of the file.
|
199
|
+
# IOStreams.copy('a.json', 'b.csv')
|
200
|
+
#
|
201
|
+
# Example:
|
202
|
+
# # Read a PSV file and write out a CSV file from it.
|
203
|
+
# IOStreams.copy('a.psv', 'b.csv')
|
204
|
+
#
|
205
|
+
# Example:
|
206
|
+
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
207
|
+
# # Since the target file_name already includes `.enc` in the filename, it is automatically
|
208
|
+
# # encrypted.
|
209
|
+
# IOStreams.copy('a.csv', 'b.csv.enc')
|
210
|
+
#
|
211
|
+
# Example:
|
212
|
+
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
213
|
+
# # Since the target file_name does not include `.enc` in the filename, to encrypt it
|
214
|
+
# # the encryption stream is added.
|
215
|
+
# IOStreams.copy('a.csv', 'b', target_options: [:enc])
|
216
|
+
#
|
217
|
+
# Example:
|
218
|
+
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
219
|
+
# # Since the target file_name does not include `.enc` in the filename, to encrypt it
|
220
|
+
# # the encryption stream is added, along with the optional compression option.
|
221
|
+
# IOStreams.copy('a.csv', 'b', target_options: [enc: { compress: true }])
|
222
|
+
#
|
173
223
|
# Example:
|
224
|
+
# # Create a pgp encrypted file.
|
225
|
+
# # For PGP Encryption the recipients email address is required.
|
226
|
+
# IOStreams.copy('a.xlsx', 'b.csv.pgp', target_options: [:csv, pgp: { recipient_email: 'user@nospam.org' }])
|
227
|
+
#
|
228
|
+
# Example: Copy between 2 existing streams
|
174
229
|
# IOStreams.reader('a.csv') do |source_stream|
|
175
230
|
# IOStreams.writer('b.csv.enc') do |target_stream|
|
176
231
|
# IOStreams.copy(source_stream, target_stream)
|
177
232
|
# end
|
178
233
|
# end
|
179
|
-
def self.copy(source_stream, target_stream, buffer_size = 65536)
|
180
|
-
bytes = 0
|
181
|
-
while data = source_stream.read(buffer_size)
|
182
|
-
break if data.size == 0
|
183
|
-
bytes += data.size
|
184
|
-
target_stream.write(data)
|
185
|
-
end
|
186
|
-
bytes
|
187
|
-
end
|
188
|
-
|
189
|
-
# Copies the source file name to the target file name.
|
190
234
|
#
|
191
|
-
#
|
235
|
+
# Example:
|
236
|
+
# # Copy between 2 csv files, reducing the number of columns present and encrypting the
|
237
|
+
# # target file with Symmetric Encryption
|
238
|
+
# output_headers = %w[name address]
|
239
|
+
# IOStreams.copy(
|
240
|
+
# 'a.csv',
|
241
|
+
# 'b.csv.enc',
|
242
|
+
# target_options: [csv:{headers: output_headers}, enc: {compress: true}]
|
243
|
+
# )
|
192
244
|
#
|
193
245
|
# Example:
|
194
|
-
#
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
246
|
+
# # Copy a locally encrypted file to AWS S3.
|
247
|
+
# # Decrypts the file, then compresses it with gzip as it is being streamed into S3.
|
248
|
+
# # Useful for when the entire bucket is encrypted on S3.
|
249
|
+
# IOStreams.copy('a.csv.enc', 's3://my_bucket/b.csv.gz')
|
250
|
+
def self.copy(source_file_name_or_io, target_file_name_or_io, buffer_size: 65536, source_options: {}, target_options: {})
|
251
|
+
bytes = 0
|
252
|
+
reader(source_file_name_or_io, **source_options) do |source_stream|
|
253
|
+
writer(target_file_name_or_io, **target_options) do |target_stream|
|
254
|
+
while data = source_stream.read(buffer_size)
|
255
|
+
break if data.size == 0
|
256
|
+
bytes += data.size
|
257
|
+
target_stream.write(data)
|
258
|
+
end
|
199
259
|
end
|
200
260
|
end
|
261
|
+
bytes
|
201
262
|
end
|
202
263
|
|
203
264
|
# Returns [true|false] whether the supplied file_name_or_io is a reader stream
|
@@ -222,38 +283,149 @@ module IOStreams
|
|
222
283
|
!(file_name =~ /\.(enc|pgp|gpg)\z/i).nil?
|
223
284
|
end
|
224
285
|
|
225
|
-
#
|
226
|
-
#
|
227
|
-
|
228
|
-
|
286
|
+
# Returns [Array] the formats required to process the file by looking at
|
287
|
+
# its extension(s)
|
288
|
+
#
|
289
|
+
# Example Zip file:
|
290
|
+
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
|
291
|
+
# => [ :zip ]
|
292
|
+
#
|
293
|
+
# Example Encrypted Gzip file:
|
294
|
+
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
|
295
|
+
# => [ :gz, :enc ]
|
296
|
+
#
|
297
|
+
# Example plain text / binary file:
|
298
|
+
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
|
299
|
+
# => [ :file ]
|
300
|
+
def self.streams_for_file_name(file_name)
|
301
|
+
raise ArgumentError.new('File name cannot be nil') if file_name.nil?
|
302
|
+
raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
|
303
|
+
parts = file_name.split('.')
|
304
|
+
extensions = []
|
305
|
+
while extension = parts.pop
|
306
|
+
sym = extension.downcase.to_sym
|
307
|
+
break unless @extensions[sym]
|
308
|
+
extensions.unshift(sym)
|
309
|
+
end
|
310
|
+
extensions
|
311
|
+
end
|
312
|
+
|
313
|
+
# Iterate over a file / stream returning each record/line one at a time.
|
314
|
+
def self.line_reader(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
315
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) ||
|
316
|
+
file_name_or_io.is_a?(IOStreams::Xlsx::Reader) ||
|
317
|
+
file_name_or_io.is_a?(Array)
|
318
|
+
|
319
|
+
reader(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
320
|
+
IOStreams::Line::Reader.open(io, **args, &block)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
# Iterate over a file / stream returning each line as a hash, one at a time.
|
325
|
+
def self.row_reader(file_name_or_io,
|
326
|
+
streams: nil,
|
327
|
+
delimiter: nil,
|
328
|
+
encoding: IOStreams::UTF8_ENCODING,
|
329
|
+
strip_non_printable: false,
|
330
|
+
file_name: nil,
|
331
|
+
**args,
|
332
|
+
&block)
|
333
|
+
|
334
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Reader)
|
335
|
+
|
336
|
+
line_reader(
|
337
|
+
file_name_or_io,
|
338
|
+
streams: streams,
|
339
|
+
delimiter: delimiter,
|
340
|
+
encoding: encoding,
|
341
|
+
strip_non_printable: strip_non_printable,
|
342
|
+
file_name: file_name) do |io|
|
343
|
+
|
344
|
+
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
345
|
+
IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# Iterate over a file / stream returning each line as a hash, one at a time.
|
350
|
+
def self.record_reader(file_name_or_io,
|
351
|
+
streams: nil,
|
352
|
+
delimiter: nil,
|
353
|
+
encoding: IOStreams::UTF8_ENCODING,
|
354
|
+
strip_non_printable: false,
|
355
|
+
file_name: nil,
|
356
|
+
**args,
|
357
|
+
&block)
|
229
358
|
|
230
|
-
|
231
|
-
|
232
|
-
|
359
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader) || file_name_or_io.is_a?(IOStreams::Xlsx::Reader)
|
360
|
+
|
361
|
+
line_reader(
|
362
|
+
file_name_or_io,
|
363
|
+
streams: streams,
|
364
|
+
delimiter: delimiter,
|
365
|
+
encoding: encoding,
|
366
|
+
strip_non_printable: strip_non_printable,
|
367
|
+
file_name: file_name) do |io|
|
368
|
+
|
369
|
+
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
370
|
+
IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
|
233
371
|
end
|
234
372
|
end
|
235
373
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
374
|
+
Extension = Struct.new(:reader_class, :writer_class)
|
375
|
+
|
376
|
+
# Register a file extension and the reader and writer streaming classes
|
377
|
+
#
|
378
|
+
# Example:
|
379
|
+
# # MyXls::Reader and MyXls::Writer must implement .open
|
380
|
+
# register_extension(:xls, MyXls::Reader, MyXls::Writer)
|
381
|
+
def self.register_extension(extension, reader_class, writer_class)
|
382
|
+
raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.nil? || extension.to_s =~ /\A\w+\Z/
|
383
|
+
@extensions[extension.nil? ? nil : extension.to_sym] = Extension.new(reader_class, writer_class)
|
384
|
+
end
|
385
|
+
|
386
|
+
# De-Register a file extension
|
387
|
+
#
|
388
|
+
# Returns [Symbol] the extension removed, or nil if the extension was not registered
|
389
|
+
#
|
390
|
+
# Example:
|
391
|
+
# register_extension(:xls)
|
392
|
+
def self.deregister_extension(extension)
|
393
|
+
raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
|
394
|
+
@extensions.delete(extension.to_sym)
|
395
|
+
end
|
240
396
|
|
241
|
-
|
242
|
-
|
397
|
+
# Helper method: Returns [true|false] if a value is blank?
|
398
|
+
def self.blank?(value)
|
399
|
+
if value.nil?
|
400
|
+
true
|
401
|
+
elsif value.is_a?(String)
|
402
|
+
value !~ /\S/
|
403
|
+
else
|
404
|
+
value.respond_to?(:empty?) ? value.empty? : !value
|
405
|
+
end
|
243
406
|
end
|
244
407
|
|
245
|
-
##########################################################################
|
246
408
|
private
|
247
409
|
|
410
|
+
# A registry to hold formats for processing files during upload or download
|
411
|
+
@extensions = {}
|
412
|
+
|
248
413
|
# Struct to hold the Stream and options if any
|
249
414
|
StreamStruct = Struct.new(:klass, :options)
|
250
415
|
|
251
416
|
# Returns a reader or writer stream
|
252
|
-
def self.stream(type, file_name_or_io, streams
|
253
|
-
|
254
|
-
|
255
|
-
|
417
|
+
def self.stream(type, file_name_or_io, streams:, file_name:, &block)
|
418
|
+
# TODO: Add support for different schemes, such as file://, s3://, sftp://
|
419
|
+
|
420
|
+
streams = streams_for_file_name(file_name) if streams.nil? && file_name
|
421
|
+
|
422
|
+
# Shortcut for when it is already a stream and no further streams need to be applied.
|
423
|
+
return block.call(file_name_or_io) if !file_name_or_io.is_a?(String) && (streams.nil? || streams.empty?)
|
424
|
+
|
425
|
+
if streams.nil?
|
426
|
+
streams = file_name_or_io.is_a?(String) ? streams_for_file_name(file_name_or_io) : [nil]
|
256
427
|
end
|
428
|
+
|
257
429
|
stream_structs = streams_for(type, streams)
|
258
430
|
if stream_structs.size == 1
|
259
431
|
stream_struct = stream_structs.first
|
@@ -270,6 +442,7 @@ module IOStreams
|
|
270
442
|
if params.is_a?(Symbol)
|
271
443
|
[stream_struct_for_stream(type, params)]
|
272
444
|
elsif params.is_a?(Array)
|
445
|
+
return [stream_struct_for_stream(type, nil)] if params.empty?
|
273
446
|
a = []
|
274
447
|
params.each do |stream|
|
275
448
|
if stream.is_a?(Hash)
|
@@ -288,24 +461,35 @@ module IOStreams
|
|
288
461
|
end
|
289
462
|
end
|
290
463
|
|
291
|
-
def self.stream_struct_for_stream(type, stream, options={})
|
292
|
-
ext = @extensions[stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
|
464
|
+
def self.stream_struct_for_stream(type, stream, options = {})
|
465
|
+
ext = @extensions[stream.nil? ? nil : stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
|
293
466
|
klass = ext.send("#{type}_class")
|
294
467
|
StreamStruct.new(klass, options)
|
295
468
|
end
|
296
469
|
|
470
|
+
# Default reader/writer when no other streams need to be applied.
|
471
|
+
register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
|
472
|
+
|
297
473
|
# Register File extensions
|
298
|
-
|
299
|
-
register_extension(:
|
300
|
-
register_extension(:
|
301
|
-
register_extension(:
|
302
|
-
register_extension(:
|
303
|
-
register_extension(:
|
304
|
-
register_extension(:
|
305
|
-
register_extension(:
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
474
|
+
register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
|
475
|
+
register_extension(:gz, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
|
476
|
+
register_extension(:gzip, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
|
477
|
+
register_extension(:zip, IOStreams::Zip::Reader, IOStreams::Zip::Writer)
|
478
|
+
register_extension(:pgp, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
|
479
|
+
register_extension(:gpg, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
|
480
|
+
register_extension(:xlsx, IOStreams::Xlsx::Reader, nil)
|
481
|
+
register_extension(:xlsm, IOStreams::Xlsx::Reader, nil)
|
482
|
+
|
483
|
+
# Use Symmetric Encryption to encrypt of decrypt files with the `enc` extension
|
484
|
+
# when the gem `symmetric-encryption` has been loaded.
|
485
|
+
if defined?(SymmetricEncryption)
|
486
|
+
register_extension(:enc, SymmetricEncryption::Reader, SymmetricEncryption::Writer)
|
487
|
+
end
|
488
|
+
|
489
|
+
# register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
|
490
|
+
# register_scheme(:file, IOStreams::File::Reader, IOStreams::File::Writer)
|
491
|
+
# register_scheme(:http, IOStreams::HTTP::Reader, IOStreams::HTTP::Writer)
|
492
|
+
# register_scheme(:https, IOStreams::HTTPS::Reader, IOStreams::HTTPS::Writer)
|
493
|
+
# register_scheme(:sftp, IOStreams::SFTP::Reader, IOStreams::SFTP::Writer)
|
494
|
+
# register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
|
311
495
|
end
|