iostreams 0.14.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +202 -0
- data/README.md +155 -47
- data/lib/io_streams/file/reader.rb +7 -8
- data/lib/io_streams/file/writer.rb +7 -8
- data/lib/io_streams/io_streams.rb +313 -129
- data/lib/io_streams/{delimited → line}/reader.rb +20 -30
- data/lib/io_streams/line/writer.rb +81 -0
- data/lib/io_streams/pgp.rb +4 -14
- data/lib/io_streams/record/reader.rb +55 -0
- data/lib/io_streams/record/writer.rb +63 -0
- data/lib/io_streams/row/reader.rb +60 -0
- data/lib/io_streams/row/writer.rb +62 -0
- data/lib/io_streams/s3.rb +25 -0
- data/lib/io_streams/s3/reader.rb +64 -0
- data/lib/io_streams/s3/writer.rb +13 -0
- data/lib/io_streams/streams.rb +1 -1
- data/lib/io_streams/tabular.rb +163 -0
- data/lib/io_streams/tabular/errors.rb +14 -0
- data/lib/io_streams/tabular/header.rb +146 -0
- data/lib/io_streams/tabular/parser/array.rb +26 -0
- data/lib/io_streams/tabular/parser/base.rb +12 -0
- data/lib/io_streams/tabular/parser/csv.rb +35 -0
- data/lib/io_streams/tabular/parser/fixed.rb +88 -0
- data/lib/io_streams/tabular/parser/hash.rb +21 -0
- data/lib/io_streams/tabular/parser/json.rb +25 -0
- data/lib/io_streams/tabular/parser/psv.rb +34 -0
- data/lib/io_streams/tabular/utility/csv_row.rb +115 -0
- data/lib/io_streams/version.rb +2 -2
- data/lib/io_streams/xlsx/reader.rb +1 -1
- data/lib/io_streams/zip/reader.rb +1 -1
- data/lib/io_streams/zip/writer.rb +1 -1
- data/lib/iostreams.rb +21 -10
- data/test/bzip2_reader_test.rb +21 -22
- data/test/bzip2_writer_test.rb +38 -32
- data/test/file_reader_test.rb +19 -18
- data/test/file_writer_test.rb +23 -22
- data/test/files/test.json +3 -0
- data/test/gzip_reader_test.rb +21 -22
- data/test/gzip_writer_test.rb +35 -29
- data/test/io_streams_test.rb +137 -61
- data/test/line_reader_test.rb +105 -0
- data/test/line_writer_test.rb +50 -0
- data/test/pgp_reader_test.rb +29 -29
- data/test/pgp_test.rb +149 -195
- data/test/pgp_writer_test.rb +63 -62
- data/test/record_reader_test.rb +61 -0
- data/test/record_writer_test.rb +73 -0
- data/test/row_reader_test.rb +34 -0
- data/test/row_writer_test.rb +51 -0
- data/test/tabular_test.rb +184 -0
- data/test/xlsx_reader_test.rb +13 -17
- data/test/zip_reader_test.rb +21 -22
- data/test/zip_writer_test.rb +40 -36
- metadata +41 -17
- data/lib/io_streams/csv/reader.rb +0 -21
- data/lib/io_streams/csv/writer.rb +0 -20
- data/lib/io_streams/delimited/writer.rb +0 -67
- data/test/csv_reader_test.rb +0 -34
- data/test/csv_writer_test.rb +0 -35
- data/test/delimited_reader_test.rb +0 -115
- data/test/delimited_writer_test.rb +0 -44
@@ -1,15 +1,14 @@
|
|
1
1
|
module IOStreams
|
2
2
|
module File
|
3
3
|
class Reader
|
4
|
-
# Read from a file
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
block.call(file_name_or_io)
|
10
|
-
end
|
11
|
-
end
|
4
|
+
# Read from a named file
|
5
|
+
# TODO: Add support for mode (text / binary)
|
6
|
+
# TODO: Add encoding support: external_encoding, internal_encoding
|
7
|
+
def self.open(file_name, _=nil, &block)
|
8
|
+
raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
|
12
9
|
|
10
|
+
::File.open(file_name, 'rb', &block)
|
11
|
+
end
|
13
12
|
end
|
14
13
|
end
|
15
14
|
end
|
@@ -1,15 +1,14 @@
|
|
1
1
|
module IOStreams
|
2
2
|
module File
|
3
3
|
class Writer
|
4
|
-
# Write to a file
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
block.call(file_name_or_io)
|
10
|
-
end
|
11
|
-
end
|
4
|
+
# Write to a named file
|
5
|
+
# TODO: Add support for mode (text / binary), permissions, buffering, append
|
6
|
+
# TODO: Add encoding support: external_encoding, internal_encoding
|
7
|
+
def self.open(file_name, _=nil, &block)
|
8
|
+
raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
|
12
9
|
|
10
|
+
::File.open(file_name, 'wb', &block)
|
11
|
+
end
|
13
12
|
end
|
14
13
|
end
|
15
14
|
end
|
@@ -1,72 +1,21 @@
|
|
1
1
|
require 'concurrent'
|
2
|
-
module IOStreams
|
3
|
-
# A registry to hold formats for processing files during upload or download
|
4
|
-
@extensions = Concurrent::Map.new
|
5
2
|
|
3
|
+
# Streaming library for Ruby
|
4
|
+
#
|
5
|
+
# Stream types / extensions supported:
|
6
|
+
# .zip Zip File [ :zip ]
|
7
|
+
# .gz, .gzip GZip File [ :gzip ]
|
8
|
+
# .enc File Encrypted using symmetric encryption [ :enc ]
|
9
|
+
# etc...
|
10
|
+
# other All other extensions will be returned as: []
|
11
|
+
#
|
12
|
+
# When a file is encrypted, it may also be compressed:
|
13
|
+
# .zip.enc [ :zip, :enc ]
|
14
|
+
# .gz.enc [ :gz, :enc ]
|
15
|
+
module IOStreams
|
6
16
|
UTF8_ENCODING = Encoding.find('UTF-8').freeze
|
7
17
|
BINARY_ENCODING = Encoding.find('BINARY').freeze
|
8
18
|
|
9
|
-
# Returns [Array] the formats required to process the file by looking at
|
10
|
-
# its extension(s)
|
11
|
-
#
|
12
|
-
# Extensions supported:
|
13
|
-
# .zip Zip File [ :zip ]
|
14
|
-
# .gz, .gzip GZip File [ :gzip ]
|
15
|
-
# .enc File Encrypted using symmetric encryption [ :enc ]
|
16
|
-
# other All other extensions will be returned as: [ :file ]
|
17
|
-
#
|
18
|
-
# When a file is encrypted, it may also be compressed:
|
19
|
-
# .zip.enc [ :zip, :enc ]
|
20
|
-
# .gz.enc [ :gz, :enc ]
|
21
|
-
#
|
22
|
-
# Example Zip file:
|
23
|
-
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
|
24
|
-
# => [ :zip ]
|
25
|
-
#
|
26
|
-
# Example Encrypted Gzip file:
|
27
|
-
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
|
28
|
-
# => [ :gz, :enc ]
|
29
|
-
#
|
30
|
-
# Example plain text / binary file:
|
31
|
-
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
|
32
|
-
# => [ :file ]
|
33
|
-
def self.streams_for_file_name(file_name)
|
34
|
-
raise ArgumentError.new('File name cannot be nil') if file_name.nil?
|
35
|
-
raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
|
36
|
-
parts = file_name.split('.')
|
37
|
-
extensions = []
|
38
|
-
while extension = parts.pop
|
39
|
-
sym = extension.downcase.to_sym
|
40
|
-
break unless @extensions[sym]
|
41
|
-
extensions.unshift(sym)
|
42
|
-
end
|
43
|
-
extensions << :file if extensions.size == 0
|
44
|
-
extensions
|
45
|
-
end
|
46
|
-
|
47
|
-
Extension = Struct.new(:reader_class, :writer_class)
|
48
|
-
|
49
|
-
# Register a file extension and the reader and writer classes to use to format it
|
50
|
-
#
|
51
|
-
# Example:
|
52
|
-
# # MyXls::Reader and MyXls::Writer must implement .open
|
53
|
-
# register_extension(:xls, MyXls::Reader, MyXls::Writer)
|
54
|
-
def self.register_extension(extension, reader_class, writer_class)
|
55
|
-
raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
|
56
|
-
@extensions[extension.to_sym] = Extension.new(reader_class, writer_class)
|
57
|
-
end
|
58
|
-
|
59
|
-
# De-Register a file extension
|
60
|
-
#
|
61
|
-
# Returns [Symbol] the extension removed, or nil if the extension was not registered
|
62
|
-
#
|
63
|
-
# Example:
|
64
|
-
# register_extension(:xls)
|
65
|
-
def self.deregister_extension(extension)
|
66
|
-
raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
|
67
|
-
@extensions.delete(extension.to_sym)
|
68
|
-
end
|
69
|
-
|
70
19
|
# Returns a Reader for reading a file / stream
|
71
20
|
#
|
72
21
|
# Parameters
|
@@ -81,15 +30,11 @@ module IOStreams
|
|
81
30
|
# streams should be applied.
|
82
31
|
# Default: nil
|
83
32
|
#
|
84
|
-
#
|
85
|
-
#
|
86
|
-
#
|
87
|
-
#
|
88
|
-
#
|
89
|
-
#
|
90
|
-
# When a file is encrypted, it may also be compressed:
|
91
|
-
# .zip.enc [ :zip, :enc ]
|
92
|
-
# .gz.enc [ :gz, :enc ]
|
33
|
+
# file_name [String]
|
34
|
+
# When `streams` is not supplied, `file_name` can be used for determining the streams
|
35
|
+
# to apply to read the file/stream.
|
36
|
+
# This is particularly useful when `file_name_or_io` is a stream, or a temporary file name.
|
37
|
+
# Default: nil
|
93
38
|
#
|
94
39
|
# Example: Zip
|
95
40
|
# IOStreams.reader('myfile.zip') do |stream|
|
@@ -108,11 +53,47 @@ module IOStreams
|
|
108
53
|
#
|
109
54
|
# Example: Supply custom options
|
110
55
|
# # Encrypt the file and get Symmetric Encryption to also compress it
|
111
|
-
# IOStreams.reader('myfile.csv.enc',
|
56
|
+
# IOStreams.reader('myfile.csv.enc', streams: enc: {compress: true}) do |stream|
|
112
57
|
# puts stream.read
|
113
58
|
# end
|
114
|
-
|
115
|
-
|
59
|
+
#
|
60
|
+
# Note:
|
61
|
+
# * Passes the file_name_or_io as-is into the block if it is already a reader stream AND
|
62
|
+
# no streams are passed in.
|
63
|
+
def self.reader(file_name_or_io, streams: nil, file_name: nil, &block)
|
64
|
+
stream(:reader, file_name_or_io, streams: streams, file_name: file_name, &block)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Iterate over a file / stream returning one line at a time.
|
68
|
+
def self.each_line(file_name_or_io, **args, &block)
|
69
|
+
line_reader(file_name_or_io, **args) do |line_stream|
|
70
|
+
line_stream.each(&block)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# Iterate over a file / stream returning one line at a time.
|
75
|
+
def self.each_row(file_name_or_io, **args, &block)
|
76
|
+
row_reader(file_name_or_io, **args) do |row_stream|
|
77
|
+
row_stream.each(&block)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns [Hash] of every record in a file or stream with support for headers.
|
82
|
+
#
|
83
|
+
# Reading a delimited stream and converting to tabular form.
|
84
|
+
#
|
85
|
+
# Each record / line is returned one at a time so that very large files
|
86
|
+
# can be read without having to load the entire file into memory.
|
87
|
+
#
|
88
|
+
# Example:
|
89
|
+
# file_name = 'customer_data.csv.pgp'
|
90
|
+
# IOStreams.each_record(file_name) do |hash|
|
91
|
+
# p hash
|
92
|
+
# end
|
93
|
+
def self.each_record(file_name_or_io, **args, &block)
|
94
|
+
record_reader(file_name_or_io, **args) do |record_stream|
|
95
|
+
record_stream.each(&block)
|
96
|
+
end
|
116
97
|
end
|
117
98
|
|
118
99
|
# Returns a Writer for writing to a file / stream
|
@@ -163,41 +144,121 @@ module IOStreams
|
|
163
144
|
# IOStreams.writer('myfile.csv.zip', zip: { zip_file_name: 'myfile.csv' }) do |stream|
|
164
145
|
# stream.write(data)
|
165
146
|
# end
|
166
|
-
|
167
|
-
|
147
|
+
#
|
148
|
+
# Note:
|
149
|
+
# * Passes the file_name_or_io as-is into the block if it is already a writer stream AND
|
150
|
+
# no streams are passed in.
|
151
|
+
def self.writer(file_name_or_io, streams: nil, file_name: nil, &block)
|
152
|
+
stream(:writer, file_name_or_io, streams: streams, file_name: file_name, &block)
|
153
|
+
end
|
154
|
+
|
155
|
+
def self.line_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
156
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Writer) || file_name_or_io.is_a?(Array)
|
157
|
+
|
158
|
+
writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
159
|
+
IOStreams::Line::Writer.open(io, **args, &block)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def self.row_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
164
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Writer)
|
165
|
+
|
166
|
+
line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
167
|
+
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
168
|
+
|
169
|
+
IOStreams::Row::Writer.open(io, file_name: file_name, **args, &block)
|
170
|
+
end
|
168
171
|
end
|
169
172
|
|
170
|
-
|
173
|
+
def self.record_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
174
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Writer)
|
175
|
+
|
176
|
+
line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
177
|
+
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
178
|
+
|
179
|
+
IOStreams::Record::Writer.open(io, file_name: file_name, **args, &block)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Copies the source file/stream to the target file/stream.
|
171
184
|
# Returns [Integer] the number of bytes copied
|
172
185
|
#
|
186
|
+
# Example: Copy between 2 files
|
187
|
+
# IOStreams.copy('a.csv', 'b.csv')
|
188
|
+
# # TODO: The above will convert the csv file to a Hash and then back to write it to the target file.
|
189
|
+
#
|
190
|
+
# Example: Read content from a Xlsx file and write it out in CSV form.
|
191
|
+
# IOStreams.copy('a.xlsx', 'b.csv')
|
192
|
+
#
|
193
|
+
# Example:
|
194
|
+
# # Read content from a JSON file and write it out in CSV form.
|
195
|
+
# #
|
196
|
+
# # The output header for the CSV file is extracted from the first row in the JSON file.
|
197
|
+
# # If the first JSON row does not contain all the column names then they will be ignored
|
198
|
+
# # for the rest of the file.
|
199
|
+
# IOStreams.copy('a.json', 'b.csv')
|
200
|
+
#
|
201
|
+
# Example:
|
202
|
+
# # Read a PSV file and write out a CSV file from it.
|
203
|
+
# IOStreams.copy('a.psv', 'b.csv')
|
204
|
+
#
|
205
|
+
# Example:
|
206
|
+
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
207
|
+
# # Since the target file_name already includes `.enc` in the filename, it is automatically
|
208
|
+
# # encrypted.
|
209
|
+
# IOStreams.copy('a.csv', 'b.csv.enc')
|
210
|
+
#
|
211
|
+
# Example:
|
212
|
+
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
213
|
+
# # Since the target file_name does not include `.enc` in the filename, to encrypt it
|
214
|
+
# # the encryption stream is added.
|
215
|
+
# IOStreams.copy('a.csv', 'b', target_options: [:enc])
|
216
|
+
#
|
217
|
+
# Example:
|
218
|
+
# # Copy between 2 files, encrypting the target file with Symmetric Encryption
|
219
|
+
# # Since the target file_name does not include `.enc` in the filename, to encrypt it
|
220
|
+
# # the encryption stream is added, along with the optional compression option.
|
221
|
+
# IOStreams.copy('a.csv', 'b', target_options: [enc: { compress: true }])
|
222
|
+
#
|
173
223
|
# Example:
|
224
|
+
# # Create a pgp encrypted file.
|
225
|
+
# # For PGP Encryption the recipients email address is required.
|
226
|
+
# IOStreams.copy('a.xlsx', 'b.csv.pgp', target_options: [:csv, pgp: { recipient_email: 'user@nospam.org' }])
|
227
|
+
#
|
228
|
+
# Example: Copy between 2 existing streams
|
174
229
|
# IOStreams.reader('a.csv') do |source_stream|
|
175
230
|
# IOStreams.writer('b.csv.enc') do |target_stream|
|
176
231
|
# IOStreams.copy(source_stream, target_stream)
|
177
232
|
# end
|
178
233
|
# end
|
179
|
-
def self.copy(source_stream, target_stream, buffer_size = 65536)
|
180
|
-
bytes = 0
|
181
|
-
while data = source_stream.read(buffer_size)
|
182
|
-
break if data.size == 0
|
183
|
-
bytes += data.size
|
184
|
-
target_stream.write(data)
|
185
|
-
end
|
186
|
-
bytes
|
187
|
-
end
|
188
|
-
|
189
|
-
# Copies the source file name to the target file name.
|
190
234
|
#
|
191
|
-
#
|
235
|
+
# Example:
|
236
|
+
# # Copy between 2 csv files, reducing the number of columns present and encrypting the
|
237
|
+
# # target file with Symmetric Encryption
|
238
|
+
# output_headers = %w[name address]
|
239
|
+
# IOStreams.copy(
|
240
|
+
# 'a.csv',
|
241
|
+
# 'b.csv.enc',
|
242
|
+
# target_options: [csv:{headers: output_headers}, enc: {compress: true}]
|
243
|
+
# )
|
192
244
|
#
|
193
245
|
# Example:
|
194
|
-
#
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
246
|
+
# # Copy a locally encrypted file to AWS S3.
|
247
|
+
# # Decrypts the file, then compresses it with gzip as it is being streamed into S3.
|
248
|
+
# # Useful for when the entire bucket is encrypted on S3.
|
249
|
+
# IOStreams.copy('a.csv.enc', 's3://my_bucket/b.csv.gz')
|
250
|
+
def self.copy(source_file_name_or_io, target_file_name_or_io, buffer_size: 65536, source_options: {}, target_options: {})
|
251
|
+
bytes = 0
|
252
|
+
reader(source_file_name_or_io, **source_options) do |source_stream|
|
253
|
+
writer(target_file_name_or_io, **target_options) do |target_stream|
|
254
|
+
while data = source_stream.read(buffer_size)
|
255
|
+
break if data.size == 0
|
256
|
+
bytes += data.size
|
257
|
+
target_stream.write(data)
|
258
|
+
end
|
199
259
|
end
|
200
260
|
end
|
261
|
+
bytes
|
201
262
|
end
|
202
263
|
|
203
264
|
# Returns [true|false] whether the supplied file_name_or_io is a reader stream
|
@@ -222,38 +283,149 @@ module IOStreams
|
|
222
283
|
!(file_name =~ /\.(enc|pgp|gpg)\z/i).nil?
|
223
284
|
end
|
224
285
|
|
225
|
-
#
|
226
|
-
#
|
227
|
-
|
228
|
-
|
286
|
+
# Returns [Array] the formats required to process the file by looking at
|
287
|
+
# its extension(s)
|
288
|
+
#
|
289
|
+
# Example Zip file:
|
290
|
+
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.zip')
|
291
|
+
# => [ :zip ]
|
292
|
+
#
|
293
|
+
# Example Encrypted Gzip file:
|
294
|
+
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv.gz.enc')
|
295
|
+
# => [ :gz, :enc ]
|
296
|
+
#
|
297
|
+
# Example plain text / binary file:
|
298
|
+
# RocketJob::Formatter::Formats.streams_for_file_name('myfile.csv')
|
299
|
+
# => [ :file ]
|
300
|
+
def self.streams_for_file_name(file_name)
|
301
|
+
raise ArgumentError.new('File name cannot be nil') if file_name.nil?
|
302
|
+
raise ArgumentError.new("File name must be a string: #{file_name.inspect}, class: #{file_name.class}") unless file_name.is_a?(String)
|
303
|
+
parts = file_name.split('.')
|
304
|
+
extensions = []
|
305
|
+
while extension = parts.pop
|
306
|
+
sym = extension.downcase.to_sym
|
307
|
+
break unless @extensions[sym]
|
308
|
+
extensions.unshift(sym)
|
309
|
+
end
|
310
|
+
extensions
|
311
|
+
end
|
312
|
+
|
313
|
+
# Iterate over a file / stream returning each record/line one at a time.
|
314
|
+
def self.line_reader(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
315
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) ||
|
316
|
+
file_name_or_io.is_a?(IOStreams::Xlsx::Reader) ||
|
317
|
+
file_name_or_io.is_a?(Array)
|
318
|
+
|
319
|
+
reader(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
320
|
+
IOStreams::Line::Reader.open(io, **args, &block)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
# Iterate over a file / stream returning each line as a hash, one at a time.
|
325
|
+
def self.row_reader(file_name_or_io,
|
326
|
+
streams: nil,
|
327
|
+
delimiter: nil,
|
328
|
+
encoding: IOStreams::UTF8_ENCODING,
|
329
|
+
strip_non_printable: false,
|
330
|
+
file_name: nil,
|
331
|
+
**args,
|
332
|
+
&block)
|
333
|
+
|
334
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Reader)
|
335
|
+
|
336
|
+
line_reader(
|
337
|
+
file_name_or_io,
|
338
|
+
streams: streams,
|
339
|
+
delimiter: delimiter,
|
340
|
+
encoding: encoding,
|
341
|
+
strip_non_printable: strip_non_printable,
|
342
|
+
file_name: file_name) do |io|
|
343
|
+
|
344
|
+
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
345
|
+
IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# Iterate over a file / stream returning each line as a hash, one at a time.
|
350
|
+
def self.record_reader(file_name_or_io,
|
351
|
+
streams: nil,
|
352
|
+
delimiter: nil,
|
353
|
+
encoding: IOStreams::UTF8_ENCODING,
|
354
|
+
strip_non_printable: false,
|
355
|
+
file_name: nil,
|
356
|
+
**args,
|
357
|
+
&block)
|
229
358
|
|
230
|
-
|
231
|
-
|
232
|
-
|
359
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader) || file_name_or_io.is_a?(IOStreams::Xlsx::Reader)
|
360
|
+
|
361
|
+
line_reader(
|
362
|
+
file_name_or_io,
|
363
|
+
streams: streams,
|
364
|
+
delimiter: delimiter,
|
365
|
+
encoding: encoding,
|
366
|
+
strip_non_printable: strip_non_printable,
|
367
|
+
file_name: file_name) do |io|
|
368
|
+
|
369
|
+
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
370
|
+
IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
|
233
371
|
end
|
234
372
|
end
|
235
373
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
374
|
+
Extension = Struct.new(:reader_class, :writer_class)
|
375
|
+
|
376
|
+
# Register a file extension and the reader and writer streaming classes
|
377
|
+
#
|
378
|
+
# Example:
|
379
|
+
# # MyXls::Reader and MyXls::Writer must implement .open
|
380
|
+
# register_extension(:xls, MyXls::Reader, MyXls::Writer)
|
381
|
+
def self.register_extension(extension, reader_class, writer_class)
|
382
|
+
raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.nil? || extension.to_s =~ /\A\w+\Z/
|
383
|
+
@extensions[extension.nil? ? nil : extension.to_sym] = Extension.new(reader_class, writer_class)
|
384
|
+
end
|
385
|
+
|
386
|
+
# De-Register a file extension
|
387
|
+
#
|
388
|
+
# Returns [Symbol] the extension removed, or nil if the extension was not registered
|
389
|
+
#
|
390
|
+
# Example:
|
391
|
+
# register_extension(:xls)
|
392
|
+
def self.deregister_extension(extension)
|
393
|
+
raise(ArgumentError, "Invalid extension #{extension.inspect}") unless extension.to_s =~ /\A\w+\Z/
|
394
|
+
@extensions.delete(extension.to_sym)
|
395
|
+
end
|
240
396
|
|
241
|
-
|
242
|
-
|
397
|
+
# Helper method: Returns [true|false] if a value is blank?
|
398
|
+
def self.blank?(value)
|
399
|
+
if value.nil?
|
400
|
+
true
|
401
|
+
elsif value.is_a?(String)
|
402
|
+
value !~ /\S/
|
403
|
+
else
|
404
|
+
value.respond_to?(:empty?) ? value.empty? : !value
|
405
|
+
end
|
243
406
|
end
|
244
407
|
|
245
|
-
##########################################################################
|
246
408
|
private
|
247
409
|
|
410
|
+
# A registry to hold formats for processing files during upload or download
|
411
|
+
@extensions = {}
|
412
|
+
|
248
413
|
# Struct to hold the Stream and options if any
|
249
414
|
StreamStruct = Struct.new(:klass, :options)
|
250
415
|
|
251
416
|
# Returns a reader or writer stream
|
252
|
-
def self.stream(type, file_name_or_io, streams
|
253
|
-
|
254
|
-
|
255
|
-
|
417
|
+
def self.stream(type, file_name_or_io, streams:, file_name:, &block)
|
418
|
+
# TODO: Add support for different schemes, such as file://, s3://, sftp://
|
419
|
+
|
420
|
+
streams = streams_for_file_name(file_name) if streams.nil? && file_name
|
421
|
+
|
422
|
+
# Shortcut for when it is already a stream and no further streams need to be applied.
|
423
|
+
return block.call(file_name_or_io) if !file_name_or_io.is_a?(String) && (streams.nil? || streams.empty?)
|
424
|
+
|
425
|
+
if streams.nil?
|
426
|
+
streams = file_name_or_io.is_a?(String) ? streams_for_file_name(file_name_or_io) : [nil]
|
256
427
|
end
|
428
|
+
|
257
429
|
stream_structs = streams_for(type, streams)
|
258
430
|
if stream_structs.size == 1
|
259
431
|
stream_struct = stream_structs.first
|
@@ -270,6 +442,7 @@ module IOStreams
|
|
270
442
|
if params.is_a?(Symbol)
|
271
443
|
[stream_struct_for_stream(type, params)]
|
272
444
|
elsif params.is_a?(Array)
|
445
|
+
return [stream_struct_for_stream(type, nil)] if params.empty?
|
273
446
|
a = []
|
274
447
|
params.each do |stream|
|
275
448
|
if stream.is_a?(Hash)
|
@@ -288,24 +461,35 @@ module IOStreams
|
|
288
461
|
end
|
289
462
|
end
|
290
463
|
|
291
|
-
def self.stream_struct_for_stream(type, stream, options={})
|
292
|
-
ext = @extensions[stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
|
464
|
+
def self.stream_struct_for_stream(type, stream, options = {})
|
465
|
+
ext = @extensions[stream.nil? ? nil : stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
|
293
466
|
klass = ext.send("#{type}_class")
|
294
467
|
StreamStruct.new(klass, options)
|
295
468
|
end
|
296
469
|
|
470
|
+
# Default reader/writer when no other streams need to be applied.
|
471
|
+
register_extension(nil, IOStreams::File::Reader, IOStreams::File::Writer)
|
472
|
+
|
297
473
|
# Register File extensions
|
298
|
-
|
299
|
-
register_extension(:
|
300
|
-
register_extension(:
|
301
|
-
register_extension(:
|
302
|
-
register_extension(:
|
303
|
-
register_extension(:
|
304
|
-
register_extension(:
|
305
|
-
register_extension(:
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
474
|
+
register_extension(:bz2, IOStreams::Bzip2::Reader, IOStreams::Bzip2::Writer)
|
475
|
+
register_extension(:gz, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
|
476
|
+
register_extension(:gzip, IOStreams::Gzip::Reader, IOStreams::Gzip::Writer)
|
477
|
+
register_extension(:zip, IOStreams::Zip::Reader, IOStreams::Zip::Writer)
|
478
|
+
register_extension(:pgp, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
|
479
|
+
register_extension(:gpg, IOStreams::Pgp::Reader, IOStreams::Pgp::Writer)
|
480
|
+
register_extension(:xlsx, IOStreams::Xlsx::Reader, nil)
|
481
|
+
register_extension(:xlsm, IOStreams::Xlsx::Reader, nil)
|
482
|
+
|
483
|
+
# Use Symmetric Encryption to encrypt of decrypt files with the `enc` extension
|
484
|
+
# when the gem `symmetric-encryption` has been loaded.
|
485
|
+
if defined?(SymmetricEncryption)
|
486
|
+
register_extension(:enc, SymmetricEncryption::Reader, SymmetricEncryption::Writer)
|
487
|
+
end
|
488
|
+
|
489
|
+
# register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
|
490
|
+
# register_scheme(:file, IOStreams::File::Reader, IOStreams::File::Writer)
|
491
|
+
# register_scheme(:http, IOStreams::HTTP::Reader, IOStreams::HTTP::Writer)
|
492
|
+
# register_scheme(:https, IOStreams::HTTPS::Reader, IOStreams::HTTPS::Writer)
|
493
|
+
# register_scheme(:sftp, IOStreams::SFTP::Reader, IOStreams::SFTP::Writer)
|
494
|
+
# register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
|
311
495
|
end
|