iostreams 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -1
- data/lib/io_streams/builder.rb +20 -4
- data/lib/io_streams/errors.rb +12 -0
- data/lib/io_streams/io_streams.rb +0 -2
- data/lib/io_streams/line/reader.rb +23 -11
- data/lib/io_streams/path.rb +1 -1
- data/lib/io_streams/paths/s3.rb +25 -14
- data/lib/io_streams/paths/sftp.rb +93 -59
- data/lib/io_streams/pgp.rb +17 -17
- data/lib/io_streams/pgp/writer.rb +1 -2
- data/lib/io_streams/stream.rb +75 -10
- data/lib/io_streams/tabular.rb +23 -23
- data/lib/io_streams/tabular/parser/csv.rb +4 -2
- data/lib/io_streams/tabular/parser/fixed.rb +1 -1
- data/lib/io_streams/tabular/utility/csv_row.rb +1 -4
- data/lib/io_streams/utils.rb +3 -5
- data/lib/io_streams/version.rb +1 -1
- data/lib/iostreams.rb +8 -0
- data/test/builder_test.rb +29 -0
- data/test/deprecated_test.rb +2 -0
- data/test/files/test.psv +4 -0
- data/test/files/unclosed_quote_large_test.csv +1658 -0
- data/test/files/unclosed_quote_test2.csv +3 -0
- data/test/line_reader_test.rb +30 -4
- data/test/paths/file_test.rb +6 -8
- data/test/paths/sftp_test.rb +7 -1
- data/test/stream_test.rb +169 -3
- data/test/test_helper.rb +0 -3
- metadata +48 -43
data/lib/io_streams/pgp.rb
CHANGED
@@ -48,8 +48,8 @@ module IOStreams
|
|
48
48
|
# See `man gpg` for the remaining options
|
49
49
|
def self.generate_key(name:,
|
50
50
|
email:,
|
51
|
-
comment: nil,
|
52
51
|
passphrase:,
|
52
|
+
comment: nil,
|
53
53
|
key_type: "RSA",
|
54
54
|
key_length: 4096,
|
55
55
|
subkey_type: "RSA",
|
@@ -261,7 +261,7 @@ module IOStreams
|
|
261
261
|
|
262
262
|
import(key: key)
|
263
263
|
set_trust(email: email, key_id: key_id)
|
264
|
-
email
|
264
|
+
email || key_id
|
265
265
|
end
|
266
266
|
|
267
267
|
# Set the trust level for an existing key.
|
@@ -291,10 +291,8 @@ module IOStreams
|
|
291
291
|
version_check
|
292
292
|
Open3.popen2e("#{executable} --list-keys --fingerprint --with-colons #{email}") do |_stdin, out, waith_thr|
|
293
293
|
output = out.read.chomp
|
294
|
-
|
295
|
-
|
296
|
-
raise(Pgp::Failure, "GPG Failed calling #{executable} to list keys for #{email}: #{output}")
|
297
|
-
end
|
294
|
+
if !waith_thr.value.success? && !(output !~ /(public key not found|No public key)/i)
|
295
|
+
raise(Pgp::Failure, "GPG Failed calling #{executable} to list keys for #{email}: #{output}")
|
298
296
|
end
|
299
297
|
|
300
298
|
output.each_line do |line|
|
@@ -336,9 +334,11 @@ module IOStreams
|
|
336
334
|
match[1]
|
337
335
|
end
|
338
336
|
else
|
339
|
-
|
337
|
+
if err !~ /(key not found|No (public|secret) key)/i
|
338
|
+
raise(Pgp::Failure, "GPG Failed calling #{executable} to list keys for #{email || key_id}: #{err}#{out}")
|
339
|
+
end
|
340
340
|
|
341
|
-
|
341
|
+
[]
|
342
342
|
end
|
343
343
|
end
|
344
344
|
end
|
@@ -350,7 +350,7 @@ module IOStreams
|
|
350
350
|
end
|
351
351
|
|
352
352
|
def self.version_check
|
353
|
-
return unless pgp_version.to_f >= 2.
|
353
|
+
return unless pgp_version.to_f >= 2.4
|
354
354
|
|
355
355
|
raise(
|
356
356
|
Pgp::UnsupportedVersion,
|
@@ -382,10 +382,10 @@ module IOStreams
|
|
382
382
|
key_length: match[3].to_s.to_i,
|
383
383
|
key_type: match[2],
|
384
384
|
date: (begin
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
385
|
+
Date.parse(match[4].to_s)
|
386
|
+
rescue StandardError
|
387
|
+
match[4]
|
388
|
+
end)
|
389
389
|
}
|
390
390
|
elsif (match = line.match(%r{(pub|sec)\s+(\d+)(.*)/(\w+)\s+(\d+-\d+-\d+)(\s+(.+)<(.+)>)?}))
|
391
391
|
# Matches: pub 2048R/C7F9D9CB 2016-10-26
|
@@ -396,10 +396,10 @@ module IOStreams
|
|
396
396
|
key_type: match[3],
|
397
397
|
key_id: match[4],
|
398
398
|
date: (begin
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
399
|
+
Date.parse(match[5].to_s)
|
400
|
+
rescue StandardError
|
401
|
+
match[5]
|
402
|
+
end)
|
403
403
|
}
|
404
404
|
# Prior to gpg v2.0.30
|
405
405
|
if match[7]
|
data/lib/io_streams/stream.rb
CHANGED
@@ -151,6 +151,9 @@ module IOStreams
|
|
151
151
|
# Whether to apply the stream conversions during the copy.
|
152
152
|
# Default: true
|
153
153
|
#
|
154
|
+
# :mode [:line, :array, :hash]
|
155
|
+
# When convert is `true` then use this mode to convert the contents of the file.
|
156
|
+
#
|
154
157
|
# Examples:
|
155
158
|
#
|
156
159
|
# # Copy and convert streams based on file extensions
|
@@ -162,11 +165,17 @@ module IOStreams
|
|
162
165
|
# # Advanced copy with custom stream conversions on source and target.
|
163
166
|
# source = IOStreams.path("source_file").stream(encoding: "BINARY")
|
164
167
|
# IOStreams.path("target_file.pgp").option(:pgp, passphrase: "hello").copy_from(source)
|
165
|
-
def copy_from(source, convert: true)
|
168
|
+
def copy_from(source, convert: true, mode: nil, **args)
|
166
169
|
if convert
|
167
170
|
stream = IOStreams.new(source)
|
168
|
-
|
169
|
-
|
171
|
+
if mode
|
172
|
+
writer(mode, **args) do |target|
|
173
|
+
stream.each(mode) { |row| target << row }
|
174
|
+
end
|
175
|
+
else
|
176
|
+
writer(**args) do |target|
|
177
|
+
stream.reader { |src| IO.copy_stream(src, target) }
|
178
|
+
end
|
170
179
|
end
|
171
180
|
else
|
172
181
|
stream = source.is_a?(Stream) ? source.dup : IOStreams.new(source)
|
@@ -176,9 +185,9 @@ module IOStreams
|
|
176
185
|
end
|
177
186
|
end
|
178
187
|
|
179
|
-
def copy_to(target,
|
188
|
+
def copy_to(target, **args)
|
180
189
|
target = IOStreams.new(target)
|
181
|
-
target.copy_from(self,
|
190
|
+
target.copy_from(self, **args)
|
182
191
|
end
|
183
192
|
|
184
193
|
# Set/get the original file_name
|
@@ -191,11 +200,41 @@ module IOStreams
|
|
191
200
|
end
|
192
201
|
end
|
193
202
|
|
194
|
-
# Set
|
203
|
+
# Set the original file_name
|
195
204
|
def file_name=(file_name)
|
196
205
|
builder.file_name = file_name
|
197
206
|
end
|
198
207
|
|
208
|
+
# Set/get the tabular format_options
|
209
|
+
def format(format = :none)
|
210
|
+
if format == :none
|
211
|
+
builder.format
|
212
|
+
else
|
213
|
+
builder.format = format
|
214
|
+
self
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Set the tabular format
|
219
|
+
def format=(format)
|
220
|
+
builder.format = format
|
221
|
+
end
|
222
|
+
|
223
|
+
# Set/get the tabular format options
|
224
|
+
def format_options(format_options = :none)
|
225
|
+
if format_options == :none
|
226
|
+
builder.format_options
|
227
|
+
else
|
228
|
+
builder.format_options = format_options
|
229
|
+
self
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# Set the tabular format_options
|
234
|
+
def format_options=(format_options)
|
235
|
+
builder.format_options = format_options
|
236
|
+
end
|
237
|
+
|
199
238
|
# Returns [String] the last component of this path.
|
200
239
|
# Returns `nil` if no `file_name` was set.
|
201
240
|
#
|
@@ -293,14 +332,26 @@ module IOStreams
|
|
293
332
|
# Iterate over a file / stream returning each line as an array, one at a time.
|
294
333
|
def row_reader(delimiter: nil, embedded_within: nil, **args)
|
295
334
|
line_reader(delimiter: delimiter, embedded_within: embedded_within) do |io|
|
296
|
-
yield IOStreams::Row::Reader.new(
|
335
|
+
yield IOStreams::Row::Reader.new(
|
336
|
+
io,
|
337
|
+
original_file_name: builder.file_name,
|
338
|
+
format: builder.format,
|
339
|
+
format_options: builder.format_options,
|
340
|
+
**args
|
341
|
+
)
|
297
342
|
end
|
298
343
|
end
|
299
344
|
|
300
345
|
# Iterate over a file / stream returning each line as a hash, one at a time.
|
301
346
|
def record_reader(delimiter: nil, embedded_within: nil, **args)
|
302
347
|
line_reader(delimiter: delimiter, embedded_within: embedded_within) do |io|
|
303
|
-
yield IOStreams::Record::Reader.new(
|
348
|
+
yield IOStreams::Record::Reader.new(
|
349
|
+
io,
|
350
|
+
original_file_name: builder.file_name,
|
351
|
+
format: builder.format,
|
352
|
+
format_options: builder.format_options,
|
353
|
+
**args
|
354
|
+
)
|
304
355
|
end
|
305
356
|
end
|
306
357
|
|
@@ -320,7 +371,14 @@ module IOStreams
|
|
320
371
|
return block.call(io_stream) if io_stream&.is_a?(IOStreams::Row::Writer)
|
321
372
|
|
322
373
|
line_writer(delimiter: delimiter) do |io|
|
323
|
-
IOStreams::Row::Writer.stream(
|
374
|
+
IOStreams::Row::Writer.stream(
|
375
|
+
io,
|
376
|
+
original_file_name: builder.file_name,
|
377
|
+
format: builder.format,
|
378
|
+
format_options: builder.format_options,
|
379
|
+
**args,
|
380
|
+
&block
|
381
|
+
)
|
324
382
|
end
|
325
383
|
end
|
326
384
|
|
@@ -328,7 +386,14 @@ module IOStreams
|
|
328
386
|
return block.call(io_stream) if io_stream&.is_a?(IOStreams::Record::Writer)
|
329
387
|
|
330
388
|
line_writer(delimiter: delimiter) do |io|
|
331
|
-
IOStreams::Record::Writer.stream(
|
389
|
+
IOStreams::Record::Writer.stream(
|
390
|
+
io,
|
391
|
+
original_file_name: builder.file_name,
|
392
|
+
format: builder.format,
|
393
|
+
format_options: builder.format_options,
|
394
|
+
**args,
|
395
|
+
&block
|
396
|
+
)
|
332
397
|
end
|
333
398
|
end
|
334
399
|
end
|
data/lib/io_streams/tabular.rb
CHANGED
@@ -52,7 +52,7 @@ module IOStreams
|
|
52
52
|
# format: [Symbol]
|
53
53
|
# :csv, :hash, :array, :json, :psv, :fixed
|
54
54
|
#
|
55
|
-
# file_name: [String]
|
55
|
+
# file_name: [IOStreams::Path | String]
|
56
56
|
# When `:format` is not supplied the file name can be used to infer the required format.
|
57
57
|
# Optional. Default: nil
|
58
58
|
#
|
@@ -81,14 +81,19 @@ module IOStreams
|
|
81
81
|
# #as_hash will skip these additional columns entirely as if they were not in the file at all.
|
82
82
|
# false:
|
83
83
|
# Raises Tabular::InvalidHeader when a column is supplied that is not in the whitelist.
|
84
|
-
|
84
|
+
#
|
85
|
+
# default_format: [Symbol]
|
86
|
+
# When the format is not supplied, and the format cannot be inferred from the supplied file name
|
87
|
+
# then this default format will be used.
|
88
|
+
# Default: :csv
|
89
|
+
# Set to nil to force it to raise an exception when the format is undefined.
|
90
|
+
def initialize(format: nil, file_name: nil, format_options: nil, default_format: :csv, **args)
|
85
91
|
@header = Header.new(**args)
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
end
|
92
|
+
@format = file_name && format.nil? ? self.class.format_from_file_name(file_name) : format
|
93
|
+
@format ||= default_format
|
94
|
+
raise(UnknownFormat, "The format cannot be inferred from the file name: #{file_name}") unless @format
|
95
|
+
|
96
|
+
klass = self.class.parser_class(@format)
|
92
97
|
@parser = format_options ? klass.new(**format_options) : klass.new
|
93
98
|
end
|
94
99
|
|
@@ -162,9 +167,9 @@ module IOStreams
|
|
162
167
|
# Example:
|
163
168
|
# register_format(:csv, IOStreams::Tabular::Parser::Csv)
|
164
169
|
def self.register_format(format, parser)
|
165
|
-
raise(ArgumentError, "Invalid format #{format.inspect}") unless format.
|
170
|
+
raise(ArgumentError, "Invalid format #{format.inspect}") unless format.to_s =~ /\A\w+\Z/
|
166
171
|
|
167
|
-
@formats[format.
|
172
|
+
@formats[format.to_sym] = parser
|
168
173
|
end
|
169
174
|
|
170
175
|
# De-Register a file format
|
@@ -187,23 +192,18 @@ module IOStreams
|
|
187
192
|
# A registry to hold formats for processing files during upload or download
|
188
193
|
@formats = {}
|
189
194
|
|
190
|
-
|
191
|
-
|
195
|
+
# Returns the registered format that will be used for the supplied file name.
|
196
|
+
def self.format_from_file_name(file_name)
|
197
|
+
file_name.to_s.split(".").reverse_each { |ext| return ext.to_sym if @formats.include?(ext.to_sym) }
|
198
|
+
nil
|
192
199
|
end
|
193
200
|
|
194
|
-
# Returns the parser
|
195
|
-
def self.
|
196
|
-
format
|
197
|
-
|
198
|
-
if @formats.include?(ext.to_sym)
|
199
|
-
format = ext.to_sym
|
200
|
-
break
|
201
|
-
end
|
202
|
-
end
|
203
|
-
parser_class(format)
|
201
|
+
# Returns the parser class for the registered format.
|
202
|
+
def self.parser_class(format)
|
203
|
+
@formats[format.nil? ? nil : format.to_sym] ||
|
204
|
+
raise(ArgumentError, "Unknown Tabular Format: #{format.inspect}")
|
204
205
|
end
|
205
206
|
|
206
|
-
register_format(nil, IOStreams::Tabular::Parser::Csv)
|
207
207
|
register_format(:array, IOStreams::Tabular::Parser::Array)
|
208
208
|
register_format(:csv, IOStreams::Tabular::Parser::Csv)
|
209
209
|
register_format(:fixed, IOStreams::Tabular::Parser::Fixed)
|
@@ -5,8 +5,10 @@ module IOStreams
|
|
5
5
|
class Csv < Base
|
6
6
|
attr_reader :csv_parser
|
7
7
|
|
8
|
-
|
9
|
-
|
8
|
+
unless RUBY_VERSION.to_f >= 2.6
|
9
|
+
def initialize
|
10
|
+
@csv_parser = Utility::CSVRow.new
|
11
|
+
end
|
10
12
|
end
|
11
13
|
|
12
14
|
# Returns [Array<String>] the header row.
|
@@ -146,7 +146,7 @@ module IOStreams
|
|
146
146
|
|
147
147
|
attr_reader :key, :size, :type, :decimals
|
148
148
|
|
149
|
-
def initialize(key: nil,
|
149
|
+
def initialize(size:, key: nil, type: :string, decimals: 2)
|
150
150
|
@key = key
|
151
151
|
@size = size == :remainder ? -1 : size.to_i
|
152
152
|
@type = type.to_sym
|
@@ -6,10 +6,7 @@ module IOStreams
|
|
6
6
|
# 2 to 3 times better performance than CSV.parse_line and considerably less
|
7
7
|
# garbage collection required.
|
8
8
|
#
|
9
|
-
# Note:
|
10
|
-
# This parser does not support line feeds embedded in quoted fields since
|
11
|
-
# the file is broken apart based on line feeds during the upload process and
|
12
|
-
# is then processed by each worker on a line by line basis.
|
9
|
+
# Note: Only used prior to Ruby 2.6
|
13
10
|
class CSVRow < ::CSV
|
14
11
|
UTF8_ENCODING = Encoding.find("UTF-8").freeze
|
15
12
|
|
data/lib/io_streams/utils.rb
CHANGED
@@ -28,11 +28,9 @@ module IOStreams
|
|
28
28
|
def self.temp_file_name(basename, extension = "")
|
29
29
|
result = nil
|
30
30
|
::Dir::Tmpname.create([basename, extension], IOStreams.temp_dir, max_try: MAX_TEMP_FILE_NAME_ATTEMPTS) do |tmpname|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
::File.unlink(tmpname) if ::File.exist?(tmpname)
|
35
|
-
end
|
31
|
+
result = yield(tmpname)
|
32
|
+
ensure
|
33
|
+
::File.unlink(tmpname) if ::File.exist?(tmpname)
|
36
34
|
end
|
37
35
|
result
|
38
36
|
end
|
data/lib/io_streams/version.rb
CHANGED
data/lib/iostreams.rb
CHANGED
@@ -23,33 +23,41 @@ module IOStreams
|
|
23
23
|
autoload :Reader, "io_streams/bzip2/reader"
|
24
24
|
autoload :Writer, "io_streams/bzip2/writer"
|
25
25
|
end
|
26
|
+
|
26
27
|
module Encode
|
27
28
|
autoload :Reader, "io_streams/encode/reader"
|
28
29
|
autoload :Writer, "io_streams/encode/writer"
|
29
30
|
end
|
31
|
+
|
30
32
|
module Gzip
|
31
33
|
autoload :Reader, "io_streams/gzip/reader"
|
32
34
|
autoload :Writer, "io_streams/gzip/writer"
|
33
35
|
end
|
36
|
+
|
34
37
|
module Line
|
35
38
|
autoload :Reader, "io_streams/line/reader"
|
36
39
|
autoload :Writer, "io_streams/line/writer"
|
37
40
|
end
|
41
|
+
|
38
42
|
module Record
|
39
43
|
autoload :Reader, "io_streams/record/reader"
|
40
44
|
autoload :Writer, "io_streams/record/writer"
|
41
45
|
end
|
46
|
+
|
42
47
|
module Row
|
43
48
|
autoload :Reader, "io_streams/row/reader"
|
44
49
|
autoload :Writer, "io_streams/row/writer"
|
45
50
|
end
|
51
|
+
|
46
52
|
module SymmetricEncryption
|
47
53
|
autoload :Reader, "io_streams/symmetric_encryption/reader"
|
48
54
|
autoload :Writer, "io_streams/symmetric_encryption/writer"
|
49
55
|
end
|
56
|
+
|
50
57
|
module Xlsx
|
51
58
|
autoload :Reader, "io_streams/xlsx/reader"
|
52
59
|
end
|
60
|
+
|
53
61
|
module Zip
|
54
62
|
autoload :Reader, "io_streams/zip/reader"
|
55
63
|
autoload :Writer, "io_streams/zip/writer"
|
data/test/builder_test.rb
CHANGED
@@ -41,6 +41,35 @@ class BuilderTest < Minitest::Test
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
+
describe "#format" do
|
45
|
+
it "detects the format from the file name" do
|
46
|
+
streams = IOStreams::Builder.new("abc.json")
|
47
|
+
assert_equal :json, streams.format
|
48
|
+
end
|
49
|
+
|
50
|
+
it "is nil if the file name has no meaningful format" do
|
51
|
+
assert_nil streams.format
|
52
|
+
end
|
53
|
+
|
54
|
+
it "returns set format with no file_name" do
|
55
|
+
streams = IOStreams::Builder.new
|
56
|
+
streams.format = :csv
|
57
|
+
assert_equal :csv, streams.format
|
58
|
+
end
|
59
|
+
|
60
|
+
it "returns set format with file_name" do
|
61
|
+
streams = IOStreams::Builder.new("abc.json")
|
62
|
+
streams.format = :csv
|
63
|
+
assert_equal :csv, streams.format
|
64
|
+
end
|
65
|
+
|
66
|
+
it "validates bad format" do
|
67
|
+
assert_raises ArgumentError do
|
68
|
+
streams.format = :blah
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
44
73
|
describe "#stream" do
|
45
74
|
it "adds one stream" do
|
46
75
|
streams.stream(:pgp, passphrase: "unlock-me")
|