iostreams 1.10.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -2
- data/Rakefile +7 -0
- data/lib/io_streams/builder.rb +10 -10
- data/lib/io_streams/bzip2/writer.rb +1 -1
- data/lib/io_streams/encode/reader.rb +2 -2
- data/lib/io_streams/encode/writer.rb +5 -5
- data/lib/io_streams/gzip/reader.rb +1 -1
- data/lib/io_streams/gzip/writer.rb +1 -1
- data/lib/io_streams/io_streams.rb +47 -21
- data/lib/io_streams/line/reader.rb +2 -2
- data/lib/io_streams/line/writer.rb +1 -1
- data/lib/io_streams/path.rb +2 -2
- data/lib/io_streams/paths/file.rb +25 -11
- data/lib/io_streams/paths/http.rb +80 -7
- data/lib/io_streams/paths/matcher.rb +3 -3
- data/lib/io_streams/paths/s3.rb +22 -3
- data/lib/io_streams/paths/sftp.rb +9 -10
- data/lib/io_streams/pgp/reader.rb +25 -7
- data/lib/io_streams/pgp/writer.rb +95 -29
- data/lib/io_streams/pgp.rb +289 -87
- data/lib/io_streams/reader.rb +4 -4
- data/lib/io_streams/record/reader.rb +3 -4
- data/lib/io_streams/record/writer.rb +3 -4
- data/lib/io_streams/row/reader.rb +1 -1
- data/lib/io_streams/row/writer.rb +1 -1
- data/lib/io_streams/stream.rb +36 -30
- data/lib/io_streams/symmetric_encryption/reader.rb +2 -2
- data/lib/io_streams/symmetric_encryption/writer.rb +4 -4
- data/lib/io_streams/tabular/header.rb +18 -6
- data/lib/io_streams/tabular/parser/array.rb +0 -10
- data/lib/io_streams/tabular/parser/csv.rb +6 -38
- data/lib/io_streams/tabular/parser/fixed.rb +5 -5
- data/lib/io_streams/tabular/parser/psv.rb +0 -12
- data/lib/io_streams/tabular.rb +5 -10
- data/lib/io_streams/utils.rb +6 -8
- data/lib/io_streams/version.rb +1 -1
- data/lib/io_streams/writer.rb +6 -6
- data/lib/io_streams/xlsx/reader.rb +1 -1
- data/lib/io_streams/zip/writer.rb +22 -10
- data/lib/iostreams.rb +0 -1
- metadata +28 -113
- data/lib/io_streams/deprecated.rb +0 -216
- data/lib/io_streams/tabular/utility/csv_row.rb +0 -105
- data/test/builder_test.rb +0 -311
- data/test/bzip2_reader_test.rb +0 -27
- data/test/bzip2_writer_test.rb +0 -56
- data/test/deprecated_test.rb +0 -121
- data/test/encode_reader_test.rb +0 -51
- data/test/encode_writer_test.rb +0 -90
- data/test/files/embedded_lines_test.csv +0 -7
- data/test/files/multiple_files.zip +0 -0
- data/test/files/spreadsheet.xlsx +0 -0
- data/test/files/test.csv +0 -4
- data/test/files/test.json +0 -3
- data/test/files/test.psv +0 -4
- data/test/files/text file.txt +0 -3
- data/test/files/text.txt +0 -3
- data/test/files/text.txt.bz2 +0 -0
- data/test/files/text.txt.gz +0 -0
- data/test/files/text.txt.gz.zip +0 -0
- data/test/files/text.zip +0 -0
- data/test/files/text.zip.gz +0 -0
- data/test/files/unclosed_quote_large_test.csv +0 -1658
- data/test/files/unclosed_quote_test.csv +0 -4
- data/test/files/unclosed_quote_test2.csv +0 -3
- data/test/files/utf16_test.csv +0 -0
- data/test/gzip_reader_test.rb +0 -27
- data/test/gzip_writer_test.rb +0 -52
- data/test/io_streams_test.rb +0 -132
- data/test/line_reader_test.rb +0 -325
- data/test/line_writer_test.rb +0 -59
- data/test/minimal_file_reader.rb +0 -25
- data/test/path_test.rb +0 -55
- data/test/paths/file_test.rb +0 -202
- data/test/paths/http_test.rb +0 -34
- data/test/paths/matcher_test.rb +0 -120
- data/test/paths/s3_test.rb +0 -220
- data/test/paths/sftp_test.rb +0 -106
- data/test/pgp_reader_test.rb +0 -46
- data/test/pgp_test.rb +0 -254
- data/test/pgp_writer_test.rb +0 -130
- data/test/record_reader_test.rb +0 -60
- data/test/record_writer_test.rb +0 -82
- data/test/row_reader_test.rb +0 -35
- data/test/row_writer_test.rb +0 -56
- data/test/stream_test.rb +0 -574
- data/test/tabular_test.rb +0 -338
- data/test/test_helper.rb +0 -40
- data/test/utils_test.rb +0 -20
- data/test/xlsx_reader_test.rb +0 -37
- data/test/zip_reader_test.rb +0 -53
- data/test/zip_writer_test.rb +0 -48
data/lib/io_streams/stream.rb
CHANGED
|
@@ -18,8 +18,8 @@ module IOStreams
|
|
|
18
18
|
# Example:
|
|
19
19
|
#
|
|
20
20
|
# IOStreams.path("tempfile2527").stream(:zip).stream(:pgp, passphrase: "receiver_passphrase").read
|
|
21
|
-
def stream(stream, **
|
|
22
|
-
builder.stream(stream, **
|
|
21
|
+
def stream(stream, **)
|
|
22
|
+
builder.stream(stream, **)
|
|
23
23
|
self
|
|
24
24
|
end
|
|
25
25
|
|
|
@@ -33,15 +33,15 @@ module IOStreams
|
|
|
33
33
|
# IOStreams.path("keep_safe.enc").option(:pgp, passphrase: "receiver_passphrase").read
|
|
34
34
|
#
|
|
35
35
|
# IOStreams.path(output_file_name).option(:pgp, passphrase: "receiver_passphrase").read
|
|
36
|
-
def option(stream, **
|
|
37
|
-
builder.option(stream, **
|
|
36
|
+
def option(stream, **)
|
|
37
|
+
builder.option(stream, **)
|
|
38
38
|
self
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
# Adds the options for the specified stream as an option,
|
|
42
42
|
# but if streams have already been added it is instead added as a stream.
|
|
43
|
-
def option_or_stream(stream, **
|
|
44
|
-
builder.option_or_stream(stream, **
|
|
43
|
+
def option_or_stream(stream, **)
|
|
44
|
+
builder.option_or_stream(stream, **)
|
|
45
45
|
self
|
|
46
46
|
end
|
|
47
47
|
|
|
@@ -93,21 +93,26 @@ module IOStreams
|
|
|
93
93
|
def each(mode = :line, **args, &block)
|
|
94
94
|
raise(ArgumentError, "Invalid mode: #{mode.inspect}") if mode == :stream
|
|
95
95
|
|
|
96
|
-
#
|
|
96
|
+
# Deliberately not returning an Enumerator when no block is given.
|
|
97
|
+
# The stream pipeline manages resources via block scope: every stream is opened with an
|
|
98
|
+
# `ensure` that closes the file handle, reaps the gpg subprocess, deletes temp files, etc.
|
|
99
|
+
# A Fiber-backed Enumerator (e.g. `to_enum(__method__, mode, **args)`) would leave that block
|
|
100
|
+
# suspended; if the caller abandons a partially-consumed enumerator, none of the cleanup runs
|
|
101
|
+
# until GC collects the Fiber, leaking file descriptors, gpg processes, and temp files.
|
|
97
102
|
reader(mode, **args) { |stream| stream.each(&block) }
|
|
98
103
|
end
|
|
99
104
|
|
|
100
105
|
# Returns a Reader for reading a file / stream
|
|
101
|
-
def reader(mode = :stream, **args, &
|
|
106
|
+
def reader(mode = :stream, **args, &)
|
|
102
107
|
case mode
|
|
103
108
|
when :stream
|
|
104
|
-
stream_reader(&
|
|
109
|
+
stream_reader(&)
|
|
105
110
|
when :line
|
|
106
|
-
line_reader(**args, &
|
|
111
|
+
line_reader(**args, &)
|
|
107
112
|
when :array
|
|
108
|
-
row_reader(**args, &
|
|
113
|
+
row_reader(**args, &)
|
|
109
114
|
when :hash
|
|
110
|
-
record_reader(**args, &
|
|
115
|
+
record_reader(**args, &)
|
|
111
116
|
else
|
|
112
117
|
raise(ArgumentError, "Invalid mode: #{mode.inspect}")
|
|
113
118
|
end
|
|
@@ -124,16 +129,16 @@ module IOStreams
|
|
|
124
129
|
end
|
|
125
130
|
|
|
126
131
|
# Returns a Writer for writing to a file / stream
|
|
127
|
-
def writer(mode = :stream, **args, &
|
|
132
|
+
def writer(mode = :stream, **args, &)
|
|
128
133
|
case mode
|
|
129
134
|
when :stream
|
|
130
|
-
stream_writer(&
|
|
135
|
+
stream_writer(&)
|
|
131
136
|
when :line
|
|
132
|
-
line_writer(**args, &
|
|
137
|
+
line_writer(**args, &)
|
|
133
138
|
when :array
|
|
134
|
-
row_writer(**args, &
|
|
139
|
+
row_writer(**args, &)
|
|
135
140
|
when :hash
|
|
136
|
-
record_writer(**args, &
|
|
141
|
+
record_writer(**args, &)
|
|
137
142
|
else
|
|
138
143
|
raise(ArgumentError, "Invalid mode: #{mode.inspect}")
|
|
139
144
|
end
|
|
@@ -171,7 +176,7 @@ module IOStreams
|
|
|
171
176
|
# IOStreams.path("target_file.json").copy_from("source_file_name.csv.gz", convert: false)
|
|
172
177
|
#
|
|
173
178
|
# # Advanced copy with custom stream conversions on source and target.
|
|
174
|
-
# source = IOStreams.path("source_file").stream(encoding: "BINARY")
|
|
179
|
+
# source = IOStreams.path("source_file").stream(:encode, encoding: "BINARY")
|
|
175
180
|
# IOStreams.path("target_file.pgp").option(:pgp, passphrase: "hello").copy_from(source)
|
|
176
181
|
def copy_from(source, convert: true, mode: nil, **args)
|
|
177
182
|
if convert
|
|
@@ -322,18 +327,19 @@ module IOStreams
|
|
|
322
327
|
@builder ||= IOStreams::Builder.new
|
|
323
328
|
end
|
|
324
329
|
|
|
325
|
-
def stream_reader(&
|
|
326
|
-
builder.reader(io_stream, &
|
|
330
|
+
def stream_reader(&)
|
|
331
|
+
builder.reader(io_stream, &)
|
|
327
332
|
end
|
|
328
333
|
|
|
329
334
|
def line_reader(embedded_within: nil, **args)
|
|
330
335
|
embedded_within = '"' if embedded_within.nil? && builder.file_name&.include?(".csv")
|
|
331
336
|
|
|
332
337
|
stream_reader do |io|
|
|
333
|
-
yield IOStreams::Line::Reader.new(
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
338
|
+
yield IOStreams::Line::Reader.new(
|
|
339
|
+
io,
|
|
340
|
+
embedded_within: embedded_within,
|
|
341
|
+
**args
|
|
342
|
+
)
|
|
337
343
|
end
|
|
338
344
|
end
|
|
339
345
|
|
|
@@ -363,20 +369,20 @@ module IOStreams
|
|
|
363
369
|
end
|
|
364
370
|
end
|
|
365
371
|
|
|
366
|
-
def stream_writer(&
|
|
367
|
-
builder.writer(io_stream, &
|
|
372
|
+
def stream_writer(&)
|
|
373
|
+
builder.writer(io_stream, &)
|
|
368
374
|
end
|
|
369
375
|
|
|
370
376
|
def line_writer(**args, &block)
|
|
371
|
-
return block.call(io_stream) if io_stream
|
|
377
|
+
return block.call(io_stream) if io_stream.is_a?(IOStreams::Line::Writer)
|
|
372
378
|
|
|
373
379
|
writer do |io|
|
|
374
|
-
IOStreams::Line::Writer.stream(io,
|
|
380
|
+
IOStreams::Line::Writer.stream(io, **args, &block)
|
|
375
381
|
end
|
|
376
382
|
end
|
|
377
383
|
|
|
378
384
|
def row_writer(delimiter: $/, **args, &block)
|
|
379
|
-
return block.call(io_stream) if io_stream
|
|
385
|
+
return block.call(io_stream) if io_stream.is_a?(IOStreams::Row::Writer)
|
|
380
386
|
|
|
381
387
|
line_writer(delimiter: delimiter) do |io|
|
|
382
388
|
IOStreams::Row::Writer.stream(
|
|
@@ -391,7 +397,7 @@ module IOStreams
|
|
|
391
397
|
end
|
|
392
398
|
|
|
393
399
|
def record_writer(delimiter: $/, **args, &block)
|
|
394
|
-
return block.call(io_stream) if io_stream
|
|
400
|
+
return block.call(io_stream) if io_stream.is_a?(IOStreams::Record::Writer)
|
|
395
401
|
|
|
396
402
|
line_writer(delimiter: delimiter) do |io|
|
|
397
403
|
IOStreams::Record::Writer.stream(
|
|
@@ -2,10 +2,10 @@ module IOStreams
|
|
|
2
2
|
module SymmetricEncryption
|
|
3
3
|
class Reader < IOStreams::Reader
|
|
4
4
|
# read from a file/stream using Symmetric Encryption
|
|
5
|
-
def self.stream(input_stream, **args, &
|
|
5
|
+
def self.stream(input_stream, **args, &)
|
|
6
6
|
Utils.load_soft_dependency("symmetric-encryption", ".enc streaming") unless defined?(SymmetricEncryption)
|
|
7
7
|
|
|
8
|
-
::SymmetricEncryption::Reader.open(input_stream, **args, &
|
|
8
|
+
::SymmetricEncryption::Reader.open(input_stream, **args, &)
|
|
9
9
|
end
|
|
10
10
|
end
|
|
11
11
|
end
|
|
@@ -4,18 +4,18 @@ module IOStreams
|
|
|
4
4
|
# Write to stream using Symmetric Encryption
|
|
5
5
|
# By default the output stream is compressed.
|
|
6
6
|
# If the input_stream is already compressed consider setting compress: false.
|
|
7
|
-
def self.stream(input_stream, compress: true, **args, &
|
|
7
|
+
def self.stream(input_stream, compress: true, **args, &)
|
|
8
8
|
Utils.load_soft_dependency("symmetric-encryption", ".enc streaming") unless defined?(SymmetricEncryption)
|
|
9
9
|
|
|
10
|
-
::SymmetricEncryption::Writer.open(input_stream, compress: compress, **args, &
|
|
10
|
+
::SymmetricEncryption::Writer.open(input_stream, compress: compress, **args, &)
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
# Write to stream using Symmetric Encryption
|
|
14
14
|
# By default the output stream is compressed unless the file_name extension indicates the file is already compressed.
|
|
15
|
-
def self.file(file_name, compress: nil, **args, &
|
|
15
|
+
def self.file(file_name, compress: nil, **args, &)
|
|
16
16
|
Utils.load_soft_dependency("symmetric-encryption", ".enc streaming") unless defined?(SymmetricEncryption)
|
|
17
17
|
|
|
18
|
-
::SymmetricEncryption::Writer.open(file_name, compress: compress, **args, &
|
|
18
|
+
::SymmetricEncryption::Writer.open(file_name, compress: compress, **args, &)
|
|
19
19
|
end
|
|
20
20
|
end
|
|
21
21
|
end
|
|
@@ -5,16 +5,16 @@ module IOStreams
|
|
|
5
5
|
# Column names that begin with this prefix have been rejected and should be ignored.
|
|
6
6
|
IGNORE_PREFIX = "__rejected__".freeze
|
|
7
7
|
|
|
8
|
-
attr_accessor :
|
|
8
|
+
attr_accessor :allowed_columns, :required_columns, :skip_unknown
|
|
9
|
+
attr_reader :columns
|
|
9
10
|
|
|
10
11
|
# Header
|
|
11
12
|
#
|
|
12
13
|
# Parameters
|
|
13
|
-
# columns [Array<String>]
|
|
14
|
+
# columns [Array<String|Symbol>]
|
|
14
15
|
# Columns in this header.
|
|
15
16
|
# Note:
|
|
16
|
-
#
|
|
17
|
-
# with MongoDB when it converts symbol keys to strings.
|
|
17
|
+
# Column names are converted to strings.
|
|
18
18
|
#
|
|
19
19
|
# allowed_columns [Array<String>]
|
|
20
20
|
# List of columns to allow.
|
|
@@ -33,12 +33,17 @@ module IOStreams
|
|
|
33
33
|
# false:
|
|
34
34
|
# Raises Tabular::InvalidHeader when a column is supplied that is not in the whitelist.
|
|
35
35
|
def initialize(columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
|
|
36
|
-
@columns = columns
|
|
36
|
+
@columns = stringify(columns)
|
|
37
37
|
@required_columns = required_columns
|
|
38
38
|
@allowed_columns = allowed_columns
|
|
39
39
|
@skip_unknown = skip_unknown
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
+
# Set the columns in this header, converting the column names to strings.
|
|
43
|
+
def columns=(columns)
|
|
44
|
+
@columns = stringify(columns)
|
|
45
|
+
end
|
|
46
|
+
|
|
42
47
|
# Returns [Array<String>] list columns that were ignored during cleansing.
|
|
43
48
|
#
|
|
44
49
|
# Each column is cleansed as follows:
|
|
@@ -127,13 +132,16 @@ module IOStreams
|
|
|
127
132
|
|
|
128
133
|
def array_to_hash(row)
|
|
129
134
|
h = {}
|
|
130
|
-
columns.each_with_index
|
|
135
|
+
columns.each_with_index do |col, i|
|
|
136
|
+
h[col] = row[i] unless IOStreams::Utils.blank?(col) || col.start_with?(IGNORE_PREFIX)
|
|
137
|
+
end
|
|
131
138
|
h
|
|
132
139
|
end
|
|
133
140
|
|
|
134
141
|
# Perform cleansing on returned Hash keys during the narrowing process.
|
|
135
142
|
# For example, avoids issues with case etc.
|
|
136
143
|
def cleanse_hash(hash)
|
|
144
|
+
hash = hash.transform_keys(&:to_s) unless hash.keys.all?(String)
|
|
137
145
|
unmatched = columns - hash.keys
|
|
138
146
|
unless unmatched.empty?
|
|
139
147
|
hash = hash.dup
|
|
@@ -149,6 +157,10 @@ module IOStreams
|
|
|
149
157
|
cleansed.gsub!(/\W+/, "")
|
|
150
158
|
cleansed
|
|
151
159
|
end
|
|
160
|
+
|
|
161
|
+
def stringify(columns)
|
|
162
|
+
columns&.collect { |column| column&.to_s }
|
|
163
|
+
end
|
|
152
164
|
end
|
|
153
165
|
end
|
|
154
166
|
end
|
|
@@ -3,16 +3,6 @@ module IOStreams
|
|
|
3
3
|
class Tabular
|
|
4
4
|
module Parser
|
|
5
5
|
class Array < Base
|
|
6
|
-
# Returns [Array<String>] the header row.
|
|
7
|
-
# Returns nil if the row is blank.
|
|
8
|
-
def parse_header(row)
|
|
9
|
-
unless row.is_a?(::Array)
|
|
10
|
-
raise(IOStreams::Errors::InvalidHeader, "Format is :array. Invalid input header: #{row.class.name}")
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
row
|
|
14
|
-
end
|
|
15
|
-
|
|
16
6
|
# Returns Array
|
|
17
7
|
def parse(row)
|
|
18
8
|
raise(IOStreams::Errors::TypeMismatch, "Format is :array. Invalid input: #{row.class.name}") unless row.is_a?(::Array)
|
|
@@ -3,26 +3,6 @@ module IOStreams
|
|
|
3
3
|
class Tabular
|
|
4
4
|
module Parser
|
|
5
5
|
class Csv < Base
|
|
6
|
-
attr_reader :csv_parser
|
|
7
|
-
|
|
8
|
-
unless RUBY_VERSION.to_f >= 2.6
|
|
9
|
-
def initialize
|
|
10
|
-
@csv_parser = Utility::CSVRow.new
|
|
11
|
-
end
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
# Returns [Array<String>] the header row.
|
|
15
|
-
# Returns nil if the row is blank.
|
|
16
|
-
def parse_header(row)
|
|
17
|
-
return row if row.is_a?(::Array)
|
|
18
|
-
|
|
19
|
-
unless row.is_a?(String)
|
|
20
|
-
raise(IOStreams::Errors::InvalidHeader, "Format is :csv. Invalid input header: #{row.class.name}")
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
parse_line(row)
|
|
24
|
-
end
|
|
25
|
-
|
|
26
6
|
# Returns [Array] the parsed CSV line
|
|
27
7
|
def parse(row)
|
|
28
8
|
return row if row.is_a?(::Array)
|
|
@@ -40,26 +20,14 @@ module IOStreams
|
|
|
40
20
|
|
|
41
21
|
private
|
|
42
22
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
# but at least it works on Ruby 2.6 and above.
|
|
46
|
-
def parse_line(line)
|
|
47
|
-
return if IOStreams::Utils.blank?(line)
|
|
23
|
+
def parse_line(line)
|
|
24
|
+
return if IOStreams::Utils.blank?(line)
|
|
48
25
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def render_array(array)
|
|
53
|
-
CSV.generate_line(array, encoding: "UTF-8", row_sep: "")
|
|
54
|
-
end
|
|
55
|
-
else
|
|
56
|
-
def parse_line(line)
|
|
57
|
-
csv_parser.parse(line)
|
|
58
|
-
end
|
|
26
|
+
CSV.parse_line(line)
|
|
27
|
+
end
|
|
59
28
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
end
|
|
29
|
+
def render_array(array)
|
|
30
|
+
CSV.generate_line(array, encoding: "UTF-8", row_sep: "")
|
|
63
31
|
end
|
|
64
32
|
end
|
|
65
33
|
end
|
|
@@ -71,7 +71,7 @@ module IOStreams
|
|
|
71
71
|
def render(row, header)
|
|
72
72
|
hash = header.to_hash(row)
|
|
73
73
|
|
|
74
|
-
result = ""
|
|
74
|
+
result = +""
|
|
75
75
|
layout.columns.each do |column|
|
|
76
76
|
result << column.render(hash[column.key], truncate)
|
|
77
77
|
end
|
|
@@ -93,7 +93,7 @@ module IOStreams
|
|
|
93
93
|
index = 0
|
|
94
94
|
layout.columns.each do |column|
|
|
95
95
|
if column.size == -1
|
|
96
|
-
hash[column.key] = column.parse(line[index
|
|
96
|
+
hash[column.key] = column.parse(line[index..]) if column.key
|
|
97
97
|
break
|
|
98
98
|
end
|
|
99
99
|
|
|
@@ -148,7 +148,7 @@ module IOStreams
|
|
|
148
148
|
|
|
149
149
|
def initialize(size:, key: nil, type: :string, decimals: 2)
|
|
150
150
|
@key = key
|
|
151
|
-
@size =
|
|
151
|
+
@size = [:remainder, "remainder"].include?(size) ? -1 : size.to_i
|
|
152
152
|
@type = type.to_sym
|
|
153
153
|
@decimals = decimals
|
|
154
154
|
|
|
@@ -167,9 +167,9 @@ module IOStreams
|
|
|
167
167
|
when :string
|
|
168
168
|
stripped_value
|
|
169
169
|
when :integer
|
|
170
|
-
stripped_value.
|
|
170
|
+
stripped_value.empty? ? nil : value.to_i
|
|
171
171
|
when :float
|
|
172
|
-
stripped_value.
|
|
172
|
+
stripped_value.empty? ? nil : value.to_f
|
|
173
173
|
else
|
|
174
174
|
raise(Errors::InvalidLayout, "Unsupported type: #{type.inspect}")
|
|
175
175
|
end
|
|
@@ -3,18 +3,6 @@ module IOStreams
|
|
|
3
3
|
module Parser
|
|
4
4
|
# For parsing a single line of Pipe-separated values
|
|
5
5
|
class Psv < Base
|
|
6
|
-
# Returns [Array<String>] the header row.
|
|
7
|
-
# Returns nil if the row is blank.
|
|
8
|
-
def parse_header(row)
|
|
9
|
-
return row if row.is_a?(::Array)
|
|
10
|
-
|
|
11
|
-
unless row.is_a?(String)
|
|
12
|
-
raise(IOStreams::Errors::InvalidHeader, "Format is :psv. Invalid input header: #{row.class.name}")
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
row.split("|")
|
|
16
|
-
end
|
|
17
|
-
|
|
18
6
|
# Returns [Array] the parsed PSV line
|
|
19
7
|
def parse(row)
|
|
20
8
|
return row if row.is_a?(::Array)
|
data/lib/io_streams/tabular.rb
CHANGED
|
@@ -40,10 +40,6 @@ module IOStreams
|
|
|
40
40
|
autoload :Psv, "io_streams/tabular/parser/psv"
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
-
module Utility
|
|
44
|
-
autoload :CSVRow, "io_streams/tabular/utility/csv_row"
|
|
45
|
-
end
|
|
46
|
-
|
|
47
43
|
attr_reader :format, :header, :parser
|
|
48
44
|
|
|
49
45
|
# Parse a delimited data source.
|
|
@@ -59,11 +55,10 @@ module IOStreams
|
|
|
59
55
|
# format_options: [Hash]
|
|
60
56
|
# Any specialized format specific options. For example, `:fixed` format requires the file definition.
|
|
61
57
|
#
|
|
62
|
-
# columns [Array<String>]
|
|
58
|
+
# columns [Array<String|Symbol>]
|
|
63
59
|
# The header columns when the file does not include a header row.
|
|
64
60
|
# Note:
|
|
65
|
-
#
|
|
66
|
-
# with MongoDB when it converts symbol keys to strings.
|
|
61
|
+
# Column names are converted to strings.
|
|
67
62
|
#
|
|
68
63
|
# allowed_columns [Array<String>]
|
|
69
64
|
# List of columns to allow.
|
|
@@ -91,7 +86,7 @@ module IOStreams
|
|
|
91
86
|
@header = Header.new(**args)
|
|
92
87
|
@format = file_name && format.nil? ? self.class.format_from_file_name(file_name) : format
|
|
93
88
|
@format ||= default_format
|
|
94
|
-
raise(UnknownFormat, "The format cannot be inferred from the file name: #{file_name}") unless @format
|
|
89
|
+
raise(Errors::UnknownFormat, "The format cannot be inferred from the file name: #{file_name}") unless @format
|
|
95
90
|
|
|
96
91
|
klass = self.class.parser_class(@format)
|
|
97
92
|
@parser = format_options ? klass.new(**format_options) : klass.new
|
|
@@ -177,7 +172,7 @@ module IOStreams
|
|
|
177
172
|
# Returns [Symbol] the format removed, or nil if the format was not registered
|
|
178
173
|
#
|
|
179
174
|
# Example:
|
|
180
|
-
#
|
|
175
|
+
# deregister_format(:psv)
|
|
181
176
|
def self.deregister_format(format)
|
|
182
177
|
raise(ArgumentError, "Invalid format #{format.inspect}") unless format.to_s =~ /\A\w+\Z/
|
|
183
178
|
|
|
@@ -200,7 +195,7 @@ module IOStreams
|
|
|
200
195
|
|
|
201
196
|
# Returns the parser class for the registered format.
|
|
202
197
|
def self.parser_class(format)
|
|
203
|
-
@formats[format
|
|
198
|
+
@formats[format&.to_sym] ||
|
|
204
199
|
raise(ArgumentError, "Unknown Tabular Format: #{format.inspect}")
|
|
205
200
|
end
|
|
206
201
|
|
data/lib/io_streams/utils.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
require "cgi"
|
|
1
2
|
require "uri"
|
|
2
3
|
require "tmpdir"
|
|
3
4
|
module IOStreams
|
|
@@ -13,13 +14,10 @@ module IOStreams
|
|
|
13
14
|
|
|
14
15
|
# Helper method: Returns [true|false] if a value is blank?
|
|
15
16
|
def self.blank?(value)
|
|
16
|
-
if value.nil?
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
else
|
|
21
|
-
value.respond_to?(:empty?) ? value.empty? : !value
|
|
22
|
-
end
|
|
17
|
+
return true if value.nil?
|
|
18
|
+
return value !~ /\S/ if value.is_a?(String)
|
|
19
|
+
|
|
20
|
+
value.respond_to?(:empty?) ? value.empty? : !value
|
|
23
21
|
end
|
|
24
22
|
|
|
25
23
|
# Yields the path to a temporary file_name.
|
|
@@ -30,7 +28,7 @@ module IOStreams
|
|
|
30
28
|
::Dir::Tmpname.create([basename, extension], IOStreams.temp_dir, max_try: MAX_TEMP_FILE_NAME_ATTEMPTS) do |tmpname|
|
|
31
29
|
result = yield(tmpname)
|
|
32
30
|
ensure
|
|
33
|
-
::
|
|
31
|
+
::FileUtils.rm_f(tmpname)
|
|
34
32
|
end
|
|
35
33
|
result
|
|
36
34
|
end
|
data/lib/io_streams/version.rb
CHANGED
data/lib/io_streams/writer.rb
CHANGED
|
@@ -2,22 +2,22 @@ module IOStreams
|
|
|
2
2
|
class Writer
|
|
3
3
|
# When a Writer does not support streams, we copy the stream to a local temp file
|
|
4
4
|
# and then pass that filename in for this reader.
|
|
5
|
-
def self.stream(output_stream,
|
|
5
|
+
def self.stream(output_stream, **args, &block)
|
|
6
6
|
Utils.temp_file_name("iostreams_writer") do |file_name|
|
|
7
|
-
count = file(file_name,
|
|
7
|
+
count = file(file_name, **args, &block)
|
|
8
8
|
::File.open(file_name, "rb") { |source| ::IO.copy_stream(source, output_stream) }
|
|
9
9
|
count
|
|
10
10
|
end
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
# When a Writer supports streams, also allow it to simply support a file
|
|
14
|
-
def self.file(file_name,
|
|
15
|
-
::File.open(file_name, "wb") { |file| stream(file,
|
|
14
|
+
def self.file(file_name, **args, &block)
|
|
15
|
+
::File.open(file_name, "wb") { |file| stream(file, **args, &block) }
|
|
16
16
|
end
|
|
17
17
|
|
|
18
18
|
# For processing by either a file name or an open IO stream.
|
|
19
|
-
def self.open(file_name_or_io, **args, &
|
|
20
|
-
file_name_or_io.is_a?(String) ? file(file_name_or_io, **args, &
|
|
19
|
+
def self.open(file_name_or_io, **args, &)
|
|
20
|
+
file_name_or_io.is_a?(String) ? file(file_name_or_io, **args, &) : stream(file_name_or_io, **args, &)
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
attr_reader :output_stream
|
|
@@ -4,7 +4,7 @@ module IOStreams
|
|
|
4
4
|
module Xlsx
|
|
5
5
|
class Reader < IOStreams::Reader
|
|
6
6
|
# Convert a xlsx, or xlsm file into CSV format.
|
|
7
|
-
def self.file(file_name,
|
|
7
|
+
def self.file(file_name, &block)
|
|
8
8
|
# Stream into a temp file as csv
|
|
9
9
|
Utils.temp_file_name("iostreams_csv") do |temp_file_name|
|
|
10
10
|
::File.open(temp_file_name, "wb") { |io| new(file_name).each { |lines| io << lines.to_csv } }
|
|
@@ -1,30 +1,42 @@
|
|
|
1
1
|
module IOStreams
|
|
2
2
|
module Zip
|
|
3
3
|
class Writer < IOStreams::Writer
|
|
4
|
+
# When writing to a file, default the entry name within the zip to the file name
|
|
5
|
+
# without the `.zip` extension, unless an entry name was explicitly supplied.
|
|
6
|
+
def self.file(file_name, zip_file_name: nil, entry_file_name: zip_file_name, &)
|
|
7
|
+
entry_file_name = file_name.to_s[0..-5] if entry_file_name.nil? && file_name.to_s =~ /\.zip\z/i
|
|
8
|
+
|
|
9
|
+
super(file_name, entry_file_name: entry_file_name, &)
|
|
10
|
+
end
|
|
11
|
+
|
|
4
12
|
# Write a single file in Zip format to the supplied output stream
|
|
5
13
|
#
|
|
6
14
|
# Parameters
|
|
7
15
|
# output_stream [IO]
|
|
8
16
|
# Output stream to write to
|
|
9
17
|
#
|
|
10
|
-
# original_file_name [String]
|
|
11
|
-
# Since this is a stream the original file name is used to create the entry_file_name if not supplied
|
|
12
|
-
#
|
|
13
18
|
# entry_file_name: [String]
|
|
14
19
|
# Name of the file entry within the Zip file.
|
|
20
|
+
# Default: "file"
|
|
15
21
|
#
|
|
16
22
|
# The stream supplied to the block only responds to #write
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
23
|
+
#
|
|
24
|
+
# Note:
|
|
25
|
+
# This writer uses `zip_kit` rather than `rubyzip` on purpose. `rubyzip`'s
|
|
26
|
+
# `Zip::OutputStream` requires a seekable output: it seeks back to rewrite each
|
|
27
|
+
# entry's local header with the CRC and sizes once the entry is finished. That
|
|
28
|
+
# means it cannot write directly to a non-seekable destination (S3, SFTP, HTTP,
|
|
29
|
+
# a socket); the output would first have to be spooled to a temporary file and
|
|
30
|
+
# then copied across. `zip_kit` streams to non-seekable outputs by emitting
|
|
31
|
+
# data descriptors instead, so we can write straight to the output stream and
|
|
32
|
+
# avoid the temp file round-trip.
|
|
33
|
+
def self.stream(output_stream, zip_file_name: nil, entry_file_name: zip_file_name)
|
|
22
34
|
entry_file_name ||= "file"
|
|
23
35
|
|
|
24
|
-
Utils.load_soft_dependency("
|
|
36
|
+
Utils.load_soft_dependency("zip_kit", "Zip") unless defined?(ZipKit::Streamer)
|
|
25
37
|
|
|
26
38
|
result = nil
|
|
27
|
-
|
|
39
|
+
ZipKit::Streamer.open(output_stream) do |zip|
|
|
28
40
|
zip.write_deflated_file(entry_file_name) { |io| result = yield(io) }
|
|
29
41
|
end
|
|
30
42
|
result
|