iostreams 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/io_streams/bzip2/reader.rb +1 -1
- data/lib/io_streams/bzip2/writer.rb +1 -1
- data/lib/io_streams/encode/reader.rb +102 -0
- data/lib/io_streams/encode/writer.rb +78 -0
- data/lib/io_streams/errors.rb +19 -0
- data/lib/io_streams/file/reader.rb +1 -1
- data/lib/io_streams/file/writer.rb +1 -3
- data/lib/io_streams/gzip/reader.rb +1 -1
- data/lib/io_streams/gzip/writer.rb +1 -1
- data/lib/io_streams/io_streams.rb +57 -38
- data/lib/io_streams/line/reader.rb +125 -69
- data/lib/io_streams/line/writer.rb +11 -35
- data/lib/io_streams/pgp.rb +1 -1
- data/lib/io_streams/record/reader.rb +12 -14
- data/lib/io_streams/record/writer.rb +12 -14
- data/lib/io_streams/row/reader.rb +15 -16
- data/lib/io_streams/row/writer.rb +14 -12
- data/lib/io_streams/tabular.rb +50 -30
- data/lib/io_streams/tabular/header.rb +6 -6
- data/lib/io_streams/tabular/parser/array.rb +2 -2
- data/lib/io_streams/tabular/parser/csv.rb +6 -2
- data/lib/io_streams/tabular/parser/fixed.rb +18 -37
- data/lib/io_streams/tabular/parser/hash.rb +1 -1
- data/lib/io_streams/tabular/parser/json.rb +3 -1
- data/lib/io_streams/tabular/parser/psv.rb +6 -2
- data/lib/io_streams/version.rb +1 -1
- data/lib/io_streams/xlsx/reader.rb +22 -32
- data/lib/iostreams.rb +6 -0
- data/test/encode_reader_test.rb +54 -0
- data/test/encode_writer_test.rb +82 -0
- data/test/io_streams_test.rb +0 -65
- data/test/line_reader_test.rb +180 -37
- data/test/tabular_test.rb +79 -3
- data/test/test_helper.rb +1 -1
- data/test/xlsx_reader_test.rb +7 -10
- metadata +10 -4
- data/lib/io_streams/tabular/errors.rb +0 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9392af579f83fb26f1ea2cfeefae497362c1232ff1226adb4220cc54805b93c2
|
4
|
+
data.tar.gz: 8db9dd03113c32ebe44b54610509dc86626f44deb6900fae65ac7cdd34a3abee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2810231db04f7a98086bb47a7657adb7888dc3f16e03f48e9824170bf36a8df65ca006cfbcaca7360e42f609f5444be189fa24ba1ae4610c5c727f3c18c6bb85
|
7
|
+
data.tar.gz: 2e247fb89ad090d96cd6e0b20aaf9afd0b71cbbef7dc6cb7e84bd60182c51c6ce9cb461dfcdb4d35d8e5b0cdfb8822667cb35f42a8e54ed6d67b4575d02bffdb
|
@@ -2,7 +2,7 @@ module IOStreams
|
|
2
2
|
module Bzip2
|
3
3
|
class Reader
|
4
4
|
# Read from a Bzip2 file or stream, decompressing the contents as it is read
|
5
|
-
def self.open(file_name_or_io,
|
5
|
+
def self.open(file_name_or_io, **args, &block)
|
6
6
|
begin
|
7
7
|
require 'rbzip2' unless defined?(RBzip2)
|
8
8
|
rescue LoadError => e
|
@@ -2,7 +2,7 @@ module IOStreams
|
|
2
2
|
module Bzip2
|
3
3
|
class Writer
|
4
4
|
# Write to a file / stream, compressing with Bzip2
|
5
|
-
def self.open(file_name_or_io,
|
5
|
+
def self.open(file_name_or_io, **args, &block)
|
6
6
|
begin
|
7
7
|
require 'rbzip2' unless defined?(RBzip2)
|
8
8
|
rescue LoadError => e
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Encode
|
3
|
+
class Reader
|
4
|
+
attr_reader :encoding, :cleaner
|
5
|
+
|
6
|
+
NOT_PRINTABLE = Regexp.compile(/[^[:print:]|\r|\n]/).freeze
|
7
|
+
# Builtin strip options to apply after encoding the read data.
|
8
|
+
CLEANSE_RULES = {
|
9
|
+
# Strips all non printable characters
|
10
|
+
printable: -> (data) { data.gsub!(NOT_PRINTABLE, '') || data }
|
11
|
+
}
|
12
|
+
|
13
|
+
# Read a line at a time from a file or stream
|
14
|
+
def self.open(file_name_or_io, **args)
|
15
|
+
if file_name_or_io.is_a?(String)
|
16
|
+
IOStreams::File::Reader.open(file_name_or_io) { |io| yield new(io, **args) }
|
17
|
+
else
|
18
|
+
yield new(file_name_or_io, **args)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Apply encoding conversion when reading a stream.
|
23
|
+
#
|
24
|
+
# Parameters
|
25
|
+
# input_stream
|
26
|
+
# The input stream that implements #read
|
27
|
+
#
|
28
|
+
# encoding: [String|Encoding]
|
29
|
+
# Encode returned data with this encoding.
|
30
|
+
# 'US-ASCII': Original 7 bit ASCII Format
|
31
|
+
# 'ASCII-8BIT': 8-bit ASCII Format
|
32
|
+
# 'UTF-8': UTF-8 Format
|
33
|
+
# Etc.
|
34
|
+
# Default: 'UTF-8'
|
35
|
+
#
|
36
|
+
# encode_replace: [String]
|
37
|
+
# The character to replace with when a character cannot be converted to the target encoding.
|
38
|
+
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
39
|
+
# Default: nil
|
40
|
+
#
|
41
|
+
# encode_cleaner: [nil|symbol|Proc]
|
42
|
+
# Cleanse data read from the input stream.
|
43
|
+
# nil: No cleansing
|
44
|
+
# :printable Cleanse all non-printable characters except \r and \n
|
45
|
+
# Proc/lambda Proc to call after every read to cleanse the data
|
46
|
+
# Default: nil
|
47
|
+
def initialize(input_stream, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil)
|
48
|
+
@input_stream = input_stream
|
49
|
+
@cleaner = self.class.extract_cleaner(encode_cleaner)
|
50
|
+
|
51
|
+
@encoding = encoding.nil? || encoding.is_a?(Encoding) ? encoding : Encoding.find(encoding)
|
52
|
+
@encoding_options = encode_replace.nil? ? {} : {invalid: :replace, undef: :replace, replace: encode_replace}
|
53
|
+
|
54
|
+
# More efficient read buffering only supported when the input stream `#read` method supports it.
|
55
|
+
if encode_replace.nil? && !@input_stream.method(:read).arity.between?(0, 1)
|
56
|
+
@read_cache_buffer = ''.encode(@encoding)
|
57
|
+
else
|
58
|
+
@read_cache_buffer = nil
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Returns [String] data returned from the input stream.
|
63
|
+
# Returns [nil] if end of file and no further data was read.
|
64
|
+
def read(size = nil)
|
65
|
+
block =
|
66
|
+
if @read_cache_buffer
|
67
|
+
begin
|
68
|
+
@input_stream.read(size, @read_cache_buffer)
|
69
|
+
rescue ArgumentError
|
70
|
+
# Handle arity of -1 when just 0..1
|
71
|
+
@read_cache_buffer = nil
|
72
|
+
@input_stream.read(size)
|
73
|
+
end
|
74
|
+
else
|
75
|
+
@input_stream.read(size)
|
76
|
+
end
|
77
|
+
|
78
|
+
# EOF reached?
|
79
|
+
return unless block
|
80
|
+
|
81
|
+
block = block.encode(@encoding, @encoding_options) unless block.encoding == @encoding
|
82
|
+
block = @cleaner.call(block) if @cleaner
|
83
|
+
block
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def self.extract_cleaner(cleaner)
|
89
|
+
return if cleaner.nil?
|
90
|
+
|
91
|
+
case cleaner
|
92
|
+
when Symbol
|
93
|
+
proc = CLEANSE_RULES[cleaner]
|
94
|
+
raise(ArgumentError, "Invalid cleansing rule #{cleaner.inspect}") unless proc
|
95
|
+
proc
|
96
|
+
when Proc
|
97
|
+
cleaner
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Encode
|
3
|
+
class Writer
|
4
|
+
attr_reader :encoding, :cleaner
|
5
|
+
|
6
|
+
# Write a line at a time to a file or stream
|
7
|
+
def self.open(file_name_or_io, **args)
|
8
|
+
if file_name_or_io.is_a?(String)
|
9
|
+
IOStreams::File::Writer.open(file_name_or_io) { |io| yield new(io, **args) }
|
10
|
+
else
|
11
|
+
yield new(file_name_or_io, **args)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# A delimited stream writer that will write to the supplied output stream
|
16
|
+
# Written data is encoded prior to writing.
|
17
|
+
#
|
18
|
+
# Parameters
|
19
|
+
# output_stream
|
20
|
+
# The output stream that implements #write
|
21
|
+
#
|
22
|
+
# encoding: [String|Encoding]
|
23
|
+
# Encode returned data with this encoding.
|
24
|
+
# 'US-ASCII': Original 7 bit ASCII Format
|
25
|
+
# 'ASCII-8BIT': 8-bit ASCII Format
|
26
|
+
# 'UTF-8': UTF-8 Format
|
27
|
+
# Etc.
|
28
|
+
# Default: 'UTF-8'
|
29
|
+
#
|
30
|
+
# encode_replace: [String]
|
31
|
+
# The character to replace with when a character cannot be converted to the target encoding.
|
32
|
+
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
33
|
+
# Default: nil
|
34
|
+
#
|
35
|
+
# encode_cleaner: [nil|symbol|Proc]
|
36
|
+
# Cleanse data read from the input stream.
|
37
|
+
# nil: No cleansing
|
38
|
+
# :printable Cleanse all non-printable characters except \r and \n
|
39
|
+
# Proc/lambda Proc to call after every read to cleanse the data
|
40
|
+
# Default: nil
|
41
|
+
def initialize(output_stream, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil)
|
42
|
+
@output_stream = output_stream
|
43
|
+
@cleaner = ::IOStreams::Encode::Reader.send(:extract_cleaner, encode_cleaner)
|
44
|
+
|
45
|
+
@encoding = encoding.nil? || encoding.is_a?(Encoding) ? encoding : Encoding.find(encoding)
|
46
|
+
@encoding_options = encode_replace.nil? ? {} : {invalid: :replace, undef: :replace, replace: encode_replace}
|
47
|
+
end
|
48
|
+
|
49
|
+
# Write a line to the output stream
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# IOStreams.writer('a.txt', encoding: 'UTF-8') do |stream|
|
53
|
+
# stream << 'first line' << 'second line'
|
54
|
+
# end
|
55
|
+
def <<(record)
|
56
|
+
write(record)
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
# Write a line to the output stream followed by the delimiter.
|
61
|
+
# Returns [Integer] the number of bytes written.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
# IOStreams.writer('a.txt', encoding: 'UTF-8') do |stream|
|
65
|
+
# count = stream.write('first line')
|
66
|
+
# puts "Wrote #{count} bytes to the output file, including the delimiter"
|
67
|
+
# end
|
68
|
+
def write(data)
|
69
|
+
return 0 if data.nil?
|
70
|
+
|
71
|
+
data = data.to_s
|
72
|
+
block = data.encoding == @encoding ? data : data.encode(@encoding, @encoding_options)
|
73
|
+
block = @cleaner.call(block) if @cleaner
|
74
|
+
@output_stream.write(block)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Errors
|
3
|
+
class Error < StandardError;
|
4
|
+
end
|
5
|
+
|
6
|
+
class InvalidHeader < Error;
|
7
|
+
end
|
8
|
+
|
9
|
+
class MissingHeader < Error;
|
10
|
+
end
|
11
|
+
|
12
|
+
class TypeMismatch < Error;
|
13
|
+
end
|
14
|
+
|
15
|
+
# When the specified delimiter is not found in the supplied stream / file
|
16
|
+
class DelimiterNotFound < Error;
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -4,7 +4,7 @@ module IOStreams
|
|
4
4
|
# Read from a named file
|
5
5
|
# TODO: Add support for mode (text / binary)
|
6
6
|
# TODO: Add encoding support: external_encoding, internal_encoding
|
7
|
-
def self.open(file_name,
|
7
|
+
def self.open(file_name, **args, &block)
|
8
8
|
raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
|
9
9
|
|
10
10
|
::File.open(file_name, 'rb', &block)
|
@@ -2,9 +2,7 @@ module IOStreams
|
|
2
2
|
module File
|
3
3
|
class Writer
|
4
4
|
# Write to a named file
|
5
|
-
|
6
|
-
# TODO: Add encoding support: external_encoding, internal_encoding
|
7
|
-
def self.open(file_name, _=nil, &block)
|
5
|
+
def self.open(file_name, **args, &block)
|
8
6
|
raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
|
9
7
|
|
10
8
|
::File.open(file_name, 'wb', &block)
|
@@ -2,7 +2,7 @@ module IOStreams
|
|
2
2
|
module Gzip
|
3
3
|
class Reader
|
4
4
|
# Read from a gzip file or stream, decompressing the contents as it is read
|
5
|
-
def self.open(file_name_or_io,
|
5
|
+
def self.open(file_name_or_io, **args, &block)
|
6
6
|
unless IOStreams.reader_stream?(file_name_or_io)
|
7
7
|
::Zlib::GzipReader.open(file_name_or_io, &block)
|
8
8
|
else
|
@@ -2,7 +2,7 @@ module IOStreams
|
|
2
2
|
module Gzip
|
3
3
|
class Writer
|
4
4
|
# Write to a file / stream, compressing with GZip
|
5
|
-
def self.open(file_name_or_io,
|
5
|
+
def self.open(file_name_or_io, **args, &block)
|
6
6
|
unless IOStreams.writer_stream?(file_name_or_io)
|
7
7
|
Zlib::GzipWriter.open(file_name_or_io, &block)
|
8
8
|
else
|
@@ -1,4 +1,9 @@
|
|
1
1
|
require 'concurrent'
|
2
|
+
# Load Symmetric Encryption if present so that its reader and writer can be registered
|
3
|
+
begin
|
4
|
+
require 'symmetric-encryption'
|
5
|
+
rescue LoadError
|
6
|
+
end
|
2
7
|
|
3
8
|
# Streaming library for Ruby
|
4
9
|
#
|
@@ -60,20 +65,20 @@ module IOStreams
|
|
60
65
|
# Note:
|
61
66
|
# * Passes the file_name_or_io as-is into the block if it is already a reader stream AND
|
62
67
|
# no streams are passed in.
|
63
|
-
def self.reader(file_name_or_io, streams: nil, file_name: nil, &block)
|
64
|
-
stream(:reader, file_name_or_io, streams: streams, file_name: file_name, &block)
|
68
|
+
def self.reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
|
69
|
+
stream(:reader, file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, &block)
|
65
70
|
end
|
66
71
|
|
67
72
|
# Iterate over a file / stream returning one line at a time.
|
68
|
-
def self.each_line(file_name_or_io, **args, &block)
|
69
|
-
line_reader(file_name_or_io, **args) do |line_stream|
|
73
|
+
def self.each_line(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
74
|
+
line_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |line_stream|
|
70
75
|
line_stream.each(&block)
|
71
76
|
end
|
72
77
|
end
|
73
78
|
|
74
79
|
# Iterate over a file / stream returning one line at a time.
|
75
|
-
def self.each_row(file_name_or_io, **args, &block)
|
76
|
-
row_reader(file_name_or_io, **args) do |row_stream|
|
80
|
+
def self.each_row(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
81
|
+
row_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |row_stream|
|
77
82
|
row_stream.each(&block)
|
78
83
|
end
|
79
84
|
end
|
@@ -90,8 +95,8 @@ module IOStreams
|
|
90
95
|
# IOStreams.each_record(file_name) do |hash|
|
91
96
|
# p hash
|
92
97
|
# end
|
93
|
-
def self.each_record(file_name_or_io, **args, &block)
|
94
|
-
record_reader(file_name_or_io, **args) do |record_stream|
|
98
|
+
def self.each_record(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
99
|
+
record_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |record_stream|
|
95
100
|
record_stream.each(&block)
|
96
101
|
end
|
97
102
|
end
|
@@ -148,32 +153,32 @@ module IOStreams
|
|
148
153
|
# Note:
|
149
154
|
# * Passes the file_name_or_io as-is into the block if it is already a writer stream AND
|
150
155
|
# no streams are passed in.
|
151
|
-
def self.writer(file_name_or_io, streams: nil, file_name: nil, &block)
|
152
|
-
stream(:writer, file_name_or_io, streams: streams, file_name: file_name, &block)
|
156
|
+
def self.writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
|
157
|
+
stream(:writer, file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, &block)
|
153
158
|
end
|
154
159
|
|
155
|
-
def self.line_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
160
|
+
def self.line_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
156
161
|
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Writer) || file_name_or_io.is_a?(Array)
|
157
162
|
|
158
|
-
writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
163
|
+
writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
|
159
164
|
IOStreams::Line::Writer.open(io, **args, &block)
|
160
165
|
end
|
161
166
|
end
|
162
167
|
|
163
|
-
def self.row_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
168
|
+
def self.row_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
164
169
|
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Writer)
|
165
170
|
|
166
|
-
line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
171
|
+
line_writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
|
167
172
|
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
168
173
|
|
169
174
|
IOStreams::Row::Writer.open(io, file_name: file_name, **args, &block)
|
170
175
|
end
|
171
176
|
end
|
172
177
|
|
173
|
-
def self.record_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
178
|
+
def self.record_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
174
179
|
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Writer)
|
175
180
|
|
176
|
-
line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
181
|
+
line_writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
|
177
182
|
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
178
183
|
|
179
184
|
IOStreams::Record::Writer.open(io, file_name: file_name, **args, &block)
|
@@ -185,7 +190,6 @@ module IOStreams
|
|
185
190
|
#
|
186
191
|
# Example: Copy between 2 files
|
187
192
|
# IOStreams.copy('a.csv', 'b.csv')
|
188
|
-
# # TODO: The above will convert the csv file to a Hash and then back to write it to the target file.
|
189
193
|
#
|
190
194
|
# Example: Read content from a Xlsx file and write it out in CSV form.
|
191
195
|
# IOStreams.copy('a.xlsx', 'b.csv')
|
@@ -311,23 +315,23 @@ module IOStreams
|
|
311
315
|
end
|
312
316
|
|
313
317
|
# Iterate over a file / stream returning each record/line one at a time.
|
314
|
-
def self.line_reader(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
318
|
+
def self.line_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
315
319
|
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) ||
|
316
|
-
file_name_or_io.is_a?(IOStreams::Xlsx::Reader) ||
|
317
320
|
file_name_or_io.is_a?(Array)
|
318
321
|
|
319
|
-
reader(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
322
|
+
reader(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
|
320
323
|
IOStreams::Line::Reader.open(io, **args, &block)
|
321
324
|
end
|
322
325
|
end
|
323
326
|
|
324
|
-
# Iterate over a file / stream returning each line as
|
327
|
+
# Iterate over a file / stream returning each line as an array, one at a time.
|
325
328
|
def self.row_reader(file_name_or_io,
|
326
329
|
streams: nil,
|
327
330
|
delimiter: nil,
|
328
|
-
encoding: IOStreams::UTF8_ENCODING,
|
329
|
-
strip_non_printable: false,
|
330
331
|
file_name: nil,
|
332
|
+
encoding: nil,
|
333
|
+
encode_cleaner: nil,
|
334
|
+
encode_replace: nil,
|
331
335
|
**args,
|
332
336
|
&block)
|
333
337
|
|
@@ -335,12 +339,13 @@ module IOStreams
|
|
335
339
|
|
336
340
|
line_reader(
|
337
341
|
file_name_or_io,
|
338
|
-
streams:
|
339
|
-
delimiter:
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
342
|
+
streams: streams,
|
343
|
+
delimiter: delimiter,
|
344
|
+
file_name: file_name,
|
345
|
+
encoding: encoding,
|
346
|
+
encode_cleaner: encode_cleaner,
|
347
|
+
encode_replace: encode_replace
|
348
|
+
) do |io|
|
344
349
|
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
345
350
|
IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
|
346
351
|
end
|
@@ -350,21 +355,24 @@ module IOStreams
|
|
350
355
|
def self.record_reader(file_name_or_io,
|
351
356
|
streams: nil,
|
352
357
|
delimiter: nil,
|
353
|
-
encoding: IOStreams::UTF8_ENCODING,
|
354
|
-
strip_non_printable: false,
|
355
358
|
file_name: nil,
|
359
|
+
encoding: nil,
|
360
|
+
encode_cleaner: nil,
|
361
|
+
encode_replace: nil,
|
356
362
|
**args,
|
357
363
|
&block)
|
358
364
|
|
359
|
-
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader)
|
365
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader)
|
360
366
|
|
361
367
|
line_reader(
|
362
368
|
file_name_or_io,
|
363
|
-
streams:
|
364
|
-
delimiter:
|
365
|
-
|
366
|
-
|
367
|
-
|
369
|
+
streams: streams,
|
370
|
+
delimiter: delimiter,
|
371
|
+
file_name: file_name,
|
372
|
+
encoding: encoding,
|
373
|
+
encode_cleaner: encode_cleaner,
|
374
|
+
encode_replace: encode_replace
|
375
|
+
) do |io|
|
368
376
|
|
369
377
|
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
370
378
|
IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
|
@@ -414,7 +422,7 @@ module IOStreams
|
|
414
422
|
StreamStruct = Struct.new(:klass, :options)
|
415
423
|
|
416
424
|
# Returns a reader or writer stream
|
417
|
-
def self.stream(type, file_name_or_io, streams:, file_name:, &block)
|
425
|
+
def self.stream(type, file_name_or_io, streams:, file_name:, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
|
418
426
|
# TODO: Add support for different schemes, such as file://, s3://, sftp://
|
419
427
|
|
420
428
|
streams = streams_for_file_name(file_name) if streams.nil? && file_name
|
@@ -427,6 +435,17 @@ module IOStreams
|
|
427
435
|
end
|
428
436
|
|
429
437
|
stream_structs = streams_for(type, streams)
|
438
|
+
|
439
|
+
# Add encoding stream if any of its options are present
|
440
|
+
if encoding || encode_cleaner || encode_replace
|
441
|
+
klass = type == :reader ? IOStreams::Encode::Reader : IOStreams::Encode::Writer
|
442
|
+
options = {}
|
443
|
+
options[:encoding] = encoding if encoding
|
444
|
+
options[:encode_cleaner] = encode_cleaner if encode_cleaner
|
445
|
+
options[:encode_replace] = encode_replace if encode_replace
|
446
|
+
stream_structs.unshift(StreamStruct.new(klass, options))
|
447
|
+
end
|
448
|
+
|
430
449
|
if stream_structs.size == 1
|
431
450
|
stream_struct = stream_structs.first
|
432
451
|
stream_struct.klass.open(file_name_or_io, stream_struct.options, &block)
|