iostreams 0.15.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/io_streams/bzip2/reader.rb +1 -1
- data/lib/io_streams/bzip2/writer.rb +1 -1
- data/lib/io_streams/encode/reader.rb +102 -0
- data/lib/io_streams/encode/writer.rb +78 -0
- data/lib/io_streams/errors.rb +19 -0
- data/lib/io_streams/file/reader.rb +1 -1
- data/lib/io_streams/file/writer.rb +1 -3
- data/lib/io_streams/gzip/reader.rb +1 -1
- data/lib/io_streams/gzip/writer.rb +1 -1
- data/lib/io_streams/io_streams.rb +57 -38
- data/lib/io_streams/line/reader.rb +125 -69
- data/lib/io_streams/line/writer.rb +11 -35
- data/lib/io_streams/pgp.rb +1 -1
- data/lib/io_streams/record/reader.rb +12 -14
- data/lib/io_streams/record/writer.rb +12 -14
- data/lib/io_streams/row/reader.rb +15 -16
- data/lib/io_streams/row/writer.rb +14 -12
- data/lib/io_streams/tabular.rb +50 -30
- data/lib/io_streams/tabular/header.rb +6 -6
- data/lib/io_streams/tabular/parser/array.rb +2 -2
- data/lib/io_streams/tabular/parser/csv.rb +6 -2
- data/lib/io_streams/tabular/parser/fixed.rb +18 -37
- data/lib/io_streams/tabular/parser/hash.rb +1 -1
- data/lib/io_streams/tabular/parser/json.rb +3 -1
- data/lib/io_streams/tabular/parser/psv.rb +6 -2
- data/lib/io_streams/version.rb +1 -1
- data/lib/io_streams/xlsx/reader.rb +22 -32
- data/lib/iostreams.rb +6 -0
- data/test/encode_reader_test.rb +54 -0
- data/test/encode_writer_test.rb +82 -0
- data/test/io_streams_test.rb +0 -65
- data/test/line_reader_test.rb +180 -37
- data/test/tabular_test.rb +79 -3
- data/test/test_helper.rb +1 -1
- data/test/xlsx_reader_test.rb +7 -10
- metadata +10 -4
- data/lib/io_streams/tabular/errors.rb +0 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9392af579f83fb26f1ea2cfeefae497362c1232ff1226adb4220cc54805b93c2
|
4
|
+
data.tar.gz: 8db9dd03113c32ebe44b54610509dc86626f44deb6900fae65ac7cdd34a3abee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2810231db04f7a98086bb47a7657adb7888dc3f16e03f48e9824170bf36a8df65ca006cfbcaca7360e42f609f5444be189fa24ba1ae4610c5c727f3c18c6bb85
|
7
|
+
data.tar.gz: 2e247fb89ad090d96cd6e0b20aaf9afd0b71cbbef7dc6cb7e84bd60182c51c6ce9cb461dfcdb4d35d8e5b0cdfb8822667cb35f42a8e54ed6d67b4575d02bffdb
|
@@ -2,7 +2,7 @@ module IOStreams
|
|
2
2
|
module Bzip2
|
3
3
|
class Reader
|
4
4
|
# Read from a Bzip2 file or stream, decompressing the contents as it is read
|
5
|
-
def self.open(file_name_or_io,
|
5
|
+
def self.open(file_name_or_io, **args, &block)
|
6
6
|
begin
|
7
7
|
require 'rbzip2' unless defined?(RBzip2)
|
8
8
|
rescue LoadError => e
|
@@ -2,7 +2,7 @@ module IOStreams
|
|
2
2
|
module Bzip2
|
3
3
|
class Writer
|
4
4
|
# Write to a file / stream, compressing with Bzip2
|
5
|
-
def self.open(file_name_or_io,
|
5
|
+
def self.open(file_name_or_io, **args, &block)
|
6
6
|
begin
|
7
7
|
require 'rbzip2' unless defined?(RBzip2)
|
8
8
|
rescue LoadError => e
|
@@ -0,0 +1,102 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Encode
|
3
|
+
class Reader
|
4
|
+
attr_reader :encoding, :cleaner
|
5
|
+
|
6
|
+
NOT_PRINTABLE = Regexp.compile(/[^[:print:]|\r|\n]/).freeze
|
7
|
+
# Builtin strip options to apply after encoding the read data.
|
8
|
+
CLEANSE_RULES = {
|
9
|
+
# Strips all non printable characters
|
10
|
+
printable: -> (data) { data.gsub!(NOT_PRINTABLE, '') || data }
|
11
|
+
}
|
12
|
+
|
13
|
+
# Read a line at a time from a file or stream
|
14
|
+
def self.open(file_name_or_io, **args)
|
15
|
+
if file_name_or_io.is_a?(String)
|
16
|
+
IOStreams::File::Reader.open(file_name_or_io) { |io| yield new(io, **args) }
|
17
|
+
else
|
18
|
+
yield new(file_name_or_io, **args)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Apply encoding conversion when reading a stream.
|
23
|
+
#
|
24
|
+
# Parameters
|
25
|
+
# input_stream
|
26
|
+
# The input stream that implements #read
|
27
|
+
#
|
28
|
+
# encoding: [String|Encoding]
|
29
|
+
# Encode returned data with this encoding.
|
30
|
+
# 'US-ASCII': Original 7 bit ASCII Format
|
31
|
+
# 'ASCII-8BIT': 8-bit ASCII Format
|
32
|
+
# 'UTF-8': UTF-8 Format
|
33
|
+
# Etc.
|
34
|
+
# Default: 'UTF-8'
|
35
|
+
#
|
36
|
+
# encode_replace: [String]
|
37
|
+
# The character to replace with when a character cannot be converted to the target encoding.
|
38
|
+
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
39
|
+
# Default: nil
|
40
|
+
#
|
41
|
+
# encode_cleaner: [nil|symbol|Proc]
|
42
|
+
# Cleanse data read from the input stream.
|
43
|
+
# nil: No cleansing
|
44
|
+
# :printable Cleanse all non-printable characters except \r and \n
|
45
|
+
# Proc/lambda Proc to call after every read to cleanse the data
|
46
|
+
# Default: nil
|
47
|
+
def initialize(input_stream, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil)
|
48
|
+
@input_stream = input_stream
|
49
|
+
@cleaner = self.class.extract_cleaner(encode_cleaner)
|
50
|
+
|
51
|
+
@encoding = encoding.nil? || encoding.is_a?(Encoding) ? encoding : Encoding.find(encoding)
|
52
|
+
@encoding_options = encode_replace.nil? ? {} : {invalid: :replace, undef: :replace, replace: encode_replace}
|
53
|
+
|
54
|
+
# More efficient read buffering only supported when the input stream `#read` method supports it.
|
55
|
+
if encode_replace.nil? && !@input_stream.method(:read).arity.between?(0, 1)
|
56
|
+
@read_cache_buffer = ''.encode(@encoding)
|
57
|
+
else
|
58
|
+
@read_cache_buffer = nil
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Returns [String] data returned from the input stream.
|
63
|
+
# Returns [nil] if end of file and no further data was read.
|
64
|
+
def read(size = nil)
|
65
|
+
block =
|
66
|
+
if @read_cache_buffer
|
67
|
+
begin
|
68
|
+
@input_stream.read(size, @read_cache_buffer)
|
69
|
+
rescue ArgumentError
|
70
|
+
# Handle arity of -1 when just 0..1
|
71
|
+
@read_cache_buffer = nil
|
72
|
+
@input_stream.read(size)
|
73
|
+
end
|
74
|
+
else
|
75
|
+
@input_stream.read(size)
|
76
|
+
end
|
77
|
+
|
78
|
+
# EOF reached?
|
79
|
+
return unless block
|
80
|
+
|
81
|
+
block = block.encode(@encoding, @encoding_options) unless block.encoding == @encoding
|
82
|
+
block = @cleaner.call(block) if @cleaner
|
83
|
+
block
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def self.extract_cleaner(cleaner)
|
89
|
+
return if cleaner.nil?
|
90
|
+
|
91
|
+
case cleaner
|
92
|
+
when Symbol
|
93
|
+
proc = CLEANSE_RULES[cleaner]
|
94
|
+
raise(ArgumentError, "Invalid cleansing rule #{cleaner.inspect}") unless proc
|
95
|
+
proc
|
96
|
+
when Proc
|
97
|
+
cleaner
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Encode
|
3
|
+
class Writer
|
4
|
+
attr_reader :encoding, :cleaner
|
5
|
+
|
6
|
+
# Write a line at a time to a file or stream
|
7
|
+
def self.open(file_name_or_io, **args)
|
8
|
+
if file_name_or_io.is_a?(String)
|
9
|
+
IOStreams::File::Writer.open(file_name_or_io) { |io| yield new(io, **args) }
|
10
|
+
else
|
11
|
+
yield new(file_name_or_io, **args)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# A delimited stream writer that will write to the supplied output stream
|
16
|
+
# Written data is encoded prior to writing.
|
17
|
+
#
|
18
|
+
# Parameters
|
19
|
+
# output_stream
|
20
|
+
# The output stream that implements #write
|
21
|
+
#
|
22
|
+
# encoding: [String|Encoding]
|
23
|
+
# Encode returned data with this encoding.
|
24
|
+
# 'US-ASCII': Original 7 bit ASCII Format
|
25
|
+
# 'ASCII-8BIT': 8-bit ASCII Format
|
26
|
+
# 'UTF-8': UTF-8 Format
|
27
|
+
# Etc.
|
28
|
+
# Default: 'UTF-8'
|
29
|
+
#
|
30
|
+
# encode_replace: [String]
|
31
|
+
# The character to replace with when a character cannot be converted to the target encoding.
|
32
|
+
# nil: Don't replace any invalid characters. Encoding::UndefinedConversionError is raised.
|
33
|
+
# Default: nil
|
34
|
+
#
|
35
|
+
# encode_cleaner: [nil|symbol|Proc]
|
36
|
+
# Cleanse data read from the input stream.
|
37
|
+
# nil: No cleansing
|
38
|
+
# :printable Cleanse all non-printable characters except \r and \n
|
39
|
+
# Proc/lambda Proc to call after every read to cleanse the data
|
40
|
+
# Default: nil
|
41
|
+
def initialize(output_stream, encoding: 'UTF-8', encode_cleaner: nil, encode_replace: nil)
|
42
|
+
@output_stream = output_stream
|
43
|
+
@cleaner = ::IOStreams::Encode::Reader.send(:extract_cleaner, encode_cleaner)
|
44
|
+
|
45
|
+
@encoding = encoding.nil? || encoding.is_a?(Encoding) ? encoding : Encoding.find(encoding)
|
46
|
+
@encoding_options = encode_replace.nil? ? {} : {invalid: :replace, undef: :replace, replace: encode_replace}
|
47
|
+
end
|
48
|
+
|
49
|
+
# Write a line to the output stream
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# IOStreams.writer('a.txt', encoding: 'UTF-8') do |stream|
|
53
|
+
# stream << 'first line' << 'second line'
|
54
|
+
# end
|
55
|
+
def <<(record)
|
56
|
+
write(record)
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
# Write a line to the output stream followed by the delimiter.
|
61
|
+
# Returns [Integer] the number of bytes written.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
# IOStreams.writer('a.txt', encoding: 'UTF-8') do |stream|
|
65
|
+
# count = stream.write('first line')
|
66
|
+
# puts "Wrote #{count} bytes to the output file, including the delimiter"
|
67
|
+
# end
|
68
|
+
def write(data)
|
69
|
+
return 0 if data.nil?
|
70
|
+
|
71
|
+
data = data.to_s
|
72
|
+
block = data.encoding == @encoding ? data : data.encode(@encoding, @encoding_options)
|
73
|
+
block = @cleaner.call(block) if @cleaner
|
74
|
+
@output_stream.write(block)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Errors
|
3
|
+
class Error < StandardError;
|
4
|
+
end
|
5
|
+
|
6
|
+
class InvalidHeader < Error;
|
7
|
+
end
|
8
|
+
|
9
|
+
class MissingHeader < Error;
|
10
|
+
end
|
11
|
+
|
12
|
+
class TypeMismatch < Error;
|
13
|
+
end
|
14
|
+
|
15
|
+
# When the specified delimiter is not found in the supplied stream / file
|
16
|
+
class DelimiterNotFound < Error;
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -4,7 +4,7 @@ module IOStreams
|
|
4
4
|
# Read from a named file
|
5
5
|
# TODO: Add support for mode (text / binary)
|
6
6
|
# TODO: Add encoding support: external_encoding, internal_encoding
|
7
|
-
def self.open(file_name,
|
7
|
+
def self.open(file_name, **args, &block)
|
8
8
|
raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
|
9
9
|
|
10
10
|
::File.open(file_name, 'rb', &block)
|
@@ -2,9 +2,7 @@ module IOStreams
|
|
2
2
|
module File
|
3
3
|
class Writer
|
4
4
|
# Write to a named file
|
5
|
-
|
6
|
-
# TODO: Add encoding support: external_encoding, internal_encoding
|
7
|
-
def self.open(file_name, _=nil, &block)
|
5
|
+
def self.open(file_name, **args, &block)
|
8
6
|
raise(ArgumentError, 'File name must be a string') unless file_name.is_a?(String)
|
9
7
|
|
10
8
|
::File.open(file_name, 'wb', &block)
|
@@ -2,7 +2,7 @@ module IOStreams
|
|
2
2
|
module Gzip
|
3
3
|
class Reader
|
4
4
|
# Read from a gzip file or stream, decompressing the contents as it is read
|
5
|
-
def self.open(file_name_or_io,
|
5
|
+
def self.open(file_name_or_io, **args, &block)
|
6
6
|
unless IOStreams.reader_stream?(file_name_or_io)
|
7
7
|
::Zlib::GzipReader.open(file_name_or_io, &block)
|
8
8
|
else
|
@@ -2,7 +2,7 @@ module IOStreams
|
|
2
2
|
module Gzip
|
3
3
|
class Writer
|
4
4
|
# Write to a file / stream, compressing with GZip
|
5
|
-
def self.open(file_name_or_io,
|
5
|
+
def self.open(file_name_or_io, **args, &block)
|
6
6
|
unless IOStreams.writer_stream?(file_name_or_io)
|
7
7
|
Zlib::GzipWriter.open(file_name_or_io, &block)
|
8
8
|
else
|
@@ -1,4 +1,9 @@
|
|
1
1
|
require 'concurrent'
|
2
|
+
# Load Symmetric Encryption if present so that its reader and writer can be registered
|
3
|
+
begin
|
4
|
+
require 'symmetric-encryption'
|
5
|
+
rescue LoadError
|
6
|
+
end
|
2
7
|
|
3
8
|
# Streaming library for Ruby
|
4
9
|
#
|
@@ -60,20 +65,20 @@ module IOStreams
|
|
60
65
|
# Note:
|
61
66
|
# * Passes the file_name_or_io as-is into the block if it is already a reader stream AND
|
62
67
|
# no streams are passed in.
|
63
|
-
def self.reader(file_name_or_io, streams: nil, file_name: nil, &block)
|
64
|
-
stream(:reader, file_name_or_io, streams: streams, file_name: file_name, &block)
|
68
|
+
def self.reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
|
69
|
+
stream(:reader, file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, &block)
|
65
70
|
end
|
66
71
|
|
67
72
|
# Iterate over a file / stream returning one line at a time.
|
68
|
-
def self.each_line(file_name_or_io, **args, &block)
|
69
|
-
line_reader(file_name_or_io, **args) do |line_stream|
|
73
|
+
def self.each_line(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
74
|
+
line_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |line_stream|
|
70
75
|
line_stream.each(&block)
|
71
76
|
end
|
72
77
|
end
|
73
78
|
|
74
79
|
# Iterate over a file / stream returning one line at a time.
|
75
|
-
def self.each_row(file_name_or_io, **args, &block)
|
76
|
-
row_reader(file_name_or_io, **args) do |row_stream|
|
80
|
+
def self.each_row(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
81
|
+
row_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |row_stream|
|
77
82
|
row_stream.each(&block)
|
78
83
|
end
|
79
84
|
end
|
@@ -90,8 +95,8 @@ module IOStreams
|
|
90
95
|
# IOStreams.each_record(file_name) do |hash|
|
91
96
|
# p hash
|
92
97
|
# end
|
93
|
-
def self.each_record(file_name_or_io, **args, &block)
|
94
|
-
record_reader(file_name_or_io, **args) do |record_stream|
|
98
|
+
def self.each_record(file_name_or_io, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
99
|
+
record_reader(file_name_or_io, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, **args) do |record_stream|
|
95
100
|
record_stream.each(&block)
|
96
101
|
end
|
97
102
|
end
|
@@ -148,32 +153,32 @@ module IOStreams
|
|
148
153
|
# Note:
|
149
154
|
# * Passes the file_name_or_io as-is into the block if it is already a writer stream AND
|
150
155
|
# no streams are passed in.
|
151
|
-
def self.writer(file_name_or_io, streams: nil, file_name: nil, &block)
|
152
|
-
stream(:writer, file_name_or_io, streams: streams, file_name: file_name, &block)
|
156
|
+
def self.writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
|
157
|
+
stream(:writer, file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace, &block)
|
153
158
|
end
|
154
159
|
|
155
|
-
def self.line_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
160
|
+
def self.line_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
156
161
|
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Writer) || file_name_or_io.is_a?(Array)
|
157
162
|
|
158
|
-
writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
163
|
+
writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
|
159
164
|
IOStreams::Line::Writer.open(io, **args, &block)
|
160
165
|
end
|
161
166
|
end
|
162
167
|
|
163
|
-
def self.row_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
168
|
+
def self.row_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
164
169
|
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Row::Writer)
|
165
170
|
|
166
|
-
line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
171
|
+
line_writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
|
167
172
|
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
168
173
|
|
169
174
|
IOStreams::Row::Writer.open(io, file_name: file_name, **args, &block)
|
170
175
|
end
|
171
176
|
end
|
172
177
|
|
173
|
-
def self.record_writer(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
178
|
+
def self.record_writer(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
174
179
|
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Writer)
|
175
180
|
|
176
|
-
line_writer(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
181
|
+
line_writer(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
|
177
182
|
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
178
183
|
|
179
184
|
IOStreams::Record::Writer.open(io, file_name: file_name, **args, &block)
|
@@ -185,7 +190,6 @@ module IOStreams
|
|
185
190
|
#
|
186
191
|
# Example: Copy between 2 files
|
187
192
|
# IOStreams.copy('a.csv', 'b.csv')
|
188
|
-
# # TODO: The above will convert the csv file to a Hash and then back to write it to the target file.
|
189
193
|
#
|
190
194
|
# Example: Read content from a Xlsx file and write it out in CSV form.
|
191
195
|
# IOStreams.copy('a.xlsx', 'b.csv')
|
@@ -311,23 +315,23 @@ module IOStreams
|
|
311
315
|
end
|
312
316
|
|
313
317
|
# Iterate over a file / stream returning each record/line one at a time.
|
314
|
-
def self.line_reader(file_name_or_io, streams: nil, file_name: nil, **args, &block)
|
318
|
+
def self.line_reader(file_name_or_io, streams: nil, file_name: nil, encoding: nil, encode_cleaner: nil, encode_replace: nil, **args, &block)
|
315
319
|
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Line::Reader) ||
|
316
|
-
file_name_or_io.is_a?(IOStreams::Xlsx::Reader) ||
|
317
320
|
file_name_or_io.is_a?(Array)
|
318
321
|
|
319
|
-
reader(file_name_or_io, streams: streams, file_name: file_name) do |io|
|
322
|
+
reader(file_name_or_io, streams: streams, file_name: file_name, encoding: encoding, encode_cleaner: encode_cleaner, encode_replace: encode_replace) do |io|
|
320
323
|
IOStreams::Line::Reader.open(io, **args, &block)
|
321
324
|
end
|
322
325
|
end
|
323
326
|
|
324
|
-
# Iterate over a file / stream returning each line as
|
327
|
+
# Iterate over a file / stream returning each line as an array, one at a time.
|
325
328
|
def self.row_reader(file_name_or_io,
|
326
329
|
streams: nil,
|
327
330
|
delimiter: nil,
|
328
|
-
encoding: IOStreams::UTF8_ENCODING,
|
329
|
-
strip_non_printable: false,
|
330
331
|
file_name: nil,
|
332
|
+
encoding: nil,
|
333
|
+
encode_cleaner: nil,
|
334
|
+
encode_replace: nil,
|
331
335
|
**args,
|
332
336
|
&block)
|
333
337
|
|
@@ -335,12 +339,13 @@ module IOStreams
|
|
335
339
|
|
336
340
|
line_reader(
|
337
341
|
file_name_or_io,
|
338
|
-
streams:
|
339
|
-
delimiter:
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
342
|
+
streams: streams,
|
343
|
+
delimiter: delimiter,
|
344
|
+
file_name: file_name,
|
345
|
+
encoding: encoding,
|
346
|
+
encode_cleaner: encode_cleaner,
|
347
|
+
encode_replace: encode_replace
|
348
|
+
) do |io|
|
344
349
|
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
345
350
|
IOStreams::Row::Reader.open(io, file_name: file_name, **args, &block)
|
346
351
|
end
|
@@ -350,21 +355,24 @@ module IOStreams
|
|
350
355
|
def self.record_reader(file_name_or_io,
|
351
356
|
streams: nil,
|
352
357
|
delimiter: nil,
|
353
|
-
encoding: IOStreams::UTF8_ENCODING,
|
354
|
-
strip_non_printable: false,
|
355
358
|
file_name: nil,
|
359
|
+
encoding: nil,
|
360
|
+
encode_cleaner: nil,
|
361
|
+
encode_replace: nil,
|
356
362
|
**args,
|
357
363
|
&block)
|
358
364
|
|
359
|
-
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader)
|
365
|
+
return yield(file_name_or_io) if file_name_or_io.is_a?(IOStreams::Record::Reader)
|
360
366
|
|
361
367
|
line_reader(
|
362
368
|
file_name_or_io,
|
363
|
-
streams:
|
364
|
-
delimiter:
|
365
|
-
|
366
|
-
|
367
|
-
|
369
|
+
streams: streams,
|
370
|
+
delimiter: delimiter,
|
371
|
+
file_name: file_name,
|
372
|
+
encoding: encoding,
|
373
|
+
encode_cleaner: encode_cleaner,
|
374
|
+
encode_replace: encode_replace
|
375
|
+
) do |io|
|
368
376
|
|
369
377
|
file_name = file_name_or_io if file_name.nil? && file_name_or_io.is_a?(String)
|
370
378
|
IOStreams::Record::Reader.open(io, file_name: file_name, **args, &block)
|
@@ -414,7 +422,7 @@ module IOStreams
|
|
414
422
|
StreamStruct = Struct.new(:klass, :options)
|
415
423
|
|
416
424
|
# Returns a reader or writer stream
|
417
|
-
def self.stream(type, file_name_or_io, streams:, file_name:, &block)
|
425
|
+
def self.stream(type, file_name_or_io, streams:, file_name:, encoding: nil, encode_cleaner: nil, encode_replace: nil, &block)
|
418
426
|
# TODO: Add support for different schemes, such as file://, s3://, sftp://
|
419
427
|
|
420
428
|
streams = streams_for_file_name(file_name) if streams.nil? && file_name
|
@@ -427,6 +435,17 @@ module IOStreams
|
|
427
435
|
end
|
428
436
|
|
429
437
|
stream_structs = streams_for(type, streams)
|
438
|
+
|
439
|
+
# Add encoding stream if any of its options are present
|
440
|
+
if encoding || encode_cleaner || encode_replace
|
441
|
+
klass = type == :reader ? IOStreams::Encode::Reader : IOStreams::Encode::Writer
|
442
|
+
options = {}
|
443
|
+
options[:encoding] = encoding if encoding
|
444
|
+
options[:encode_cleaner] = encode_cleaner if encode_cleaner
|
445
|
+
options[:encode_replace] = encode_replace if encode_replace
|
446
|
+
stream_structs.unshift(StreamStruct.new(klass, options))
|
447
|
+
end
|
448
|
+
|
430
449
|
if stream_structs.size == 1
|
431
450
|
stream_struct = stream_structs.first
|
432
451
|
stream_struct.klass.open(file_name_or_io, stream_struct.options, &block)
|