iostreams 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +202 -0
- data/README.md +155 -47
- data/lib/io_streams/file/reader.rb +7 -8
- data/lib/io_streams/file/writer.rb +7 -8
- data/lib/io_streams/io_streams.rb +313 -129
- data/lib/io_streams/{delimited → line}/reader.rb +20 -30
- data/lib/io_streams/line/writer.rb +81 -0
- data/lib/io_streams/pgp.rb +4 -14
- data/lib/io_streams/record/reader.rb +55 -0
- data/lib/io_streams/record/writer.rb +63 -0
- data/lib/io_streams/row/reader.rb +60 -0
- data/lib/io_streams/row/writer.rb +62 -0
- data/lib/io_streams/s3.rb +25 -0
- data/lib/io_streams/s3/reader.rb +64 -0
- data/lib/io_streams/s3/writer.rb +13 -0
- data/lib/io_streams/streams.rb +1 -1
- data/lib/io_streams/tabular.rb +163 -0
- data/lib/io_streams/tabular/errors.rb +14 -0
- data/lib/io_streams/tabular/header.rb +146 -0
- data/lib/io_streams/tabular/parser/array.rb +26 -0
- data/lib/io_streams/tabular/parser/base.rb +12 -0
- data/lib/io_streams/tabular/parser/csv.rb +35 -0
- data/lib/io_streams/tabular/parser/fixed.rb +88 -0
- data/lib/io_streams/tabular/parser/hash.rb +21 -0
- data/lib/io_streams/tabular/parser/json.rb +25 -0
- data/lib/io_streams/tabular/parser/psv.rb +34 -0
- data/lib/io_streams/tabular/utility/csv_row.rb +115 -0
- data/lib/io_streams/version.rb +2 -2
- data/lib/io_streams/xlsx/reader.rb +1 -1
- data/lib/io_streams/zip/reader.rb +1 -1
- data/lib/io_streams/zip/writer.rb +1 -1
- data/lib/iostreams.rb +21 -10
- data/test/bzip2_reader_test.rb +21 -22
- data/test/bzip2_writer_test.rb +38 -32
- data/test/file_reader_test.rb +19 -18
- data/test/file_writer_test.rb +23 -22
- data/test/files/test.json +3 -0
- data/test/gzip_reader_test.rb +21 -22
- data/test/gzip_writer_test.rb +35 -29
- data/test/io_streams_test.rb +137 -61
- data/test/line_reader_test.rb +105 -0
- data/test/line_writer_test.rb +50 -0
- data/test/pgp_reader_test.rb +29 -29
- data/test/pgp_test.rb +149 -195
- data/test/pgp_writer_test.rb +63 -62
- data/test/record_reader_test.rb +61 -0
- data/test/record_writer_test.rb +73 -0
- data/test/row_reader_test.rb +34 -0
- data/test/row_writer_test.rb +51 -0
- data/test/tabular_test.rb +184 -0
- data/test/xlsx_reader_test.rb +13 -17
- data/test/zip_reader_test.rb +21 -22
- data/test/zip_writer_test.rb +40 -36
- metadata +41 -17
- data/lib/io_streams/csv/reader.rb +0 -21
- data/lib/io_streams/csv/writer.rb +0 -20
- data/lib/io_streams/delimited/writer.rb +0 -67
- data/test/csv_reader_test.rb +0 -34
- data/test/csv_writer_test.rb +0 -35
- data/test/delimited_reader_test.rb +0 -115
- data/test/delimited_writer_test.rb +0 -44
@@ -1,16 +1,14 @@
|
|
1
1
|
module IOStreams
|
2
|
-
module
|
2
|
+
module Line
|
3
3
|
class Reader
|
4
|
-
|
4
|
+
attr_reader :delimiter, :buffer_size, :encoding, :strip_non_printable
|
5
5
|
|
6
|
-
# Read from a file or stream
|
7
|
-
def self.open(file_name_or_io,
|
8
|
-
if
|
9
|
-
|
6
|
+
# Read a line at a time from a file or stream
|
7
|
+
def self.open(file_name_or_io, **args)
|
8
|
+
if file_name_or_io.is_a?(String)
|
9
|
+
IOStreams::File::Reader.open(file_name_or_io) { |io| yield new(io, **args) }
|
10
10
|
else
|
11
|
-
|
12
|
-
yield new(io, delimiter: delimiter, buffer_size: buffer_size, encoding: encoding, strip_non_printable: strip_non_printable)
|
13
|
-
end
|
11
|
+
yield new(file_name_or_io, **args)
|
14
12
|
end
|
15
13
|
end
|
16
14
|
|
@@ -47,6 +45,12 @@ module IOStreams
|
|
47
45
|
# Force encoding to this encoding for all data being read
|
48
46
|
# Default: UTF8_ENCODING
|
49
47
|
# Set to nil to disable encoding
|
48
|
+
#
|
49
|
+
# TODO:
|
50
|
+
# - Skip Comment lines. RegExp?
|
51
|
+
# - Skip "empty" / "blank" lines. RegExp?
|
52
|
+
# - Extract header line(s) / first non-comment, non-blank line
|
53
|
+
# - Embedded newline support, RegExp? or Proc?
|
50
54
|
def initialize(input_stream, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false)
|
51
55
|
@input_stream = input_stream
|
52
56
|
@delimiter = delimiter
|
@@ -54,11 +58,12 @@ module IOStreams
|
|
54
58
|
@encoding = encoding
|
55
59
|
@strip_non_printable = strip_non_printable
|
56
60
|
|
57
|
-
@delimiter.
|
61
|
+
@delimiter.encode(UTF8_ENCODING) if @delimiter && @encoding
|
58
62
|
@buffer = ''
|
59
63
|
end
|
60
64
|
|
61
|
-
#
|
65
|
+
# Iterate over every line in the file/stream passing each line to supplied block in turn.
|
66
|
+
# Returns [Integer] the number of lines read from the file/stream.
|
62
67
|
def each(&block)
|
63
68
|
partial = nil
|
64
69
|
loop do
|
@@ -83,32 +88,17 @@ module IOStreams
|
|
83
88
|
end
|
84
89
|
end
|
85
90
|
|
86
|
-
alias_method :each_line, :each
|
87
|
-
|
88
|
-
# Reads length bytes from the I/O stream.
|
89
|
-
# Not recommended, but available if someone calls #read on this delimited reader
|
90
|
-
def read(length = nil, outbuf = nil)
|
91
|
-
if length
|
92
|
-
while (@buffer.size < length) && (read_chunk > 0)
|
93
|
-
end
|
94
|
-
data = @buffer.slice!(0, length)
|
95
|
-
outbuf << data if outbuf
|
96
|
-
data
|
97
|
-
else
|
98
|
-
while read_chunk > 0
|
99
|
-
end
|
100
|
-
@buffer
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
##########################################################################
|
105
91
|
private
|
106
92
|
|
93
|
+
attr_reader :buffer
|
94
|
+
attr_writer :delimiter
|
95
|
+
|
107
96
|
NOT_PRINTABLE = Regexp.compile(/[^[:print:]|\r|\n]/)
|
108
97
|
|
109
98
|
# Returns [Integer] the number of bytes read into the internal buffer
|
110
99
|
# Returns 0 on EOF
|
111
100
|
def read_chunk
|
101
|
+
# TODO: read into existing buffer
|
112
102
|
chunk = @input_stream.read(@buffer_size)
|
113
103
|
# EOF reached?
|
114
104
|
return 0 unless chunk
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Line
|
3
|
+
class Writer
|
4
|
+
attr_reader :delimiter, :encoding, :strip_non_printable
|
5
|
+
|
6
|
+
# Write a line at a time to a file or stream
|
7
|
+
def self.open(file_name_or_io, **args)
|
8
|
+
if file_name_or_io.is_a?(String)
|
9
|
+
IOStreams::File::Writer.open(file_name_or_io) { |io| yield new(io, **args) }
|
10
|
+
else
|
11
|
+
yield new(file_name_or_io, **args)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
NOT_PRINTABLE = Regexp.compile(/[^[:print:]]/)
|
16
|
+
|
17
|
+
# A delimited stream writer that will write to the supplied output stream
|
18
|
+
#
|
19
|
+
# The output stream should be binary with no text conversions performed
|
20
|
+
# since `strip_non_printable` will be applied to the binary stream before
|
21
|
+
# converting to UTF-8
|
22
|
+
#
|
23
|
+
# Parameters
|
24
|
+
# output_stream
|
25
|
+
# The output stream that implements #write
|
26
|
+
#
|
27
|
+
# delimiter: [String]
|
28
|
+
# Add the specified delimiter after every record when writing it
|
29
|
+
# to the output stream
|
30
|
+
# Default: OS Specific. Linux: "\n"
|
31
|
+
#
|
32
|
+
# encoding:
|
33
|
+
# Encode data before writing to the output stream.
|
34
|
+
# Default: UTF8_ENCODING
|
35
|
+
# Set to nil to disable encoding
|
36
|
+
#
|
37
|
+
# strip_non_printable: [true|false]
|
38
|
+
# Strip all non-printable characters before writing to the file / stream.
|
39
|
+
# Default: false
|
40
|
+
#
|
41
|
+
# TODO: Support replacement character for invalid characters
|
42
|
+
def initialize(output_stream, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false)
|
43
|
+
@output_stream = output_stream
|
44
|
+
@delimiter = delimiter.encode(encoding) if delimiter && encoding
|
45
|
+
@encoding = encoding
|
46
|
+
@strip_non_printable = strip_non_printable
|
47
|
+
end
|
48
|
+
|
49
|
+
# Write a line to the output stream
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# IOStreams.line_writer('a.txt') do |stream|
|
53
|
+
# stream << 'first line' << 'second line'
|
54
|
+
# end
|
55
|
+
def <<(record)
|
56
|
+
write(record)
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
# Write a line to the output stream followed by the delimiter.
|
61
|
+
# Returns [Integer] the number of bytes written.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
# IOStreams.line_writer('a.txt') do |stream|
|
65
|
+
# count = stream.write('first line')
|
66
|
+
# puts "Wrote #{count} bytes to the output file, including the delimiter"
|
67
|
+
# end
|
68
|
+
def write(record)
|
69
|
+
chunk = record.to_s
|
70
|
+
chunk.gsub!(NOT_PRINTABLE, '') if strip_non_printable
|
71
|
+
count = output_stream.write((encoding ? chunk.encode(encoding) : chunk))
|
72
|
+
count += output_stream.write(delimiter) if delimiter
|
73
|
+
count
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
attr_reader :output_stream
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/io_streams/pgp.rb
CHANGED
@@ -232,31 +232,21 @@ module IOStreams
|
|
232
232
|
end
|
233
233
|
end
|
234
234
|
|
235
|
-
# Returns [String] containing all the keys for the supplied email address.
|
235
|
+
# Returns [String] containing all the public keys for the supplied email address.
|
236
236
|
#
|
237
237
|
# email: [String] Email address for requested key.
|
238
238
|
#
|
239
239
|
# ascii: [true|false]
|
240
240
|
# Whether to export as ASCII text instead of binary format
|
241
241
|
# Default: true
|
242
|
-
|
243
|
-
# private: [true|false]
|
244
|
-
# Whether to export the private key
|
245
|
-
# Default: false
|
246
|
-
#
|
247
|
-
# passphrase: [String]
|
248
|
-
# In order to export a private key the passphrase for the key must be supplied.
|
249
|
-
# Otherwise a `Inappropriate ioctl for device` error will be returned.
|
250
|
-
def self.export(email:, passphrase: nil, ascii: true, private: false)
|
242
|
+
def self.export(email:, ascii: true)
|
251
243
|
version_check
|
252
|
-
raise(ArgumentError, "Missing keyword: passphrase when private: true") if private && passphrase.nil?
|
253
244
|
|
254
245
|
armor = ascii ? '--armor' : nil
|
255
|
-
cmd = private ? '--export-secret-keys' : '--export'
|
256
246
|
loopback = pgp_version.to_f >= 2.1 ? '--pinentry-mode loopback' : ''
|
257
|
-
command = "#{executable} #{loopback} --no-tty --passphrase-fd 0 --batch #{armor}
|
247
|
+
command = "#{executable} #{loopback} --no-tty --passphrase-fd 0 --batch #{armor} --export #{email}"
|
258
248
|
|
259
|
-
out, err, status = Open3.capture3(command, binmode: true
|
249
|
+
out, err, status = Open3.capture3(command, binmode: true)
|
260
250
|
logger.debug { "IOStreams::Pgp.export: #{command}\n#{err}" } if logger
|
261
251
|
if status.success? && out.length > 0
|
262
252
|
out
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Record
|
3
|
+
# Converts each line of an input stream into hash for every row
|
4
|
+
class Reader
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# Read a record as a Hash at a time from a file or stream.
|
8
|
+
def self.open(file_name_or_io, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
|
9
|
+
if file_name_or_io.is_a?(String)
|
10
|
+
IOStreams.line_reader(file_name_or_io,
|
11
|
+
delimiter: delimiter,
|
12
|
+
buffer_size: buffer_size,
|
13
|
+
encoding: encoding,
|
14
|
+
strip_non_printable: strip_non_printable) do |io|
|
15
|
+
yield new(io, file_name: file_name_or_io, **args)
|
16
|
+
end
|
17
|
+
else
|
18
|
+
yield new(file_name_or_io, **args)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Create a Tabular reader to return the stream as Hash records
|
23
|
+
# Parse a delimited data source.
|
24
|
+
#
|
25
|
+
# Parameters
|
26
|
+
# delimited: [#each]
|
27
|
+
# Anything that returns one line / record at a time when #each is called on it.
|
28
|
+
#
|
29
|
+
# format: [Symbol]
|
30
|
+
# :csv, :hash, :array, :json, :psv, :fixed
|
31
|
+
#
|
32
|
+
# For all other parameters, see Tabular::Header.new
|
33
|
+
def initialize(delimited, cleanse_header: true, **args)
|
34
|
+
@tabular = IOStreams::Tabular.new(**args)
|
35
|
+
@delimited = delimited
|
36
|
+
@cleanse_header = cleanse_header
|
37
|
+
end
|
38
|
+
|
39
|
+
def each
|
40
|
+
delimited.each do |line|
|
41
|
+
if tabular.requires_header?
|
42
|
+
tabular.parse_header(line)
|
43
|
+
tabular.cleanse_header! if cleanse_header
|
44
|
+
else
|
45
|
+
yield tabular.record_parse(line)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
attr_reader :tabular, :delimited, :cleanse_header
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Record
|
3
|
+
# Example, implied header from first record:
|
4
|
+
# IOStreams.record_writer do |stream|
|
5
|
+
# stream << {name: 'Jack', address: 'Somewhere', zipcode: 12345}
|
6
|
+
# stream << {name: 'Joe', address: 'Lost', zipcode: 32443, age: 23}
|
7
|
+
# end
|
8
|
+
#
|
9
|
+
# Output:
|
10
|
+
# name, add
|
11
|
+
#
|
12
|
+
class Writer
|
13
|
+
# Write a record as a Hash at a time to a file or stream.
|
14
|
+
def self.open(file_name_or_io, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
|
15
|
+
if file_name_or_io.is_a?(String)
|
16
|
+
IOStreams.line_writer(file_name_or_io,
|
17
|
+
delimiter: delimiter,
|
18
|
+
encoding: encoding,
|
19
|
+
strip_non_printable: strip_non_printable) do |io|
|
20
|
+
yield new(io, file_name: file_name_or_io, **args)
|
21
|
+
end
|
22
|
+
else
|
23
|
+
yield new(file_name_or_io, **args)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Create a Tabular writer that takes individual
|
28
|
+
# Parse a delimited data source.
|
29
|
+
#
|
30
|
+
# Parameters
|
31
|
+
# delimited: [#<<]
|
32
|
+
# Anything that accepts a line / record at a time when #<< is called on it.
|
33
|
+
#
|
34
|
+
# format: [Symbol]
|
35
|
+
# :csv, :hash, :array, :json, :psv, :fixed
|
36
|
+
#
|
37
|
+
# For all other parameters, see Tabular::Header.new
|
38
|
+
#
|
39
|
+
# columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
|
40
|
+
def initialize(delimited, columns: nil, **args)
|
41
|
+
@tabular = IOStreams::Tabular.new(columns: columns, **args)
|
42
|
+
@delimited = delimited
|
43
|
+
|
44
|
+
# Render header line when `columns` is supplied.
|
45
|
+
delimited << @tabular.render(columns) if columns && @tabular.requires_header?
|
46
|
+
end
|
47
|
+
|
48
|
+
def <<(hash)
|
49
|
+
raise(ArgumentError, 'Must supply a Hash') unless hash.is_a?(Hash)
|
50
|
+
if tabular.requires_header?
|
51
|
+
columns = hash.keys
|
52
|
+
tabular.header.columns = columns
|
53
|
+
delimited << tabular.render(columns)
|
54
|
+
end
|
55
|
+
delimited << tabular.render(hash)
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
attr_reader :tabular, :delimited, :cleanse_header
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Row
|
3
|
+
# Converts each line of an input stream into an array for every line
|
4
|
+
class Reader
|
5
|
+
# Read a line as an Array at a time from a file or stream.
|
6
|
+
def self.open(file_name_or_io,
|
7
|
+
delimiter: nil,
|
8
|
+
buffer_size: 65_536,
|
9
|
+
encoding: UTF8_ENCODING,
|
10
|
+
strip_non_printable: false,
|
11
|
+
file_name: nil,
|
12
|
+
**args)
|
13
|
+
if file_name_or_io.is_a?(String)
|
14
|
+
IOStreams.line_reader(file_name_or_io,
|
15
|
+
delimiter: delimiter,
|
16
|
+
buffer_size: buffer_size,
|
17
|
+
encoding: encoding,
|
18
|
+
file_name: file_name,
|
19
|
+
strip_non_printable: strip_non_printable) do |io|
|
20
|
+
yield new(io, file_name: file_name, **args)
|
21
|
+
end
|
22
|
+
else
|
23
|
+
yield new(file_name_or_io, **args)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Create a Tabular reader to return the stream rows as arrays.
|
28
|
+
#
|
29
|
+
# Parameters
|
30
|
+
# delimited: [#each]
|
31
|
+
# Anything that returns one line / record at a time when #each is called on it.
|
32
|
+
#
|
33
|
+
# format: [Symbol]
|
34
|
+
# :csv, :hash, :array, :json, :psv, :fixed
|
35
|
+
#
|
36
|
+
# For all other parameters, see Tabular::Header.new
|
37
|
+
def initialize(delimited, cleanse_header: true, **args)
|
38
|
+
@tabular = IOStreams::Tabular.new(**args)
|
39
|
+
@delimited = delimited
|
40
|
+
@cleanse_header = cleanse_header
|
41
|
+
end
|
42
|
+
|
43
|
+
def each
|
44
|
+
delimited.each do |line|
|
45
|
+
if tabular.requires_header?
|
46
|
+
columns = tabular.parse_header(line)
|
47
|
+
tabular.cleanse_header! if cleanse_header
|
48
|
+
yield columns
|
49
|
+
else
|
50
|
+
yield tabular.row_parse(line)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
attr_reader :tabular, :delimited, :cleanse_header
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'csv'
|
2
|
+
module IOStreams
|
3
|
+
module Row
|
4
|
+
# Example:
|
5
|
+
# IOStreams.row_writer do |stream|
|
6
|
+
# stream << ['name', 'address', 'zipcode']
|
7
|
+
# stream << ['Jack', 'Somewhere', 12345]
|
8
|
+
# stream << ['Joe', 'Lost', 32443]
|
9
|
+
# end
|
10
|
+
#
|
11
|
+
# Output:
|
12
|
+
# ...
|
13
|
+
#
|
14
|
+
class Writer
|
15
|
+
# Write a record as a Hash at a time to a file or stream.
|
16
|
+
def self.open(file_name_or_io, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
|
17
|
+
if file_name_or_io.is_a?(String)
|
18
|
+
IOStreams.line_writer(file_name_or_io,
|
19
|
+
delimiter: delimiter,
|
20
|
+
encoding: encoding,
|
21
|
+
strip_non_printable: strip_non_printable) do |io|
|
22
|
+
yield new(io, **args)
|
23
|
+
end
|
24
|
+
else
|
25
|
+
yield new(file_name_or_io, **args)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Create a Tabular writer that takes individual rows as arrays.
|
30
|
+
#
|
31
|
+
# Parameters
|
32
|
+
# delimited: [#<<]
|
33
|
+
# Anything that accepts a line / record at a time when #<< is called on it.
|
34
|
+
#
|
35
|
+
# format: [Symbol]
|
36
|
+
# :csv, :hash, :array, :json, :psv, :fixed
|
37
|
+
#
|
38
|
+
# For all other parameters, see Tabular::Header.new
|
39
|
+
#
|
40
|
+
# columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
|
41
|
+
def initialize(delimited, columns: nil, **args)
|
42
|
+
@tabular = IOStreams::Tabular.new(columns: columns, **args)
|
43
|
+
@delimited = delimited
|
44
|
+
|
45
|
+
# Render header line when `columns` is supplied.
|
46
|
+
delimited << @tabular.render(columns) if columns && @tabular.requires_header?
|
47
|
+
end
|
48
|
+
|
49
|
+
# Supply a hash or an array to render
|
50
|
+
def <<(array)
|
51
|
+
raise(ArgumentError, 'Must supply an Array') unless array.is_a?(Array)
|
52
|
+
# If header (columns) was not supplied as an argument, assume first line is the header.
|
53
|
+
tabular.header.columns = array if tabular.requires_header?
|
54
|
+
delimited << tabular.render(array)
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
attr_reader :tabular, :delimited
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|