iostreams 0.14.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +202 -0
- data/README.md +155 -47
- data/lib/io_streams/file/reader.rb +7 -8
- data/lib/io_streams/file/writer.rb +7 -8
- data/lib/io_streams/io_streams.rb +313 -129
- data/lib/io_streams/{delimited → line}/reader.rb +20 -30
- data/lib/io_streams/line/writer.rb +81 -0
- data/lib/io_streams/pgp.rb +4 -14
- data/lib/io_streams/record/reader.rb +55 -0
- data/lib/io_streams/record/writer.rb +63 -0
- data/lib/io_streams/row/reader.rb +60 -0
- data/lib/io_streams/row/writer.rb +62 -0
- data/lib/io_streams/s3.rb +25 -0
- data/lib/io_streams/s3/reader.rb +64 -0
- data/lib/io_streams/s3/writer.rb +13 -0
- data/lib/io_streams/streams.rb +1 -1
- data/lib/io_streams/tabular.rb +163 -0
- data/lib/io_streams/tabular/errors.rb +14 -0
- data/lib/io_streams/tabular/header.rb +146 -0
- data/lib/io_streams/tabular/parser/array.rb +26 -0
- data/lib/io_streams/tabular/parser/base.rb +12 -0
- data/lib/io_streams/tabular/parser/csv.rb +35 -0
- data/lib/io_streams/tabular/parser/fixed.rb +88 -0
- data/lib/io_streams/tabular/parser/hash.rb +21 -0
- data/lib/io_streams/tabular/parser/json.rb +25 -0
- data/lib/io_streams/tabular/parser/psv.rb +34 -0
- data/lib/io_streams/tabular/utility/csv_row.rb +115 -0
- data/lib/io_streams/version.rb +2 -2
- data/lib/io_streams/xlsx/reader.rb +1 -1
- data/lib/io_streams/zip/reader.rb +1 -1
- data/lib/io_streams/zip/writer.rb +1 -1
- data/lib/iostreams.rb +21 -10
- data/test/bzip2_reader_test.rb +21 -22
- data/test/bzip2_writer_test.rb +38 -32
- data/test/file_reader_test.rb +19 -18
- data/test/file_writer_test.rb +23 -22
- data/test/files/test.json +3 -0
- data/test/gzip_reader_test.rb +21 -22
- data/test/gzip_writer_test.rb +35 -29
- data/test/io_streams_test.rb +137 -61
- data/test/line_reader_test.rb +105 -0
- data/test/line_writer_test.rb +50 -0
- data/test/pgp_reader_test.rb +29 -29
- data/test/pgp_test.rb +149 -195
- data/test/pgp_writer_test.rb +63 -62
- data/test/record_reader_test.rb +61 -0
- data/test/record_writer_test.rb +73 -0
- data/test/row_reader_test.rb +34 -0
- data/test/row_writer_test.rb +51 -0
- data/test/tabular_test.rb +184 -0
- data/test/xlsx_reader_test.rb +13 -17
- data/test/zip_reader_test.rb +21 -22
- data/test/zip_writer_test.rb +40 -36
- metadata +41 -17
- data/lib/io_streams/csv/reader.rb +0 -21
- data/lib/io_streams/csv/writer.rb +0 -20
- data/lib/io_streams/delimited/writer.rb +0 -67
- data/test/csv_reader_test.rb +0 -34
- data/test/csv_writer_test.rb +0 -35
- data/test/delimited_reader_test.rb +0 -115
- data/test/delimited_writer_test.rb +0 -44
@@ -1,16 +1,14 @@
|
|
1
1
|
module IOStreams
|
2
|
-
module
|
2
|
+
module Line
|
3
3
|
class Reader
|
4
|
-
|
4
|
+
attr_reader :delimiter, :buffer_size, :encoding, :strip_non_printable
|
5
5
|
|
6
|
-
# Read from a file or stream
|
7
|
-
def self.open(file_name_or_io,
|
8
|
-
if
|
9
|
-
|
6
|
+
# Read a line at a time from a file or stream
|
7
|
+
def self.open(file_name_or_io, **args)
|
8
|
+
if file_name_or_io.is_a?(String)
|
9
|
+
IOStreams::File::Reader.open(file_name_or_io) { |io| yield new(io, **args) }
|
10
10
|
else
|
11
|
-
|
12
|
-
yield new(io, delimiter: delimiter, buffer_size: buffer_size, encoding: encoding, strip_non_printable: strip_non_printable)
|
13
|
-
end
|
11
|
+
yield new(file_name_or_io, **args)
|
14
12
|
end
|
15
13
|
end
|
16
14
|
|
@@ -47,6 +45,12 @@ module IOStreams
|
|
47
45
|
# Force encoding to this encoding for all data being read
|
48
46
|
# Default: UTF8_ENCODING
|
49
47
|
# Set to nil to disable encoding
|
48
|
+
#
|
49
|
+
# TODO:
|
50
|
+
# - Skip Comment lines. RegExp?
|
51
|
+
# - Skip "empty" / "blank" lines. RegExp?
|
52
|
+
# - Extract header line(s) / first non-comment, non-blank line
|
53
|
+
# - Embedded newline support, RegExp? or Proc?
|
50
54
|
def initialize(input_stream, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false)
|
51
55
|
@input_stream = input_stream
|
52
56
|
@delimiter = delimiter
|
@@ -54,11 +58,12 @@ module IOStreams
|
|
54
58
|
@encoding = encoding
|
55
59
|
@strip_non_printable = strip_non_printable
|
56
60
|
|
57
|
-
@delimiter.
|
61
|
+
@delimiter.encode(UTF8_ENCODING) if @delimiter && @encoding
|
58
62
|
@buffer = ''
|
59
63
|
end
|
60
64
|
|
61
|
-
#
|
65
|
+
# Iterate over every line in the file/stream passing each line to supplied block in turn.
|
66
|
+
# Returns [Integer] the number of lines read from the file/stream.
|
62
67
|
def each(&block)
|
63
68
|
partial = nil
|
64
69
|
loop do
|
@@ -83,32 +88,17 @@ module IOStreams
|
|
83
88
|
end
|
84
89
|
end
|
85
90
|
|
86
|
-
alias_method :each_line, :each
|
87
|
-
|
88
|
-
# Reads length bytes from the I/O stream.
|
89
|
-
# Not recommended, but available if someone calls #read on this delimited reader
|
90
|
-
def read(length = nil, outbuf = nil)
|
91
|
-
if length
|
92
|
-
while (@buffer.size < length) && (read_chunk > 0)
|
93
|
-
end
|
94
|
-
data = @buffer.slice!(0, length)
|
95
|
-
outbuf << data if outbuf
|
96
|
-
data
|
97
|
-
else
|
98
|
-
while read_chunk > 0
|
99
|
-
end
|
100
|
-
@buffer
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
##########################################################################
|
105
91
|
private
|
106
92
|
|
93
|
+
attr_reader :buffer
|
94
|
+
attr_writer :delimiter
|
95
|
+
|
107
96
|
NOT_PRINTABLE = Regexp.compile(/[^[:print:]|\r|\n]/)
|
108
97
|
|
109
98
|
# Returns [Integer] the number of bytes read into the internal buffer
|
110
99
|
# Returns 0 on EOF
|
111
100
|
def read_chunk
|
101
|
+
# TODO: read into existing buffer
|
112
102
|
chunk = @input_stream.read(@buffer_size)
|
113
103
|
# EOF reached?
|
114
104
|
return 0 unless chunk
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Line
|
3
|
+
class Writer
|
4
|
+
attr_reader :delimiter, :encoding, :strip_non_printable
|
5
|
+
|
6
|
+
# Write a line at a time to a file or stream
|
7
|
+
def self.open(file_name_or_io, **args)
|
8
|
+
if file_name_or_io.is_a?(String)
|
9
|
+
IOStreams::File::Writer.open(file_name_or_io) { |io| yield new(io, **args) }
|
10
|
+
else
|
11
|
+
yield new(file_name_or_io, **args)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
NOT_PRINTABLE = Regexp.compile(/[^[:print:]]/)
|
16
|
+
|
17
|
+
# A delimited stream writer that will write to the supplied output stream
|
18
|
+
#
|
19
|
+
# The output stream should be binary with no text conversions performed
|
20
|
+
# since `strip_non_printable` will be applied to the binary stream before
|
21
|
+
# converting to UTF-8
|
22
|
+
#
|
23
|
+
# Parameters
|
24
|
+
# output_stream
|
25
|
+
# The output stream that implements #write
|
26
|
+
#
|
27
|
+
# delimiter: [String]
|
28
|
+
# Add the specified delimiter after every record when writing it
|
29
|
+
# to the output stream
|
30
|
+
# Default: OS Specific. Linux: "\n"
|
31
|
+
#
|
32
|
+
# encoding:
|
33
|
+
# Encode data before writing to the output stream.
|
34
|
+
# Default: UTF8_ENCODING
|
35
|
+
# Set to nil to disable encoding
|
36
|
+
#
|
37
|
+
# strip_non_printable: [true|false]
|
38
|
+
# Strip all non-printable characters before writing to the file / stream.
|
39
|
+
# Default: false
|
40
|
+
#
|
41
|
+
# TODO: Support replacement character for invalid characters
|
42
|
+
def initialize(output_stream, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false)
|
43
|
+
@output_stream = output_stream
|
44
|
+
@delimiter = delimiter.encode(encoding) if delimiter && encoding
|
45
|
+
@encoding = encoding
|
46
|
+
@strip_non_printable = strip_non_printable
|
47
|
+
end
|
48
|
+
|
49
|
+
# Write a line to the output stream
|
50
|
+
#
|
51
|
+
# Example:
|
52
|
+
# IOStreams.line_writer('a.txt') do |stream|
|
53
|
+
# stream << 'first line' << 'second line'
|
54
|
+
# end
|
55
|
+
def <<(record)
|
56
|
+
write(record)
|
57
|
+
self
|
58
|
+
end
|
59
|
+
|
60
|
+
# Write a line to the output stream followed by the delimiter.
|
61
|
+
# Returns [Integer] the number of bytes written.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
# IOStreams.line_writer('a.txt') do |stream|
|
65
|
+
# count = stream.write('first line')
|
66
|
+
# puts "Wrote #{count} bytes to the output file, including the delimiter"
|
67
|
+
# end
|
68
|
+
def write(record)
|
69
|
+
chunk = record.to_s
|
70
|
+
chunk.gsub!(NOT_PRINTABLE, '') if strip_non_printable
|
71
|
+
count = output_stream.write((encoding ? chunk.encode(encoding) : chunk))
|
72
|
+
count += output_stream.write(delimiter) if delimiter
|
73
|
+
count
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
attr_reader :output_stream
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/io_streams/pgp.rb
CHANGED
@@ -232,31 +232,21 @@ module IOStreams
|
|
232
232
|
end
|
233
233
|
end
|
234
234
|
|
235
|
-
# Returns [String] containing all the keys for the supplied email address.
|
235
|
+
# Returns [String] containing all the public keys for the supplied email address.
|
236
236
|
#
|
237
237
|
# email: [String] Email address for requested key.
|
238
238
|
#
|
239
239
|
# ascii: [true|false]
|
240
240
|
# Whether to export as ASCII text instead of binary format
|
241
241
|
# Default: true
|
242
|
-
|
243
|
-
# private: [true|false]
|
244
|
-
# Whether to export the private key
|
245
|
-
# Default: false
|
246
|
-
#
|
247
|
-
# passphrase: [String]
|
248
|
-
# In order to export a private key the passphrase for the key must be supplied.
|
249
|
-
# Otherwise a `Inappropriate ioctl for device` error will be returned.
|
250
|
-
def self.export(email:, passphrase: nil, ascii: true, private: false)
|
242
|
+
def self.export(email:, ascii: true)
|
251
243
|
version_check
|
252
|
-
raise(ArgumentError, "Missing keyword: passphrase when private: true") if private && passphrase.nil?
|
253
244
|
|
254
245
|
armor = ascii ? '--armor' : nil
|
255
|
-
cmd = private ? '--export-secret-keys' : '--export'
|
256
246
|
loopback = pgp_version.to_f >= 2.1 ? '--pinentry-mode loopback' : ''
|
257
|
-
command = "#{executable} #{loopback} --no-tty --passphrase-fd 0 --batch #{armor}
|
247
|
+
command = "#{executable} #{loopback} --no-tty --passphrase-fd 0 --batch #{armor} --export #{email}"
|
258
248
|
|
259
|
-
out, err, status = Open3.capture3(command, binmode: true
|
249
|
+
out, err, status = Open3.capture3(command, binmode: true)
|
260
250
|
logger.debug { "IOStreams::Pgp.export: #{command}\n#{err}" } if logger
|
261
251
|
if status.success? && out.length > 0
|
262
252
|
out
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Record
|
3
|
+
# Converts each line of an input stream into hash for every row
|
4
|
+
class Reader
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# Read a record as a Hash at a time from a file or stream.
|
8
|
+
def self.open(file_name_or_io, delimiter: nil, buffer_size: 65536, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
|
9
|
+
if file_name_or_io.is_a?(String)
|
10
|
+
IOStreams.line_reader(file_name_or_io,
|
11
|
+
delimiter: delimiter,
|
12
|
+
buffer_size: buffer_size,
|
13
|
+
encoding: encoding,
|
14
|
+
strip_non_printable: strip_non_printable) do |io|
|
15
|
+
yield new(io, file_name: file_name_or_io, **args)
|
16
|
+
end
|
17
|
+
else
|
18
|
+
yield new(file_name_or_io, **args)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Create a Tabular reader to return the stream as Hash records
|
23
|
+
# Parse a delimited data source.
|
24
|
+
#
|
25
|
+
# Parameters
|
26
|
+
# delimited: [#each]
|
27
|
+
# Anything that returns one line / record at a time when #each is called on it.
|
28
|
+
#
|
29
|
+
# format: [Symbol]
|
30
|
+
# :csv, :hash, :array, :json, :psv, :fixed
|
31
|
+
#
|
32
|
+
# For all other parameters, see Tabular::Header.new
|
33
|
+
def initialize(delimited, cleanse_header: true, **args)
|
34
|
+
@tabular = IOStreams::Tabular.new(**args)
|
35
|
+
@delimited = delimited
|
36
|
+
@cleanse_header = cleanse_header
|
37
|
+
end
|
38
|
+
|
39
|
+
def each
|
40
|
+
delimited.each do |line|
|
41
|
+
if tabular.requires_header?
|
42
|
+
tabular.parse_header(line)
|
43
|
+
tabular.cleanse_header! if cleanse_header
|
44
|
+
else
|
45
|
+
yield tabular.record_parse(line)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
attr_reader :tabular, :delimited, :cleanse_header
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Record
|
3
|
+
# Example, implied header from first record:
|
4
|
+
# IOStreams.record_writer do |stream|
|
5
|
+
# stream << {name: 'Jack', address: 'Somewhere', zipcode: 12345}
|
6
|
+
# stream << {name: 'Joe', address: 'Lost', zipcode: 32443, age: 23}
|
7
|
+
# end
|
8
|
+
#
|
9
|
+
# Output:
|
10
|
+
# name, add
|
11
|
+
#
|
12
|
+
class Writer
|
13
|
+
# Write a record as a Hash at a time to a file or stream.
|
14
|
+
def self.open(file_name_or_io, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
|
15
|
+
if file_name_or_io.is_a?(String)
|
16
|
+
IOStreams.line_writer(file_name_or_io,
|
17
|
+
delimiter: delimiter,
|
18
|
+
encoding: encoding,
|
19
|
+
strip_non_printable: strip_non_printable) do |io|
|
20
|
+
yield new(io, file_name: file_name_or_io, **args)
|
21
|
+
end
|
22
|
+
else
|
23
|
+
yield new(file_name_or_io, **args)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Create a Tabular writer that takes individual
|
28
|
+
# Parse a delimited data source.
|
29
|
+
#
|
30
|
+
# Parameters
|
31
|
+
# delimited: [#<<]
|
32
|
+
# Anything that accepts a line / record at a time when #<< is called on it.
|
33
|
+
#
|
34
|
+
# format: [Symbol]
|
35
|
+
# :csv, :hash, :array, :json, :psv, :fixed
|
36
|
+
#
|
37
|
+
# For all other parameters, see Tabular::Header.new
|
38
|
+
#
|
39
|
+
# columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
|
40
|
+
def initialize(delimited, columns: nil, **args)
|
41
|
+
@tabular = IOStreams::Tabular.new(columns: columns, **args)
|
42
|
+
@delimited = delimited
|
43
|
+
|
44
|
+
# Render header line when `columns` is supplied.
|
45
|
+
delimited << @tabular.render(columns) if columns && @tabular.requires_header?
|
46
|
+
end
|
47
|
+
|
48
|
+
def <<(hash)
|
49
|
+
raise(ArgumentError, 'Must supply a Hash') unless hash.is_a?(Hash)
|
50
|
+
if tabular.requires_header?
|
51
|
+
columns = hash.keys
|
52
|
+
tabular.header.columns = columns
|
53
|
+
delimited << tabular.render(columns)
|
54
|
+
end
|
55
|
+
delimited << tabular.render(hash)
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
attr_reader :tabular, :delimited, :cleanse_header
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module IOStreams
|
2
|
+
module Row
|
3
|
+
# Converts each line of an input stream into an array for every line
|
4
|
+
class Reader
|
5
|
+
# Read a line as an Array at a time from a file or stream.
|
6
|
+
def self.open(file_name_or_io,
|
7
|
+
delimiter: nil,
|
8
|
+
buffer_size: 65_536,
|
9
|
+
encoding: UTF8_ENCODING,
|
10
|
+
strip_non_printable: false,
|
11
|
+
file_name: nil,
|
12
|
+
**args)
|
13
|
+
if file_name_or_io.is_a?(String)
|
14
|
+
IOStreams.line_reader(file_name_or_io,
|
15
|
+
delimiter: delimiter,
|
16
|
+
buffer_size: buffer_size,
|
17
|
+
encoding: encoding,
|
18
|
+
file_name: file_name,
|
19
|
+
strip_non_printable: strip_non_printable) do |io|
|
20
|
+
yield new(io, file_name: file_name, **args)
|
21
|
+
end
|
22
|
+
else
|
23
|
+
yield new(file_name_or_io, **args)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Create a Tabular reader to return the stream rows as arrays.
|
28
|
+
#
|
29
|
+
# Parameters
|
30
|
+
# delimited: [#each]
|
31
|
+
# Anything that returns one line / record at a time when #each is called on it.
|
32
|
+
#
|
33
|
+
# format: [Symbol]
|
34
|
+
# :csv, :hash, :array, :json, :psv, :fixed
|
35
|
+
#
|
36
|
+
# For all other parameters, see Tabular::Header.new
|
37
|
+
def initialize(delimited, cleanse_header: true, **args)
|
38
|
+
@tabular = IOStreams::Tabular.new(**args)
|
39
|
+
@delimited = delimited
|
40
|
+
@cleanse_header = cleanse_header
|
41
|
+
end
|
42
|
+
|
43
|
+
def each
|
44
|
+
delimited.each do |line|
|
45
|
+
if tabular.requires_header?
|
46
|
+
columns = tabular.parse_header(line)
|
47
|
+
tabular.cleanse_header! if cleanse_header
|
48
|
+
yield columns
|
49
|
+
else
|
50
|
+
yield tabular.row_parse(line)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
attr_reader :tabular, :delimited, :cleanse_header
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'csv'
|
2
|
+
module IOStreams
|
3
|
+
module Row
|
4
|
+
# Example:
|
5
|
+
# IOStreams.row_writer do |stream|
|
6
|
+
# stream << ['name', 'address', 'zipcode']
|
7
|
+
# stream << ['Jack', 'Somewhere', 12345]
|
8
|
+
# stream << ['Joe', 'Lost', 32443]
|
9
|
+
# end
|
10
|
+
#
|
11
|
+
# Output:
|
12
|
+
# ...
|
13
|
+
#
|
14
|
+
class Writer
|
15
|
+
# Write a record as a Hash at a time to a file or stream.
|
16
|
+
def self.open(file_name_or_io, delimiter: $/, encoding: UTF8_ENCODING, strip_non_printable: false, **args)
|
17
|
+
if file_name_or_io.is_a?(String)
|
18
|
+
IOStreams.line_writer(file_name_or_io,
|
19
|
+
delimiter: delimiter,
|
20
|
+
encoding: encoding,
|
21
|
+
strip_non_printable: strip_non_printable) do |io|
|
22
|
+
yield new(io, **args)
|
23
|
+
end
|
24
|
+
else
|
25
|
+
yield new(file_name_or_io, **args)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Create a Tabular writer that takes individual rows as arrays.
|
30
|
+
#
|
31
|
+
# Parameters
|
32
|
+
# delimited: [#<<]
|
33
|
+
# Anything that accepts a line / record at a time when #<< is called on it.
|
34
|
+
#
|
35
|
+
# format: [Symbol]
|
36
|
+
# :csv, :hash, :array, :json, :psv, :fixed
|
37
|
+
#
|
38
|
+
# For all other parameters, see Tabular::Header.new
|
39
|
+
#
|
40
|
+
# columns: nil, allowed_columns: nil, required_columns: nil, skip_unknown: true)
|
41
|
+
def initialize(delimited, columns: nil, **args)
|
42
|
+
@tabular = IOStreams::Tabular.new(columns: columns, **args)
|
43
|
+
@delimited = delimited
|
44
|
+
|
45
|
+
# Render header line when `columns` is supplied.
|
46
|
+
delimited << @tabular.render(columns) if columns && @tabular.requires_header?
|
47
|
+
end
|
48
|
+
|
49
|
+
# Supply a hash or an array to render
|
50
|
+
def <<(array)
|
51
|
+
raise(ArgumentError, 'Must supply an Array') unless array.is_a?(Array)
|
52
|
+
# If header (columns) was not supplied as an argument, assume first line is the header.
|
53
|
+
tabular.header.columns = array if tabular.requires_header?
|
54
|
+
delimited << tabular.render(array)
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
attr_reader :tabular, :delimited
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|