iostreams 1.2.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -3
- data/lib/io_streams/builder.rb +9 -6
- data/lib/io_streams/bzip2/reader.rb +5 -3
- data/lib/io_streams/bzip2/writer.rb +5 -3
- data/lib/io_streams/deprecated.rb +1 -1
- data/lib/io_streams/encode/reader.rb +1 -1
- data/lib/io_streams/encode/writer.rb +1 -1
- data/lib/io_streams/errors.rb +10 -0
- data/lib/io_streams/io_streams.rb +1 -1
- data/lib/io_streams/paths/file.rb +4 -4
- data/lib/io_streams/paths/http.rb +6 -3
- data/lib/io_streams/paths/s3.rb +27 -8
- data/lib/io_streams/paths/sftp.rb +16 -4
- data/lib/io_streams/pgp.rb +80 -61
- data/lib/io_streams/stream.rb +20 -9
- data/lib/io_streams/tabular.rb +5 -2
- data/lib/io_streams/tabular/header.rb +14 -12
- data/lib/io_streams/tabular/parser/fixed.rb +135 -25
- data/lib/io_streams/utils.rb +4 -4
- data/lib/io_streams/version.rb +1 -1
- data/lib/io_streams/zip/reader.rb +1 -1
- data/test/bzip2_writer_test.rb +6 -4
- data/test/io_streams_test.rb +2 -2
- data/test/paths/file_test.rb +1 -1
- data/test/paths/s3_test.rb +3 -3
- data/test/paths/sftp_test.rb +4 -4
- data/test/pgp_test.rb +54 -4
- data/test/pgp_writer_test.rb +3 -3
- data/test/tabular_test.rb +55 -26
- data/test/test_helper.rb +1 -1
- metadata +3 -3
data/lib/io_streams/stream.rb
CHANGED
@@ -17,7 +17,7 @@ module IOStreams
|
|
17
17
|
#
|
18
18
|
# Example:
|
19
19
|
#
|
20
|
-
# IOStreams.path(
|
20
|
+
# IOStreams.path("tempfile2527").stream(:zip).stream(:pgp, passphrase: "receiver_passphrase").read
|
21
21
|
def stream(stream, **options)
|
22
22
|
builder.stream(stream, **options)
|
23
23
|
self
|
@@ -27,12 +27,12 @@ module IOStreams
|
|
27
27
|
# If the relevant stream is not found for this file it is ignored.
|
28
28
|
# For example, if the file does not have a pgp extension then the pgp option is not relevant.
|
29
29
|
#
|
30
|
-
# IOStreams.path(
|
30
|
+
# IOStreams.path("keep_safe.pgp").option(:pgp, passphrase: "receiver_passphrase").read
|
31
31
|
#
|
32
32
|
# # In this case the file is not pgp so the `passphrase` option is ignored.
|
33
|
-
# IOStreams.path(
|
33
|
+
# IOStreams.path("keep_safe.enc").option(:pgp, passphrase: "receiver_passphrase").read
|
34
34
|
#
|
35
|
-
# IOStreams.path(output_file_name).option(:pgp, passphrase:
|
35
|
+
# IOStreams.path(output_file_name).option(:pgp, passphrase: "receiver_passphrase").read
|
36
36
|
def option(stream, **options)
|
37
37
|
builder.option(stream, **options)
|
38
38
|
self
|
@@ -177,7 +177,7 @@ module IOStreams
|
|
177
177
|
end
|
178
178
|
|
179
179
|
def copy_to(target, convert: true)
|
180
|
-
target = IOStreams.
|
180
|
+
target = IOStreams.new(target)
|
181
181
|
target.copy_from(self, convert: convert)
|
182
182
|
end
|
183
183
|
|
@@ -282,7 +282,12 @@ module IOStreams
|
|
282
282
|
def line_reader(embedded_within: nil, **args)
|
283
283
|
embedded_within = '"' if embedded_within.nil? && builder.file_name&.include?(".csv")
|
284
284
|
|
285
|
-
stream_reader
|
285
|
+
stream_reader do |io|
|
286
|
+
yield IOStreams::Line::Reader.new(io,
|
287
|
+
original_file_name: builder.file_name,
|
288
|
+
embedded_within: embedded_within,
|
289
|
+
**args)
|
290
|
+
end
|
286
291
|
end
|
287
292
|
|
288
293
|
# Iterate over a file / stream returning each line as an array, one at a time.
|
@@ -306,19 +311,25 @@ module IOStreams
|
|
306
311
|
def line_writer(**args, &block)
|
307
312
|
return block.call(io_stream) if io_stream&.is_a?(IOStreams::Line::Writer)
|
308
313
|
|
309
|
-
writer
|
314
|
+
writer do |io|
|
315
|
+
IOStreams::Line::Writer.stream(io, original_file_name: builder.file_name, **args, &block)
|
316
|
+
end
|
310
317
|
end
|
311
318
|
|
312
319
|
def row_writer(delimiter: $/, **args, &block)
|
313
320
|
return block.call(io_stream) if io_stream&.is_a?(IOStreams::Row::Writer)
|
314
321
|
|
315
|
-
line_writer(delimiter: delimiter)
|
322
|
+
line_writer(delimiter: delimiter) do |io|
|
323
|
+
IOStreams::Row::Writer.stream(io, original_file_name: builder.file_name, **args, &block)
|
324
|
+
end
|
316
325
|
end
|
317
326
|
|
318
327
|
def record_writer(delimiter: $/, **args, &block)
|
319
328
|
return block.call(io_stream) if io_stream&.is_a?(IOStreams::Record::Writer)
|
320
329
|
|
321
|
-
line_writer(delimiter: delimiter)
|
330
|
+
line_writer(delimiter: delimiter) do |io|
|
331
|
+
IOStreams::Record::Writer.stream(io, original_file_name: builder.file_name, **args, &block)
|
332
|
+
end
|
322
333
|
end
|
323
334
|
end
|
324
335
|
end
|
data/lib/io_streams/tabular.rb
CHANGED
@@ -89,7 +89,7 @@ module IOStreams
|
|
89
89
|
else
|
90
90
|
self.class.parser_class(format)
|
91
91
|
end
|
92
|
-
@parser = format_options ? klass.new(format_options) : klass.new
|
92
|
+
@parser = format_options ? klass.new(**format_options) : klass.new
|
93
93
|
end
|
94
94
|
|
95
95
|
# Returns [true|false] whether a header is still required in order to parse or render the current format.
|
@@ -142,7 +142,10 @@ module IOStreams
|
|
142
142
|
return unless requires_header?
|
143
143
|
|
144
144
|
if IOStreams::Utils.blank?(header.columns)
|
145
|
-
raise(
|
145
|
+
raise(
|
146
|
+
Errors::MissingHeader,
|
147
|
+
"Header columns must be set before attempting to render a header for format: #{format.inspect}"
|
148
|
+
)
|
146
149
|
end
|
147
150
|
|
148
151
|
parser.render(header.columns, header)
|
@@ -109,7 +109,10 @@ module IOStreams
|
|
109
109
|
end
|
110
110
|
|
111
111
|
unless row.is_a?(Array)
|
112
|
-
raise(
|
112
|
+
raise(
|
113
|
+
IOStreams::Errors::TypeMismatch,
|
114
|
+
"Don't know how to convert #{row.class.name} to an Array without the header columns being set."
|
115
|
+
)
|
113
116
|
end
|
114
117
|
|
115
118
|
row
|
@@ -126,18 +129,17 @@ module IOStreams
|
|
126
129
|
# Perform cleansing on returned Hash keys during the narrowing process.
|
127
130
|
# For example, avoids issues with case etc.
|
128
131
|
def cleanse_hash(hash)
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
132
|
+
unmatched = columns - hash.keys
|
133
|
+
unless unmatched.empty?
|
134
|
+
hash = hash.dup
|
135
|
+
unmatched.each { |name| hash[cleanse_column(name)] = hash.delete(name) }
|
136
|
+
end
|
137
|
+
# Hash#slice as of Ruby 2.5
|
138
|
+
if hash.respond_to?(:slice)
|
139
|
+
hash.slice(*columns)
|
140
|
+
else
|
141
|
+
columns.each_with_object({}) { |column, new_hash| new_hash[column] = hash[column] }
|
139
142
|
end
|
140
|
-
h
|
141
143
|
end
|
142
144
|
|
143
145
|
def cleanse_column(name)
|
@@ -3,31 +3,66 @@ module IOStreams
|
|
3
3
|
module Parser
|
4
4
|
# Parsing and rendering fixed length data
|
5
5
|
class Fixed < Base
|
6
|
-
attr_reader :
|
6
|
+
attr_reader :layout, :truncate
|
7
7
|
|
8
8
|
# Returns [IOStreams::Tabular::Parser]
|
9
9
|
#
|
10
10
|
# Parameters:
|
11
11
|
# layout: [Array<Hash>]
|
12
12
|
# [
|
13
|
-
# {
|
14
|
-
# {
|
15
|
-
# {
|
13
|
+
# {size: 23, key: "name"},
|
14
|
+
# {size: 40, key: "address"},
|
15
|
+
# {size: 2},
|
16
|
+
# {size: 5, key: "zip"},
|
17
|
+
# {size: 8, key: "age", type: :integer},
|
18
|
+
# {size: 10, key: "weight", type: :float, decimals: 2}
|
16
19
|
# ]
|
17
|
-
|
18
|
-
|
20
|
+
#
|
21
|
+
# Notes:
|
22
|
+
# * Leave out the name of the key to ignore that column during parsing,
|
23
|
+
# and to space fill when rendering. For example as a filler.
|
24
|
+
#
|
25
|
+
# Types:
|
26
|
+
# :string
|
27
|
+
# This is the default type.
|
28
|
+
# Applies space padding and the value is left justified.
|
29
|
+
# Returns value as a String
|
30
|
+
# :integer
|
31
|
+
# Applies zero padding to the left.
|
32
|
+
# Returns value as an Integer
|
33
|
+
# Raises Errors::ValueTooLong when the supplied value cannot be rendered in `size` characters.
|
34
|
+
# :float
|
35
|
+
# Applies zero padding to the left.
|
36
|
+
# Returns value as a float.
|
37
|
+
# The :size is the total size of this field including the `.` and the decimals.
|
38
|
+
# Number of :decimals
|
39
|
+
# Raises Errors::ValueTooLong when the supplied value cannot be rendered in `size` characters.
|
40
|
+
def initialize(layout:, truncate: true)
|
41
|
+
@layout = Layout.new(layout)
|
42
|
+
@truncate = truncate
|
43
|
+
end
|
44
|
+
|
45
|
+
# The required line length for every fixed length line
|
46
|
+
def line_length
|
47
|
+
layout.length
|
19
48
|
end
|
20
49
|
|
21
50
|
# Returns [String] fixed layout values extracted from the supplied hash.
|
22
|
-
#
|
51
|
+
#
|
52
|
+
# Notes:
|
53
|
+
# * A nil value is considered an empty string
|
54
|
+
# * When a supplied value exceeds the column size it is truncated.
|
23
55
|
def render(row, header)
|
24
56
|
hash = header.to_hash(row)
|
25
57
|
|
26
58
|
result = ""
|
27
|
-
|
28
|
-
|
29
|
-
value
|
30
|
-
|
59
|
+
layout.columns.each do |column|
|
60
|
+
value = hash[column.key].to_s
|
61
|
+
if !truncate && (value.length > column.size)
|
62
|
+
raise(Errors::ValueTooLong, "Value: #{value.inspect} is too long to fit into column #{column.key} of size #{column.size}")
|
63
|
+
end
|
64
|
+
|
65
|
+
result << column.render(value)
|
31
66
|
end
|
32
67
|
result
|
33
68
|
end
|
@@ -36,32 +71,107 @@ module IOStreams
|
|
36
71
|
# String will be encoded to `encoding`
|
37
72
|
def parse(line)
|
38
73
|
unless line.is_a?(String)
|
39
|
-
raise(
|
74
|
+
raise(Errors::TypeMismatch, "Line must be a String when format is :fixed. Actual: #{line.class.name}")
|
75
|
+
end
|
76
|
+
|
77
|
+
if line.length != layout.length
|
78
|
+
raise(Errors::InvalidLineLength, "Expected line length: #{layout.length}, actual line length: #{line.length}")
|
40
79
|
end
|
41
80
|
|
42
81
|
hash = {}
|
43
82
|
index = 0
|
44
|
-
|
45
|
-
|
46
|
-
index
|
47
|
-
|
83
|
+
layout.columns.each do |column|
|
84
|
+
# Ignore "columns" that have no keys. E.g. Fillers
|
85
|
+
hash[column.key] = column.parse(line[index, column.size]) if column.key
|
86
|
+
index += column.size
|
48
87
|
end
|
49
88
|
hash
|
50
89
|
end
|
51
90
|
|
91
|
+
# The header is required as an argument and cannot be supplied in the file itself.
|
92
|
+
def requires_header?
|
93
|
+
false
|
94
|
+
end
|
95
|
+
|
52
96
|
private
|
53
97
|
|
54
|
-
|
98
|
+
class Layout
|
99
|
+
attr_reader :columns, :length
|
100
|
+
|
101
|
+
# Returns [Array<FixedLayout>] the layout for this fixed width file.
|
102
|
+
# Also validates values
|
103
|
+
def initialize(layout)
|
104
|
+
@length = 0
|
105
|
+
@columns = parse_layout(layout)
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def parse_layout(layout)
|
111
|
+
@length = 0
|
112
|
+
layout.collect do |hash|
|
113
|
+
raise(Errors::InvalidLayout, "Missing required :size in: #{hash.inspect}") unless hash.key?(:size)
|
114
|
+
|
115
|
+
column = Column.new(**hash)
|
116
|
+
@length += column.size
|
117
|
+
column
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
class Column
|
123
|
+
TYPES = %i[string integer float].freeze
|
124
|
+
|
125
|
+
attr_reader :key, :size, :type, :decimals
|
126
|
+
|
127
|
+
def initialize(key: nil, size:, type: :string, decimals: 2)
|
128
|
+
@key = key
|
129
|
+
@size = size.to_i
|
130
|
+
@type = type.to_sym
|
131
|
+
@decimals = decimals
|
132
|
+
|
133
|
+
raise(Errors::InvalidLayout, "Size #{size.inspect} must be positive") unless @size.positive?
|
134
|
+
raise(Errors::InvalidLayout, "Unknown type: #{type.inspect}") unless TYPES.include?(type)
|
135
|
+
end
|
136
|
+
|
137
|
+
def parse(value)
|
138
|
+
return if value.nil?
|
139
|
+
|
140
|
+
stripped_value = value.to_s.strip
|
141
|
+
|
142
|
+
case type
|
143
|
+
when :string
|
144
|
+
stripped_value
|
145
|
+
when :integer
|
146
|
+
stripped_value.length.zero? ? nil : value.to_i
|
147
|
+
when :float
|
148
|
+
stripped_value.length.zero? ? nil : value.to_f
|
149
|
+
else
|
150
|
+
raise(Errors::InvalidLayout, "Unsupported type: #{type.inspect}")
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def render(value)
|
155
|
+
case type
|
156
|
+
when :string
|
157
|
+
format("%-#{size}.#{size}s", value.to_s)
|
158
|
+
when :integer
|
159
|
+
formatted = format("%0#{size}d", value.to_i)
|
160
|
+
if formatted.length > size
|
161
|
+
raise(Errors::ValueTooLong, "Value: #{value} is too large to fit into column:#{key} of size:#{size}")
|
162
|
+
end
|
55
163
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
raise(ArgumentError, "Missing required :key and :size in: #{map.inspect}") unless size && key
|
164
|
+
formatted
|
165
|
+
when :float
|
166
|
+
formatted = format("%0#{size}.#{decimals}f", value.to_f)
|
167
|
+
if formatted.length > size
|
168
|
+
raise(Errors::ValueTooLong, "Value: #{value} is too large to fit into column:#{key} of size:#{size}")
|
169
|
+
end
|
63
170
|
|
64
|
-
|
171
|
+
formatted
|
172
|
+
else
|
173
|
+
raise(Errors::InvalidLayout, "Unsupported type: #{type.inspect}")
|
174
|
+
end
|
65
175
|
end
|
66
176
|
end
|
67
177
|
end
|
data/lib/io_streams/utils.rb
CHANGED
@@ -49,10 +49,10 @@ module IOStreams
|
|
49
49
|
@user = uri.user
|
50
50
|
@password = uri.password
|
51
51
|
@port = uri.port
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
52
|
+
return unless uri.query
|
53
|
+
|
54
|
+
@query = {}
|
55
|
+
::URI.decode_www_form(uri.query).each { |key, value| @query[key] = value }
|
56
56
|
end
|
57
57
|
end
|
58
58
|
end
|
data/lib/io_streams/version.rb
CHANGED
data/test/bzip2_writer_test.rb
CHANGED
@@ -22,13 +22,14 @@ class Bzip2WriterTest < Minitest::Test
|
|
22
22
|
it "file" do
|
23
23
|
IOStreams::Bzip2::Writer.file(file_name) do |io|
|
24
24
|
io.write(decompressed)
|
25
|
+
io.write(decompressed)
|
25
26
|
end
|
26
27
|
|
27
28
|
File.open(file_name, "rb") do |file|
|
28
|
-
io =
|
29
|
+
io = ::Bzip2::FFI::Reader.new(file)
|
29
30
|
result = io.read
|
30
31
|
temp_file.delete
|
31
|
-
assert_equal decompressed, result
|
32
|
+
assert_equal decompressed + decompressed, result
|
32
33
|
end
|
33
34
|
end
|
34
35
|
|
@@ -36,12 +37,13 @@ class Bzip2WriterTest < Minitest::Test
|
|
36
37
|
io_string = StringIO.new("".b)
|
37
38
|
IOStreams::Bzip2::Writer.stream(io_string) do |io|
|
38
39
|
io.write(decompressed)
|
40
|
+
io.write(decompressed)
|
39
41
|
end
|
40
42
|
|
41
43
|
io = StringIO.new(io_string.string)
|
42
|
-
rbzip2 =
|
44
|
+
rbzip2 = ::Bzip2::FFI::Reader.new(io)
|
43
45
|
data = rbzip2.read
|
44
|
-
assert_equal decompressed, data
|
46
|
+
assert_equal decompressed + decompressed, data
|
45
47
|
end
|
46
48
|
end
|
47
49
|
end
|
data/test/io_streams_test.rb
CHANGED
@@ -16,7 +16,7 @@ module IOStreams
|
|
16
16
|
end
|
17
17
|
|
18
18
|
let :json_file_name do
|
19
|
-
"/tmp/
|
19
|
+
"/tmp/iostreams_abc.json"
|
20
20
|
end
|
21
21
|
|
22
22
|
describe ".root" do
|
@@ -90,7 +90,7 @@ module IOStreams
|
|
90
90
|
it "hash reader detects json format from file name" do
|
91
91
|
::File.open(json_file_name, "wb") { |file| file.write(expected_json) }
|
92
92
|
rows = []
|
93
|
-
path = IOStreams.path(
|
93
|
+
path = IOStreams.path(json_file_name)
|
94
94
|
path.each(:hash) do |row|
|
95
95
|
rows << row
|
96
96
|
end
|
data/test/paths/file_test.rb
CHANGED
data/test/paths/s3_test.rb
CHANGED
@@ -73,7 +73,7 @@ module Paths
|
|
73
73
|
|
74
74
|
describe "#reader" do
|
75
75
|
it "reads" do
|
76
|
-
assert_equal raw, existing_path.
|
76
|
+
assert_equal raw, existing_path.read
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
@@ -89,7 +89,7 @@ module Paths
|
|
89
89
|
|
90
90
|
describe "#writer" do
|
91
91
|
it "writes" do
|
92
|
-
assert_equal
|
92
|
+
assert_equal(raw.size, write_path.writer { |io| io.write(raw) })
|
93
93
|
assert write_path.exist?
|
94
94
|
assert_equal raw, write_path.read
|
95
95
|
end
|
@@ -138,7 +138,7 @@ module Paths
|
|
138
138
|
|
139
139
|
it "returns all the children under a sub-dir" do
|
140
140
|
write_raw_data
|
141
|
-
expected =
|
141
|
+
expected = %w[abd/test1.txt abd/test5.file].collect { |file_name| each_root.join(file_name) }
|
142
142
|
assert_equal expected.collect(&:to_s).sort, each_root.children("abd/*").collect(&:to_s).sort
|
143
143
|
end
|
144
144
|
|