iostreams 1.7.0 → 1.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/io_streams/builder.rb +20 -7
- data/lib/io_streams/line/reader.rb +6 -3
- data/lib/io_streams/paths/file.rb +5 -0
- data/lib/io_streams/paths/s3.rb +5 -0
- data/lib/io_streams/paths/sftp.rb +5 -0
- data/lib/io_streams/record/reader.rb +2 -0
- data/lib/io_streams/row/reader.rb +2 -0
- data/lib/io_streams/stream.rb +8 -0
- data/lib/io_streams/tabular/header.rb +12 -12
- data/lib/io_streams/version.rb +1 -1
- data/test/builder_test.rb +15 -0
- data/test/line_reader_test.rb +7 -0
- data/test/paths/file_test.rb +21 -1
- data/test/record_reader_test.rb +7 -0
- data/test/row_reader_test.rb +8 -1
- data/test/tabular_test.rb +7 -7
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c93dac4c226c66c4e36554311858ff328299fc4202c257cdb0e7f2c8e82e323b
|
4
|
+
data.tar.gz: fa96f9d6007769b812ab5506e1ca5ca20866ed9d6fc6bc98d75b50a0d50a23b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2538af2be40ad81287b4c3501f0ff9672ea82289adcb2de1e976d868fd9aa655f01d496b6299ba851193e1ed832750e6bcb21f6ed88179aab303f348e4bef60f
|
7
|
+
data.tar.gz: e7d6c8f09d0377f0bdfb83fdfc61559afd1581d1e8425cc9a3a76754be2e92003f202246c0a43b83b12107774965388aa71f2c2950835797363ce057ca5fe24b
|
data/lib/io_streams/builder.rb
CHANGED
@@ -79,15 +79,16 @@ module IOStreams
|
|
79
79
|
# with their options that will be applied when the reader or writer is invoked.
|
80
80
|
def pipeline
|
81
81
|
return streams.dup.freeze if streams
|
82
|
-
return {}.freeze unless file_name
|
83
82
|
|
84
|
-
|
85
|
-
|
86
|
-
built_streams[:encode] = options[:encode] if options&.key?(:encode)
|
83
|
+
build_pipeline.freeze
|
84
|
+
end
|
87
85
|
|
88
|
-
|
89
|
-
|
90
|
-
|
86
|
+
# Removes the named stream from the current pipeline.
|
87
|
+
# If the stream pipeline has not yet been built it will be built from the file_name if present.
|
88
|
+
# Note: Any options must be set _before_ calling this method.
|
89
|
+
def remove_from_pipeline(stream_name)
|
90
|
+
@streams ||= build_pipeline
|
91
|
+
@streams.delete(stream_name.to_sym)
|
91
92
|
end
|
92
93
|
|
93
94
|
# Returns the tabular format if set, otherwise tries to autodetect the format if the file_name has been set
|
@@ -106,6 +107,18 @@ module IOStreams
|
|
106
107
|
|
107
108
|
private
|
108
109
|
|
110
|
+
def build_pipeline
|
111
|
+
return {} unless file_name
|
112
|
+
|
113
|
+
built_streams = {}
|
114
|
+
# Encode stream is always first
|
115
|
+
built_streams[:encode] = options[:encode] if options&.key?(:encode)
|
116
|
+
|
117
|
+
opts = options || {}
|
118
|
+
parse_extensions.each { |stream| built_streams[stream] = opts[stream] || {} }
|
119
|
+
built_streams
|
120
|
+
end
|
121
|
+
|
109
122
|
def class_for_stream(type, stream)
|
110
123
|
ext = IOStreams.extensions[stream.nil? ? nil : stream.to_sym] ||
|
111
124
|
raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
|
@@ -75,6 +75,8 @@ module IOStreams
|
|
75
75
|
# Note:
|
76
76
|
# * The line delimiter is _not_ returned.
|
77
77
|
def each
|
78
|
+
return to_enum(__method__) unless block_given?
|
79
|
+
|
78
80
|
line_count = 0
|
79
81
|
until eof?
|
80
82
|
line = readline
|
@@ -146,8 +148,8 @@ module IOStreams
|
|
146
148
|
data
|
147
149
|
end
|
148
150
|
|
149
|
-
# Returns
|
150
|
-
# Returns
|
151
|
+
# Returns whether more data is available to read
|
152
|
+
# Returns false on EOF
|
151
153
|
def read_block
|
152
154
|
return false if @eof
|
153
155
|
|
@@ -157,7 +159,8 @@ module IOStreams
|
|
157
159
|
@input_stream.read(@buffer_size, @read_cache_buffer)
|
158
160
|
rescue ArgumentError
|
159
161
|
# Handle arity of -1 when just 0..1
|
160
|
-
@read_cache_buffer
|
162
|
+
@read_cache_buffer = nil
|
163
|
+
@use_read_cache_buffer = false
|
161
164
|
@input_stream.read(@buffer_size)
|
162
165
|
end
|
163
166
|
else
|
@@ -89,6 +89,11 @@ module IOStreams
|
|
89
89
|
# "**.rb" "lib/song.rb" true
|
90
90
|
# "*" "dave/.profile" true
|
91
91
|
def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
|
92
|
+
unless block_given?
|
93
|
+
return to_enum(__method__, pattern,
|
94
|
+
case_sensitive: case_sensitive, directories: directories, hidden: hidden)
|
95
|
+
end
|
96
|
+
|
92
97
|
flags = 0
|
93
98
|
flags |= ::File::FNM_CASEFOLD unless case_sensitive
|
94
99
|
flags |= ::File::FNM_DOTMATCH if hidden
|
data/lib/io_streams/paths/s3.rb
CHANGED
@@ -284,6 +284,11 @@ module IOStreams
|
|
284
284
|
# Notes:
|
285
285
|
# - Currently all S3 lookups are recursive as of the pattern regardless of whether the pattern includes `**`.
|
286
286
|
def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
|
287
|
+
unless block_given?
|
288
|
+
return to_enum(__method__, pattern,
|
289
|
+
case_sensitive: case_sensitive, directories: directories, hidden: hidden)
|
290
|
+
end
|
291
|
+
|
287
292
|
matcher = Matcher.new(self, pattern, case_sensitive: case_sensitive, hidden: hidden)
|
288
293
|
|
289
294
|
# When the pattern includes an exact file name without any pattern characters
|
@@ -142,6 +142,11 @@ module IOStreams
|
|
142
142
|
# sftp://sftp.example.org/a/b/c/test.txt {:type=>1, :size=>37, :owner=>"test_owner", :group=>"test_group",
|
143
143
|
# :permissions=>420, :atime=>1572378136, :mtime=>1572378136, :link_count=>1, :extended=>{}}
|
144
144
|
def each_child(pattern = "*", case_sensitive: true, directories: false, hidden: false)
|
145
|
+
unless block_given?
|
146
|
+
return to_enum(__method__, pattern,
|
147
|
+
case_sensitive: case_sensitive, directories: directories, hidden: hidden)
|
148
|
+
end
|
149
|
+
|
145
150
|
Utils.load_soft_dependency("net-sftp", "SFTP glob capability", "net/sftp") unless defined?(Net::SFTP)
|
146
151
|
|
147
152
|
flags = ::File::FNM_EXTGLOB
|
data/lib/io_streams/stream.rb
CHANGED
@@ -56,6 +56,14 @@ module IOStreams
|
|
56
56
|
builder.pipeline
|
57
57
|
end
|
58
58
|
|
59
|
+
# Removes the named stream from the current pipeline.
|
60
|
+
# If the stream pipeline has not yet been built it will be built from the file_name if present.
|
61
|
+
# Note: Any options must be set _before_ calling this method.
|
62
|
+
def remove_from_pipeline(stream_name)
|
63
|
+
builder.remove_from_pipeline(stream_name)
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
59
67
|
# Iterate over a file / stream returning one line at a time.
|
60
68
|
#
|
61
69
|
# Example: Read a line at a time
|
@@ -2,6 +2,9 @@ module IOStreams
|
|
2
2
|
class Tabular
|
3
3
|
# Process files / streams that start with a header.
|
4
4
|
class Header
|
5
|
+
# Column names that begin with this prefix have been rejected and should be ignored.
|
6
|
+
IGNORE_PREFIX = "__rejected__".freeze
|
7
|
+
|
5
8
|
attr_accessor :columns, :allowed_columns, :required_columns, :skip_unknown
|
6
9
|
|
7
10
|
# Header
|
@@ -17,8 +20,8 @@ module IOStreams
|
|
17
20
|
# List of columns to allow.
|
18
21
|
# Default: nil ( Allow all columns )
|
19
22
|
# Note:
|
20
|
-
#
|
21
|
-
#
|
23
|
+
# * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
|
24
|
+
# For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
|
22
25
|
#
|
23
26
|
# required_columns [Array<String>]
|
24
27
|
# List of columns that must be present, otherwise an Exception is raised.
|
@@ -44,8 +47,10 @@ module IOStreams
|
|
44
47
|
# - Spaces and '-' are converted to '_'.
|
45
48
|
# - All characters except for letters, digits, and '_' are stripped.
|
46
49
|
#
|
47
|
-
# Notes
|
48
|
-
# *
|
50
|
+
# Notes:
|
51
|
+
# * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
|
52
|
+
# For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
|
53
|
+
# * Raises Tabular::InvalidHeader when there are no rejected columns left after cleansing.
|
49
54
|
def cleanse!
|
50
55
|
return [] if columns.nil? || columns.empty?
|
51
56
|
|
@@ -56,7 +61,7 @@ module IOStreams
|
|
56
61
|
cleansed
|
57
62
|
else
|
58
63
|
ignored_columns << column
|
59
|
-
|
64
|
+
"#{IGNORE_PREFIX}#{column}"
|
60
65
|
end
|
61
66
|
end
|
62
67
|
|
@@ -122,7 +127,7 @@ module IOStreams
|
|
122
127
|
|
123
128
|
def array_to_hash(row)
|
124
129
|
h = {}
|
125
|
-
columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) }
|
130
|
+
columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) || col.start_with?(IGNORE_PREFIX) }
|
126
131
|
h
|
127
132
|
end
|
128
133
|
|
@@ -134,12 +139,7 @@ module IOStreams
|
|
134
139
|
hash = hash.dup
|
135
140
|
unmatched.each { |name| hash[cleanse_column(name)] = hash.delete(name) }
|
136
141
|
end
|
137
|
-
|
138
|
-
if hash.respond_to?(:slice)
|
139
|
-
hash.slice(*columns)
|
140
|
-
else
|
141
|
-
columns.each_with_object({}) { |column, new_hash| new_hash[column] = hash[column] }
|
142
|
-
end
|
142
|
+
hash.slice(*columns)
|
143
143
|
end
|
144
144
|
|
145
145
|
def cleanse_column(name)
|
data/lib/io_streams/version.rb
CHANGED
data/test/builder_test.rb
CHANGED
@@ -237,6 +237,21 @@ class BuilderTest < Minitest::Test
|
|
237
237
|
end
|
238
238
|
end
|
239
239
|
|
240
|
+
describe "#remove_from_pipeline" do
|
241
|
+
let(:file_name) { "my/path/abc.bz2.pgp" }
|
242
|
+
it "removes a named stream from the pipeline" do
|
243
|
+
assert_equal({bz2: {}, pgp: {}}, streams.pipeline)
|
244
|
+
streams.remove_from_pipeline(:bz2)
|
245
|
+
assert_equal({pgp: {}}, streams.pipeline)
|
246
|
+
end
|
247
|
+
it "removes a named stream from the pipeline with options" do
|
248
|
+
streams.option(:pgp, passphrase: "unlock-me")
|
249
|
+
assert_equal({bz2: {}, pgp: {passphrase: "unlock-me"}}, streams.pipeline)
|
250
|
+
streams.remove_from_pipeline(:bz2)
|
251
|
+
assert_equal({pgp: {passphrase: "unlock-me"}}, streams.pipeline)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
240
255
|
describe "#execute" do
|
241
256
|
it "directly calls block for an empty stream" do
|
242
257
|
string_io = StringIO.new
|
data/test/line_reader_test.rb
CHANGED
@@ -98,6 +98,13 @@ class LineReaderTest < Minitest::Test
|
|
98
98
|
assert_equal data.size, count
|
99
99
|
end
|
100
100
|
|
101
|
+
it "with no block returns enumerator" do
|
102
|
+
lines = IOStreams::Line::Reader.file(file_name) do |io|
|
103
|
+
io.each.first(100)
|
104
|
+
end
|
105
|
+
assert_equal data, lines
|
106
|
+
end
|
107
|
+
|
101
108
|
it "each_line stream" do
|
102
109
|
lines = []
|
103
110
|
count = File.open(file_name) do |file|
|
data/test/paths/file_test.rb
CHANGED
@@ -5,7 +5,7 @@ module Paths
|
|
5
5
|
describe IOStreams::Paths::File do
|
6
6
|
let(:root) { IOStreams::Paths::File.new("/tmp/iostreams").delete_all }
|
7
7
|
let(:directory) { root.join("/some_test_dir") }
|
8
|
-
let(:data) { "Hello World" }
|
8
|
+
let(:data) { "Hello World\nHow are you doing?\nOn this fine day" }
|
9
9
|
let(:file_path) do
|
10
10
|
path = root.join("some_test_dir/test_file.txt")
|
11
11
|
path.writer { |io| io << data }
|
@@ -17,6 +17,20 @@ module Paths
|
|
17
17
|
path
|
18
18
|
end
|
19
19
|
|
20
|
+
describe "#each" do
|
21
|
+
it "reads lines" do
|
22
|
+
records = []
|
23
|
+
count = file_path.each { |line| records << line }
|
24
|
+
assert_equal count, data.lines.size
|
25
|
+
assert_equal data.lines.collect(&:strip), records
|
26
|
+
end
|
27
|
+
|
28
|
+
it "reads lines without block" do
|
29
|
+
records = file_path.each.first(100)
|
30
|
+
assert_equal data.lines.collect(&:strip), records
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
20
34
|
describe "#each_child" do
|
21
35
|
it "iterates an empty path" do
|
22
36
|
none = nil
|
@@ -48,6 +62,12 @@ module Paths
|
|
48
62
|
actual = root.children("**/Test*.TXT", case_sensitive: true).collect(&:to_s)
|
49
63
|
refute_equal expected, actual.sort
|
50
64
|
end
|
65
|
+
|
66
|
+
it "with no block returns enumerator" do
|
67
|
+
expected = [file_path.to_s, file_path2.to_s]
|
68
|
+
actual = root.each_child("**/*").first(100).collect(&:to_s)
|
69
|
+
assert_equal expected.sort, actual.sort
|
70
|
+
end
|
51
71
|
end
|
52
72
|
|
53
73
|
describe "#mkpath" do
|
data/test/record_reader_test.rb
CHANGED
@@ -46,6 +46,13 @@ class RecordReaderTest < Minitest::Test
|
|
46
46
|
end
|
47
47
|
assert_equal expected, rows
|
48
48
|
end
|
49
|
+
|
50
|
+
it "with no block returns enumerator" do
|
51
|
+
records = IOStreams::Record::Reader.file(file_name, cleanse_header: false) do |io|
|
52
|
+
io.each.first(100)
|
53
|
+
end
|
54
|
+
assert_equal expected, records
|
55
|
+
end
|
49
56
|
end
|
50
57
|
|
51
58
|
describe "#collect" do
|
data/test/row_reader_test.rb
CHANGED
@@ -10,7 +10,7 @@ class RowReaderTest < Minitest::Test
|
|
10
10
|
CSV.read(file_name)
|
11
11
|
end
|
12
12
|
|
13
|
-
describe "
|
13
|
+
describe "#each" do
|
14
14
|
it "file" do
|
15
15
|
rows = []
|
16
16
|
count = IOStreams::Row::Reader.file(file_name) do |io|
|
@@ -20,6 +20,13 @@ class RowReaderTest < Minitest::Test
|
|
20
20
|
assert_equal expected.size, count
|
21
21
|
end
|
22
22
|
|
23
|
+
it "with no block returns enumerator" do
|
24
|
+
rows = IOStreams::Row::Reader.file(file_name) do |io|
|
25
|
+
io.each.first(100)
|
26
|
+
end
|
27
|
+
assert_equal expected, rows
|
28
|
+
end
|
29
|
+
|
23
30
|
it "stream" do
|
24
31
|
rows = []
|
25
32
|
count = IOStreams::Line::Reader.file(file_name) do |file|
|
data/test/tabular_test.rb
CHANGED
@@ -58,12 +58,12 @@ class TabularTest < Minitest::Test
|
|
58
58
|
assert_equal header, tabular.header.columns
|
59
59
|
end
|
60
60
|
|
61
|
-
it "
|
61
|
+
it "allowed list snake cased alphanumeric columns" do
|
62
62
|
tabular = IOStreams::Tabular.new(
|
63
|
-
columns: ["Ard Vark", "
|
63
|
+
columns: ["Ard Vark", "Password", "robot version", "$$$"],
|
64
64
|
allowed_columns: %w[ard_vark robot_version]
|
65
65
|
)
|
66
|
-
expected_header = ["ard_vark",
|
66
|
+
expected_header = ["ard_vark", "__rejected__Password", "robot_version", "__rejected__$$$"]
|
67
67
|
cleansed_header = tabular.cleanse_header!
|
68
68
|
assert_equal(expected_header, cleansed_header)
|
69
69
|
end
|
@@ -82,13 +82,13 @@ class TabularTest < Minitest::Test
|
|
82
82
|
assert_equal @allowed_columns, tabular.header.allowed_columns
|
83
83
|
end
|
84
84
|
|
85
|
-
it "nils columns not in the
|
85
|
+
it "nils columns not in the allowed list" do
|
86
86
|
tabular = IOStreams::Tabular.new(columns: [" first ", "Unknown Column", "thirD "], allowed_columns: @allowed_columns)
|
87
87
|
header = tabular.cleanse_header!
|
88
|
-
assert_equal ["first",
|
88
|
+
assert_equal ["first", "__rejected__Unknown Column", "third"], header
|
89
89
|
end
|
90
90
|
|
91
|
-
it "raises exception for columns not in the
|
91
|
+
it "raises exception for columns not in the allowed list" do
|
92
92
|
tabular = IOStreams::Tabular.new(columns: [" first ", "Unknown Column", "thirD "], allowed_columns: @allowed_columns, skip_unknown: false)
|
93
93
|
exc = assert_raises IOStreams::Errors::InvalidHeader do
|
94
94
|
tabular.cleanse_header!
|
@@ -218,7 +218,7 @@ class TabularTest < Minitest::Test
|
|
218
218
|
end
|
219
219
|
end
|
220
220
|
|
221
|
-
it "skips columns not in the
|
221
|
+
it "skips columns not in the allowed list" do
|
222
222
|
tabular.header.allowed_columns = %w[first second third fourth fifth]
|
223
223
|
tabular.cleanse_header!
|
224
224
|
assert hash = tabular.record_parse("1,2,3")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iostreams
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.10.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
132
132
|
- !ruby/object:Gem::Version
|
133
133
|
version: '0'
|
134
134
|
requirements: []
|
135
|
-
rubygems_version: 3.2.
|
135
|
+
rubygems_version: 3.2.22
|
136
136
|
signing_key:
|
137
137
|
specification_version: 4
|
138
138
|
summary: Input and Output streaming for Ruby.
|