iostreams 1.7.0 → 1.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/io_streams/builder.rb +20 -7
- data/lib/io_streams/line/reader.rb +6 -3
- data/lib/io_streams/paths/file.rb +5 -0
- data/lib/io_streams/paths/s3.rb +5 -0
- data/lib/io_streams/paths/sftp.rb +5 -0
- data/lib/io_streams/record/reader.rb +2 -0
- data/lib/io_streams/row/reader.rb +2 -0
- data/lib/io_streams/stream.rb +8 -0
- data/lib/io_streams/tabular/header.rb +12 -12
- data/lib/io_streams/version.rb +1 -1
- data/test/builder_test.rb +15 -0
- data/test/line_reader_test.rb +7 -0
- data/test/paths/file_test.rb +21 -1
- data/test/record_reader_test.rb +7 -0
- data/test/row_reader_test.rb +8 -1
- data/test/tabular_test.rb +7 -7
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c93dac4c226c66c4e36554311858ff328299fc4202c257cdb0e7f2c8e82e323b
|
4
|
+
data.tar.gz: fa96f9d6007769b812ab5506e1ca5ca20866ed9d6fc6bc98d75b50a0d50a23b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2538af2be40ad81287b4c3501f0ff9672ea82289adcb2de1e976d868fd9aa655f01d496b6299ba851193e1ed832750e6bcb21f6ed88179aab303f348e4bef60f
|
7
|
+
data.tar.gz: e7d6c8f09d0377f0bdfb83fdfc61559afd1581d1e8425cc9a3a76754be2e92003f202246c0a43b83b12107774965388aa71f2c2950835797363ce057ca5fe24b
|
data/lib/io_streams/builder.rb
CHANGED
@@ -79,15 +79,16 @@ module IOStreams
|
|
79
79
|
# with their options that will be applied when the reader or writer is invoked.
|
80
80
|
def pipeline
|
81
81
|
return streams.dup.freeze if streams
|
82
|
-
return {}.freeze unless file_name
|
83
82
|
|
84
|
-
|
85
|
-
|
86
|
-
built_streams[:encode] = options[:encode] if options&.key?(:encode)
|
83
|
+
build_pipeline.freeze
|
84
|
+
end
|
87
85
|
|
88
|
-
|
89
|
-
|
90
|
-
|
86
|
+
# Removes the named stream from the current pipeline.
|
87
|
+
# If the stream pipeline has not yet been built it will be built from the file_name if present.
|
88
|
+
# Note: Any options must be set _before_ calling this method.
|
89
|
+
def remove_from_pipeline(stream_name)
|
90
|
+
@streams ||= build_pipeline
|
91
|
+
@streams.delete(stream_name.to_sym)
|
91
92
|
end
|
92
93
|
|
93
94
|
# Returns the tabular format if set, otherwise tries to autodetect the format if the file_name has been set
|
@@ -106,6 +107,18 @@ module IOStreams
|
|
106
107
|
|
107
108
|
private
|
108
109
|
|
110
|
+
def build_pipeline
|
111
|
+
return {} unless file_name
|
112
|
+
|
113
|
+
built_streams = {}
|
114
|
+
# Encode stream is always first
|
115
|
+
built_streams[:encode] = options[:encode] if options&.key?(:encode)
|
116
|
+
|
117
|
+
opts = options || {}
|
118
|
+
parse_extensions.each { |stream| built_streams[stream] = opts[stream] || {} }
|
119
|
+
built_streams
|
120
|
+
end
|
121
|
+
|
109
122
|
def class_for_stream(type, stream)
|
110
123
|
ext = IOStreams.extensions[stream.nil? ? nil : stream.to_sym] ||
|
111
124
|
raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
|
@@ -75,6 +75,8 @@ module IOStreams
|
|
75
75
|
# Note:
|
76
76
|
# * The line delimiter is _not_ returned.
|
77
77
|
def each
|
78
|
+
return to_enum(__method__) unless block_given?
|
79
|
+
|
78
80
|
line_count = 0
|
79
81
|
until eof?
|
80
82
|
line = readline
|
@@ -146,8 +148,8 @@ module IOStreams
|
|
146
148
|
data
|
147
149
|
end
|
148
150
|
|
149
|
-
# Returns
|
150
|
-
# Returns
|
151
|
+
# Returns whether more data is available to read
|
152
|
+
# Returns false on EOF
|
151
153
|
def read_block
|
152
154
|
return false if @eof
|
153
155
|
|
@@ -157,7 +159,8 @@ module IOStreams
|
|
157
159
|
@input_stream.read(@buffer_size, @read_cache_buffer)
|
158
160
|
rescue ArgumentError
|
159
161
|
# Handle arity of -1 when just 0..1
|
160
|
-
@read_cache_buffer
|
162
|
+
@read_cache_buffer = nil
|
163
|
+
@use_read_cache_buffer = false
|
161
164
|
@input_stream.read(@buffer_size)
|
162
165
|
end
|
163
166
|
else
|
@@ -89,6 +89,11 @@ module IOStreams
|
|
89
89
|
# "**.rb" "lib/song.rb" true
|
90
90
|
# "*" "dave/.profile" true
|
91
91
|
def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
|
92
|
+
unless block_given?
|
93
|
+
return to_enum(__method__, pattern,
|
94
|
+
case_sensitive: case_sensitive, directories: directories, hidden: hidden)
|
95
|
+
end
|
96
|
+
|
92
97
|
flags = 0
|
93
98
|
flags |= ::File::FNM_CASEFOLD unless case_sensitive
|
94
99
|
flags |= ::File::FNM_DOTMATCH if hidden
|
data/lib/io_streams/paths/s3.rb
CHANGED
@@ -284,6 +284,11 @@ module IOStreams
|
|
284
284
|
# Notes:
|
285
285
|
# - Currently all S3 lookups are recursive as of the pattern regardless of whether the pattern includes `**`.
|
286
286
|
def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
|
287
|
+
unless block_given?
|
288
|
+
return to_enum(__method__, pattern,
|
289
|
+
case_sensitive: case_sensitive, directories: directories, hidden: hidden)
|
290
|
+
end
|
291
|
+
|
287
292
|
matcher = Matcher.new(self, pattern, case_sensitive: case_sensitive, hidden: hidden)
|
288
293
|
|
289
294
|
# When the pattern includes an exact file name without any pattern characters
|
@@ -142,6 +142,11 @@ module IOStreams
|
|
142
142
|
# sftp://sftp.example.org/a/b/c/test.txt {:type=>1, :size=>37, :owner=>"test_owner", :group=>"test_group",
|
143
143
|
# :permissions=>420, :atime=>1572378136, :mtime=>1572378136, :link_count=>1, :extended=>{}}
|
144
144
|
def each_child(pattern = "*", case_sensitive: true, directories: false, hidden: false)
|
145
|
+
unless block_given?
|
146
|
+
return to_enum(__method__, pattern,
|
147
|
+
case_sensitive: case_sensitive, directories: directories, hidden: hidden)
|
148
|
+
end
|
149
|
+
|
145
150
|
Utils.load_soft_dependency("net-sftp", "SFTP glob capability", "net/sftp") unless defined?(Net::SFTP)
|
146
151
|
|
147
152
|
flags = ::File::FNM_EXTGLOB
|
data/lib/io_streams/stream.rb
CHANGED
@@ -56,6 +56,14 @@ module IOStreams
|
|
56
56
|
builder.pipeline
|
57
57
|
end
|
58
58
|
|
59
|
+
# Removes the named stream from the current pipeline.
|
60
|
+
# If the stream pipeline has not yet been built it will be built from the file_name if present.
|
61
|
+
# Note: Any options must be set _before_ calling this method.
|
62
|
+
def remove_from_pipeline(stream_name)
|
63
|
+
builder.remove_from_pipeline(stream_name)
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
59
67
|
# Iterate over a file / stream returning one line at a time.
|
60
68
|
#
|
61
69
|
# Example: Read a line at a time
|
@@ -2,6 +2,9 @@ module IOStreams
|
|
2
2
|
class Tabular
|
3
3
|
# Process files / streams that start with a header.
|
4
4
|
class Header
|
5
|
+
# Column names that begin with this prefix have been rejected and should be ignored.
|
6
|
+
IGNORE_PREFIX = "__rejected__".freeze
|
7
|
+
|
5
8
|
attr_accessor :columns, :allowed_columns, :required_columns, :skip_unknown
|
6
9
|
|
7
10
|
# Header
|
@@ -17,8 +20,8 @@ module IOStreams
|
|
17
20
|
# List of columns to allow.
|
18
21
|
# Default: nil ( Allow all columns )
|
19
22
|
# Note:
|
20
|
-
#
|
21
|
-
#
|
23
|
+
# * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
|
24
|
+
# For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
|
22
25
|
#
|
23
26
|
# required_columns [Array<String>]
|
24
27
|
# List of columns that must be present, otherwise an Exception is raised.
|
@@ -44,8 +47,10 @@ module IOStreams
|
|
44
47
|
# - Spaces and '-' are converted to '_'.
|
45
48
|
# - All characters except for letters, digits, and '_' are stripped.
|
46
49
|
#
|
47
|
-
# Notes
|
48
|
-
# *
|
50
|
+
# Notes:
|
51
|
+
# * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
|
52
|
+
# For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
|
53
|
+
# * Raises Tabular::InvalidHeader when there are no rejected columns left after cleansing.
|
49
54
|
def cleanse!
|
50
55
|
return [] if columns.nil? || columns.empty?
|
51
56
|
|
@@ -56,7 +61,7 @@ module IOStreams
|
|
56
61
|
cleansed
|
57
62
|
else
|
58
63
|
ignored_columns << column
|
59
|
-
|
64
|
+
"#{IGNORE_PREFIX}#{column}"
|
60
65
|
end
|
61
66
|
end
|
62
67
|
|
@@ -122,7 +127,7 @@ module IOStreams
|
|
122
127
|
|
123
128
|
def array_to_hash(row)
|
124
129
|
h = {}
|
125
|
-
columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) }
|
130
|
+
columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) || col.start_with?(IGNORE_PREFIX) }
|
126
131
|
h
|
127
132
|
end
|
128
133
|
|
@@ -134,12 +139,7 @@ module IOStreams
|
|
134
139
|
hash = hash.dup
|
135
140
|
unmatched.each { |name| hash[cleanse_column(name)] = hash.delete(name) }
|
136
141
|
end
|
137
|
-
|
138
|
-
if hash.respond_to?(:slice)
|
139
|
-
hash.slice(*columns)
|
140
|
-
else
|
141
|
-
columns.each_with_object({}) { |column, new_hash| new_hash[column] = hash[column] }
|
142
|
-
end
|
142
|
+
hash.slice(*columns)
|
143
143
|
end
|
144
144
|
|
145
145
|
def cleanse_column(name)
|
data/lib/io_streams/version.rb
CHANGED
data/test/builder_test.rb
CHANGED
@@ -237,6 +237,21 @@ class BuilderTest < Minitest::Test
|
|
237
237
|
end
|
238
238
|
end
|
239
239
|
|
240
|
+
describe "#remove_from_pipeline" do
|
241
|
+
let(:file_name) { "my/path/abc.bz2.pgp" }
|
242
|
+
it "removes a named stream from the pipeline" do
|
243
|
+
assert_equal({bz2: {}, pgp: {}}, streams.pipeline)
|
244
|
+
streams.remove_from_pipeline(:bz2)
|
245
|
+
assert_equal({pgp: {}}, streams.pipeline)
|
246
|
+
end
|
247
|
+
it "removes a named stream from the pipeline with options" do
|
248
|
+
streams.option(:pgp, passphrase: "unlock-me")
|
249
|
+
assert_equal({bz2: {}, pgp: {passphrase: "unlock-me"}}, streams.pipeline)
|
250
|
+
streams.remove_from_pipeline(:bz2)
|
251
|
+
assert_equal({pgp: {passphrase: "unlock-me"}}, streams.pipeline)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
240
255
|
describe "#execute" do
|
241
256
|
it "directly calls block for an empty stream" do
|
242
257
|
string_io = StringIO.new
|
data/test/line_reader_test.rb
CHANGED
@@ -98,6 +98,13 @@ class LineReaderTest < Minitest::Test
|
|
98
98
|
assert_equal data.size, count
|
99
99
|
end
|
100
100
|
|
101
|
+
it "with no block returns enumerator" do
|
102
|
+
lines = IOStreams::Line::Reader.file(file_name) do |io|
|
103
|
+
io.each.first(100)
|
104
|
+
end
|
105
|
+
assert_equal data, lines
|
106
|
+
end
|
107
|
+
|
101
108
|
it "each_line stream" do
|
102
109
|
lines = []
|
103
110
|
count = File.open(file_name) do |file|
|
data/test/paths/file_test.rb
CHANGED
@@ -5,7 +5,7 @@ module Paths
|
|
5
5
|
describe IOStreams::Paths::File do
|
6
6
|
let(:root) { IOStreams::Paths::File.new("/tmp/iostreams").delete_all }
|
7
7
|
let(:directory) { root.join("/some_test_dir") }
|
8
|
-
let(:data) { "Hello World" }
|
8
|
+
let(:data) { "Hello World\nHow are you doing?\nOn this fine day" }
|
9
9
|
let(:file_path) do
|
10
10
|
path = root.join("some_test_dir/test_file.txt")
|
11
11
|
path.writer { |io| io << data }
|
@@ -17,6 +17,20 @@ module Paths
|
|
17
17
|
path
|
18
18
|
end
|
19
19
|
|
20
|
+
describe "#each" do
|
21
|
+
it "reads lines" do
|
22
|
+
records = []
|
23
|
+
count = file_path.each { |line| records << line }
|
24
|
+
assert_equal count, data.lines.size
|
25
|
+
assert_equal data.lines.collect(&:strip), records
|
26
|
+
end
|
27
|
+
|
28
|
+
it "reads lines without block" do
|
29
|
+
records = file_path.each.first(100)
|
30
|
+
assert_equal data.lines.collect(&:strip), records
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
20
34
|
describe "#each_child" do
|
21
35
|
it "iterates an empty path" do
|
22
36
|
none = nil
|
@@ -48,6 +62,12 @@ module Paths
|
|
48
62
|
actual = root.children("**/Test*.TXT", case_sensitive: true).collect(&:to_s)
|
49
63
|
refute_equal expected, actual.sort
|
50
64
|
end
|
65
|
+
|
66
|
+
it "with no block returns enumerator" do
|
67
|
+
expected = [file_path.to_s, file_path2.to_s]
|
68
|
+
actual = root.each_child("**/*").first(100).collect(&:to_s)
|
69
|
+
assert_equal expected.sort, actual.sort
|
70
|
+
end
|
51
71
|
end
|
52
72
|
|
53
73
|
describe "#mkpath" do
|
data/test/record_reader_test.rb
CHANGED
@@ -46,6 +46,13 @@ class RecordReaderTest < Minitest::Test
|
|
46
46
|
end
|
47
47
|
assert_equal expected, rows
|
48
48
|
end
|
49
|
+
|
50
|
+
it "with no block returns enumerator" do
|
51
|
+
records = IOStreams::Record::Reader.file(file_name, cleanse_header: false) do |io|
|
52
|
+
io.each.first(100)
|
53
|
+
end
|
54
|
+
assert_equal expected, records
|
55
|
+
end
|
49
56
|
end
|
50
57
|
|
51
58
|
describe "#collect" do
|
data/test/row_reader_test.rb
CHANGED
@@ -10,7 +10,7 @@ class RowReaderTest < Minitest::Test
|
|
10
10
|
CSV.read(file_name)
|
11
11
|
end
|
12
12
|
|
13
|
-
describe "
|
13
|
+
describe "#each" do
|
14
14
|
it "file" do
|
15
15
|
rows = []
|
16
16
|
count = IOStreams::Row::Reader.file(file_name) do |io|
|
@@ -20,6 +20,13 @@ class RowReaderTest < Minitest::Test
|
|
20
20
|
assert_equal expected.size, count
|
21
21
|
end
|
22
22
|
|
23
|
+
it "with no block returns enumerator" do
|
24
|
+
rows = IOStreams::Row::Reader.file(file_name) do |io|
|
25
|
+
io.each.first(100)
|
26
|
+
end
|
27
|
+
assert_equal expected, rows
|
28
|
+
end
|
29
|
+
|
23
30
|
it "stream" do
|
24
31
|
rows = []
|
25
32
|
count = IOStreams::Line::Reader.file(file_name) do |file|
|
data/test/tabular_test.rb
CHANGED
@@ -58,12 +58,12 @@ class TabularTest < Minitest::Test
|
|
58
58
|
assert_equal header, tabular.header.columns
|
59
59
|
end
|
60
60
|
|
61
|
-
it "
|
61
|
+
it "allowed list snake cased alphanumeric columns" do
|
62
62
|
tabular = IOStreams::Tabular.new(
|
63
|
-
columns: ["Ard Vark", "
|
63
|
+
columns: ["Ard Vark", "Password", "robot version", "$$$"],
|
64
64
|
allowed_columns: %w[ard_vark robot_version]
|
65
65
|
)
|
66
|
-
expected_header = ["ard_vark",
|
66
|
+
expected_header = ["ard_vark", "__rejected__Password", "robot_version", "__rejected__$$$"]
|
67
67
|
cleansed_header = tabular.cleanse_header!
|
68
68
|
assert_equal(expected_header, cleansed_header)
|
69
69
|
end
|
@@ -82,13 +82,13 @@ class TabularTest < Minitest::Test
|
|
82
82
|
assert_equal @allowed_columns, tabular.header.allowed_columns
|
83
83
|
end
|
84
84
|
|
85
|
-
it "nils columns not in the
|
85
|
+
it "nils columns not in the allowed list" do
|
86
86
|
tabular = IOStreams::Tabular.new(columns: [" first ", "Unknown Column", "thirD "], allowed_columns: @allowed_columns)
|
87
87
|
header = tabular.cleanse_header!
|
88
|
-
assert_equal ["first",
|
88
|
+
assert_equal ["first", "__rejected__Unknown Column", "third"], header
|
89
89
|
end
|
90
90
|
|
91
|
-
it "raises exception for columns not in the
|
91
|
+
it "raises exception for columns not in the allowed list" do
|
92
92
|
tabular = IOStreams::Tabular.new(columns: [" first ", "Unknown Column", "thirD "], allowed_columns: @allowed_columns, skip_unknown: false)
|
93
93
|
exc = assert_raises IOStreams::Errors::InvalidHeader do
|
94
94
|
tabular.cleanse_header!
|
@@ -218,7 +218,7 @@ class TabularTest < Minitest::Test
|
|
218
218
|
end
|
219
219
|
end
|
220
220
|
|
221
|
-
it "skips columns not in the
|
221
|
+
it "skips columns not in the allowed list" do
|
222
222
|
tabular.header.allowed_columns = %w[first second third fourth fifth]
|
223
223
|
tabular.cleanse_header!
|
224
224
|
assert hash = tabular.record_parse("1,2,3")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iostreams
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.10.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
132
132
|
- !ruby/object:Gem::Version
|
133
133
|
version: '0'
|
134
134
|
requirements: []
|
135
|
-
rubygems_version: 3.2.
|
135
|
+
rubygems_version: 3.2.22
|
136
136
|
signing_key:
|
137
137
|
specification_version: 4
|
138
138
|
summary: Input and Output streaming for Ruby.
|