iostreams 1.7.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 849ceda63eb30f95762a7c985cd215d424e62afd68ab20776e8d16c188dd6aed
4
- data.tar.gz: 8e26af86c40bb673ce36855a7fb30d1c4b401edc3eac0b27a71b9760cfe865dd
3
+ metadata.gz: c93dac4c226c66c4e36554311858ff328299fc4202c257cdb0e7f2c8e82e323b
4
+ data.tar.gz: fa96f9d6007769b812ab5506e1ca5ca20866ed9d6fc6bc98d75b50a0d50a23b2
5
5
  SHA512:
6
- metadata.gz: 99318c4c64e0133df57b84429b1c2f9caa064abb1405ace5d55208e41b6bf8bb8fa83a75db8ae46d53753f10d566bab53971d95871ed4011bab4571d31bebe8a
7
- data.tar.gz: bfba3a033c753e3fe05f798177b8f3c7ee8f566eabaf9223984fa902b288cb3515154f621a4e97f75b6c5bc31da88f284390c2d82c5015d7682ecf08c2a671d3
6
+ metadata.gz: 2538af2be40ad81287b4c3501f0ff9672ea82289adcb2de1e976d868fd9aa655f01d496b6299ba851193e1ed832750e6bcb21f6ed88179aab303f348e4bef60f
7
+ data.tar.gz: e7d6c8f09d0377f0bdfb83fdfc61559afd1581d1e8425cc9a3a76754be2e92003f202246c0a43b83b12107774965388aa71f2c2950835797363ce057ca5fe24b
@@ -79,15 +79,16 @@ module IOStreams
79
79
  # with their options that will be applied when the reader or writer is invoked.
80
80
  def pipeline
81
81
  return streams.dup.freeze if streams
82
- return {}.freeze unless file_name
83
82
 
84
- built_streams = {}
85
- # Encode stream is always first
86
- built_streams[:encode] = options[:encode] if options&.key?(:encode)
83
+ build_pipeline.freeze
84
+ end
87
85
 
88
- opts = options || {}
89
- parse_extensions.each { |stream| built_streams[stream] = opts[stream] || {} }
90
- built_streams.freeze
86
+ # Removes the named stream from the current pipeline.
87
+ # If the stream pipeline has not yet been built it will be built from the file_name if present.
88
+ # Note: Any options must be set _before_ calling this method.
89
+ def remove_from_pipeline(stream_name)
90
+ @streams ||= build_pipeline
91
+ @streams.delete(stream_name.to_sym)
91
92
  end
92
93
 
93
94
  # Returns the tabular format if set, otherwise tries to autodetect the format if the file_name has been set
@@ -106,6 +107,18 @@ module IOStreams
106
107
 
107
108
  private
108
109
 
110
+ def build_pipeline
111
+ return {} unless file_name
112
+
113
+ built_streams = {}
114
+ # Encode stream is always first
115
+ built_streams[:encode] = options[:encode] if options&.key?(:encode)
116
+
117
+ opts = options || {}
118
+ parse_extensions.each { |stream| built_streams[stream] = opts[stream] || {} }
119
+ built_streams
120
+ end
121
+
109
122
  def class_for_stream(type, stream)
110
123
  ext = IOStreams.extensions[stream.nil? ? nil : stream.to_sym] ||
111
124
  raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
@@ -75,6 +75,8 @@ module IOStreams
75
75
  # Note:
76
76
  # * The line delimiter is _not_ returned.
77
77
  def each
78
+ return to_enum(__method__) unless block_given?
79
+
78
80
  line_count = 0
79
81
  until eof?
80
82
  line = readline
@@ -146,8 +148,8 @@ module IOStreams
146
148
  data
147
149
  end
148
150
 
149
- # Returns [Integer] the number of characters read into the internal buffer
150
- # Returns 0 on EOF
151
+ # Returns whether more data is available to read
152
+ # Returns false on EOF
151
153
  def read_block
152
154
  return false if @eof
153
155
 
@@ -157,7 +159,8 @@ module IOStreams
157
159
  @input_stream.read(@buffer_size, @read_cache_buffer)
158
160
  rescue ArgumentError
159
161
  # Handle arity of -1 when just 0..1
160
- @read_cache_buffer = nil
162
+ @read_cache_buffer = nil
163
+ @use_read_cache_buffer = false
161
164
  @input_stream.read(@buffer_size)
162
165
  end
163
166
  else
@@ -89,6 +89,11 @@ module IOStreams
89
89
  # "**.rb" "lib/song.rb" true
90
90
  # "*" "dave/.profile" true
91
91
  def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
92
+ unless block_given?
93
+ return to_enum(__method__, pattern,
94
+ case_sensitive: case_sensitive, directories: directories, hidden: hidden)
95
+ end
96
+
92
97
  flags = 0
93
98
  flags |= ::File::FNM_CASEFOLD unless case_sensitive
94
99
  flags |= ::File::FNM_DOTMATCH if hidden
@@ -284,6 +284,11 @@ module IOStreams
284
284
  # Notes:
285
285
  # - Currently all S3 lookups are recursive as of the pattern regardless of whether the pattern includes `**`.
286
286
  def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
287
+ unless block_given?
288
+ return to_enum(__method__, pattern,
289
+ case_sensitive: case_sensitive, directories: directories, hidden: hidden)
290
+ end
291
+
287
292
  matcher = Matcher.new(self, pattern, case_sensitive: case_sensitive, hidden: hidden)
288
293
 
289
294
  # When the pattern includes an exact file name without any pattern characters
@@ -142,6 +142,11 @@ module IOStreams
142
142
  # sftp://sftp.example.org/a/b/c/test.txt {:type=>1, :size=>37, :owner=>"test_owner", :group=>"test_group",
143
143
  # :permissions=>420, :atime=>1572378136, :mtime=>1572378136, :link_count=>1, :extended=>{}}
144
144
  def each_child(pattern = "*", case_sensitive: true, directories: false, hidden: false)
145
+ unless block_given?
146
+ return to_enum(__method__, pattern,
147
+ case_sensitive: case_sensitive, directories: directories, hidden: hidden)
148
+ end
149
+
145
150
  Utils.load_soft_dependency("net-sftp", "SFTP glob capability", "net/sftp") unless defined?(Net::SFTP)
146
151
 
147
152
  flags = ::File::FNM_EXTGLOB
@@ -68,6 +68,8 @@ module IOStreams
68
68
  end
69
69
 
70
70
  def each
71
+ return to_enum(__method__) unless block_given?
72
+
71
73
  @line_reader.each do |line|
72
74
  if @tabular.header?
73
75
  @tabular.parse_header(line)
@@ -40,6 +40,8 @@ module IOStreams
40
40
  end
41
41
 
42
42
  def each
43
+ return to_enum(__method__) unless block_given?
44
+
43
45
  @line_reader.each do |line|
44
46
  if @tabular.header?
45
47
  columns = @tabular.parse_header(line)
@@ -56,6 +56,14 @@ module IOStreams
56
56
  builder.pipeline
57
57
  end
58
58
 
59
+ # Removes the named stream from the current pipeline.
60
+ # If the stream pipeline has not yet been built it will be built from the file_name if present.
61
+ # Note: Any options must be set _before_ calling this method.
62
+ def remove_from_pipeline(stream_name)
63
+ builder.remove_from_pipeline(stream_name)
64
+ self
65
+ end
66
+
59
67
  # Iterate over a file / stream returning one line at a time.
60
68
  #
61
69
  # Example: Read a line at a time
@@ -2,6 +2,9 @@ module IOStreams
2
2
  class Tabular
3
3
  # Process files / streams that start with a header.
4
4
  class Header
5
+ # Column names that begin with this prefix have been rejected and should be ignored.
6
+ IGNORE_PREFIX = "__rejected__".freeze
7
+
5
8
  attr_accessor :columns, :allowed_columns, :required_columns, :skip_unknown
6
9
 
7
10
  # Header
@@ -17,8 +20,8 @@ module IOStreams
17
20
  # List of columns to allow.
18
21
  # Default: nil ( Allow all columns )
19
22
  # Note:
20
- # When supplied any columns that are rejected will be returned in the cleansed columns
21
- # as nil so that they can be ignored during processing.
23
+ # * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
24
+ # For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
22
25
  #
23
26
  # required_columns [Array<String>]
24
27
  # List of columns that must be present, otherwise an Exception is raised.
@@ -44,8 +47,10 @@ module IOStreams
44
47
  # - Spaces and '-' are converted to '_'.
45
48
  # - All characters except for letters, digits, and '_' are stripped.
46
49
  #
47
- # Notes
48
- # * Raises Tabular::InvalidHeader when there are no non-nil columns left after cleansing.
50
+ # Notes:
51
+ # * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
52
+ # For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
53
+ # * Raises Tabular::InvalidHeader when there are no rejected columns left after cleansing.
49
54
  def cleanse!
50
55
  return [] if columns.nil? || columns.empty?
51
56
 
@@ -56,7 +61,7 @@ module IOStreams
56
61
  cleansed
57
62
  else
58
63
  ignored_columns << column
59
- nil
64
+ "#{IGNORE_PREFIX}#{column}"
60
65
  end
61
66
  end
62
67
 
@@ -122,7 +127,7 @@ module IOStreams
122
127
 
123
128
  def array_to_hash(row)
124
129
  h = {}
125
- columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) }
130
+ columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) || col.start_with?(IGNORE_PREFIX) }
126
131
  h
127
132
  end
128
133
 
@@ -134,12 +139,7 @@ module IOStreams
134
139
  hash = hash.dup
135
140
  unmatched.each { |name| hash[cleanse_column(name)] = hash.delete(name) }
136
141
  end
137
- # Hash#slice as of Ruby 2.5
138
- if hash.respond_to?(:slice)
139
- hash.slice(*columns)
140
- else
141
- columns.each_with_object({}) { |column, new_hash| new_hash[column] = hash[column] }
142
- end
142
+ hash.slice(*columns)
143
143
  end
144
144
 
145
145
  def cleanse_column(name)
@@ -1,3 +1,3 @@
1
1
  module IOStreams
2
- VERSION = "1.7.0".freeze
2
+ VERSION = "1.10.1".freeze
3
3
  end
data/test/builder_test.rb CHANGED
@@ -237,6 +237,21 @@ class BuilderTest < Minitest::Test
237
237
  end
238
238
  end
239
239
 
240
+ describe "#remove_from_pipeline" do
241
+ let(:file_name) { "my/path/abc.bz2.pgp" }
242
+ it "removes a named stream from the pipeline" do
243
+ assert_equal({bz2: {}, pgp: {}}, streams.pipeline)
244
+ streams.remove_from_pipeline(:bz2)
245
+ assert_equal({pgp: {}}, streams.pipeline)
246
+ end
247
+ it "removes a named stream from the pipeline with options" do
248
+ streams.option(:pgp, passphrase: "unlock-me")
249
+ assert_equal({bz2: {}, pgp: {passphrase: "unlock-me"}}, streams.pipeline)
250
+ streams.remove_from_pipeline(:bz2)
251
+ assert_equal({pgp: {passphrase: "unlock-me"}}, streams.pipeline)
252
+ end
253
+ end
254
+
240
255
  describe "#execute" do
241
256
  it "directly calls block for an empty stream" do
242
257
  string_io = StringIO.new
@@ -98,6 +98,13 @@ class LineReaderTest < Minitest::Test
98
98
  assert_equal data.size, count
99
99
  end
100
100
 
101
+ it "with no block returns enumerator" do
102
+ lines = IOStreams::Line::Reader.file(file_name) do |io|
103
+ io.each.first(100)
104
+ end
105
+ assert_equal data, lines
106
+ end
107
+
101
108
  it "each_line stream" do
102
109
  lines = []
103
110
  count = File.open(file_name) do |file|
@@ -5,7 +5,7 @@ module Paths
5
5
  describe IOStreams::Paths::File do
6
6
  let(:root) { IOStreams::Paths::File.new("/tmp/iostreams").delete_all }
7
7
  let(:directory) { root.join("/some_test_dir") }
8
- let(:data) { "Hello World" }
8
+ let(:data) { "Hello World\nHow are you doing?\nOn this fine day" }
9
9
  let(:file_path) do
10
10
  path = root.join("some_test_dir/test_file.txt")
11
11
  path.writer { |io| io << data }
@@ -17,6 +17,20 @@ module Paths
17
17
  path
18
18
  end
19
19
 
20
+ describe "#each" do
21
+ it "reads lines" do
22
+ records = []
23
+ count = file_path.each { |line| records << line }
24
+ assert_equal count, data.lines.size
25
+ assert_equal data.lines.collect(&:strip), records
26
+ end
27
+
28
+ it "reads lines without block" do
29
+ records = file_path.each.first(100)
30
+ assert_equal data.lines.collect(&:strip), records
31
+ end
32
+ end
33
+
20
34
  describe "#each_child" do
21
35
  it "iterates an empty path" do
22
36
  none = nil
@@ -48,6 +62,12 @@ module Paths
48
62
  actual = root.children("**/Test*.TXT", case_sensitive: true).collect(&:to_s)
49
63
  refute_equal expected, actual.sort
50
64
  end
65
+
66
+ it "with no block returns enumerator" do
67
+ expected = [file_path.to_s, file_path2.to_s]
68
+ actual = root.each_child("**/*").first(100).collect(&:to_s)
69
+ assert_equal expected.sort, actual.sort
70
+ end
51
71
  end
52
72
 
53
73
  describe "#mkpath" do
@@ -46,6 +46,13 @@ class RecordReaderTest < Minitest::Test
46
46
  end
47
47
  assert_equal expected, rows
48
48
  end
49
+
50
+ it "with no block returns enumerator" do
51
+ records = IOStreams::Record::Reader.file(file_name, cleanse_header: false) do |io|
52
+ io.each.first(100)
53
+ end
54
+ assert_equal expected, records
55
+ end
49
56
  end
50
57
 
51
58
  describe "#collect" do
@@ -10,7 +10,7 @@ class RowReaderTest < Minitest::Test
10
10
  CSV.read(file_name)
11
11
  end
12
12
 
13
- describe ".open" do
13
+ describe "#each" do
14
14
  it "file" do
15
15
  rows = []
16
16
  count = IOStreams::Row::Reader.file(file_name) do |io|
@@ -20,6 +20,13 @@ class RowReaderTest < Minitest::Test
20
20
  assert_equal expected.size, count
21
21
  end
22
22
 
23
+ it "with no block returns enumerator" do
24
+ rows = IOStreams::Row::Reader.file(file_name) do |io|
25
+ io.each.first(100)
26
+ end
27
+ assert_equal expected, rows
28
+ end
29
+
23
30
  it "stream" do
24
31
  rows = []
25
32
  count = IOStreams::Line::Reader.file(file_name) do |file|
data/test/tabular_test.rb CHANGED
@@ -58,12 +58,12 @@ class TabularTest < Minitest::Test
58
58
  assert_equal header, tabular.header.columns
59
59
  end
60
60
 
61
- it "white listed snake cased alphanumeric columns" do
61
+ it "allowed list snake cased alphanumeric columns" do
62
62
  tabular = IOStreams::Tabular.new(
63
- columns: ["Ard Vark", "password", "robot version", "$$$"],
63
+ columns: ["Ard Vark", "Password", "robot version", "$$$"],
64
64
  allowed_columns: %w[ard_vark robot_version]
65
65
  )
66
- expected_header = ["ard_vark", nil, "robot_version", nil]
66
+ expected_header = ["ard_vark", "__rejected__Password", "robot_version", "__rejected__$$$"]
67
67
  cleansed_header = tabular.cleanse_header!
68
68
  assert_equal(expected_header, cleansed_header)
69
69
  end
@@ -82,13 +82,13 @@ class TabularTest < Minitest::Test
82
82
  assert_equal @allowed_columns, tabular.header.allowed_columns
83
83
  end
84
84
 
85
- it "nils columns not in the whitelist" do
85
+ it "nils columns not in the allowed list" do
86
86
  tabular = IOStreams::Tabular.new(columns: [" first ", "Unknown Column", "thirD "], allowed_columns: @allowed_columns)
87
87
  header = tabular.cleanse_header!
88
- assert_equal ["first", nil, "third"], header
88
+ assert_equal ["first", "__rejected__Unknown Column", "third"], header
89
89
  end
90
90
 
91
- it "raises exception for columns not in the whitelist" do
91
+ it "raises exception for columns not in the allowed list" do
92
92
  tabular = IOStreams::Tabular.new(columns: [" first ", "Unknown Column", "thirD "], allowed_columns: @allowed_columns, skip_unknown: false)
93
93
  exc = assert_raises IOStreams::Errors::InvalidHeader do
94
94
  tabular.cleanse_header!
@@ -218,7 +218,7 @@ class TabularTest < Minitest::Test
218
218
  end
219
219
  end
220
220
 
221
- it "skips columns not in the whitelist" do
221
+ it "skips columns not in the allowed list" do
222
222
  tabular.header.allowed_columns = %w[first second third fourth fifth]
223
223
  tabular.cleanse_header!
224
224
  assert hash = tabular.record_parse("1,2,3")
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iostreams
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.10.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-23 00:00:00.000000000 Z
11
+ date: 2021-08-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  - !ruby/object:Gem::Version
133
133
  version: '0'
134
134
  requirements: []
135
- rubygems_version: 3.2.15
135
+ rubygems_version: 3.2.22
136
136
  signing_key:
137
137
  specification_version: 4
138
138
  summary: Input and Output streaming for Ruby.