iostreams 1.6.2 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6dd68508835099ef4c9de7f81a9527927d3225138f0bf5ecd00c44194e11858
4
- data.tar.gz: 28535e95ca83a4cf0c522de4cd48889a489b4fcefdc8feb9a7bfe0b70124fd7a
3
+ metadata.gz: 06e71b70e09ed1f57a4d776795d48fc973d32888a078c9e42319fc3eaee56db0
4
+ data.tar.gz: 7d956af5453840aee0a8c8cca5e7e4e44c465e1dcb13030daa37f2abc8f5cc78
5
5
  SHA512:
6
- metadata.gz: 8e4d38ef41234f62fdcfde1ae3a96fe59203dcfc38562843106b081c7292efffdda36b88707674059a24b0fa2996d4c1dbe0627694e80a9b12d23d47195a53d3
7
- data.tar.gz: 23e16854c305542ee0976f570444d7b99cadcb6ec460d83bebc73a65cba9c28e3beba485c9943797b174b80bdd783c216d89d539e8aecdbda62e1c31f017efce
6
+ metadata.gz: 9ed848fcc886ed53d36504097036605d13735887a880ed65620460f55d78fc26771a18b1ab32e1685f765cf25097fa42f7e59ba233099aeb15a93b0005936bb5
7
+ data.tar.gz: 6301601fa85f7f9198e0c61fabdd0a9206c4dfb2b239c0e8155a01a47973f6cb545272a870a593d67437ef25476901aed9c1101c24cacef5ab22e4f794936398
@@ -79,15 +79,16 @@ module IOStreams
79
79
  # with their options that will be applied when the reader or writer is invoked.
80
80
  def pipeline
81
81
  return streams.dup.freeze if streams
82
- return {}.freeze unless file_name
83
82
 
84
- built_streams = {}
85
- # Encode stream is always first
86
- built_streams[:encode] = options[:encode] if options&.key?(:encode)
83
+ build_pipeline.freeze
84
+ end
87
85
 
88
- opts = options || {}
89
- parse_extensions.each { |stream| built_streams[stream] = opts[stream] || {} }
90
- built_streams.freeze
86
+ # Removes the named stream from the current pipeline.
87
+ # If the stream pipeline has not yet been built it will be built from the file_name if present.
88
+ # Note: Any options must be set _before_ calling this method.
89
+ def remove_from_pipeline(stream_name)
90
+ @streams ||= build_pipeline
91
+ @streams.delete(stream_name.to_sym)
91
92
  end
92
93
 
93
94
  # Returns the tabular format if set, otherwise tries to autodetect the format if the file_name has been set
@@ -97,16 +98,30 @@ module IOStreams
97
98
  end
98
99
 
99
100
  def format=(format)
100
- raise(ArgumentError, "Invalid format: #{format.inspect}") unless format.nil? || IOStreams::Tabular.registered_formats.include?(format)
101
+ unless format.nil? || IOStreams::Tabular.registered_formats.include?(format)
102
+ raise(ArgumentError, "Invalid format: #{format.inspect}")
103
+ end
101
104
 
102
105
  @format = format
103
106
  end
104
107
 
105
108
  private
106
109
 
110
+ def build_pipeline
111
+ return {} unless file_name
112
+
113
+ built_streams = {}
114
+ # Encode stream is always first
115
+ built_streams[:encode] = options[:encode] if options&.key?(:encode)
116
+
117
+ opts = options || {}
118
+ parse_extensions.each { |stream| built_streams[stream] = opts[stream] || {} }
119
+ built_streams
120
+ end
121
+
107
122
  def class_for_stream(type, stream)
108
123
  ext = IOStreams.extensions[stream.nil? ? nil : stream.to_sym] ||
109
- raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
124
+ raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
110
125
  ext.send("#{type}_class") || raise(ArgumentError, "No #{type} registered for Stream type: #{stream.inspect}")
111
126
  end
112
127
 
@@ -75,6 +75,8 @@ module IOStreams
75
75
  # Note:
76
76
  # * The line delimiter is _not_ returned.
77
77
  def each
78
+ return to_enum(__method__) unless block_given?
79
+
78
80
  line_count = 0
79
81
  until eof?
80
82
  line = readline
@@ -96,17 +98,17 @@ module IOStreams
96
98
  while line.count(@embedded_within).odd?
97
99
  if eof? || line.length > @buffer_size * 10
98
100
  raise(Errors::MalformedDataError.new(
99
- "Unbalanced delimited field, delimiter: #{@embedded_within}",
100
- initial_line_number
101
- ))
101
+ "Unbalanced delimited field, delimiter: #{@embedded_within}",
102
+ initial_line_number
103
+ ))
102
104
  end
103
105
  line << @delimiter
104
106
  next_line = _readline
105
107
  if next_line.nil?
106
108
  raise(Errors::MalformedDataError.new(
107
- "Unbalanced delimited field, delimiter: #{@embedded_within}",
108
- initial_line_number
109
- ))
109
+ "Unbalanced delimited field, delimiter: #{@embedded_within}",
110
+ initial_line_number
111
+ ))
110
112
  end
111
113
  line << next_line
112
114
  end
@@ -146,8 +148,8 @@ module IOStreams
146
148
  data
147
149
  end
148
150
 
149
- # Returns [Integer] the number of characters read into the internal buffer
150
- # Returns 0 on EOF
151
+ # Returns whether more data is available to read
152
+ # Returns false on EOF
151
153
  def read_block
152
154
  return false if @eof
153
155
 
@@ -157,7 +159,8 @@ module IOStreams
157
159
  @input_stream.read(@buffer_size, @read_cache_buffer)
158
160
  rescue ArgumentError
159
161
  # Handle arity of -1 when just 0..1
160
- @read_cache_buffer = nil
162
+ @read_cache_buffer = nil
163
+ @use_read_cache_buffer = false
161
164
  @input_stream.read(@buffer_size)
162
165
  end
163
166
  else
@@ -170,6 +173,9 @@ module IOStreams
170
173
  return false
171
174
  end
172
175
 
176
+ # When less data is returned than was requested, it means the end of the file with partial data.
177
+ @eof = true if block.size < @buffer_size
178
+
173
179
  if @buffer
174
180
  @buffer << block
175
181
  else
@@ -89,6 +89,11 @@ module IOStreams
89
89
  # "**.rb" "lib/song.rb" true
90
90
  # "*" "dave/.profile" true
91
91
  def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
92
+ unless block_given?
93
+ return to_enum(__method__, pattern,
94
+ case_sensitive: case_sensitive, directories: directories, hidden: hidden)
95
+ end
96
+
92
97
  flags = 0
93
98
  flags |= ::File::FNM_CASEFOLD unless case_sensitive
94
99
  flags |= ::File::FNM_DOTMATCH if hidden
@@ -3,7 +3,7 @@ require "uri"
3
3
  module IOStreams
4
4
  module Paths
5
5
  class S3 < IOStreams::Path
6
- attr_reader :bucket_name, :client, :options
6
+ attr_reader :bucket_name, :options
7
7
 
8
8
  # Largest file size supported by the S3 copy object api.
9
9
  S3_COPY_OBJECT_SIZE_LIMIT = 5 * 1024 * 1024 * 1024
@@ -141,16 +141,17 @@ module IOStreams
141
141
 
142
142
  @bucket_name = uri.hostname
143
143
  key = uri.path.sub(%r{\A/}, "")
144
- if client.is_a?(Hash)
145
- client[:access_key_id] = access_key_id if access_key_id
146
- client[:secret_access_key] = secret_access_key if secret_access_key
147
- @client = ::Aws::S3::Client.new(client)
144
+
145
+ if client && !client.is_a?(Hash)
146
+ @client = client
148
147
  else
149
- @client = client || ::Aws::S3::Client.new(access_key_id: access_key_id, secret_access_key: secret_access_key)
148
+ @client_options = client.is_a?(Hash) ? client.dup : {}
149
+ @client_options[:access_key_id] = access_key_id if access_key_id
150
+ @client_options[:secret_access_key] = secret_access_key if secret_access_key
150
151
  end
151
- @options = args
152
152
 
153
- @options.merge(uri.query) if uri.query
153
+ @options = args
154
+ @options.merge!(uri.query.transform_keys(&:to_sym)) if uri.query
154
155
 
155
156
  super(key)
156
157
  end
@@ -190,11 +191,11 @@ module IOStreams
190
191
  end
191
192
 
192
193
  # Make S3 perform direct copies within S3 itself.
193
- def copy_to(target_path, convert: true)
194
- return super(target_path) if convert || (size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
194
+ def copy_to(target_path, convert: true, **args)
195
+ return super(target_path, convert: convert, **args) if convert || (size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
195
196
 
196
197
  target = IOStreams.new(target_path)
197
- return super(target) unless target.is_a?(self.class)
198
+ return super(target, convert: convert, **args) unless target.is_a?(self.class)
198
199
 
199
200
  source_name = ::File.join(bucket_name, path)
200
201
  client.copy_object(options.merge(bucket: target.bucket_name, key: target.path, copy_source: source_name))
@@ -202,11 +203,13 @@ module IOStreams
202
203
  end
203
204
 
204
205
  # Make S3 perform direct copies within S3 itself.
205
- def copy_from(source_path, convert: true)
206
- return super(source_path) if convert
206
+ def copy_from(source_path, convert: true, **args)
207
+ return super(source_path, convert: true, **args) if convert
207
208
 
208
209
  source = IOStreams.new(source_path)
209
- return super(source) if !source.is_a?(self.class) || (source.size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
210
+ if !source.is_a?(self.class) || (source.size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
211
+ return super(source, convert: convert, **args)
212
+ end
210
213
 
211
214
  source_name = ::File.join(source.bucket_name, source.path)
212
215
  client.copy_object(options.merge(bucket: bucket_name, key: path, copy_source: source_name))
@@ -281,6 +284,11 @@ module IOStreams
281
284
  # Notes:
282
285
  # - Currently all S3 lookups are recursive as of the pattern regardless of whether the pattern includes `**`.
283
286
  def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
287
+ unless block_given?
288
+ return to_enum(__method__, pattern,
289
+ case_sensitive: case_sensitive, directories: directories, hidden: hidden)
290
+ end
291
+
284
292
  matcher = Matcher.new(self, pattern, case_sensitive: case_sensitive, hidden: hidden)
285
293
 
286
294
  # When the pattern includes an exact file name without any pattern characters
@@ -312,6 +320,11 @@ module IOStreams
312
320
  def partial_files_visible?
313
321
  false
314
322
  end
323
+
324
+ # Lazy load S3 client since it takes two seconds to create itself!
325
+ def client
326
+ @client ||= ::Aws::S3::Client.new(@client_options)
327
+ end
315
328
  end
316
329
  end
317
330
  end
@@ -47,9 +47,23 @@ module IOStreams
47
47
  # password: [String]
48
48
  # Password for the user.
49
49
  #
50
- # **ssh_options
51
- # Any other options supported by ssh_config.
52
- # `man ssh_config` to see all available options.
50
+ # ssh_options: [Hash]
51
+ # - IdentityKey [String]
52
+ # The identity key that this client should use to talk to this host.
53
+ # Under the covers this value is written to a file and then the file name is passed as `IdentityFile`
54
+ # - HostKey [String]
55
+ # The expected SSH Host key that is presented by the remote host.
56
+ # Instead of storing the host key in the `known_hosts` file, it can be supplied explicity
57
+ # using this option.
58
+ # Under the covers this value is written to a file and then the file name is passed as `UserKnownHostsFile`
59
+ # Notes:
60
+ # - It must contain the entire line that would be stored in `known_hosts`,
61
+ # including the hostname, ip address, key type and key value. This value is written as-is into a
62
+ # "known_hosts" like file and then passed into sftp using the `UserKnownHostsFile` option.
63
+ # - The easiest way to generate the required is to use `ssh-keyscan` and then supply that value in this field.
64
+ # For example: `ssh-keyscan hostname`
65
+ # - Any other options supported by ssh_config.
66
+ # `man ssh_config` to see all available options.
53
67
  #
54
68
  # Examples:
55
69
  #
@@ -128,6 +142,11 @@ module IOStreams
128
142
  # sftp://sftp.example.org/a/b/c/test.txt {:type=>1, :size=>37, :owner=>"test_owner", :group=>"test_group",
129
143
  # :permissions=>420, :atime=>1572378136, :mtime=>1572378136, :link_count=>1, :extended=>{}}
130
144
  def each_child(pattern = "*", case_sensitive: true, directories: false, hidden: false)
145
+ unless block_given?
146
+ return to_enum(__method__, pattern,
147
+ case_sensitive: case_sensitive, directories: directories, hidden: hidden)
148
+ end
149
+
131
150
  Utils.load_soft_dependency("net-sftp", "SFTP glob capability", "net/sftp") unless defined?(Net::SFTP)
132
151
 
133
152
  flags = ::File::FNM_EXTGLOB
@@ -168,38 +187,36 @@ module IOStreams
168
187
  def sftp_download(remote_file_name, local_file_name)
169
188
  with_sftp_args do |args|
170
189
  Open3.popen2e(*args) do |writer, reader, waith_thr|
171
- begin
172
- # Give time for remote sftp server to get ready to accept the password.
173
- sleep self.class.before_password_wait_seconds
174
-
175
- writer.puts password
176
-
177
- # Give time for password to be processed and stdin to be passed to sftp process.
178
- sleep self.class.sshpass_wait_seconds
179
-
180
- writer.puts "get #{remote_file_name} #{local_file_name}"
181
- writer.puts "bye"
182
- writer.close
183
- out = reader.read.chomp
184
- unless waith_thr.value.success?
185
- raise(
186
- Errors::CommunicationsFailure,
187
- "Download failed calling #{self.class.sftp_bin} via #{self.class.sshpass_bin}: #{out}"
188
- )
189
- end
190
-
191
- out
192
- rescue Errno::EPIPE
193
- out = begin
194
- reader.read.chomp
195
- rescue StandardError
196
- nil
197
- end
190
+ # Give time for remote sftp server to get ready to accept the password.
191
+ sleep self.class.before_password_wait_seconds
192
+
193
+ writer.puts password
194
+
195
+ # Give time for password to be processed and stdin to be passed to sftp process.
196
+ sleep self.class.sshpass_wait_seconds
197
+
198
+ writer.puts "get #{remote_file_name} #{local_file_name}"
199
+ writer.puts "bye"
200
+ writer.close
201
+ out = reader.read.chomp
202
+ unless waith_thr.value.success?
198
203
  raise(
199
204
  Errors::CommunicationsFailure,
200
205
  "Download failed calling #{self.class.sftp_bin} via #{self.class.sshpass_bin}: #{out}"
201
206
  )
202
207
  end
208
+
209
+ out
210
+ rescue Errno::EPIPE
211
+ out = begin
212
+ reader.read.chomp
213
+ rescue StandardError
214
+ nil
215
+ end
216
+ raise(
217
+ Errors::CommunicationsFailure,
218
+ "Download failed calling #{self.class.sftp_bin} via #{self.class.sshpass_bin}: #{out}"
219
+ )
203
220
  end
204
221
  end
205
222
  end
@@ -207,48 +224,64 @@ module IOStreams
207
224
  def sftp_upload(local_file_name, remote_file_name)
208
225
  with_sftp_args do |args|
209
226
  Open3.popen2e(*args) do |writer, reader, waith_thr|
210
- begin
211
- writer.puts(password) if password
212
- # Give time for password to be processed and stdin to be passed to sftp process.
213
- sleep self.class.sshpass_wait_seconds
214
- writer.puts "put #{local_file_name.inspect} #{remote_file_name.inspect}"
215
- writer.puts "bye"
216
- writer.close
217
- out = reader.read.chomp
218
- unless waith_thr.value.success?
219
- raise(
220
- Errors::CommunicationsFailure,
221
- "Upload failed calling #{self.class.sftp_bin} via #{self.class.sshpass_bin}: #{out}"
222
- )
223
- end
224
-
225
- out
226
- rescue Errno::EPIPE
227
- out = begin
228
- reader.read.chomp
229
- rescue StandardError
230
- nil
231
- end
227
+ writer.puts(password) if password
228
+ # Give time for password to be processed and stdin to be passed to sftp process.
229
+ sleep self.class.sshpass_wait_seconds
230
+ writer.puts "put #{local_file_name.inspect} #{remote_file_name.inspect}"
231
+ writer.puts "bye"
232
+ writer.close
233
+ out = reader.read.chomp
234
+ unless waith_thr.value.success?
232
235
  raise(
233
236
  Errors::CommunicationsFailure,
234
237
  "Upload failed calling #{self.class.sftp_bin} via #{self.class.sshpass_bin}: #{out}"
235
238
  )
236
239
  end
240
+
241
+ out
242
+ rescue Errno::EPIPE
243
+ out = begin
244
+ reader.read.chomp
245
+ rescue StandardError
246
+ nil
247
+ end
248
+ raise(
249
+ Errors::CommunicationsFailure,
250
+ "Upload failed calling #{self.class.sftp_bin} via #{self.class.sshpass_bin}: #{out}"
251
+ )
237
252
  end
238
253
  end
239
254
  end
240
255
 
241
256
  def with_sftp_args
242
- return yield sftp_args(ssh_options) unless ssh_options.key?("IdentityKey")
257
+ return yield sftp_args(ssh_options) if !ssh_options.key?("IdentityKey") && !ssh_options.key?("HostKey")
258
+
259
+ with_identity_key(ssh_options.dup) do |options|
260
+ with_host_key(options) do |options2|
261
+ yield sftp_args(options2)
262
+ end
263
+ end
264
+ end
265
+
266
+ def with_identity_key(options)
267
+ return yield options unless ssh_options.key?("IdentityKey")
268
+
269
+ with_temp_file(options, "IdentityFile", options.delete("IdentityKey")) { yield options }
270
+ end
271
+
272
+ def with_host_key(options)
273
+ return yield options unless ssh_options.key?("HostKey")
274
+
275
+ with_temp_file(options, "UserKnownHostsFile", options.delete("HostKey")) { yield options }
276
+ end
243
277
 
278
+ def with_temp_file(options, option, value)
244
279
  Utils.temp_file_name("iostreams-sftp-args", "key") do |file_name|
245
- options = ssh_options.dup
246
- key = options.delete("IdentityKey")
247
280
  # sftp requires that private key is only readable by the current user
248
- ::File.open(file_name, "wb", 0o600) { |io| io.write(key) }
281
+ ::File.open(file_name, "wb", 0o600) { |io| io.write(value) }
249
282
 
250
- options["IdentityFile"] = file_name
251
- yield sftp_args(options)
283
+ options[option] = file_name
284
+ yield options
252
285
  end
253
286
  end
254
287
 
@@ -277,7 +310,7 @@ module IOStreams
277
310
  end
278
311
 
279
312
  def build_ssh_options
280
- options = ssh_options.dup
313
+ options = ssh_options.dup
281
314
  options[:logger] ||= logger if defined?(SemanticLogger)
282
315
  options[:port] ||= port
283
316
  options[:max_pkt_size] ||= 65_536
@@ -19,8 +19,7 @@ module IOStreams
19
19
 
20
20
  private
21
21
 
22
- attr_reader :default_signer_passphrase
23
- attr_reader :default_signer
22
+ attr_reader :default_signer_passphrase, :default_signer
24
23
 
25
24
  @default_signer_passphrase = nil
26
25
  @default_signer = nil
@@ -48,8 +48,8 @@ module IOStreams
48
48
  # See `man gpg` for the remaining options
49
49
  def self.generate_key(name:,
50
50
  email:,
51
- comment: nil,
52
51
  passphrase:,
52
+ comment: nil,
53
53
  key_type: "RSA",
54
54
  key_length: 4096,
55
55
  subkey_type: "RSA",
@@ -291,10 +291,8 @@ module IOStreams
291
291
  version_check
292
292
  Open3.popen2e("#{executable} --list-keys --fingerprint --with-colons #{email}") do |_stdin, out, waith_thr|
293
293
  output = out.read.chomp
294
- unless waith_thr.value.success?
295
- unless output =~ /(public key not found|No public key)/i
296
- raise(Pgp::Failure, "GPG Failed calling #{executable} to list keys for #{email}: #{output}")
297
- end
294
+ if !waith_thr.value.success? && !(output !~ /(public key not found|No public key)/i)
295
+ raise(Pgp::Failure, "GPG Failed calling #{executable} to list keys for #{email}: #{output}")
298
296
  end
299
297
 
300
298
  output.each_line do |line|
@@ -336,9 +334,11 @@ module IOStreams
336
334
  match[1]
337
335
  end
338
336
  else
339
- return [] if err =~ /(key not found|No (public|secret) key)/i
337
+ if err !~ /(key not found|No (public|secret) key)/i
338
+ raise(Pgp::Failure, "GPG Failed calling #{executable} to list keys for #{email || key_id}: #{err}#{out}")
339
+ end
340
340
 
341
- raise(Pgp::Failure, "GPG Failed calling #{executable} to list keys for #{email || key_id}: #{err}#{out}")
341
+ []
342
342
  end
343
343
  end
344
344
  end
@@ -382,10 +382,10 @@ module IOStreams
382
382
  key_length: match[3].to_s.to_i,
383
383
  key_type: match[2],
384
384
  date: (begin
385
- Date.parse(match[4].to_s)
386
- rescue StandardError
387
- match[4]
388
- end)
385
+ Date.parse(match[4].to_s)
386
+ rescue StandardError
387
+ match[4]
388
+ end)
389
389
  }
390
390
  elsif (match = line.match(%r{(pub|sec)\s+(\d+)(.*)/(\w+)\s+(\d+-\d+-\d+)(\s+(.+)<(.+)>)?}))
391
391
  # Matches: pub 2048R/C7F9D9CB 2016-10-26
@@ -396,10 +396,10 @@ module IOStreams
396
396
  key_type: match[3],
397
397
  key_id: match[4],
398
398
  date: (begin
399
- Date.parse(match[5].to_s)
400
- rescue StandardError
401
- match[5]
402
- end)
399
+ Date.parse(match[5].to_s)
400
+ rescue StandardError
401
+ match[5]
402
+ end)
403
403
  }
404
404
  # Prior to gpg v2.0.30
405
405
  if match[7]
@@ -68,6 +68,8 @@ module IOStreams
68
68
  end
69
69
 
70
70
  def each
71
+ return to_enum(__method__) unless block_given?
72
+
71
73
  @line_reader.each do |line|
72
74
  if @tabular.header?
73
75
  @tabular.parse_header(line)
@@ -40,6 +40,8 @@ module IOStreams
40
40
  end
41
41
 
42
42
  def each
43
+ return to_enum(__method__) unless block_given?
44
+
43
45
  @line_reader.each do |line|
44
46
  if @tabular.header?
45
47
  columns = @tabular.parse_header(line)
@@ -56,6 +56,14 @@ module IOStreams
56
56
  builder.pipeline
57
57
  end
58
58
 
59
+ # Removes the named stream from the current pipeline.
60
+ # If the stream pipeline has not yet been built it will be built from the file_name if present.
61
+ # Note: Any options must be set _before_ calling this method.
62
+ def remove_from_pipeline(stream_name)
63
+ builder.remove_from_pipeline(stream_name)
64
+ self
65
+ end
66
+
59
67
  # Iterate over a file / stream returning one line at a time.
60
68
  #
61
69
  # Example: Read a line at a time
@@ -151,6 +159,9 @@ module IOStreams
151
159
  # Whether to apply the stream conversions during the copy.
152
160
  # Default: true
153
161
  #
162
+ # :mode [:line, :array, :hash]
163
+ # When convert is `true` then use this mode to convert the contents of the file.
164
+ #
154
165
  # Examples:
155
166
  #
156
167
  # # Copy and convert streams based on file extensions
@@ -162,11 +173,17 @@ module IOStreams
162
173
  # # Advanced copy with custom stream conversions on source and target.
163
174
  # source = IOStreams.path("source_file").stream(encoding: "BINARY")
164
175
  # IOStreams.path("target_file.pgp").option(:pgp, passphrase: "hello").copy_from(source)
165
- def copy_from(source, convert: true)
176
+ def copy_from(source, convert: true, mode: nil, **args)
166
177
  if convert
167
178
  stream = IOStreams.new(source)
168
- writer do |target|
169
- stream.reader { |src| IO.copy_stream(src, target) }
179
+ if mode
180
+ writer(mode, **args) do |target|
181
+ stream.each(mode) { |row| target << row }
182
+ end
183
+ else
184
+ writer(**args) do |target|
185
+ stream.reader { |src| IO.copy_stream(src, target) }
186
+ end
170
187
  end
171
188
  else
172
189
  stream = source.is_a?(Stream) ? source.dup : IOStreams.new(source)
@@ -176,9 +193,9 @@ module IOStreams
176
193
  end
177
194
  end
178
195
 
179
- def copy_to(target, convert: true)
196
+ def copy_to(target, **args)
180
197
  target = IOStreams.new(target)
181
- target.copy_from(self, convert: convert)
198
+ target.copy_from(self, **args)
182
199
  end
183
200
 
184
201
  # Set/get the original file_name
@@ -365,8 +382,8 @@ module IOStreams
365
382
  IOStreams::Row::Writer.stream(
366
383
  io,
367
384
  original_file_name: builder.file_name,
368
- format: builder.format,
369
- format_options: builder.format_options,
385
+ format: builder.format,
386
+ format_options: builder.format_options,
370
387
  **args,
371
388
  &block
372
389
  )
@@ -380,10 +397,11 @@ module IOStreams
380
397
  IOStreams::Record::Writer.stream(
381
398
  io,
382
399
  original_file_name: builder.file_name,
383
- format: builder.format,
384
- format_options: builder.format_options,
400
+ format: builder.format,
401
+ format_options: builder.format_options,
385
402
  **args,
386
- &block)
403
+ &block
404
+ )
387
405
  end
388
406
  end
389
407
  end
@@ -2,6 +2,9 @@ module IOStreams
2
2
  class Tabular
3
3
  # Process files / streams that start with a header.
4
4
  class Header
5
+ # Column names that begin with this prefix have been rejected and should be ignored.
6
+ IGNORE_PREFIX = "__rejected__".freeze
7
+
5
8
  attr_accessor :columns, :allowed_columns, :required_columns, :skip_unknown
6
9
 
7
10
  # Header
@@ -17,8 +20,8 @@ module IOStreams
17
20
  # List of columns to allow.
18
21
  # Default: nil ( Allow all columns )
19
22
  # Note:
20
- # When supplied any columns that are rejected will be returned in the cleansed columns
21
- # as nil so that they can be ignored during processing.
23
+ # * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
24
+ # For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
22
25
  #
23
26
  # required_columns [Array<String>]
24
27
  # List of columns that must be present, otherwise an Exception is raised.
@@ -44,8 +47,10 @@ module IOStreams
44
47
  # - Spaces and '-' are converted to '_'.
45
48
  # - All characters except for letters, digits, and '_' are stripped.
46
49
  #
47
- # Notes
48
- # * Raises Tabular::InvalidHeader when there are no non-nil columns left after cleansing.
50
+ # Notes:
51
+ # * So that rejected columns can be identified in subsequent steps, they will be prefixed with `__rejected__`.
52
+ # For example, `Unknown Column` would be cleansed as `__rejected__Unknown Column`.
53
+ # * Raises Tabular::InvalidHeader when there are no rejected columns left after cleansing.
49
54
  def cleanse!
50
55
  return [] if columns.nil? || columns.empty?
51
56
 
@@ -56,7 +61,7 @@ module IOStreams
56
61
  cleansed
57
62
  else
58
63
  ignored_columns << column
59
- nil
64
+ "#{IGNORE_PREFIX}#{column}"
60
65
  end
61
66
  end
62
67
 
@@ -122,7 +127,7 @@ module IOStreams
122
127
 
123
128
  def array_to_hash(row)
124
129
  h = {}
125
- columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) }
130
+ columns.each_with_index { |col, i| h[col] = row[i] unless IOStreams::Utils.blank?(col) || col.start_with?(IGNORE_PREFIX) }
126
131
  h
127
132
  end
128
133
 
@@ -134,12 +139,7 @@ module IOStreams
134
139
  hash = hash.dup
135
140
  unmatched.each { |name| hash[cleanse_column(name)] = hash.delete(name) }
136
141
  end
137
- # Hash#slice as of Ruby 2.5
138
- if hash.respond_to?(:slice)
139
- hash.slice(*columns)
140
- else
141
- columns.each_with_object({}) { |column, new_hash| new_hash[column] = hash[column] }
142
- end
142
+ hash.slice(*columns)
143
143
  end
144
144
 
145
145
  def cleanse_column(name)
@@ -146,7 +146,7 @@ module IOStreams
146
146
 
147
147
  attr_reader :key, :size, :type, :decimals
148
148
 
149
- def initialize(key: nil, size:, type: :string, decimals: 2)
149
+ def initialize(size:, key: nil, type: :string, decimals: 2)
150
150
  @key = key
151
151
  @size = size == :remainder ? -1 : size.to_i
152
152
  @type = type.to_sym
@@ -28,11 +28,9 @@ module IOStreams
28
28
  def self.temp_file_name(basename, extension = "")
29
29
  result = nil
30
30
  ::Dir::Tmpname.create([basename, extension], IOStreams.temp_dir, max_try: MAX_TEMP_FILE_NAME_ATTEMPTS) do |tmpname|
31
- begin
32
- result = yield(tmpname)
33
- ensure
34
- ::File.unlink(tmpname) if ::File.exist?(tmpname)
35
- end
31
+ result = yield(tmpname)
32
+ ensure
33
+ ::File.unlink(tmpname) if ::File.exist?(tmpname)
36
34
  end
37
35
  result
38
36
  end
@@ -1,3 +1,3 @@
1
1
  module IOStreams
2
- VERSION = "1.6.2".freeze
2
+ VERSION = "1.10.0".freeze
3
3
  end
data/lib/iostreams.rb CHANGED
@@ -23,33 +23,41 @@ module IOStreams
23
23
  autoload :Reader, "io_streams/bzip2/reader"
24
24
  autoload :Writer, "io_streams/bzip2/writer"
25
25
  end
26
+
26
27
  module Encode
27
28
  autoload :Reader, "io_streams/encode/reader"
28
29
  autoload :Writer, "io_streams/encode/writer"
29
30
  end
31
+
30
32
  module Gzip
31
33
  autoload :Reader, "io_streams/gzip/reader"
32
34
  autoload :Writer, "io_streams/gzip/writer"
33
35
  end
36
+
34
37
  module Line
35
38
  autoload :Reader, "io_streams/line/reader"
36
39
  autoload :Writer, "io_streams/line/writer"
37
40
  end
41
+
38
42
  module Record
39
43
  autoload :Reader, "io_streams/record/reader"
40
44
  autoload :Writer, "io_streams/record/writer"
41
45
  end
46
+
42
47
  module Row
43
48
  autoload :Reader, "io_streams/row/reader"
44
49
  autoload :Writer, "io_streams/row/writer"
45
50
  end
51
+
46
52
  module SymmetricEncryption
47
53
  autoload :Reader, "io_streams/symmetric_encryption/reader"
48
54
  autoload :Writer, "io_streams/symmetric_encryption/writer"
49
55
  end
56
+
50
57
  module Xlsx
51
58
  autoload :Reader, "io_streams/xlsx/reader"
52
59
  end
60
+
53
61
  module Zip
54
62
  autoload :Reader, "io_streams/zip/reader"
55
63
  autoload :Writer, "io_streams/zip/writer"
data/test/builder_test.rb CHANGED
@@ -237,6 +237,21 @@ class BuilderTest < Minitest::Test
237
237
  end
238
238
  end
239
239
 
240
+ describe "#remove_from_pipeline" do
241
+ let(:file_name) { "my/path/abc.bz2.pgp" }
242
+ it "removes a named stream from the pipeline" do
243
+ assert_equal({bz2: {}, pgp: {}}, streams.pipeline)
244
+ streams.remove_from_pipeline(:bz2)
245
+ assert_equal({pgp: {}}, streams.pipeline)
246
+ end
247
+ it "removes a named stream from the pipeline with options" do
248
+ streams.option(:pgp, passphrase: "unlock-me")
249
+ assert_equal({bz2: {}, pgp: {passphrase: "unlock-me"}}, streams.pipeline)
250
+ streams.remove_from_pipeline(:bz2)
251
+ assert_equal({pgp: {passphrase: "unlock-me"}}, streams.pipeline)
252
+ end
253
+ end
254
+
240
255
  describe "#execute" do
241
256
  it "directly calls block for an empty stream" do
242
257
  string_io = StringIO.new
@@ -98,6 +98,13 @@ class LineReaderTest < Minitest::Test
98
98
  assert_equal data.size, count
99
99
  end
100
100
 
101
+ it "with no block returns enumerator" do
102
+ lines = IOStreams::Line::Reader.file(file_name) do |io|
103
+ io.each.first(100)
104
+ end
105
+ assert_equal data, lines
106
+ end
107
+
101
108
  it "each_line stream" do
102
109
  lines = []
103
110
  count = File.open(file_name) do |file|
@@ -5,7 +5,7 @@ module Paths
5
5
  describe IOStreams::Paths::File do
6
6
  let(:root) { IOStreams::Paths::File.new("/tmp/iostreams").delete_all }
7
7
  let(:directory) { root.join("/some_test_dir") }
8
- let(:data) { "Hello World" }
8
+ let(:data) { "Hello World\nHow are you doing?\nOn this fine day" }
9
9
  let(:file_path) do
10
10
  path = root.join("some_test_dir/test_file.txt")
11
11
  path.writer { |io| io << data }
@@ -17,6 +17,20 @@ module Paths
17
17
  path
18
18
  end
19
19
 
20
+ describe "#each" do
21
+ it "reads lines" do
22
+ records = []
23
+ count = file_path.each { |line| records << line }
24
+ assert_equal count, data.lines.size
25
+ assert_equal data.lines.collect(&:strip), records
26
+ end
27
+
28
+ it "reads lines without block" do
29
+ records = file_path.each.first(100)
30
+ assert_equal data.lines.collect(&:strip), records
31
+ end
32
+ end
33
+
20
34
  describe "#each_child" do
21
35
  it "iterates an empty path" do
22
36
  none = nil
@@ -48,6 +62,12 @@ module Paths
48
62
  actual = root.children("**/Test*.TXT", case_sensitive: true).collect(&:to_s)
49
63
  refute_equal expected, actual.sort
50
64
  end
65
+
66
+ it "with no block returns enumerator" do
67
+ expected = [file_path.to_s, file_path2.to_s]
68
+ actual = root.each_child("**/*").first(100).collect(&:to_s)
69
+ assert_equal expected.sort, actual.sort
70
+ end
51
71
  end
52
72
 
53
73
  describe "#mkpath" do
@@ -126,15 +146,13 @@ module Paths
126
146
 
127
147
  it "missing source file" do
128
148
  IOStreams.temp_file("iostreams_move_test", ".txt") do |temp_file|
129
- begin
130
- refute temp_file.exist?
131
- target = temp_file.directory.join("move_test.txt")
132
- assert_raises Errno::ENOENT do
133
- temp_file.move_to(target)
134
- end
135
- refute target.exist?
136
- refute temp_file.exist?
149
+ refute temp_file.exist?
150
+ target = temp_file.directory.join("move_test.txt")
151
+ assert_raises Errno::ENOENT do
152
+ temp_file.move_to(target)
137
153
  end
154
+ refute target.exist?
155
+ refute temp_file.exist?
138
156
  end
139
157
  end
140
158
 
@@ -20,7 +20,13 @@ module Paths
20
20
  let(:file_name) { File.join(File.dirname(__FILE__), "..", "files", "text file.txt") }
21
21
  let(:raw) { File.read(file_name) }
22
22
 
23
- let(:root_path) { IOStreams::Paths::SFTP.new(url, username: username, password: password) }
23
+ let(:root_path) do
24
+ if ENV["SFTP_HOST_KEY"]
25
+ IOStreams::Paths::SFTP.new(url, username: username, password: password, ssh_options: {"HostKey" => ENV["SFTP_HOST_KEY"]})
26
+ else
27
+ IOStreams::Paths::SFTP.new(url, username: username, password: password)
28
+ end
29
+ end
24
30
 
25
31
  let :existing_path do
26
32
  path = root_path.join("test.txt")
@@ -46,6 +46,13 @@ class RecordReaderTest < Minitest::Test
46
46
  end
47
47
  assert_equal expected, rows
48
48
  end
49
+
50
+ it "with no block returns enumerator" do
51
+ records = IOStreams::Record::Reader.file(file_name, cleanse_header: false) do |io|
52
+ io.each.first(100)
53
+ end
54
+ assert_equal expected, records
55
+ end
49
56
  end
50
57
 
51
58
  describe "#collect" do
@@ -10,7 +10,7 @@ class RowReaderTest < Minitest::Test
10
10
  CSV.read(file_name)
11
11
  end
12
12
 
13
- describe ".open" do
13
+ describe "#each" do
14
14
  it "file" do
15
15
  rows = []
16
16
  count = IOStreams::Row::Reader.file(file_name) do |io|
@@ -20,6 +20,13 @@ class RowReaderTest < Minitest::Test
20
20
  assert_equal expected.size, count
21
21
  end
22
22
 
23
+ it "with no block returns enumerator" do
24
+ rows = IOStreams::Row::Reader.file(file_name) do |io|
25
+ io.each.first(100)
26
+ end
27
+ assert_equal expected, rows
28
+ end
29
+
23
30
  it "stream" do
24
31
  rows = []
25
32
  count = IOStreams::Line::Reader.file(file_name) do |file|
data/test/stream_test.rb CHANGED
@@ -45,9 +45,9 @@ class StreamTest < Minitest::Test
45
45
  it "reads a zip file" do
46
46
  File.open(multiple_zip_file_name, "rb") do |io|
47
47
  result = IOStreams::Stream.new(io).
48
- file_name(multiple_zip_file_name).
49
- option(:zip, entry_file_name: "test.json").
50
- read
48
+ file_name(multiple_zip_file_name).
49
+ option(:zip, entry_file_name: "test.json").
50
+ read
51
51
  assert_equal contents_test_json, result
52
52
  end
53
53
  end
@@ -55,8 +55,8 @@ class StreamTest < Minitest::Test
55
55
  it "reads a zip file from within a gz file" do
56
56
  File.open(zip_gz_file_name, "rb") do |io|
57
57
  result = IOStreams::Stream.new(io).
58
- file_name(zip_gz_file_name).
59
- read
58
+ file_name(zip_gz_file_name).
59
+ read
60
60
  assert_equal contents_test_txt, result
61
61
  end
62
62
  end
data/test/tabular_test.rb CHANGED
@@ -58,12 +58,12 @@ class TabularTest < Minitest::Test
58
58
  assert_equal header, tabular.header.columns
59
59
  end
60
60
 
61
- it "white listed snake cased alphanumeric columns" do
61
+ it "allowed list snake cased alphanumeric columns" do
62
62
  tabular = IOStreams::Tabular.new(
63
- columns: ["Ard Vark", "password", "robot version", "$$$"],
63
+ columns: ["Ard Vark", "Password", "robot version", "$$$"],
64
64
  allowed_columns: %w[ard_vark robot_version]
65
65
  )
66
- expected_header = ["ard_vark", nil, "robot_version", nil]
66
+ expected_header = ["ard_vark", "__rejected__Password", "robot_version", "__rejected__$$$"]
67
67
  cleansed_header = tabular.cleanse_header!
68
68
  assert_equal(expected_header, cleansed_header)
69
69
  end
@@ -82,13 +82,13 @@ class TabularTest < Minitest::Test
82
82
  assert_equal @allowed_columns, tabular.header.allowed_columns
83
83
  end
84
84
 
85
- it "nils columns not in the whitelist" do
85
+ it "nils columns not in the allowed list" do
86
86
  tabular = IOStreams::Tabular.new(columns: [" first ", "Unknown Column", "thirD "], allowed_columns: @allowed_columns)
87
87
  header = tabular.cleanse_header!
88
- assert_equal ["first", nil, "third"], header
88
+ assert_equal ["first", "__rejected__Unknown Column", "third"], header
89
89
  end
90
90
 
91
- it "raises exception for columns not in the whitelist" do
91
+ it "raises exception for columns not in the allowed list" do
92
92
  tabular = IOStreams::Tabular.new(columns: [" first ", "Unknown Column", "thirD "], allowed_columns: @allowed_columns, skip_unknown: false)
93
93
  exc = assert_raises IOStreams::Errors::InvalidHeader do
94
94
  tabular.cleanse_header!
@@ -218,7 +218,7 @@ class TabularTest < Minitest::Test
218
218
  end
219
219
  end
220
220
 
221
- it "skips columns not in the whitelist" do
221
+ it "skips columns not in the allowed list" do
222
222
  tabular.header.allowed_columns = %w[first second third fourth fifth]
223
223
  tabular.cleanse_header!
224
224
  assert hash = tabular.record_parse("1,2,3")
data/test/test_helper.rb CHANGED
@@ -2,7 +2,6 @@ $LOAD_PATH.unshift File.dirname(__FILE__) + "/../lib"
2
2
 
3
3
  require "yaml"
4
4
  require "minitest/autorun"
5
- require "minitest/reporters"
6
5
  require "iostreams"
7
6
  require "amazing_print"
8
7
  require "symmetric-encryption"
@@ -10,8 +9,6 @@ require "symmetric-encryption"
10
9
  # Since PGP libraries use UTC for Dates
11
10
  ENV["TZ"] = "UTC"
12
11
 
13
- Minitest::Reporters.use! Minitest::Reporters::SpecReporter.new
14
-
15
12
  # Test cipher
16
13
  SymmetricEncryption.cipher = SymmetricEncryption::Cipher.new(
17
14
  cipher_name: "aes-128-cbc",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: iostreams
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.2
4
+ version: 1.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-04 00:00:00.000000000 Z
11
+ date: 2021-08-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -125,14 +125,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
125
125
  requirements:
126
126
  - - ">="
127
127
  - !ruby/object:Gem::Version
128
- version: '2.3'
128
+ version: '2.5'
129
129
  required_rubygems_version: !ruby/object:Gem::Requirement
130
130
  requirements:
131
131
  - - ">="
132
132
  - !ruby/object:Gem::Version
133
133
  version: '0'
134
134
  requirements: []
135
- rubygems_version: 3.2.15
135
+ rubygems_version: 3.2.22
136
136
  signing_key:
137
137
  specification_version: 4
138
138
  summary: Input and Output streaming for Ruby.