iostreams 1.4.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36a51f41f33a1c58f1d5d03ed857784e86de4588ca38aa5a596e479f8661faf3
4
- data.tar.gz: 23dd967c21581520675799692296bb901e2dc539f008fa30623f4bd72a2c90f3
3
+ metadata.gz: a6dd68508835099ef4c9de7f81a9527927d3225138f0bf5ecd00c44194e11858
4
+ data.tar.gz: 28535e95ca83a4cf0c522de4cd48889a489b4fcefdc8feb9a7bfe0b70124fd7a
5
5
  SHA512:
6
- metadata.gz: 81d5f7ea50b7a5b06a5d26b758eb0639b0c820735cd0cf1545c6298f244b96b0070d3f8a11deed0ef8398b5b6c461561b3c43661612e3eeda19bdf61abb549ab
7
- data.tar.gz: 19f6f2051351533029cd7fca3cd6e7be3a4e5d3ea4ff2ae512b0b98a12b84639ae63e6a4decc0ff3edf1d50d5c14ddc1b869767f2f6c8793fc392f6ad02d939f
6
+ metadata.gz: 8e4d38ef41234f62fdcfde1ae3a96fe59203dcfc38562843106b081c7292efffdda36b88707674059a24b0fa2996d4c1dbe0627694e80a9b12d23d47195a53d3
7
+ data.tar.gz: 23e16854c305542ee0976f570444d7b99cadcb6ec460d83bebc73a65cba9c28e3beba485c9943797b174b80bdd783c216d89d539e8aecdbda62e1c31f017efce
data/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  # IOStreams
2
- [![Gem Version](https://img.shields.io/gem/v/iostreams.svg)](https://rubygems.org/gems/iostreams) [![Build Status](https://travis-ci.org/rocketjob/iostreams.svg?branch=master)](https://travis-ci.org/rocketjob/iostreams) [![Downloads](https://img.shields.io/gem/dt/iostreams.svg)](https://rubygems.org/gems/iostreams) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg) [![Gitter chat](https://img.shields.io/badge/IRC%20(gitter)-Support-brightgreen.svg)](https://gitter.im/rocketjob/support)
2
+ [![Gem Version](https://img.shields.io/gem/v/iostreams.svg)](https://rubygems.org/gems/iostreams) [![Downloads](https://img.shields.io/gem/dt/iostreams.svg)](https://rubygems.org/gems/iostreams) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg)
3
3
 
4
4
  IOStreams is an incredibly powerful streaming library that makes changes to file formats, compression, encryption,
5
5
  or storage mechanism transparent to the application.
@@ -14,6 +14,18 @@ Start with the [IOStreams tutorial](https://iostreams.rocketjob.io/tutorial) to
14
14
 
15
15
  Next, checkout the remaining [IOStreams documentation](https://iostreams.rocketjob.io/)
16
16
 
17
+ ## Upgrading to v1.6
18
+
19
+ The old, deprecated api's are no longer loaded by default with v1.6. To add back the deprecated api support, add
20
+ the following line to your code:
21
+
22
+ ~~~ruby
23
+ IOStreams.include(IOStreams::Deprecated)
24
+ ~~~
25
+
26
+ It is important to move any of the old deprecated apis over to the new api, since they will be removed in a future
27
+ release.
28
+
17
29
  ## Versioning
18
30
 
19
31
  This project adheres to [Semantic Versioning](http://semver.org/).
@@ -1,13 +1,15 @@
1
1
  module IOStreams
2
2
  # Build the streams that need to be applied to a path druing reading or writing.
3
3
  class Builder
4
- attr_accessor :file_name
4
+ attr_accessor :file_name, :format_options
5
5
  attr_reader :streams, :options
6
6
 
7
7
  def initialize(file_name = nil)
8
- @file_name = file_name
9
- @streams = nil
10
- @options = nil
8
+ @file_name = file_name
9
+ @streams = nil
10
+ @options = nil
11
+ @format = nil
12
+ @format_option = nil
11
13
  end
12
14
 
13
15
  # Supply an option that is only applied once the file name extensions have been parsed.
@@ -88,11 +90,23 @@ module IOStreams
88
90
  built_streams.freeze
89
91
  end
90
92
 
93
+ # Returns the tabular format if set, otherwise tries to autodetect the format if the file_name has been set
94
+ # Returns [nil] if no format is set, or if it cannot be determined from the file_name
95
+ def format
96
+ @format ||= file_name ? Tabular.format_from_file_name(file_name) : nil
97
+ end
98
+
99
+ def format=(format)
100
+ raise(ArgumentError, "Invalid format: #{format.inspect}") unless format.nil? || IOStreams::Tabular.registered_formats.include?(format)
101
+
102
+ @format = format
103
+ end
104
+
91
105
  private
92
106
 
93
107
  def class_for_stream(type, stream)
94
108
  ext = IOStreams.extensions[stream.nil? ? nil : stream.to_sym] ||
95
- raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
109
+ raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
96
110
  ext.send("#{type}_class") || raise(ArgumentError, "No #{type} registered for Stream type: #{stream.inspect}")
97
111
  end
98
112
 
@@ -3,9 +3,7 @@ module IOStreams
3
3
  class Reader < IOStreams::Reader
4
4
  # Read from a Bzip2 stream, decompressing the contents as it is read
5
5
  def self.stream(input_stream, **args)
6
- unless defined?(::Bzip2::FFI)
7
- Utils.load_soft_dependency("bzip2-ffi", "Bzip2", "bzip2/ffi")
8
- end
6
+ Utils.load_soft_dependency("bzip2-ffi", "Bzip2", "bzip2/ffi") unless defined?(::Bzip2::FFI)
9
7
 
10
8
  begin
11
9
  io = ::Bzip2::FFI::Reader.new(input_stream, args)
@@ -3,9 +3,7 @@ module IOStreams
3
3
  class Writer < IOStreams::Writer
4
4
  # Write to a stream, compressing with Bzip2
5
5
  def self.stream(input_stream, original_file_name: nil, **args)
6
- unless defined?(::Bzip2::FFI)
7
- Utils.load_soft_dependency("bzip2-ffi", "Bzip2", "bzip2/ffi")
8
- end
6
+ Utils.load_soft_dependency("bzip2-ffi", "Bzip2", "bzip2/ffi") unless defined?(::Bzip2::FFI)
9
7
 
10
8
  begin
11
9
  io = ::Bzip2::FFI::Writer.new(input_stream, args)
@@ -78,8 +78,6 @@ module IOStreams
78
78
  block
79
79
  end
80
80
 
81
- private
82
-
83
81
  def self.extract_cleaner(cleaner)
84
82
  return if cleaner.nil?
85
83
 
@@ -9,6 +9,9 @@ module IOStreams
9
9
  class MissingHeader < Error
10
10
  end
11
11
 
12
+ class UnknownFormat < Error
13
+ end
14
+
12
15
  class TypeMismatch < Error
13
16
  end
14
17
 
@@ -26,6 +29,15 @@ module IOStreams
26
29
  class ValueTooLong < Error
27
30
  end
28
31
 
32
+ class MalformedDataError < RuntimeError
33
+ attr_reader :line_number
34
+
35
+ def initialize(message, line_number)
36
+ @line_number = line_number
37
+ super("#{message} on line #{line_number}.")
38
+ end
39
+ end
40
+
29
41
  class InvalidLayout < Error
30
42
  end
31
43
  end
@@ -13,8 +13,6 @@ require "uri"
13
13
  # .zip.enc [ :zip, :enc ]
14
14
  # .gz.enc [ :gz, :enc ]
15
15
  module IOStreams
16
- include Deprecated
17
-
18
16
  # Returns [Path] instance for the supplied complete path with optional scheme.
19
17
  #
20
18
  # Example:
@@ -298,8 +296,6 @@ module IOStreams
298
296
  @schemes[scheme_name.nil? ? nil : scheme_name.to_sym] || raise(ArgumentError, "Unknown Scheme type: #{scheme_name.inspect}")
299
297
  end
300
298
 
301
- private
302
-
303
299
  Extension = Struct.new(:reader_class, :writer_class)
304
300
 
305
301
  # Hold root paths
@@ -38,12 +38,12 @@ module IOStreams
38
38
  # Size of blocks to read from the input stream at a time.
39
39
  # Default: 65536 ( 64K )
40
40
  #
41
- # TODO:
42
- # - Handle embedded line feeds when reading csv files.
43
- # - Skip Comment lines. RegExp?
44
- # - Skip "empty" / "blank" lines. RegExp?
45
- # - Extract header line(s) / first non-comment, non-blank line
46
- # - Embedded newline support, RegExp? or Proc?
41
+ # embedded_within: [String]
42
+ # Supports CSV files where a line may contain an embedded newline.
43
+ # For CSV files set `embedded_within: '"'`
44
+ #
45
+ # Note:
46
+ # * When using a line reader and the file_name ends with ".csv" then embedded_within is automatically set to `"`
47
47
  def initialize(input_stream, delimiter: nil, buffer_size: 65_536, embedded_within: nil, original_file_name: nil)
48
48
  super(input_stream)
49
49
 
@@ -63,11 +63,11 @@ module IOStreams
63
63
  # Auto-detect windows/linux line endings if not supplied. \n or \r\n
64
64
  @delimiter ||= auto_detect_line_endings
65
65
 
66
- if @buffer
67
- # Change the delimiters encoding to match that of the input stream
68
- @delimiter = @delimiter.encode(@buffer.encoding)
69
- @delimiter_size = @delimiter.size
70
- end
66
+ return unless @buffer
67
+
68
+ # Change the delimiters encoding to match that of the input stream
69
+ @delimiter = @delimiter.encode(@buffer.encoding)
70
+ @delimiter_size = @delimiter.size
71
71
  end
72
72
 
73
73
  # Iterate over every line in the file/stream passing each line to supplied block in turn.
@@ -86,17 +86,29 @@ module IOStreams
86
86
  line_count
87
87
  end
88
88
 
89
- # Reads each line per the @delimeter. It will account for embedded lines provided they are within double quotes.
90
- # The embedded_within argument is set in IOStreams::LineReader
89
+ # Reads each line per the `delimeter`.
90
+ # Accounts for lines that contain the `delimiter` when the `delimeter` is within the `embedded_within` delimiter.
91
+ # For Example, CSV files can contain newlines embedded within double quotes.
91
92
  def readline
92
93
  line = _readline
93
94
  if line && @embedded_within
94
95
  initial_line_number = @line_number
95
96
  while line.count(@embedded_within).odd?
96
- raise "Unclosed quoted field on line #{initial_line_number}" if eof? || line.length > @buffer_size * 10
97
-
97
+ if eof? || line.length > @buffer_size * 10
98
+ raise(Errors::MalformedDataError.new(
99
+ "Unbalanced delimited field, delimiter: #{@embedded_within}",
100
+ initial_line_number
101
+ ))
102
+ end
98
103
  line << @delimiter
99
- line << _readline
104
+ next_line = _readline
105
+ if next_line.nil?
106
+ raise(Errors::MalformedDataError.new(
107
+ "Unbalanced delimited field, delimiter: #{@embedded_within}",
108
+ initial_line_number
109
+ ))
110
+ end
111
+ line << next_line
100
112
  end
101
113
  end
102
114
  line
@@ -82,6 +82,7 @@ module IOStreams
82
82
  end
83
83
 
84
84
  # Cleanup an incomplete write to the target "file" if the copy fails.
85
+ # rubocop:disable Lint/SuppressedException
85
86
  def copy_from(source, **args)
86
87
  super(source, **args)
87
88
  rescue StandardError => e
@@ -91,6 +92,7 @@ module IOStreams
91
92
  end
92
93
  raise(e)
93
94
  end
95
+ # rubocop:enable Lint/SuppressedException
94
96
 
95
97
  # Moves the file by copying it to the new path and then deleting the current path.
96
98
  # Returns [IOStreams::Path] the target path.
@@ -151,7 +153,7 @@ module IOStreams
151
153
  # Returns [true|false] whether the file is compressed based on its file extensions.
152
154
  def compressed?
153
155
  # TODO: Look at streams?
154
- !(path =~ /\.(zip|gz|gzip|xls.|)\z/i).nil?
156
+ !(path =~ /\.(zip|gz|gzip|xlsx|xlsm|bz2)\z/i).nil?
155
157
  end
156
158
 
157
159
  # Returns [true|false] whether the file is encrypted based on its file extensions.
@@ -5,6 +5,9 @@ module IOStreams
5
5
  class S3 < IOStreams::Path
6
6
  attr_reader :bucket_name, :client, :options
7
7
 
8
+ # Largest file size supported by the S3 copy object api.
9
+ S3_COPY_OBJECT_SIZE_LIMIT = 5 * 1024 * 1024 * 1024
10
+
8
11
  # Arguments:
9
12
  #
10
13
  # url: [String]
@@ -188,7 +191,7 @@ module IOStreams
188
191
 
189
192
  # Make S3 perform direct copies within S3 itself.
190
193
  def copy_to(target_path, convert: true)
191
- return super(target_path) if convert
194
+ return super(target_path) if convert || (size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
192
195
 
193
196
  target = IOStreams.new(target_path)
194
197
  return super(target) unless target.is_a?(self.class)
@@ -203,7 +206,7 @@ module IOStreams
203
206
  return super(source_path) if convert
204
207
 
205
208
  source = IOStreams.new(source_path)
206
- return super(source, **args) unless source.is_a?(self.class)
209
+ return super(source) if !source.is_a?(self.class) || (source.size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
207
210
 
208
211
  source_name = ::File.join(source.bucket_name, source.path)
209
212
  client.copy_object(options.merge(bucket: bucket_name, key: path, copy_source: source_name))
@@ -26,12 +26,13 @@ module IOStreams
26
26
  include SemanticLogger::Loggable if defined?(SemanticLogger)
27
27
 
28
28
  class << self
29
- attr_accessor :sshpass_bin, :sftp_bin, :sshpass_wait_seconds
29
+ attr_accessor :sshpass_bin, :sftp_bin, :sshpass_wait_seconds, :before_password_wait_seconds
30
30
  end
31
31
 
32
- @sftp_bin = "sftp"
33
- @sshpass_bin = "sshpass"
34
- @sshpass_wait_seconds = 5
32
+ @sftp_bin = "sftp"
33
+ @sshpass_bin = "sshpass"
34
+ @before_password_wait_seconds = 2
35
+ @sshpass_wait_seconds = 5
35
36
 
36
37
  attr_reader :hostname, :username, :ssh_options, :url, :port
37
38
 
@@ -71,7 +72,9 @@ module IOStreams
71
72
  # end
72
73
  #
73
74
  # # When using the sftp executable use an identity file instead of a password to authenticate:
74
- # IOStreams.path("sftp://test.com/path/file_name.csv", username: "jack", ssh_options: {IdentityFile: "~/.ssh/private_key"}).reader do |io|
75
+ # IOStreams.path("sftp://test.com/path/file_name.csv",
76
+ # username: "jack",
77
+ # ssh_options: {IdentityFile: "~/.ssh/private_key"}).reader do |io|
75
78
  # puts io.read
76
79
  # end
77
80
  def initialize(url, username: nil, password: nil, ssh_options: {})
@@ -122,7 +125,8 @@ module IOStreams
122
125
  # end
123
126
  #
124
127
  # Example Output:
125
- # sftp://sftp.example.org/a/b/c/test.txt {:type=>1, :size=>37, :owner=>"test_owner", :group=>"test_group", :permissions=>420, :atime=>1572378136, :mtime=>1572378136, :link_count=>1, :extended=>{}}
128
+ # sftp://sftp.example.org/a/b/c/test.txt {:type=>1, :size=>37, :owner=>"test_owner", :group=>"test_group",
129
+ # :permissions=>420, :atime=>1572378136, :mtime=>1572378136, :link_count=>1, :extended=>{}}
126
130
  def each_child(pattern = "*", case_sensitive: true, directories: false, hidden: false)
127
131
  Utils.load_soft_dependency("net-sftp", "SFTP glob capability", "net/sftp") unless defined?(Net::SFTP)
128
132
 
@@ -165,9 +169,14 @@ module IOStreams
165
169
  with_sftp_args do |args|
166
170
  Open3.popen2e(*args) do |writer, reader, waith_thr|
167
171
  begin
172
+ # Give time for remote sftp server to get ready to accept the password.
173
+ sleep self.class.before_password_wait_seconds
174
+
168
175
  writer.puts password
176
+
169
177
  # Give time for password to be processed and stdin to be passed to sftp process.
170
178
  sleep self.class.sshpass_wait_seconds
179
+
171
180
  writer.puts "get #{remote_file_name} #{local_file_name}"
172
181
  writer.puts "bye"
173
182
  writer.close
@@ -268,9 +277,9 @@ module IOStreams
268
277
  end
269
278
 
270
279
  def build_ssh_options
271
- options = ssh_options.dup
272
- options[:logger] ||= logger if defined?(SemanticLogger)
273
- options[:port] ||= port
280
+ options = ssh_options.dup
281
+ options[:logger] ||= logger if defined?(SemanticLogger)
282
+ options[:port] ||= port
274
283
  options[:max_pkt_size] ||= 65_536
275
284
  options[:password] ||= @password
276
285
  options
@@ -74,9 +74,10 @@ module IOStreams
74
74
 
75
75
  raise(Pgp::Failure, "GPG Failed to generate key: #{err}#{out}") unless status.success?
76
76
 
77
- if (match = err.match(/gpg: key ([0-9A-F]+)\s+/))
78
- match[1]
79
- end
77
+ match = err.match(/gpg: key ([0-9A-F]+)\s+/)
78
+ return unless match
79
+
80
+ match[1]
80
81
  end
81
82
 
82
83
  # Delete all private and public keys for a particular email.
@@ -110,11 +111,6 @@ module IOStreams
110
111
  !list_keys(email: email, key_id: key_id, private: private).empty?
111
112
  end
112
113
 
113
- # Deprecated
114
- def self.has_key?(**args)
115
- key?(**args)
116
- end
117
-
118
114
  # Returns [Array<Hash>] the list of keys.
119
115
  # Each Hash consists of:
120
116
  # key_length: [Integer]
@@ -232,7 +228,7 @@ module IOStreams
232
228
  err.each_line do |line|
233
229
  if line =~ /secret key imported/
234
230
  secret = true
235
- elsif match = line.match(/key\s+(\w+):\s+(\w+).+\"(.*)<(.*)>\"/)
231
+ elsif (match = line.match(/key\s+(\w+):\s+(\w+).+\"(.*)<(.*)>\"/))
236
232
  results << {
237
233
  key_id: match[1].to_s.strip,
238
234
  private: secret,
@@ -265,7 +261,7 @@ module IOStreams
265
261
 
266
262
  import(key: key)
267
263
  set_trust(email: email, key_id: key_id)
268
- email
264
+ email || key_id
269
265
  end
270
266
 
271
267
  # Set the trust level for an existing key.
@@ -347,8 +343,6 @@ module IOStreams
347
343
  end
348
344
  end
349
345
 
350
- private
351
-
352
346
  @logger = nil
353
347
 
354
348
  def self.logger
@@ -356,7 +350,7 @@ module IOStreams
356
350
  end
357
351
 
358
352
  def self.version_check
359
- return unless pgp_version.to_f >= 2.3
353
+ return unless pgp_version.to_f >= 2.4
360
354
 
361
355
  raise(
362
356
  Pgp::UnsupportedVersion,
@@ -191,11 +191,41 @@ module IOStreams
191
191
  end
192
192
  end
193
193
 
194
- # Set/get the original file_name
194
+ # Set the original file_name
195
195
  def file_name=(file_name)
196
196
  builder.file_name = file_name
197
197
  end
198
198
 
199
+ # Set/get the tabular format_options
200
+ def format(format = :none)
201
+ if format == :none
202
+ builder.format
203
+ else
204
+ builder.format = format
205
+ self
206
+ end
207
+ end
208
+
209
+ # Set the tabular format
210
+ def format=(format)
211
+ builder.format = format
212
+ end
213
+
214
+ # Set/get the tabular format options
215
+ def format_options(format_options = :none)
216
+ if format_options == :none
217
+ builder.format_options
218
+ else
219
+ builder.format_options = format_options
220
+ self
221
+ end
222
+ end
223
+
224
+ # Set the tabular format_options
225
+ def format_options=(format_options)
226
+ builder.format_options = format_options
227
+ end
228
+
199
229
  # Returns [String] the last component of this path.
200
230
  # Returns `nil` if no `file_name` was set.
201
231
  #
@@ -293,14 +323,26 @@ module IOStreams
293
323
  # Iterate over a file / stream returning each line as an array, one at a time.
294
324
  def row_reader(delimiter: nil, embedded_within: nil, **args)
295
325
  line_reader(delimiter: delimiter, embedded_within: embedded_within) do |io|
296
- yield IOStreams::Row::Reader.new(io, original_file_name: builder.file_name, **args)
326
+ yield IOStreams::Row::Reader.new(
327
+ io,
328
+ original_file_name: builder.file_name,
329
+ format: builder.format,
330
+ format_options: builder.format_options,
331
+ **args
332
+ )
297
333
  end
298
334
  end
299
335
 
300
336
  # Iterate over a file / stream returning each line as a hash, one at a time.
301
337
  def record_reader(delimiter: nil, embedded_within: nil, **args)
302
338
  line_reader(delimiter: delimiter, embedded_within: embedded_within) do |io|
303
- yield IOStreams::Record::Reader.new(io, original_file_name: builder.file_name, **args)
339
+ yield IOStreams::Record::Reader.new(
340
+ io,
341
+ original_file_name: builder.file_name,
342
+ format: builder.format,
343
+ format_options: builder.format_options,
344
+ **args
345
+ )
304
346
  end
305
347
  end
306
348
 
@@ -320,7 +362,14 @@ module IOStreams
320
362
  return block.call(io_stream) if io_stream&.is_a?(IOStreams::Row::Writer)
321
363
 
322
364
  line_writer(delimiter: delimiter) do |io|
323
- IOStreams::Row::Writer.stream(io, original_file_name: builder.file_name, **args, &block)
365
+ IOStreams::Row::Writer.stream(
366
+ io,
367
+ original_file_name: builder.file_name,
368
+ format: builder.format,
369
+ format_options: builder.format_options,
370
+ **args,
371
+ &block
372
+ )
324
373
  end
325
374
  end
326
375
 
@@ -328,7 +377,13 @@ module IOStreams
328
377
  return block.call(io_stream) if io_stream&.is_a?(IOStreams::Record::Writer)
329
378
 
330
379
  line_writer(delimiter: delimiter) do |io|
331
- IOStreams::Record::Writer.stream(io, original_file_name: builder.file_name, **args, &block)
380
+ IOStreams::Record::Writer.stream(
381
+ io,
382
+ original_file_name: builder.file_name,
383
+ format: builder.format,
384
+ format_options: builder.format_options,
385
+ **args,
386
+ &block)
332
387
  end
333
388
  end
334
389
  end