iostreams 1.3.3 → 1.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1540cfa116ee75ceb9a5cedf8b3c4e89833fc69033e442ec6c5af990d626c427
4
- data.tar.gz: 03a4bde845869ec7f3d7f54a7a5aed7f72dfaf999ab50f3435faf481927fb069
3
+ metadata.gz: 844554b8eeaaffafc4ec283775cfd3145b0c459f549367dbdb63edcd35893cca
4
+ data.tar.gz: 6384647714a78ff9999c3d049a5222caa7e746059c3ecffbee41098c07465ee1
5
5
  SHA512:
6
- metadata.gz: bb60c37dcaa87cb5a76894c141e675039d369e3eb5e97079f63f60d15e00bce0f84f82ebeff8812829f85bd743118f09850ea5672f557550e5a26e0f0e9582b9
7
- data.tar.gz: fab7d76db975e8e8b7f0051e83137b9debe4aeea6da865aa5f7d2e218650d4d789c20629fa9881fbdbdc3763fe89947fd660f0b7c494f955e67c4c9dedcbe175
6
+ metadata.gz: 9aed6c1832d502c162b5d9ba42600caf6102e7ac71b3df6b3473a44472dd93fa0513c03ff258675c9ed19838ee1cd0e4e91198389bafa605bfaf051d87431895
7
+ data.tar.gz: dde06ece42e29869d73c17e89a8f5aca8b93b37859663256b218140ccd21032c45a0b7ece40bf4be66ccdc51cb5d580840d1a18d7ec99be42ae716b79381c165
data/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  # IOStreams
2
- [![Gem Version](https://img.shields.io/gem/v/iostreams.svg)](https://rubygems.org/gems/iostreams) [![Build Status](https://travis-ci.org/rocketjob/iostreams.svg?branch=master)](https://travis-ci.org/rocketjob/iostreams) [![Downloads](https://img.shields.io/gem/dt/iostreams.svg)](https://rubygems.org/gems/iostreams) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg) [![Gitter chat](https://img.shields.io/badge/IRC%20(gitter)-Support-brightgreen.svg)](https://gitter.im/rocketjob/support)
2
+ [![Gem Version](https://img.shields.io/gem/v/iostreams.svg)](https://rubygems.org/gems/iostreams) [![Downloads](https://img.shields.io/gem/dt/iostreams.svg)](https://rubygems.org/gems/iostreams) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg) [![Gitter chat](https://img.shields.io/badge/IRC%20(gitter)-Support-brightgreen.svg)](https://gitter.im/rocketjob/support)
3
3
 
4
4
  IOStreams is an incredibly powerful streaming library that makes changes to file formats, compression, encryption,
5
5
  or storage mechanism transparent to the application.
@@ -14,6 +14,18 @@ Start with the [IOStreams tutorial](https://iostreams.rocketjob.io/tutorial) to
14
14
 
15
15
  Next, checkout the remaining [IOStreams documentation](https://iostreams.rocketjob.io/)
16
16
 
17
+ ## Upgrading to v1.6
18
+
19
+ The old, deprecated api's are no longer loaded by default with v1.6. To add back the deprecated api support, add
20
+ the following line to your code:
21
+
22
+ ~~~ruby
23
+ IOStreams.include(IOStreams::Deprecated)
24
+ ~~~
25
+
26
+ It is important to move any of the old deprecated apis over to the new api, since they will be removed in a future
27
+ release.
28
+
17
29
  ## Versioning
18
30
 
19
31
  This project adheres to [Semantic Versioning](http://semver.org/).
@@ -1,13 +1,15 @@
1
1
  module IOStreams
2
2
  # Build the streams that need to be applied to a path druing reading or writing.
3
3
  class Builder
4
- attr_accessor :file_name
4
+ attr_accessor :file_name, :format_options
5
5
  attr_reader :streams, :options
6
6
 
7
7
  def initialize(file_name = nil)
8
- @file_name = file_name
9
- @streams = nil
10
- @options = nil
8
+ @file_name = file_name
9
+ @streams = nil
10
+ @options = nil
11
+ @format = nil
12
+ @format_option = nil
11
13
  end
12
14
 
13
15
  # Supply an option that is only applied once the file name extensions have been parsed.
@@ -88,11 +90,23 @@ module IOStreams
88
90
  built_streams.freeze
89
91
  end
90
92
 
93
+ # Returns the tabular format if set, otherwise tries to autodetect the format if the file_name has been set
94
+ # Returns [nil] if no format is set, or if it cannot be determined from the file_name
95
+ def format
96
+ @format ||= file_name ? Tabular.format_from_file_name(file_name) : nil
97
+ end
98
+
99
+ def format=(format)
100
+ raise(ArgumentError, "Invalid format: #{format.inspect}") unless format.nil? || IOStreams::Tabular.registered_formats.include?(format)
101
+
102
+ @format = format
103
+ end
104
+
91
105
  private
92
106
 
93
107
  def class_for_stream(type, stream)
94
108
  ext = IOStreams.extensions[stream.nil? ? nil : stream.to_sym] ||
95
- raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
109
+ raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
96
110
  ext.send("#{type}_class") || raise(ArgumentError, "No #{type} registered for Stream type: #{stream.inspect}")
97
111
  end
98
112
 
@@ -2,11 +2,11 @@ module IOStreams
2
2
  module Bzip2
3
3
  class Reader < IOStreams::Reader
4
4
  # Read from a Bzip2 stream, decompressing the contents as it is read
5
- def self.stream(input_stream, **_args)
6
- Utils.load_soft_dependency("rbzip2", "Bzip2") unless defined?(RBzip2)
5
+ def self.stream(input_stream, **args)
6
+ Utils.load_soft_dependency("bzip2-ffi", "Bzip2", "bzip2/ffi") unless defined?(::Bzip2::FFI)
7
7
 
8
8
  begin
9
- io = RBzip2.default_adapter::Decompressor.new(input_stream)
9
+ io = ::Bzip2::FFI::Reader.new(input_stream, args)
10
10
  yield io
11
11
  ensure
12
12
  io&.close
@@ -2,11 +2,11 @@ module IOStreams
2
2
  module Bzip2
3
3
  class Writer < IOStreams::Writer
4
4
  # Write to a stream, compressing with Bzip2
5
- def self.stream(input_stream, original_file_name: nil, **_args)
6
- Utils.load_soft_dependency("rbzip2", "Bzip2") unless defined?(RBzip2)
5
+ def self.stream(input_stream, original_file_name: nil, **args)
6
+ Utils.load_soft_dependency("bzip2-ffi", "Bzip2", "bzip2/ffi") unless defined?(::Bzip2::FFI)
7
7
 
8
8
  begin
9
- io = RBzip2.default_adapter::Compressor.new(input_stream)
9
+ io = ::Bzip2::FFI::Writer.new(input_stream, args)
10
10
  yield io
11
11
  ensure
12
12
  io&.close
@@ -78,8 +78,6 @@ module IOStreams
78
78
  block
79
79
  end
80
80
 
81
- private
82
-
83
81
  def self.extract_cleaner(cleaner)
84
82
  return if cleaner.nil?
85
83
 
@@ -9,6 +9,9 @@ module IOStreams
9
9
  class MissingHeader < Error
10
10
  end
11
11
 
12
+ class UnknownFormat < Error
13
+ end
14
+
12
15
  class TypeMismatch < Error
13
16
  end
14
17
 
@@ -26,6 +29,15 @@ module IOStreams
26
29
  class ValueTooLong < Error
27
30
  end
28
31
 
32
+ class MalformedDataError < RuntimeError
33
+ attr_reader :line_number
34
+
35
+ def initialize(message, line_number)
36
+ @line_number = line_number
37
+ super("#{message} on line #{line_number}.")
38
+ end
39
+ end
40
+
29
41
  class InvalidLayout < Error
30
42
  end
31
43
  end
@@ -13,8 +13,6 @@ require "uri"
13
13
  # .zip.enc [ :zip, :enc ]
14
14
  # .gz.enc [ :gz, :enc ]
15
15
  module IOStreams
16
- include Deprecated
17
-
18
16
  # Returns [Path] instance for the supplied complete path with optional scheme.
19
17
  #
20
18
  # Example:
@@ -298,8 +296,6 @@ module IOStreams
298
296
  @schemes[scheme_name.nil? ? nil : scheme_name.to_sym] || raise(ArgumentError, "Unknown Scheme type: #{scheme_name.inspect}")
299
297
  end
300
298
 
301
- private
302
-
303
299
  Extension = Struct.new(:reader_class, :writer_class)
304
300
 
305
301
  # Hold root paths
@@ -38,12 +38,12 @@ module IOStreams
38
38
  # Size of blocks to read from the input stream at a time.
39
39
  # Default: 65536 ( 64K )
40
40
  #
41
- # TODO:
42
- # - Handle embedded line feeds when reading csv files.
43
- # - Skip Comment lines. RegExp?
44
- # - Skip "empty" / "blank" lines. RegExp?
45
- # - Extract header line(s) / first non-comment, non-blank line
46
- # - Embedded newline support, RegExp? or Proc?
41
+ # embedded_within: [String]
42
+ # Supports CSV files where a line may contain an embedded newline.
43
+ # For CSV files set `embedded_within: '"'`
44
+ #
45
+ # Note:
46
+ # * When using a line reader and the file_name ends with ".csv" then embedded_within is automatically set to `"`
47
47
  def initialize(input_stream, delimiter: nil, buffer_size: 65_536, embedded_within: nil, original_file_name: nil)
48
48
  super(input_stream)
49
49
 
@@ -63,11 +63,11 @@ module IOStreams
63
63
  # Auto-detect windows/linux line endings if not supplied. \n or \r\n
64
64
  @delimiter ||= auto_detect_line_endings
65
65
 
66
- if @buffer
67
- # Change the delimiters encoding to match that of the input stream
68
- @delimiter = @delimiter.encode(@buffer.encoding)
69
- @delimiter_size = @delimiter.size
70
- end
66
+ return unless @buffer
67
+
68
+ # Change the delimiters encoding to match that of the input stream
69
+ @delimiter = @delimiter.encode(@buffer.encoding)
70
+ @delimiter_size = @delimiter.size
71
71
  end
72
72
 
73
73
  # Iterate over every line in the file/stream passing each line to supplied block in turn.
@@ -86,17 +86,29 @@ module IOStreams
86
86
  line_count
87
87
  end
88
88
 
89
- # Reads each line per the @delimeter. It will account for embedded lines provided they are within double quotes.
90
- # The embedded_within argument is set in IOStreams::LineReader
89
+ # Reads each line per the `delimeter`.
90
+ # Accounts for lines that contain the `delimiter` when the `delimeter` is within the `embedded_within` delimiter.
91
+ # For Example, CSV files can contain newlines embedded within double quotes.
91
92
  def readline
92
93
  line = _readline
93
94
  if line && @embedded_within
94
95
  initial_line_number = @line_number
95
96
  while line.count(@embedded_within).odd?
96
- raise "Unclosed quoted field on line #{initial_line_number}" if eof? || line.length > @buffer_size * 10
97
-
97
+ if eof? || line.length > @buffer_size * 10
98
+ raise(Errors::MalformedDataError.new(
99
+ "Unbalanced delimited field, delimiter: #{@embedded_within}",
100
+ initial_line_number
101
+ ))
102
+ end
98
103
  line << @delimiter
99
- line << _readline
104
+ next_line = _readline
105
+ if next_line.nil?
106
+ raise(Errors::MalformedDataError.new(
107
+ "Unbalanced delimited field, delimiter: #{@embedded_within}",
108
+ initial_line_number
109
+ ))
110
+ end
111
+ line << next_line
100
112
  end
101
113
  end
102
114
  line
@@ -82,6 +82,7 @@ module IOStreams
82
82
  end
83
83
 
84
84
  # Cleanup an incomplete write to the target "file" if the copy fails.
85
+ # rubocop:disable Lint/SuppressedException
85
86
  def copy_from(source, **args)
86
87
  super(source, **args)
87
88
  rescue StandardError => e
@@ -91,6 +92,7 @@ module IOStreams
91
92
  end
92
93
  raise(e)
93
94
  end
95
+ # rubocop:enable Lint/SuppressedException
94
96
 
95
97
  # Moves the file by copying it to the new path and then deleting the current path.
96
98
  # Returns [IOStreams::Path] the target path.
@@ -151,7 +153,7 @@ module IOStreams
151
153
  # Returns [true|false] whether the file is compressed based on its file extensions.
152
154
  def compressed?
153
155
  # TODO: Look at streams?
154
- !(path =~ /\.(zip|gz|gzip|xls.|)\z/i).nil?
156
+ !(path =~ /\.(zip|gz|gzip|xlsx|xlsm|bz2)\z/i).nil?
155
157
  end
156
158
 
157
159
  # Returns [true|false] whether the file is encrypted based on its file extensions.
@@ -5,6 +5,9 @@ module IOStreams
5
5
  class S3 < IOStreams::Path
6
6
  attr_reader :bucket_name, :client, :options
7
7
 
8
+ # Largest file size supported by the S3 copy object api.
9
+ S3_COPY_OBJECT_SIZE_LIMIT = 5 * 1024 * 1024 * 1024
10
+
8
11
  # Arguments:
9
12
  #
10
13
  # url: [String]
@@ -188,7 +191,7 @@ module IOStreams
188
191
 
189
192
  # Make S3 perform direct copies within S3 itself.
190
193
  def copy_to(target_path, convert: true)
191
- return super(target_path) if convert
194
+ return super(target_path) if convert || (size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
192
195
 
193
196
  target = IOStreams.new(target_path)
194
197
  return super(target) unless target.is_a?(self.class)
@@ -203,7 +206,7 @@ module IOStreams
203
206
  return super(source_path) if convert
204
207
 
205
208
  source = IOStreams.new(source_path)
206
- return super(source, **args) unless source.is_a?(self.class)
209
+ return super(source) if !source.is_a?(self.class) || (source.size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
207
210
 
208
211
  source_name = ::File.join(source.bucket_name, source.path)
209
212
  client.copy_object(options.merge(bucket: bucket_name, key: path, copy_source: source_name))
@@ -71,7 +71,9 @@ module IOStreams
71
71
  # end
72
72
  #
73
73
  # # When using the sftp executable use an identity file instead of a password to authenticate:
74
- # IOStreams.path("sftp://test.com/path/file_name.csv", username: "jack", ssh_options: {IdentityFile: "~/.ssh/private_key"}).reader do |io|
74
+ # IOStreams.path("sftp://test.com/path/file_name.csv",
75
+ # username: "jack",
76
+ # ssh_options: {IdentityFile: "~/.ssh/private_key"}).reader do |io|
75
77
  # puts io.read
76
78
  # end
77
79
  def initialize(url, username: nil, password: nil, ssh_options: {})
@@ -122,7 +124,8 @@ module IOStreams
122
124
  # end
123
125
  #
124
126
  # Example Output:
125
- # sftp://sftp.example.org/a/b/c/test.txt {:type=>1, :size=>37, :owner=>"test_owner", :group=>"test_group", :permissions=>420, :atime=>1572378136, :mtime=>1572378136, :link_count=>1, :extended=>{}}
127
+ # sftp://sftp.example.org/a/b/c/test.txt {:type=>1, :size=>37, :owner=>"test_owner", :group=>"test_group",
128
+ # :permissions=>420, :atime=>1572378136, :mtime=>1572378136, :link_count=>1, :extended=>{}}
126
129
  def each_child(pattern = "*", case_sensitive: true, directories: false, hidden: false)
127
130
  Utils.load_soft_dependency("net-sftp", "SFTP glob capability", "net/sftp") unless defined?(Net::SFTP)
128
131
 
@@ -74,9 +74,10 @@ module IOStreams
74
74
 
75
75
  raise(Pgp::Failure, "GPG Failed to generate key: #{err}#{out}") unless status.success?
76
76
 
77
- if (match = err.match(/gpg: key ([0-9A-F]+)\s+/))
78
- match[1]
79
- end
77
+ match = err.match(/gpg: key ([0-9A-F]+)\s+/)
78
+ return unless match
79
+
80
+ match[1]
80
81
  end
81
82
 
82
83
  # Delete all private and public keys for a particular email.
@@ -110,11 +111,6 @@ module IOStreams
110
111
  !list_keys(email: email, key_id: key_id, private: private).empty?
111
112
  end
112
113
 
113
- # Deprecated
114
- def self.has_key?(**args)
115
- key?(**args)
116
- end
117
-
118
114
  # Returns [Array<Hash>] the list of keys.
119
115
  # Each Hash consists of:
120
116
  # key_length: [Integer]
@@ -232,7 +228,7 @@ module IOStreams
232
228
  err.each_line do |line|
233
229
  if line =~ /secret key imported/
234
230
  secret = true
235
- elsif match = line.match(/key\s+(\w+):\s+(\w+).+\"(.*)<(.*)>\"/)
231
+ elsif (match = line.match(/key\s+(\w+):\s+(\w+).+\"(.*)<(.*)>\"/))
236
232
  results << {
237
233
  key_id: match[1].to_s.strip,
238
234
  private: secret,
@@ -265,7 +261,7 @@ module IOStreams
265
261
 
266
262
  import(key: key)
267
263
  set_trust(email: email, key_id: key_id)
268
- email
264
+ email || key_id
269
265
  end
270
266
 
271
267
  # Set the trust level for an existing key.
@@ -347,8 +343,6 @@ module IOStreams
347
343
  end
348
344
  end
349
345
 
350
- private
351
-
352
346
  @logger = nil
353
347
 
354
348
  def self.logger
@@ -356,7 +350,7 @@ module IOStreams
356
350
  end
357
351
 
358
352
  def self.version_check
359
- return unless pgp_version.to_f >= 2.3
353
+ return unless pgp_version.to_f >= 2.4
360
354
 
361
355
  raise(
362
356
  Pgp::UnsupportedVersion,
@@ -191,11 +191,41 @@ module IOStreams
191
191
  end
192
192
  end
193
193
 
194
- # Set/get the original file_name
194
+ # Set the original file_name
195
195
  def file_name=(file_name)
196
196
  builder.file_name = file_name
197
197
  end
198
198
 
199
+ # Set/get the tabular format_options
200
+ def format(format = :none)
201
+ if format == :none
202
+ builder.format
203
+ else
204
+ builder.format = format
205
+ self
206
+ end
207
+ end
208
+
209
+ # Set the tabular format
210
+ def format=(format)
211
+ builder.format = format
212
+ end
213
+
214
+ # Set/get the tabular format options
215
+ def format_options(format_options = :none)
216
+ if format_options == :none
217
+ builder.format_options
218
+ else
219
+ builder.format_options = format_options
220
+ self
221
+ end
222
+ end
223
+
224
+ # Set the tabular format_options
225
+ def format_options=(format_options)
226
+ builder.format_options = format_options
227
+ end
228
+
199
229
  # Returns [String] the last component of this path.
200
230
  # Returns `nil` if no `file_name` was set.
201
231
  #
@@ -293,14 +323,26 @@ module IOStreams
293
323
  # Iterate over a file / stream returning each line as an array, one at a time.
294
324
  def row_reader(delimiter: nil, embedded_within: nil, **args)
295
325
  line_reader(delimiter: delimiter, embedded_within: embedded_within) do |io|
296
- yield IOStreams::Row::Reader.new(io, original_file_name: builder.file_name, **args)
326
+ yield IOStreams::Row::Reader.new(
327
+ io,
328
+ original_file_name: builder.file_name,
329
+ format: builder.format,
330
+ format_options: builder.format_options,
331
+ **args
332
+ )
297
333
  end
298
334
  end
299
335
 
300
336
  # Iterate over a file / stream returning each line as a hash, one at a time.
301
337
  def record_reader(delimiter: nil, embedded_within: nil, **args)
302
338
  line_reader(delimiter: delimiter, embedded_within: embedded_within) do |io|
303
- yield IOStreams::Record::Reader.new(io, original_file_name: builder.file_name, **args)
339
+ yield IOStreams::Record::Reader.new(
340
+ io,
341
+ original_file_name: builder.file_name,
342
+ format: builder.format,
343
+ format_options: builder.format_options,
344
+ **args
345
+ )
304
346
  end
305
347
  end
306
348
 
@@ -320,7 +362,14 @@ module IOStreams
320
362
  return block.call(io_stream) if io_stream&.is_a?(IOStreams::Row::Writer)
321
363
 
322
364
  line_writer(delimiter: delimiter) do |io|
323
- IOStreams::Row::Writer.stream(io, original_file_name: builder.file_name, **args, &block)
365
+ IOStreams::Row::Writer.stream(
366
+ io,
367
+ original_file_name: builder.file_name,
368
+ format: builder.format,
369
+ format_options: builder.format_options,
370
+ **args,
371
+ &block
372
+ )
324
373
  end
325
374
  end
326
375
 
@@ -328,7 +377,13 @@ module IOStreams
328
377
  return block.call(io_stream) if io_stream&.is_a?(IOStreams::Record::Writer)
329
378
 
330
379
  line_writer(delimiter: delimiter) do |io|
331
- IOStreams::Record::Writer.stream(io, original_file_name: builder.file_name, **args, &block)
380
+ IOStreams::Record::Writer.stream(
381
+ io,
382
+ original_file_name: builder.file_name,
383
+ format: builder.format,
384
+ format_options: builder.format_options,
385
+ **args,
386
+ &block)
332
387
  end
333
388
  end
334
389
  end