RubyGems - iostreams - Versions diffs - 1.2.1 → 1.6.2 - Mend

iostreams 1.2.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +4 -4
data/README.md +19 -4
data/lib/io_streams/builder.rb +27 -10
data/lib/io_streams/bzip2/reader.rb +3 -3
data/lib/io_streams/bzip2/writer.rb +3 -3
data/lib/io_streams/deprecated.rb +1 -1
data/lib/io_streams/encode/reader.rb +1 -3
data/lib/io_streams/encode/writer.rb +1 -1
data/lib/io_streams/errors.rb +22 -0
data/lib/io_streams/io_streams.rb +1 -5
data/lib/io_streams/line/reader.rb +28 -16
data/lib/io_streams/path.rb +3 -1
data/lib/io_streams/paths/file.rb +4 -4
data/lib/io_streams/paths/http.rb +6 -3
data/lib/io_streams/paths/s3.rb +30 -8
data/lib/io_streams/paths/sftp.rb +34 -13
data/lib/io_streams/pgp.rb +84 -71
data/lib/io_streams/stream.rb +78 -12
data/lib/io_streams/tabular.rb +28 -27
data/lib/io_streams/tabular/header.rb +14 -12
data/lib/io_streams/tabular/parser/csv.rb +4 -2
data/lib/io_streams/tabular/parser/fixed.rb +166 -26
data/lib/io_streams/tabular/utility/csv_row.rb +1 -4
data/lib/io_streams/utils.rb +4 -4
data/lib/io_streams/version.rb +1 -1
data/lib/io_streams/zip/reader.rb +1 -1
data/test/builder_test.rb +29 -0
data/test/bzip2_writer_test.rb +6 -4
data/test/deprecated_test.rb +2 -0
data/test/files/test.psv +4 -0
data/test/files/unclosed_quote_large_test.csv +1658 -0
data/test/files/unclosed_quote_test2.csv +3 -0
data/test/io_streams_test.rb +2 -2
data/test/line_reader_test.rb +30 -4
data/test/paths/file_test.rb +1 -1
data/test/paths/s3_test.rb +3 -3
data/test/paths/sftp_test.rb +4 -4
data/test/pgp_test.rb +54 -4
data/test/pgp_writer_test.rb +3 -3
data/test/stream_test.rb +174 -8
data/test/tabular_test.rb +100 -40
data/test/test_helper.rb +1 -1
metadata +47 -42

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1dad581b0665992975c33f75b23f50964ae1311e025b7a1524fca4004f0ede2b
-  data.tar.gz: 4db01e4d6c2d36ce522df3b323a6e0d9f42de0d1644a282a0cea06479e979289
+  metadata.gz: a6dd68508835099ef4c9de7f81a9527927d3225138f0bf5ecd00c44194e11858
+  data.tar.gz: 28535e95ca83a4cf0c522de4cd48889a489b4fcefdc8feb9a7bfe0b70124fd7a
 SHA512:
-  metadata.gz: 4057a5c484129c60dbc9c84e462026da862900e17b0604b385164210f14814fbae6d065d015ee9171402eb9f793f33ac26c0ee7658f94b8cdeb0724c796cbe63
-  data.tar.gz: 5a84fe37c1eebc775bd84b9903181ff035c325b1233ab64990e586f5b0bd3fd51c21d4f1429f9b0e8ab64733e9b63be5c5e05df7bf026e9d1d8c0cd8a7716417
+  metadata.gz: 8e4d38ef41234f62fdcfde1ae3a96fe59203dcfc38562843106b081c7292efffdda36b88707674059a24b0fa2996d4c1dbe0627694e80a9b12d23d47195a53d3
+  data.tar.gz: 23e16854c305542ee0976f570444d7b99cadcb6ec460d83bebc73a65cba9c28e3beba485c9943797b174b80bdd783c216d89d539e8aecdbda62e1c31f017efce

data/README.md CHANGED Viewed

@@ -1,7 +1,8 @@
-# iostreams
-[![Gem Version](https://img.shields.io/gem/v/iostreams.svg)](https://rubygems.org/gems/iostreams) [![Build Status](https://travis-ci.org/rocketjob/iostreams.svg?branch=master)](https://travis-ci.org/rocketjob/iostreams) [![Downloads](https://img.shields.io/gem/dt/iostreams.svg)](https://rubygems.org/gems/iostreams) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg) [![Gitter chat](https://img.shields.io/badge/IRC%20(gitter)-Support-brightgreen.svg)](https://gitter.im/rocketjob/support)
+# IOStreams
+[![Gem Version](https://img.shields.io/gem/v/iostreams.svg)](https://rubygems.org/gems/iostreams) [![Downloads](https://img.shields.io/gem/dt/iostreams.svg)](https://rubygems.org/gems/iostreams) [![License](https://img.shields.io/badge/license-Apache%202.0-brightgreen.svg)](http://opensource.org/licenses/Apache-2.0) ![](https://img.shields.io/badge/status-Production%20Ready-blue.svg)
-Input and Output streaming for Ruby.
+IOStreams is an incredibly powerful streaming library that makes changes to file formats, compression, encryption,
+or storage mechanism transparent to the application.
 ## Project Status
@@ -9,7 +10,21 @@ Production Ready, heavily used in production environments, many as part of Rocke
 ## Documentation
-[Semantic Logger Guide](http://rocketjob.github.io/iostreams)
+Start with the [IOStreams tutorial](https://iostreams.rocketjob.io/tutorial) to get a great introduction to IOStreams.
+Next, checkout the remaining [IOStreams documentation](https://iostreams.rocketjob.io/)
+## Upgrading to v1.6
+The old, deprecated api's are no longer loaded by default with v1.6. To add back the deprecated api support, add
+the following line to your code:
+~~~ruby
+IOStreams.include(IOStreams::Deprecated)
+~~~
+It is important to move any of the old deprecated apis over to the new api, since they will be removed in a future
+release.
 ## Versioning

data/lib/io_streams/builder.rb CHANGED Viewed

@@ -1,13 +1,15 @@
 module IOStreams
   # Build the streams that need to be applied to a path druing reading or writing.
   class Builder
-    attr_accessor :file_name
+    attr_accessor :file_name, :format_options
     attr_reader :streams, :options
     def initialize(file_name = nil)
-      @file_name = file_name
-      @streams   = nil
-      @options   = nil
+      @file_name     = file_name
+      @streams       = nil
+      @options       = nil
+      @format        = nil
+      @format_option = nil
     end
     # Supply an option that is only applied once the file name extensions have been parsed.
@@ -20,7 +22,7 @@ module IOStreams
       raise(ArgumentError, "Cannot call #option unless the `file_name` was already set}") unless file_name
       @options ||= {}
-      if opts = @options[stream]
+      if (opts = @options[stream])
         opts.merge!(options)
       else
         @options[stream] = options.dup
@@ -40,7 +42,7 @@ module IOStreams
       raise(ArgumentError, "Invalid stream: #{stream.inspect}") unless IOStreams.extensions.include?(stream)
       @streams ||= {}
-      if opts = @streams[stream]
+      if (opts = @streams[stream])
         opts.merge!(options)
       else
         @streams[stream] = options.dup
@@ -88,10 +90,23 @@ module IOStreams
       built_streams.freeze
     end
+    # Returns the tabular format if set, otherwise tries to autodetect the format if the file_name has been set
+    # Returns [nil] if no format is set, or if it cannot be determined from the file_name
+    def format
+      @format ||= file_name ? Tabular.format_from_file_name(file_name) : nil
+    end
+    def format=(format)
+      raise(ArgumentError, "Invalid format: #{format.inspect}") unless format.nil? || IOStreams::Tabular.registered_formats.include?(format)
+      @format = format
+    end
     private
     def class_for_stream(type, stream)
-      ext = IOStreams.extensions[stream.nil? ? nil : stream.to_sym] || raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
+      ext = IOStreams.extensions[stream.nil? ? nil : stream.to_sym] ||
+        raise(ArgumentError, "Unknown Stream type: #{stream.inspect}")
       ext.send("#{type}_class") || raise(ArgumentError, "No #{type} registered for Stream type: #{stream.inspect}")
     end
@@ -99,7 +114,7 @@ module IOStreams
     def parse_extensions
       parts      = ::File.basename(file_name).split(".")
       extensions = []
-      while extension = parts.pop
+      while (extension = parts.pop)
         sym = extension.downcase.to_sym
         break unless IOStreams.extensions[sym]
@@ -116,10 +131,12 @@ module IOStreams
         block.call(io_stream)
       elsif pipeline.size == 1
         stream, opts = pipeline.first
-        class_for_stream(type, stream).open(io_stream, opts, &block)
+        class_for_stream(type, stream).open(io_stream, **opts, &block)
       else
         # Daisy chain multiple streams together
-        last = pipeline.keys.inject(block) { |inner, stream_sym| ->(io) { class_for_stream(type, stream_sym).open(io, pipeline[stream_sym], &inner) } }
+        last = pipeline.keys.inject(block) do |inner, stream_sym|
+          ->(io) { class_for_stream(type, stream_sym).open(io, **pipeline[stream_sym], &inner) }
+        end
         last.call(io_stream)
       end
     end

data/lib/io_streams/bzip2/reader.rb CHANGED Viewed

@@ -2,11 +2,11 @@ module IOStreams
   module Bzip2
     class Reader < IOStreams::Reader
       # Read from a Bzip2 stream, decompressing the contents as it is read
-      def self.stream(input_stream, **_args)
-        Utils.load_soft_dependency("rbzip2", "Bzip2") unless defined?(RBzip2)
+      def self.stream(input_stream, **args)
+        Utils.load_soft_dependency("bzip2-ffi", "Bzip2", "bzip2/ffi") unless defined?(::Bzip2::FFI)
         begin
-          io = RBzip2.default_adapter::Decompressor.new(input_stream)
+          io = ::Bzip2::FFI::Reader.new(input_stream, args)
           yield io
         ensure
           io&.close

data/lib/io_streams/bzip2/writer.rb CHANGED Viewed

@@ -2,11 +2,11 @@ module IOStreams
   module Bzip2
     class Writer < IOStreams::Writer
       # Write to a stream, compressing with Bzip2
-      def self.stream(input_stream, original_file_name: nil, **_args)
-        Utils.load_soft_dependency("rbzip2", "Bzip2") unless defined?(RBzip2)
+      def self.stream(input_stream, original_file_name: nil, **args)
+        Utils.load_soft_dependency("bzip2-ffi", "Bzip2", "bzip2/ffi") unless defined?(::Bzip2::FFI)
         begin
-          io = RBzip2.default_adapter::Compressor.new(input_stream)
+          io = ::Bzip2::FFI::Writer.new(input_stream, args)
           yield io
         ensure
           io&.close

data/lib/io_streams/deprecated.rb CHANGED Viewed

@@ -206,7 +206,7 @@ module IOStreams
         elsif streams.is_a?(Array)
           streams.each { |stream| apply_old_style_streams(path, stream) }
         elsif streams.is_a?(Hash)
-          streams.each_pair { |stream, options| path.stream(stream, options) }
+          streams.each_pair { |stream, options| path.stream(stream, **options) }
         else
           raise ArgumentError, "Invalid old style stream supplied: #{params.inspect}"
         end

data/lib/io_streams/encode/reader.rb CHANGED Viewed

@@ -73,13 +73,11 @@ module IOStreams
         # EOF reached?
         return unless block
-        block = block.encode(@encoding, @encoding_options) unless block.encoding == @encoding
+        block = block.encode(@encoding, **@encoding_options) unless block.encoding == @encoding
         block = @cleaner.call(block, @replace) if @cleaner
         block
       end
-      private
       def self.extract_cleaner(cleaner)
         return if cleaner.nil?

data/lib/io_streams/encode/writer.rb CHANGED Viewed

@@ -66,7 +66,7 @@ module IOStreams
         return 0 if data.nil?
         data  = data.to_s
-        block = data.encoding == @encoding ? data : data.encode(@encoding, @encoding_options)
+        block = data.encoding == @encoding ? data : data.encode(@encoding, **@encoding_options)
         block = @cleaner.call(block, @replace) if @cleaner
         @output_stream.write(block)
       end

data/lib/io_streams/errors.rb CHANGED Viewed

@@ -9,6 +9,9 @@ module IOStreams
     class MissingHeader < Error
     end
+    class UnknownFormat < Error
+    end
     class TypeMismatch < Error
     end
@@ -18,5 +21,24 @@ module IOStreams
     # When the specified delimiter is not found in the supplied stream / file
     class DelimiterNotFound < Error
     end
+    # Fixed length line has the wrong length
+    class InvalidLineLength < Error
+    end
+    class ValueTooLong < Error
+    end
+    class MalformedDataError < RuntimeError
+      attr_reader :line_number
+      def initialize(message, line_number)
+        @line_number = line_number
+        super("#{message} on line #{line_number}.")
+      end
+    end
+    class InvalidLayout < Error
+    end
   end
 end

data/lib/io_streams/io_streams.rb CHANGED Viewed

@@ -13,8 +13,6 @@ require "uri"
 #   .zip.enc  [ :zip, :enc ]
 #   .gz.enc   [ :gz,  :enc ]
 module IOStreams
-  include Deprecated
   # Returns [Path] instance for the supplied complete path with optional scheme.
   #
   # Example:
@@ -58,7 +56,7 @@ module IOStreams
   end
   # For an existing IO Stream
-  # IOStreams.stream(io).file_name('blah.zip').encoding('BINARY').reader(&:read)
+  # IOStreams.stream(io).file_name('blah.zip').encoding('BINARY').read
   # IOStreams.stream(io).file_name('blah.zip').encoding('BINARY').each(:line){ ... }
   # IOStreams.stream(io).file_name('blah.csv.zip').each(:line) { ... }
   # IOStreams.stream(io).stream(:zip).stream(:pgp, passphrase: 'receiver_passphrase').read
@@ -298,8 +296,6 @@ module IOStreams
     @schemes[scheme_name.nil? ? nil : scheme_name.to_sym] || raise(ArgumentError, "Unknown Scheme type: #{scheme_name.inspect}")
   end
-  private
   Extension = Struct.new(:reader_class, :writer_class)
   # Hold root paths

data/lib/io_streams/line/reader.rb CHANGED Viewed

@@ -38,12 +38,12 @@ module IOStreams
       #     Size of blocks to read from the input stream at a time.
       #     Default: 65536 ( 64K )
       #
-      # TODO:
-      # - Handle embedded line feeds when reading csv files.
-      # - Skip Comment lines. RegExp?
-      # - Skip "empty" / "blank" lines. RegExp?
-      # - Extract header line(s) / first non-comment, non-blank line
-      # - Embedded newline support, RegExp? or Proc?
+      #   embedded_within: [String]
+      #     Supports CSV files where a line may contain an embedded newline.
+      #     For CSV files set `embedded_within: '"'`
+      #
+      # Note:
+      # * When using a line reader and the file_name ends with ".csv" then embedded_within is automatically set to `"`
       def initialize(input_stream, delimiter: nil, buffer_size: 65_536, embedded_within: nil, original_file_name: nil)
         super(input_stream)
@@ -63,11 +63,11 @@ module IOStreams
         # Auto-detect windows/linux line endings if not supplied. \n or \r\n
         @delimiter ||= auto_detect_line_endings
-        if @buffer
-          # Change the delimiters encoding to match that of the input stream
-          @delimiter      = @delimiter.encode(@buffer.encoding)
-          @delimiter_size = @delimiter.size
-        end
+        return unless @buffer
+        # Change the delimiters encoding to match that of the input stream
+        @delimiter      = @delimiter.encode(@buffer.encoding)
+        @delimiter_size = @delimiter.size
       end
       # Iterate over every line in the file/stream passing each line to supplied block in turn.
@@ -86,17 +86,29 @@ module IOStreams
         line_count
       end
-      # Reads each line per the @delimeter. It will account for embedded lines provided they are within double quotes.
-      # The embedded_within argument is set in IOStreams::LineReader
+      # Reads each line per the `delimeter`.
+      # Accounts for lines that contain the `delimiter` when the `delimeter` is within the `embedded_within` delimiter.
+      # For Example, CSV files can contain newlines embedded within double quotes.
       def readline
         line = _readline
         if line && @embedded_within
           initial_line_number = @line_number
           while line.count(@embedded_within).odd?
-            raise "Unclosed quoted field on line #{initial_line_number}" if eof? || line.length > @buffer_size * 10
+            if eof? || line.length > @buffer_size * 10
+              raise(Errors::MalformedDataError.new(
+                "Unbalanced delimited field, delimiter: #{@embedded_within}",
+                initial_line_number
+              ))
+            end
             line << @delimiter
-            line << _readline
+            next_line = _readline
+            if next_line.nil?
+              raise(Errors::MalformedDataError.new(
+                "Unbalanced delimited field, delimiter: #{@embedded_within}",
+                initial_line_number
+              ))
+            end
+            line << next_line
           end
         end
         line

data/lib/io_streams/path.rb CHANGED Viewed

@@ -82,6 +82,7 @@ module IOStreams
     end
     # Cleanup an incomplete write to the target "file" if the copy fails.
+    # rubocop:disable Lint/SuppressedException
     def copy_from(source, **args)
       super(source, **args)
     rescue StandardError => e
@@ -91,6 +92,7 @@ module IOStreams
       end
       raise(e)
     end
+    # rubocop:enable Lint/SuppressedException
     # Moves the file by copying it to the new path and then deleting the current path.
     # Returns [IOStreams::Path] the target path.
@@ -151,7 +153,7 @@ module IOStreams
     # Returns [true|false] whether the file is compressed based on its file extensions.
     def compressed?
       # TODO: Look at streams?
-      !(path =~ /\.(zip|gz|gzip|xls.|)\z/i).nil?
+      !(path =~ /\.(zip|gz|gzip|xlsx|xlsm|bz2)\z/i).nil?
     end
     # Returns [true|false] whether the file is encrypted based on its file extensions.

data/lib/io_streams/paths/file.rb CHANGED Viewed

@@ -15,16 +15,16 @@ module IOStreams
       # Examples:
       #
       # # Case Insensitive file name lookup:
-      # IOStreams::Paths::File.new("ruby").glob("r*.md") { |name| puts name }
+      # IOStreams.path("ruby").glob("r*.md") { |name| puts name }
       #
       # # Case Sensitive file name lookup:
-      # IOStreams::Paths::File.new("ruby").each("R*.md", case_sensitive: true) { |name| puts name }
+      # IOStreams.path("ruby").each("R*.md", case_sensitive: true) { |name| puts name }
       #
       # # Also return the names of directories found during the search:
-      # IOStreams::Paths::File.new("ruby").each("R*.md", directories: true) { |name| puts name }
+      # IOStreams.path("ruby").each("R*.md", directories: true) { |name| puts name }
       #
       # # Case Insensitive recursive file name lookup:
-      # IOStreams::Paths::File.new("ruby").glob("**/*.md") { |name| puts name }
+      # IOStreams.path("ruby").glob("**/*.md") { |name| puts name }
       #
       # Parameters:
       #   pattern [String]

data/lib/io_streams/paths/http.rb CHANGED Viewed

@@ -26,16 +26,19 @@ module IOStreams
       #
       #   http_redirect_count: [Integer]
       #     Maximum number of http redirects to follow.
-      def initialize(url, username: nil, password: nil, http_redirect_count: 10)
+      def initialize(url, username: nil, password: nil, http_redirect_count: 10, parameters: nil)
         uri = URI.parse(url)
         unless %w[http https].include?(uri.scheme)
-          raise(ArgumentError, "Invalid URL. Required Format: 'http://<host_name>/<file_name>', or 'https://<host_name>/<file_name>'")
+          raise(
+            ArgumentError,
+            "Invalid URL. Required Format: 'http://<host_name>/<file_name>', or 'https://<host_name>/<file_name>'"
+          )
         end
         @username            = username || uri.user
         @password            = password || uri.password
         @http_redirect_count = http_redirect_count
-        @url                 = url
+        @url                 = parameters ? "#{url}?#{URI.encode_www_form(parameters)}" : url
         super(uri.path)
       end

data/lib/io_streams/paths/s3.rb CHANGED Viewed

@@ -3,7 +3,10 @@ require "uri"
 module IOStreams
   module Paths
     class S3 < IOStreams::Path
-      attr_reader :bucket_name, :client
+      attr_reader :bucket_name, :client, :options
+      # Largest file size supported by the S3 copy object api.
+      S3_COPY_OBJECT_SIZE_LIMIT = 5 * 1024 * 1024 * 1024
       # Arguments:
       #
@@ -92,7 +95,7 @@ module IOStreams
       #   encrypting data. This value is used to store the object and then it is
       #   discarded; Amazon does not store the encryption key. The key must be
       #   appropriate for use with the algorithm specified in the
-      #   x-amz-server-side-encryption-customer-algorithm header.
+      #   x-amz-server-side-encryption-customer-algorithm header.
       #
       # @option params [String] :sse_customer_key_md5
       #   Specifies the 128-bit MD5 digest of the encryption key according to
@@ -179,17 +182,36 @@ module IOStreams
       #
       # Notes:
       # - Can copy across buckets.
+      # - No stream conversions are applied.
       def move_to(target_path)
+        target = copy_to(target_path, convert: false)
+        delete
+        target
+      end
+      # Make S3 perform direct copies within S3 itself.
+      def copy_to(target_path, convert: true)
+        return super(target_path) if convert || (size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
         target = IOStreams.new(target_path)
         return super(target) unless target.is_a?(self.class)
         source_name = ::File.join(bucket_name, path)
-        # TODO: Does/should it also copy metadata?
-        client.copy_object(bucket: target.bucket_name, key: target.path, copy_source: source_name)
-        delete
+        client.copy_object(options.merge(bucket: target.bucket_name, key: target.path, copy_source: source_name))
         target
       end
+      # Make S3 perform direct copies within S3 itself.
+      def copy_from(source_path, convert: true)
+        return super(source_path) if convert
+        source = IOStreams.new(source_path)
+        return super(source) if !source.is_a?(self.class) || (source.size.to_i >= S3_COPY_OBJECT_SIZE_LIMIT)
+        source_name = ::File.join(source.bucket_name, source.path)
+        client.copy_object(options.merge(bucket: bucket_name, key: path, copy_source: source_name))
+      end
       # S3 logically creates paths when a key is set.
       def mkpath
         self
@@ -220,7 +242,7 @@ module IOStreams
       # Shortcut method if caller has a filename already with no other streams applied:
       def read_file(file_name)
         ::File.open(file_name, "wb") do |file|
-          client.get_object(@options.merge(response_target: file, bucket: bucket_name, key: path))
+          client.get_object(options.merge(response_target: file, bucket: bucket_name, key: path))
         end
       end
@@ -248,10 +270,10 @@ module IOStreams
           # Use multipart file upload
           s3  = Aws::S3::Resource.new(client: client)
           obj = s3.bucket(bucket_name).object(path)
-          obj.upload_file(file_name)
+          obj.upload_file(file_name, options)
         else
           ::File.open(file_name, "rb") do |file|
-            client.put_object(@options.merge(bucket: bucket_name, key: path, body: file))
+            client.put_object(options.merge(bucket: bucket_name, key: path, body: file))
           end
         end
       end