RubyGems - iostreams - Versions diffs - 0.20.3 → 1.0.0.beta - Mend

iostreams 0.20.3 → 1.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

checksums.yaml +4 -4
data/lib/io_streams/bzip2/reader.rb +9 -21
data/lib/io_streams/bzip2/writer.rb +9 -21
data/lib/io_streams/deprecated.rb +217 -0
data/lib/io_streams/encode/reader.rb +12 -16
data/lib/io_streams/encode/writer.rb +9 -13
data/lib/io_streams/errors.rb +6 -6
data/lib/io_streams/gzip/reader.rb +7 -14
data/lib/io_streams/gzip/writer.rb +7 -15
data/lib/io_streams/io_streams.rb +182 -524
data/lib/io_streams/line/reader.rb +9 -9
data/lib/io_streams/line/writer.rb +10 -11
data/lib/io_streams/path.rb +190 -0
data/lib/io_streams/paths/file.rb +176 -0
data/lib/io_streams/paths/http.rb +92 -0
data/lib/io_streams/paths/matcher.rb +61 -0
data/lib/io_streams/paths/s3.rb +269 -0
data/lib/io_streams/paths/sftp.rb +99 -0
data/lib/io_streams/pgp.rb +47 -19
data/lib/io_streams/pgp/reader.rb +20 -28
data/lib/io_streams/pgp/writer.rb +24 -46
data/lib/io_streams/reader.rb +28 -0
data/lib/io_streams/record/reader.rb +20 -16
data/lib/io_streams/record/writer.rb +28 -28
data/lib/io_streams/row/reader.rb +22 -26
data/lib/io_streams/row/writer.rb +29 -28
data/lib/io_streams/stream.rb +400 -0
data/lib/io_streams/streams.rb +125 -0
data/lib/io_streams/symmetric_encryption/reader.rb +5 -13
data/lib/io_streams/symmetric_encryption/writer.rb +16 -15
data/lib/io_streams/tabular/header.rb +9 -3
data/lib/io_streams/tabular/parser/array.rb +8 -3
data/lib/io_streams/tabular/parser/csv.rb +6 -2
data/lib/io_streams/tabular/parser/hash.rb +4 -1
data/lib/io_streams/tabular/parser/json.rb +3 -1
data/lib/io_streams/tabular/parser/psv.rb +3 -1
data/lib/io_streams/tabular/utility/csv_row.rb +9 -8
data/lib/io_streams/utils.rb +22 -0
data/lib/io_streams/version.rb +1 -1
data/lib/io_streams/writer.rb +28 -0
data/lib/io_streams/xlsx/reader.rb +7 -19
data/lib/io_streams/zip/reader.rb +7 -26
data/lib/io_streams/zip/writer.rb +21 -38
data/lib/iostreams.rb +15 -15
data/test/bzip2_reader_test.rb +3 -3
data/test/bzip2_writer_test.rb +3 -3
data/test/deprecated_test.rb +123 -0
data/test/encode_reader_test.rb +3 -3
data/test/encode_writer_test.rb +6 -6
data/test/gzip_reader_test.rb +2 -2
data/test/gzip_writer_test.rb +3 -3
data/test/io_streams_test.rb +43 -136
data/test/line_reader_test.rb +20 -20
data/test/line_writer_test.rb +3 -3
data/test/path_test.rb +30 -28
data/test/paths/file_test.rb +206 -0
data/test/paths/http_test.rb +34 -0
data/test/paths/matcher_test.rb +111 -0
data/test/paths/s3_test.rb +207 -0
data/test/pgp_reader_test.rb +8 -8
data/test/pgp_writer_test.rb +13 -13
data/test/record_reader_test.rb +5 -5
data/test/record_writer_test.rb +4 -4
data/test/row_reader_test.rb +5 -5
data/test/row_writer_test.rb +6 -6
data/test/stream_test.rb +116 -0
data/test/streams_test.rb +255 -0
data/test/utils_test.rb +20 -0
data/test/xlsx_reader_test.rb +3 -3
data/test/zip_reader_test.rb +12 -12
data/test/zip_writer_test.rb +5 -5
metadata +33 -45
data/lib/io_streams/base_path.rb +0 -72
data/lib/io_streams/file/path.rb +0 -58
data/lib/io_streams/file/reader.rb +0 -12
data/lib/io_streams/file/writer.rb +0 -22
data/lib/io_streams/http/reader.rb +0 -71
data/lib/io_streams/s3.rb +0 -26
data/lib/io_streams/s3/path.rb +0 -40
data/lib/io_streams/s3/reader.rb +0 -28
data/lib/io_streams/s3/writer.rb +0 -85
data/lib/io_streams/sftp/reader.rb +0 -67
data/lib/io_streams/sftp/writer.rb +0 -68
data/test/base_path_test.rb +0 -35
data/test/file_path_test.rb +0 -97
data/test/file_reader_test.rb +0 -33
data/test/file_writer_test.rb +0 -50
data/test/http_reader_test.rb +0 -38
data/test/s3_reader_test.rb +0 -41
data/test/s3_writer_test.rb +0 -41

data/lib/io_streams/line/reader.rb CHANGED

@@ -1,6 +1,6 @@
 module IOStreams
   module Line
-    class Reader
+    class Reader < IOStreams::Reader
       attr_reader :delimiter, :buffer_size, :line_number
       # Prevent denial of service when a delimiter is not found before this number * `buffer_size` characters are read.
@@ -8,13 +8,12 @@ module IOStreams
       LINEFEED_REGEXP = Regexp.compile(/\r\n|\n|\r/).freeze
-      # Read a line at a time from a file or stream
-      def self.open(file_name_or_io, **args)
-        if file_name_or_io.is_a?(String)
-          IOStreams::File::Reader.open(file_name_or_io) { |io| yield new(io, **args) }
-        else
-          yield new(file_name_or_io, **args)
-        end
+      # Read a line at a time from a stream
+      def self.stream(input_stream, original_file_name: nil, **args, &block)
+        # Pass-through if already a line reader
+        return block.call(input_stream) if input_stream.is_a?(self.class)
+        yield new(input_stream, **args)
       end
       # Create a delimited stream reader from the supplied input stream.
@@ -46,8 +45,9 @@ module IOStreams
       # - Extract header line(s) / first non-comment, non-blank line
       # - Embedded newline support, RegExp? or Proc?
       def initialize(input_stream, delimiter: nil, buffer_size: 65_536, embedded_within: nil)
+        super(input_stream)
         @embedded_within = embedded_within
-        @input_stream    = input_stream
         @buffer_size     = buffer_size
         # More efficient read buffering only supported when the input stream `#read` method supports it.

data/lib/io_streams/line/writer.rb CHANGED

@@ -1,15 +1,14 @@
 module IOStreams
   module Line
-    class Writer
+    class Writer < IOStreams::Writer
       attr_reader :delimiter
-      # Write a line at a time to a file or stream
-      def self.open(file_name_or_io, **args)
-        if file_name_or_io.is_a?(String)
-          IOStreams::File::Writer.open(file_name_or_io) { |io| yield new(io, **args) }
-        else
-          yield new(file_name_or_io, **args)
-        end
+      # Write a line at a time to a stream.
+      def self.stream(output_stream, original_file_name: nil, **args, &block)
+        # Pass-through if already a line writer
+        return block.call(output_stream) if output_stream.is_a?(self.class)
+        yield new(output_stream, **args)
       end
       # A delimited stream writer that will write to the supplied output stream.
@@ -26,8 +25,8 @@ module IOStreams
       #     to the output stream
       #     Default: OS Specific. Linux: "\n"
       def initialize(output_stream, delimiter: $/)
-        @output_stream = output_stream
-        @delimiter     = delimiter
+        super(output_stream)
+        @delimiter = delimiter
       end
       # Write a line to the output stream
@@ -50,7 +49,7 @@ module IOStreams
       #     puts "Wrote #{count} bytes to the output file, including the delimiter"
       #   end
       def write(data)
-        @output_stream.write(data.to_s + delimiter)
+        output_stream.write(data.to_s + delimiter)
       end
     end
   end

data/lib/io_streams/path.rb ADDED

@@ -0,0 +1,190 @@
+module IOStreams
+  class Path < IOStreams::Stream
+    attr_reader :path
+    def initialize(path)
+      raise(ArgumentError, 'Path cannot be nil') if path.nil?
+      raise(ArgumentError, "Path must be a string: #{path.inspect}, class: #{path.class}") unless path.is_a?(String)
+      @path      = path.frozen? ? path : path.dup.freeze
+      @io_stream = nil
+      @streams   = nil
+    end
+    # If elements already contains the current path then it is used as is without
+    # adding the current path for a second time
+    def join(*elements)
+      return self if elements.empty?
+      elements = elements.collect(&:to_s)
+      relative = ::File.join(*elements)
+      if relative.start_with?(path)
+        self.class.new(relative)
+      else
+        self.class.new(::File.join(path, relative))
+      end
+    end
+    def relative?
+      !absolute?
+    end
+    def absolute?
+      !!(path.strip =~ /\A\//)
+    end
+    # By default realpath just returns self.
+    def realpath
+      self
+    end
+    # Runs the pattern from the current path, returning the complete path for located files.
+    #
+    # See IOStreams::Paths::File.each for arguments.
+    def each_child(pattern = "*", **args, &block)
+      raise NotImplementedError
+    end
+    # Returns [Array] of child files based on the supplied pattern
+    def children(*args, **kargs)
+      paths = []
+      each_child(*args, **kargs) { |path| paths << path }
+      paths
+    end
+    # Returns [String] the current path.
+    def to_s
+      path
+    end
+    # Removes the last element of the path, the file name, before creating the entire path.
+    # Returns self
+    def mkpath
+      raise NotImplementedError
+    end
+    # Assumes the current path does not include a file name, and creates all elements in the path.
+    # Returns self
+    #
+    # Note: Do not call this method if the path contains a file name, see `#mkpath`
+    def mkdir
+      raise NotImplementedError
+    end
+    # Returns [true|false] whether the file exists
+    def exist?
+      raise NotImplementedError
+    end
+    # Returns [Integer] size of the file
+    def size
+      raise NotImplementedError
+    end
+    # Cleanup an incomplete write to the target "file" if the copy fails.
+    def copy_from(source, **args)
+      super(source, **args)
+    rescue StandardError => exc
+      delete
+      raise(exc)
+    end
+    # Moves the file by copying it to the new path and then deleting the current path.
+    # Returns [IOStreams::Path] the target path.
+    #
+    # Notes:
+    # - Currently only supports moving individual files, not directories.
+    def move_to(target_path)
+      target = IOStreams.new(target_path)
+      target.mkpath
+      target.copy_from(self, convert: false)
+      delete
+      target
+    end
+    # Returns [IOStreams::Path] the directory for this file.
+    # Returns `nil` if no `file_name` was set.
+    #
+    # If `path` does not include a directory name then "." is returned.
+    #
+    #   IOStreams.path("test.rb").directory         #=> "."
+    #   IOStreams.path("a/b/d/test.rb").directory   #=> "a/b/d"
+    #   IOStreams.path(".a/b/d/test.rb").directory  #=> ".a/b/d"
+    #   IOStreams.path("foo.").directory            #=> "."
+    #   IOStreams.path("test").directory            #=> "."
+    #   IOStreams.path(".profile").directory        #=> "."
+    def directory
+      file_name = streams.file_name
+      self.class.new(::File.dirname(file_name)) if file_name
+    end
+    # When path is a file, deletes this file.
+    # When path is a directory, attempts to delete this directory. If the directory contains
+    # any children it will fail.
+    #
+    # Returns self
+    #
+    # Notes:
+    # * No error is raised if the file or directory is not present.
+    # * Only the file is removed, not any of the parent paths.
+    def delete
+      raise NotImplementedError
+    end
+    # When path is a directory ,deletes this directory and all its children.
+    # When path is a file ,deletes this file.
+    #
+    # Returns self
+    #
+    # Notes:
+    # * No error is raised if the file is not present.
+    # * Only the file is removed, not any of the parent paths.
+    # * All children paths and files will be removed.
+    def delete_all
+      raise NotImplementedError
+    end
+    # Returns [true|false] whether the file is compressed based on its file extensions.
+    def compressed?
+      # TODO: Look at streams?
+      !(path =~ /\.(zip|gz|gzip|xls.|)\z/i).nil?
+    end
+    # Returns [true|false] whether the file is encrypted based on its file extensions.
+    def encrypted?
+      # TODO: Look at streams?
+      !(path =~ /\.(enc|pgp|gpg)\z/i).nil?
+    end
+    # TODO: Other possible methods:
+    # - rename - File.rename
+    # - rmtree - delete everything under this path - FileUtils.rm_r
+    # - directory?
+    # - file?
+    # - empty?
+    # - find(ignore_error: true) - Find.find
+    # Paths are sortable by name
+    def <=>(other)
+      path <=> other.to_s
+    end
+    # Compare by path name, ignore streams
+    def ==(other)
+      path == other.to_s
+    end
+    def inspect
+      str = "#<#{self.class.name}:#{path}"
+      str << " @streams=#{streams.streams.inspect}" if streams.streams
+      str << " @options=#{streams.options.inspect}" if streams.options
+      str << " pipeline=#{pipeline.inspect}>"
+    end
+    private
+    def streams
+      @streams ||= IOStreams::Streams.new(path)
+    end
+  end
+end

data/lib/io_streams/paths/file.rb ADDED

@@ -0,0 +1,176 @@
+require "fileutils"
+module IOStreams
+  module Paths
+    class File < IOStreams::Path
+      # Returns a path to a temporary file.
+      # Temporary file is deleted upon block completion if present.
+      def self.temp_file(basename, extension = "")
+        Utils.temp_file_name(basename, extension) { |file_name| yield(new(file_name).stream(:none)) }
+      end
+      # Yields Paths within the current path.
+      #
+      # Examples:
+      #
+      # # Case Insensitive file name lookup:
+      # IOStreams::Paths::File.new("ruby").glob("r*.md") { |name| puts name }
+      #
+      # # Case Sensitive file name lookup:
+      # IOStreams::Paths::File.new("ruby").each("R*.md", case_sensitive: true) { |name| puts name }
+      #
+      # # Also return the names of directories found during the search:
+      # IOStreams::Paths::File.new("ruby").each("R*.md", directories: true) { |name| puts name }
+      #
+      # # Case Insensitive recursive file name lookup:
+      # IOStreams::Paths::File.new("ruby").glob("**/*.md") { |name| puts name }
+      #
+      # Parameters:
+      #   pattern [String]
+      #     The pattern is not a regexp, it is a string that may contain the following metacharacters:
+      #     `*`      Matches all regular files.
+      #     `c*`     Matches all regular files beginning with `c`.
+      #     `*c`     Matches all regular files ending with `c`.
+      #     `*c*`    Matches all regular files that have `c` in them.
+      #
+      #     `**`     Matches recursively into subdirectories.
+      #
+      #     `?`      Matches any one character.
+      #
+      #     `[set]`  Matches any one character in the supplied `set`.
+      #     `[^set]` Does not matches any one character in the supplied `set`.
+      #
+      #     `\`      Escapes the next metacharacter.
+      #
+      #     `{a,b}`  Matches on either pattern `a` or pattern `b`.
+      #
+      #   case_sensitive [true|false]
+      #     Whether the pattern is case-sensitive.
+      #
+      #   directories [true|false]
+      #     Whether to yield directory names.
+      #
+      #   hidden [true|false]
+      #     Whether to yield hidden paths.
+      #
+      # Examples:
+      #
+      # Pattern:    File name:       match?   Reason                        Options
+      # =========== ================ ======   ============================= ===========================
+      # "cat"       "cat"            true     # Match entire string
+      # "cat"       "category"       false    # Only match partial string
+      #
+      # "c{at,ub}s" "cats"           true     # { } is supported
+      #
+      # "c?t"       "cat"            true     # "?" match only 1 character
+      # "c??t"      "cat"            false    # ditto
+      # "c*"        "cats"           true     # "*" match 0 or more characters
+      # "c*t"       "c/a/b/t"        true     # ditto
+      # "ca[a-z]"   "cat"            true     # inclusive bracket expression
+      # "ca[^t]"    "cat"            false    # exclusive bracket expression ("^" or "!")
+      #
+      # "cat"       "CAT"            false    # case sensitive              {case_sensitive: false}
+      # "cat"       "CAT"            true     # case insensitive
+      #
+      # "\?"        "?"              true     # escaped wildcard becomes ordinary
+      # "\a"        "a"              true     # escaped ordinary remains ordinary
+      # "[\?]"      "?"              true     # can escape inside bracket expression
+      #
+      # "*"         ".profile"       false    # wildcard doesn't match leading
+      # "*"         ".profile"       true     # period by default.
+      # ".*"        ".profile"       true                                   {hidden: true}
+      #
+      # "**/*.rb"   "main.rb"        false
+      # "**/*.rb"   "./main.rb"      false
+      # "**/*.rb"   "lib/song.rb"    true
+      # "**.rb"     "main.rb"        true
+      # "**.rb"     "./main.rb"      false
+      # "**.rb"     "lib/song.rb"    true
+      # "*"         "dave/.profile"  true
+      def each_child(pattern = "*", case_sensitive: false, directories: false, hidden: false)
+        flags = 0
+        flags |= ::File::FNM_CASEFOLD unless case_sensitive
+        flags |= ::File::FNM_DOTMATCH if hidden
+        # Dir.each_child("testdir") {|x| puts "Got #{x}" }
+        Dir.glob(::File.join(path, pattern), flags) do |full_path|
+          next if !directories && ::File.directory?(full_path)
+          yield(self.class.new(full_path))
+        end
+      end
+      # Moves this file to the `target_path` by copying it to the new name and then deleting the current file.
+      #
+      # Notes:
+      # - Can copy across buckets.
+      def move_to(target_path)
+        target = IOStreams.new(target_path)
+        return super(target) unless target.is_a?(self.class)
+        target.mkpath
+        # In case the file is being moved across partitions
+        FileUtils.move(path, target.to_s)
+        target
+      end
+      def mkpath
+        dir = ::File.dirname(path)
+        FileUtils.mkdir_p(dir) unless ::File.exist?(dir)
+        self
+      end
+      def mkdir
+        FileUtils.mkdir_p(path) unless ::File.exist?(path)
+        self
+      end
+      def exist?
+        ::File.exist?(path)
+      end
+      def size
+        ::File.size(path)
+      end
+      def delete
+        return self unless exist?
+        ::File.directory?(path) ? Dir.delete(path) : ::File.unlink(path)
+        self
+      end
+      def delete_all
+        return self unless exist?
+        ::File.directory?(path) ? FileUtils.remove_dir(path) : ::File.unlink(path)
+        self
+      end
+      # Returns the real path by stripping `.`, `..` and expands any symlinks.
+      def realpath
+        self.class.new(::File.realpath(path))
+      end
+      # Read from file
+      def reader(&block)
+        ::File.open(path, "rb") { |io| streams.reader(io, &block) }
+      end
+      # Write to file
+      #
+      # Note:
+      #   If an exception is raised whilst the file is being written to the file is removed to
+      #   prevent incomplete / partial files from being created.
+      def writer(create_path: true, &block)
+        mkpath if create_path
+        begin
+          ::File.open(path, "wb") { |io| streams.writer(io, &block) }
+        rescue StandardError => e
+          ::File.unlink(path) if ::File.exist?(path)
+          raise(e)
+        end
+      end
+    end
+  end
+end

data/lib/io_streams/paths/http.rb ADDED

@@ -0,0 +1,92 @@
+require 'net/http'
+require 'uri'
+module IOStreams
+  module Paths
+    class HTTP < IOStreams::Path
+      attr_reader :username, :password, :http_redirect_count
+      # Stream to/from a remote file over http(s).
+      #
+      # Parameters:
+      #   url: [String]
+      #      URI of the file to download.
+      #     Example:
+      #       https://www5.fdic.gov/idasp/Offices2.zip
+      #       http://hostname/path/file_name
+      #
+      #     Full url showing all the optional elements that can be set via the url:
+      #       https://username:password@hostname/path/file_name
+      #
+      #   username: [String]
+      #     When supplied, basic authentication is used with the username and password.
+      #
+      #   password: [String]
+      #     Password to use use with basic authentication when the username is supplied.
+      #
+      #   http_redirect_count: [Integer]
+      #     Maximum number of http redirects to follow.
+      def initialize(url, username: nil, password: nil, http_redirect_count: 10)
+        uri = URI.parse(url)
+        unless %w[http https].include?(uri.scheme)
+          raise(ArgumentError, "Invalid URL. Required Format: 'http://<host_name>/<file_name>', or 'https://<host_name>/<file_name>'")
+        end
+        @username            = username || uri.user
+        @password            = password || uri.password
+        @http_redirect_count = http_redirect_count
+        super(url)
+      end
+      # Read a file using an http get.
+      #
+      # For example:
+      #   IOStreams.path('https://www5.fdic.gov/idasp/Offices2.zip').reader {|file| puts file.read}
+      #
+      # Read the file without unzipping and streaming the first file in the zip:
+      #   IOStreams.path('https://www5.fdic.gov/idasp/Offices2.zip').stream(:none).reader {|file| puts file.read}
+      #
+      # Notes:
+      # * Since Net::HTTP download only supports a push stream, the data is streamed into a tempfile first.
+      def reader(&block)
+        handle_redirects(path, http_redirect_count, &block)
+      end
+      def handle_redirects(uri, http_redirect_count, &block)
+        uri    = URI.parse(uri) unless uri.is_a?(URI)
+        result = nil
+        raise(IOStreams::Errors::CommunicationsFailure, "Too many redirects") if http_redirect_count < 1
+        Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
+          request = Net::HTTP::Get.new(uri)
+          request.basic_auth(username, password) if username
+          http.request(request) do |response|
+            if response.is_a?(Net::HTTPNotFound)
+              raise(IOStreams::Errors::CommunicationsFailure, "Invalid URL: #{uri}")
+            end
+            if response.is_a?(Net::HTTPUnauthorized)
+              raise(IOStreams::Errors::CommunicationsFailure, "Authorization Required: Invalid :username or :password.")
+            end
+            if response.is_a?(Net::HTTPRedirection)
+              new_uri = response['location']
+              return handle_redirects(new_uri, http_redirect_count: http_redirect_count - 1, &block)
+            end
+            unless response.is_a?(Net::HTTPSuccess)
+              raise(IOStreams::Errors::CommunicationsFailure, "Invalid response code: #{response.code}")
+            end
+            # Since Net::HTTP download only supports a push stream, write it to a tempfile first.
+            Utils.temp_file_name('iostreams_http') do |file_name|
+              ::File.open(file_name, 'wb') { |io| response.read_body { |chunk| io.write(chunk) } }
+              # Return a read stream
+              result = ::File.open(file_name, 'rb', &block)
+            end
+          end
+        end
+        result
+      end
+    end
+  end
+end