RubyGems - iostreams - Versions diffs - 0.18.0 → 0.19.0 - Mend

iostreams 0.18.0 → 0.19.0

Files changed (15) hide show

checksums.yaml +4 -4
data/lib/io_streams/errors.rb +3 -0
data/lib/io_streams/http/reader.rb +71 -0
data/lib/io_streams/io_streams.rb +2 -2
data/lib/io_streams/path.rb +85 -0
data/lib/io_streams/s3/reader.rb +6 -8
data/lib/io_streams/version.rb +1 -1
data/lib/io_streams/xlsx/reader.rb +16 -16
data/lib/io_streams/zip/reader.rb +5 -16
data/lib/io_streams/zip/writer.rb +4 -11
data/lib/iostreams.rb +4 -0
data/test/http_reader_test.rb +38 -0
data/test/path_test.rb +74 -0
data/test/test_helper.rb +5 -0
metadata +8 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e9c9876f8e66281fabd3a26188f1e51d23a12620de461147c9125286d58950cb
-  data.tar.gz: 81f7b6b198f4dfa37b5349de57df74d2004e7984fd176172ac61cce2c7dac9d8
+  metadata.gz: 161967be051ed1b82c87f30afd32b760a2e6627cf8e090e978578c7c35aab278
+  data.tar.gz: abb7aab7f5aca8cc0d043ce3820603c86b3227c9db41d07420f3117010bf68af
 SHA512:
-  metadata.gz: 91c03cfc1c218d235b2bda1ab304ebc78a8d02dbb9902372a2b5c6b5dd0afb53fcee99ed335cc9361ff2e9db9213cced9fad901a9ecf48bd56f53043792e6096
-  data.tar.gz: 9773ffd737484920543182b8b6a32c4704cc42e83fb2be014e86447eedf184458aa6a570b129b3fa55fc44148c25862730c9af937578469467e4f54f8628ef60
+  metadata.gz: 2172e682359bfe1240669fee5da16f807b5a04b940529c5a7046166d5343e7c80bd20c52f05243b59c72f7cfe57ac288603ac6475e75f413f8f99031f3e1cbd0
+  data.tar.gz: 76923520dfda93c00b4209424496c5324ae7975b59f0f8653578652b540c07a9dd6866890b72c147ab5c968e48879210e3aaa85ab8c8ffbe4b4921524352eb32

data/lib/io_streams/errors.rb CHANGED

@@ -12,6 +12,9 @@ module IOStreams
     class TypeMismatch < Error;
     end
+    class CommunicationsFailure < Error;
+    end
     # When the specified delimiter is not found in the supplied stream / file
     class DelimiterNotFound < Error;
     end

data/lib/io_streams/http/reader.rb ADDED

@@ -0,0 +1,71 @@
+require 'net/http'
+require 'uri'
+module IOStreams
+  module HTTP
+    # Read a file using an http get.
+    #
+    # For example:
+    #   IOStreams.reader('https://www5.fdic.gov/idasp/Offices2.zip') {|file| puts file.read}
+    #
+    # Direct example without unzipping the above file:
+    #   IOStreams::HTTP::Reader.new('https://www5.fdic.gov/idasp/Offices2.zip') {|file| puts file.read}
+    #
+    # Parameters:
+    #   uri: [String|URI]
+    #      URI of the file to download.
+    #     Example:
+    #       https://www5.fdic.gov/idasp/Offices2.zip
+    #
+    #   :username
+    #     When supplied, basic authentication is used with the username and password.
+    #     Default: nil
+    #
+    #   :password
+    #     Password to use use with basic authentication when the username is supplied.
+    #
+    # Notes:
+    # * Since Net::HTTP download only supports a push stream, the data is streamed into a tempfile first.
+    class Reader
+      def self.open(uri, username: nil, password: nil, **args, &block)
+        raise(ArgumentError, 'file_name must be a URI string') unless uri.is_a?(String) || uri.is_a?(URI)
+        handle_redirects(uri, username: username, password: password, **args, &block)
+      end
+      def self.handle_redirects(uri, username: nil, password: nil, http_redirect_count: 10, **args, &block)
+        uri    = URI.parse(uri) unless uri.is_a?(URI)
+        result = nil
+        raise(IOStreams::Errors::CommunicationsFailure, "Too many redirects") if http_redirect_count < 1
+        Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
+          request = Net::HTTP::Get.new(uri)
+          request.basic_auth(username, password) if username
+          http.request(request) do |response|
+            if response.is_a?(Net::HTTPNotFound)
+              raise(IOStreams::Errors::CommunicationsFailure, "Invalid URL: #{uri}")
+            end
+            if response.is_a?(Net::HTTPUnauthorized)
+              raise(IOStreams::Errors::CommunicationsFailure, "Authorization Required: Invalid :username or :password.")
+            end
+            if response.is_a?(Net::HTTPRedirection)
+              new_uri = response['location']
+              return handle_redirects(new_uri, username: username, password: password, http_redirect_count: http_redirect_count - 1, **args, &block)
+            end
+            raise(IOStreams::Errors::CommunicationsFailure, "Invalid response code: #{response.code}") unless response.is_a?(Net::HTTPSuccess)
+            # Since Net::HTTP download only supports a push stream, write it to a tempfile first.
+            IOStreams::Path.temp_file_name('iostreams_http') do |file_name|
+              IOStreams::File::Writer.open(file_name) do |io|
+                response.read_body { |chunk| io.write(chunk) }
+              end
+              # Return a read stream
+              result = IOStreams::File::Reader.open(file_name, &block)
+            end
+          end
+        end
+        result
+      end
+    end
+  end
+end

data/lib/io_streams/io_streams.rb CHANGED

@@ -585,8 +585,8 @@ module IOStreams
   #    sftp://hostname/path/file_name
   #    s3://bucket/key
   register_scheme(nil, IOStreams::File::Reader, IOStreams::File::Writer)
-  # register_scheme(:http,  IOStreams::HTTP::Reader,  IOStreams::HTTP::Writer)
-  # register_scheme(:https, IOStreams::HTTPS::Reader, IOStreams::HTTPS::Writer)
+  register_scheme(:http,  IOStreams::HTTP::Reader,  nil)
+  register_scheme(:https, IOStreams::HTTP::Reader, nil)
   # register_scheme(:sftp,  IOStreams::SFTP::Reader,  IOStreams::SFTP::Writer)
   register_scheme(:s3, IOStreams::S3::Reader, IOStreams::S3::Writer)
 end

data/lib/io_streams/path.rb ADDED

@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+require 'fileutils'
+module IOStreams
+  #
+  # NOTE: This is a proof of concept class and will change significantly.
+  # I.e. Dont use it yet.
+  #
+  class Path
+    attr_reader :root, :relative
+    # Return named root path
+    def self.[](root)
+      @roots[root.to_sym] || raise(ArgumentError, "Unknown root: #{root.inspect}")
+    end
+    # Add a named root path
+    def self.add_root(root, path)
+      @roots[root.to_sym] = path.dup.freeze
+    end
+    def self.roots
+      @roots.dup
+    end
+    # Yields the path to a temporary file_name.
+    #
+    # File is deleted upon completion if present.
+    def self.temp_file_name(basename, extension = '')
+      result = nil
+      ::Dir::Tmpname.create([basename, extension]) do |tmpname|
+        begin
+          result = yield(tmpname)
+        ensure
+          ::File.unlink(tmpname) if ::File.exist?(tmpname)
+        end
+      end
+      result
+    end
+    def initialize(*elements, root: :default)
+      @root     = root.to_sym
+      root_path = self.class[@root]
+      if elements.empty?
+        @relative = ''
+        @path     = root_path
+      else
+        @relative = ::File.join(*elements).freeze
+        if @relative.start_with?(root_path)
+          @path     = @relative
+          @relative = @path[root_path.size + 1..-1].freeze
+        else
+          @path = ::File.join(root_path, @relative).freeze
+        end
+      end
+    end
+    def to_s
+      @path
+    end
+    # Creates the entire path excluding the file_name.
+    def mkpath
+      path = ::File.dirname(@path)
+      FileUtils.mkdir_p(path) unless ::File.exist?(path)
+      self
+    end
+    def exist?
+      ::File.exist?(@path)
+    end
+    # Delete the file.
+    #
+    # Note: Only the file is removed, not any of the parent paths.
+    def delete
+      ::File.unlink(@path)
+      self
+    end
+    private
+    @roots = {}
+  end
+end

data/lib/io_streams/s3/reader.rb CHANGED

@@ -13,15 +13,13 @@ module IOStreams
         begin
           # Since S3 download only supports a push stream, write it to a tempfile first.
-          temp_file = Tempfile.new('rocket_job')
-          temp_file.binmode
+          IOStreams::Path.temp_file_name('iostreams_s3') do |file_name|
+            args[:response_target] = file_name
+            object.get(args)
-          args[:response_target] = temp_file.to_path
-          object.get(args)
-          block.call(temp_file)
-        ensure
-          temp_file.delete if temp_file
+            # Return a read stream
+            IOStreams::File::Reader.open(file_name, &block)
+          end
         end
       end
     end

data/lib/io_streams/version.rb CHANGED

@@ -1,3 +1,3 @@
 module IOStreams
-  VERSION = '0.18.0'
+  VERSION = '0.19.0'
 end

data/lib/io_streams/xlsx/reader.rb CHANGED

@@ -4,24 +4,24 @@ module IOStreams
   module Xlsx
     class Reader
       # Convert a xlsx, or xlsm file or stream into CSV format.
-      def self.open(file_name_or_io, _ = nil)
-        if file_name_or_io.is_a?(String)
-          file_name = file_name_or_io
-        else
-          temp_file = Tempfile.new('iostreams_xlsx')
-          temp_file.binmode
-          IOStreams.copy(file_name_or_io, temp_file)
-          file_name = temp_file.to_path
+      def self.open(file_name_or_io, _ = nil, &block)
+        return extract_csv(file_name_or_io, &block) if file_name_or_io.is_a?(String)
+        # Creek gem can only work against a file, not a stream, so create temp file.
+        IOStreams::Path.temp_file_name('iostreams_xlsx') do |temp_file_name|
+          IOStreams.copy(file_name_or_io, temp_file_name, target_options: {streams: []})
+          extract_csv(temp_file_name, &block)
         end
+      end
-        csv_temp_file = Tempfile.new('iostreams_csv')
-        csv_temp_file.binmode
-        new(file_name).each { |lines| csv_temp_file << lines.to_csv }
-        csv_temp_file.rewind
-        yield csv_temp_file
-      ensure
-        temp_file.delete if temp_file
-        csv_temp_file.delete if csv_temp_file
+      # Convert the spreadsheet to csv in a tempfile
+      def self.extract_csv(file_name, &block)
+        IOStreams::Path.temp_file_name('iostreams_csv') do |temp_file_name|
+          IOStreams::File::Writer.open(temp_file_name) do |io|
+            new(file_name).each { |lines| io << lines.to_csv }
+          end
+          IOStreams::File::Reader.open(temp_file_name, &block)
+        end
       end
       def initialize(file_name)

data/lib/io_streams/zip/reader.rb CHANGED

@@ -12,7 +12,7 @@ module IOStreams
       #       puts data
       #     end
       #   end
-      def self.open(file_name_or_io, buffer_size: 65536, &block)
+      def self.open(file_name_or_io, _ = nil, &block)
         if !defined?(JRuby) && !defined?(::Zip)
           # MRI needs Ruby Zip, since it only has native support for GZip
           begin
@@ -25,21 +25,10 @@ module IOStreams
         # File name supplied
         return read_file(file_name_or_io, &block) unless IOStreams.reader_stream?(file_name_or_io)
-        # Stream supplied
-        begin
-          # Since ZIP cannot be streamed, download un-zipped data to a local file before streaming
-          temp_file = Tempfile.new('rocket_job')
-          temp_file.binmode
-          file_name = temp_file.to_path
-          # Stream zip stream into temp file
-          ::File.open(file_name, 'wb') do |file|
-            IOStreams.copy(file_name_or_io, file, buffer_size: buffer_size)
-          end
-          read_file(file_name, &block)
-        ensure
-          temp_file.delete if temp_file
+        # ZIP can only work against a file, not a stream, so create temp file.
+        IOStreams::Path.temp_file_name('iostreams_zip') do |temp_file_name|
+          IOStreams.copy(file_name_or_io, temp_file_name, target_options: {streams: []})
+          read_file(temp_file_name, &block)
         end
       end

data/lib/io_streams/zip/writer.rb CHANGED

@@ -38,17 +38,10 @@ module IOStreams
         # File name supplied
         return write_file(file_name_or_io, zip_file_name, &block) unless IOStreams.writer_stream?(file_name_or_io)
-        # Stream supplied
-        begin
-          # Since ZIP cannot be streamed, download to a local file before streaming
-          temp_file = Tempfile.new('rocket_job')
-          temp_file.binmode
-          write_file(temp_file.to_path, zip_file_name, &block)
-          # Stream temp file into output stream
-          IOStreams.copy(temp_file, file_name_or_io, buffer_size: buffer_size)
-        ensure
-          temp_file.delete if temp_file
+        # ZIP can only work against a file, not a stream, so create temp file.
+        IOStreams::Path.temp_file_name('iostreams_zip') do |temp_file_name|
+          write_file(temp_file_name, zip_file_name, &block)
+          IOStreams.copy(temp_file_name, file_name_or_io, source_options: {streams: []})
         end
       end

data/lib/iostreams.rb CHANGED

@@ -15,6 +15,10 @@ module IOStreams
     autoload :Reader, 'io_streams/gzip/reader'
     autoload :Writer, 'io_streams/gzip/writer'
   end
+  module HTTP
+    autoload :Reader, 'io_streams/http/reader'
+  end
+  autoload :Path,     'io_streams/path'
   autoload :Pgp,      'io_streams/pgp'
   autoload :S3,       'io_streams/s3'
   module SFTP

data/test/http_reader_test.rb ADDED

@@ -0,0 +1,38 @@
+require_relative 'test_helper'
+class HTTPReaderTest < Minitest::Test
+  describe IOStreams::HTTP::Reader do
+    let :uri do
+      "http://example.com/index.html?count=10"
+    end
+    let :ssl_uri do
+      "https://example.com/index.html?count=10"
+    end
+    describe '.open' do
+      it 'reads http' do
+        result = IOStreams::HTTP::Reader.open(uri) do |io|
+          io.read
+        end
+        assert_includes result, "<html>"
+      end
+      it 'reads https' do
+        result = IOStreams::HTTP::Reader.open(ssl_uri) do |io|
+          io.read
+        end
+        assert_includes result, "<html>"
+      end
+      it 'does not support streams' do
+        assert_raises ArgumentError do
+          io = StringIO.new
+          IOStreams::HTTP::Reader.open(io) do |http_io|
+            http_io.read
+          end
+        end
+      end
+    end
+  end
+end

data/test/path_test.rb ADDED

@@ -0,0 +1,74 @@
+require_relative 'test_helper'
+module IOStreams
+  class PathTest < Minitest::Test
+    describe IOStreams::Path do
+      describe '.root' do
+        it 'return default path' do
+          path = ::File.expand_path(::File.join(__dir__, '../tmp/default'))
+          assert_equal path, IOStreams::Path[:default]
+        end
+        it 'return downloads path' do
+          path = ::File.expand_path(::File.join(__dir__, '../tmp/downloads'))
+          assert_equal path, IOStreams::Path[:downloads]
+        end
+      end
+      describe '.to_s' do
+        it 'returns path' do
+          assert_equal IOStreams::Path[:default], IOStreams::Path.new.to_s
+        end
+        it 'adds path to root' do
+          assert_equal ::File.join(IOStreams::Path[:default], 'test'), IOStreams::Path.new('test').to_s
+        end
+        it 'adds paths to root' do
+          assert_equal ::File.join(IOStreams::Path[:default], 'test', 'second', 'third'), IOStreams::Path.new('test', 'second', 'third').to_s
+        end
+        it 'returns path and filename' do
+          path = ::File.join(IOStreams::Path[:default], 'file.xls')
+          assert_equal path, IOStreams::Path.new('file.xls').to_s
+        end
+        it 'adds path to root and filename' do
+          path = ::File.join(IOStreams::Path[:default], 'test', 'file.xls')
+          assert_equal path, IOStreams::Path.new('test', 'file.xls').to_s
+        end
+        it 'adds paths to root' do
+          path = ::File.join(IOStreams::Path[:default], 'test', 'second', 'third', 'file.xls')
+          assert_equal path, IOStreams::Path.new('test', 'second', 'third', 'file.xls').to_s
+        end
+        it 'return path as sent in when full path' do
+          path = ::File.join(IOStreams::Path[:default], 'file.xls')
+          assert_equal path, IOStreams::Path.new(path).to_s
+        end
+      end
+      describe '.mkpath' do
+        it 'makes root' do
+          path = IOStreams::Path.new('test.xls')
+          assert_equal path, path.mkpath
+          assert ::File.exist?(IOStreams::Path.new.to_s)
+        end
+        it 'makes root with path' do
+          path = IOStreams::Path.new('test', 'test.xls')
+          assert_equal path, path.mkpath
+          assert ::File.exist?(IOStreams::Path.new('test').to_s)
+        end
+        it 'makes root with paths' do
+          path = IOStreams::Path.new('test', 'second', 'third', 'test.xls')
+          assert_equal path, path.mkpath
+          assert ::File.exist?(IOStreams::Path.new('test', 'second', 'third').to_s)
+        end
+      end
+    end
+  end
+end

data/test/test_helper.rb CHANGED

@@ -32,3 +32,8 @@ unless IOStreams::Pgp.has_key?(email: 'receiver@example.org')
   puts 'Generating test PGP key: receiver@example.org'
   IOStreams::Pgp.generate_key(name: 'Receiver', email: 'receiver@example.org', passphrase: 'receiver_passphrase', key_length: 2048)
 end
+# Test paths
+root = File.expand_path(File.join(__dir__, '../tmp'))
+IOStreams::Path.add_root(:default, File.join(root, 'default'))
+IOStreams::Path.add_root(:downloads, File.join(root, 'downloads'))

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: iostreams
 version: !ruby/object:Gem::Version
-  version: 0.18.0
+  version: 0.19.0
 platform: ruby
 authors:
 - Reid Morrison
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-08-15 00:00:00.000000000 Z
+date: 2019-08-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: concurrent-ruby
@@ -43,9 +43,11 @@ files:
 - lib/io_streams/file/writer.rb
 - lib/io_streams/gzip/reader.rb
 - lib/io_streams/gzip/writer.rb
+- lib/io_streams/http/reader.rb
 - lib/io_streams/io_streams.rb
 - lib/io_streams/line/reader.rb
 - lib/io_streams/line/writer.rb
+- lib/io_streams/path.rb
 - lib/io_streams/pgp.rb
 - lib/io_streams/pgp/reader.rb
 - lib/io_streams/pgp/writer.rb
@@ -93,9 +95,11 @@ files:
 - test/files/unclosed_quote_test.csv
 - test/gzip_reader_test.rb
 - test/gzip_writer_test.rb
+- test/http_reader_test.rb
 - test/io_streams_test.rb
 - test/line_reader_test.rb
 - test/line_writer_test.rb
+- test/path_test.rb
 - test/pgp_reader_test.rb
 - test/pgp_test.rb
 - test/pgp_writer_test.rb
@@ -145,6 +149,7 @@ test_files:
 - test/file_reader_test.rb
 - test/record_reader_test.rb
 - test/s3_writer_test.rb
+- test/http_reader_test.rb
 - test/pgp_writer_test.rb
 - test/line_writer_test.rb
 - test/row_reader_test.rb
@@ -165,6 +170,7 @@ test_files:
 - test/test_helper.rb
 - test/file_writer_test.rb
 - test/tabular_test.rb
+- test/path_test.rb
 - test/pgp_test.rb
 - test/io_streams_test.rb
 - test/record_writer_test.rb