url_fetcher 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a0720abc5e87d7a504fb8d25c58cec2675fe2e6c
4
+ data.tar.gz: 93bb4aeffae265ce2acd95e84584b7d28ebd6f5f
5
+ SHA512:
6
+ metadata.gz: 2a9d32727ad820eb13f494b70e242cbe7f81b751da46d4c53d91055ea4e43dde090bc3eb25fdb5f447f6484d819abb21e875e31592df4ff54e1c0b5c1b5b54ef
7
+ data.tar.gz: b61ae8d4f664273bf63a1deaa7a8d19025ed9acc74db06a8a92e5181869e4d40b60c1e1a8280ea93327f59c74d7d3b73b4f457f06bf375d063ffdce3497ee814
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in url_fetcher.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 WHI, Inc.
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # UrlFetcher
2
+
3
+ This gem provides a class that will fetch a URL response and save the body to a Tempfile. This can be useful if you are fetching large HTTP objects so you don't need to read them into memory all at once. The response body is exposed as a stream.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'url_fetcher'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install url_fetcher
18
+
19
+ ## Usage
20
+
21
+ response = UrlFetcher.new("http://example.com/large_file")
22
+ response.body # Returns a stream to a the body from a Tempfile on disk.
23
+
24
+ ## Contributing
25
+
26
+ 1. Fork it
27
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
28
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
29
+ 4. Push to the branch (`git push origin my-new-feature`)
30
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ desc 'Default: run unit tests.'
4
+ task :default => :test
5
+
6
+ desc 'RVM likes to call it tests'
7
+ task :tests => :test
8
+
9
+ begin
10
+ require 'rspec'
11
+ require 'rspec/core/rake_task'
12
+ desc 'Run the unit tests'
13
+ RSpec::Core::RakeTask.new(:test)
14
+ rescue LoadError
15
+ task :test do
16
+ STDERR.puts "You must have rspec 2.0 installed to run the tests"
17
+ end
18
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.1
@@ -0,0 +1,105 @@
1
+ require "net/http"
2
+ require "open-uri"
3
+ require "tempfile"
4
+
5
+ # This class will fetch the contents of a URL and store them in a Tempfile. The
6
+ # results are exposed as a stream so you don't need to read potentialy huge responses
7
+ # into memory all at once.
8
+ class UrlFetcher
9
+ MEGABYTE = 1048576
10
+
11
+ attr_reader :url
12
+
13
+ # Create a fetcher for the specified URL.
14
+ #
15
+ # Options include (default in parentheses):
16
+ # * :unlink (true) - Automatically delete the Tempfile. The stream will still be open, but will not be accessible from any other process.
17
+ # * :follow_redirects (true) - Automatically follow redirects instead of returning the redirect response.
18
+ # * :method (:get) - HTTP method to use to fetch the URL.
19
+ # * :max_size (10 megabytes)- The maximum size in bytes that should be fetched.
20
+ # * :open_timeout (10) - Time in seconds to wait for a connection to be established.
21
+ # * :read_timeout (20) - Time in seconds to wait for reading the HTTP response.
22
+ def initialize(url, options = {}, &redirect_hook)
23
+ @url = url
24
+ @redirect_hook = redirect_hook
25
+ options = default_options.merge(options)
26
+ @response = fetch_response(@url, options)
27
+ end
28
+
29
+ # Return an open stream to the downloaded URL.
30
+ def body
31
+ @response.body if success?
32
+ end
33
+
34
+ # Get the header with the specified name from the response.
35
+ def header(name)
36
+ @response[name]
37
+ end
38
+
39
+ # Return true if the response was a redirect (i.e. the redirect_block passed in the header returned false on a redirect)
40
+ def redirect?
41
+ @response.is_a? Net::HTTPRedirection
42
+ end
43
+
44
+ # Return true of the the response was a success.
45
+ def success?
46
+ @response.is_a? Net::HTTPSuccess
47
+ end
48
+
49
+ private
50
+
51
+ def default_options
52
+ { :unlink => true, :follow_redirects => true, :method => :get }
53
+ end
54
+
55
+ def fetch_response(url, options, previous_attempts = [])
56
+ raise "Too many redirects" if previous_attempts.size > 5
57
+ raise "Circular redirect" if previous_attempts.include?(url)
58
+ previous_attempts << url
59
+
60
+ uri = URI(url)
61
+
62
+ http = Net::HTTP.new(uri.host, uri.port)
63
+ http.read_timeout = options[:read_timeout] || 20 # This is seconds. Default is 60.
64
+ http.open_timeout = options[:open_timeout] || 10
65
+ if uri.scheme == "https"
66
+ http.use_ssl = true
67
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
68
+ end
69
+
70
+ request = case options[:method]
71
+ when :head
72
+ Net::HTTP::Head.new(uri.request_uri)
73
+ when :post
74
+ Net::HTTP::Post.new(uri.request_uri)
75
+ else
76
+ Net::HTTP::Get.new(uri.request_uri)
77
+ end
78
+
79
+ response = http.request(request) do |resp|
80
+ unless resp.is_a?(Net::HTTPSuccess) || resp.is_a?(Net::HTTPRedirection)
81
+ resp.value # Raises an appropriate HTTP error
82
+ end
83
+ if resp.is_a?(Net::HTTPSuccess) && resp.class.body_permitted?
84
+ content_length = resp["Content-Length"].to_i
85
+ raise "File to big (#{content_length} bytes" if content_length > (options[:max_size] || 10 * MEGABYTE)
86
+ tempfile = Tempfile.new("url_fetcher", :encoding => 'ascii-8bit')
87
+ resp.read_body(tempfile)
88
+ tempfile.close
89
+ end
90
+ end
91
+
92
+ if response.is_a?(Net::HTTPRedirection) && options[:follow_redirects]
93
+ location = response["Location"]
94
+ unless location.include?(':')
95
+ location = Addressable::URI.parse(location)
96
+ location.scheme = uri.scheme
97
+ location.host = uri.host
98
+ end
99
+ abort_redirect = (@redirect_hook ? @redirect_hook.call(location.to_s) == false : false)
100
+ response = fetch_response(location, options, previous_attempts) unless abort_redirect
101
+ end
102
+
103
+ response
104
+ end
105
+ end
@@ -0,0 +1,14 @@
1
+ require 'url_fetcher'
2
+ require 'webmock/rspec'
3
+
4
+ RSpec.configure do |config|
5
+ config.treat_symbols_as_metadata_keys_with_true_values = true
6
+ config.run_all_when_everything_filtered = true
7
+ config.filter_run :focus
8
+
9
+ # Run specs in random order to surface order dependencies. If you find an
10
+ # order dependency and want to debug it, you can fix the order by providing
11
+ # the seed, which is printed after each run.
12
+ # --seed 1234
13
+ config.order = 'random'
14
+ end
@@ -0,0 +1,120 @@
1
+ require 'spec_helper'
2
+
3
+ describe UrlFetcher do
4
+
5
+ it "should fetch a URL to a temp file" do
6
+ WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
7
+ url_fetcher = UrlFetcher.new("http://example.com/test")
8
+ url_fetcher.success?.should == true
9
+ url_fetcher.redirect?.should == false
10
+ url_fetcher.header("content-length").should == "5"
11
+ url_fetcher.body.open
12
+ url_fetcher.body.read.should == "Hello"
13
+ end
14
+
15
+ it "should perform a POST request" do
16
+ WebMock.stub_request(:post, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
17
+ url_fetcher = UrlFetcher.new("http://example.com/test", :method => :post)
18
+ url_fetcher.success?.should == true
19
+ url_fetcher.redirect?.should == false
20
+ url_fetcher.header("content-length").should == "5"
21
+ url_fetcher.body.open
22
+ url_fetcher.body.read.should == "Hello"
23
+ end
24
+
25
+ it "should perform a HEAD request" do
26
+ WebMock.stub_request(:head, "http://example.com/test").to_return(:status => 200, :body => nil, :headers => {"Content-Length" => 5})
27
+ url_fetcher = UrlFetcher.new("http://example.com/test", :method => :head)
28
+ url_fetcher.success?.should == true
29
+ url_fetcher.redirect?.should == false
30
+ url_fetcher.header("content-length").should == "5"
31
+ url_fetcher.body.should == nil
32
+ end
33
+
34
+ it "should work with SSL" do
35
+ WebMock.stub_request(:get, "https://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
36
+ url_fetcher = UrlFetcher.new("https://example.com/test")
37
+ url_fetcher.success?.should == true
38
+ url_fetcher.redirect?.should == false
39
+ url_fetcher.header("content-length").should == "5"
40
+ url_fetcher.body.open.read.should == "Hello"
41
+ end
42
+
43
+ it "should honor redirects" do
44
+ WebMock.stub_request(:get, "http://example.com/test1").to_return(:status => 301, :headers => {"Location" => "http://example.com/test2"})
45
+ WebMock.stub_request(:get, "http://example.com/test2").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
46
+ url_fetcher = UrlFetcher.new("http://example.com/test1")
47
+ url_fetcher.success?.should == true
48
+ url_fetcher.redirect?.should == false
49
+ url_fetcher.header("content-length").should == "5"
50
+ url_fetcher.body.open.read.should == "Hello"
51
+ end
52
+
53
+ it "should not honor redirects if :follow_redirects == false" do
54
+ WebMock.stub_request(:get, "http://example.com/test1").to_return(:status => 301, :headers => {"Location" => "http://example.com/test2"})
55
+ url_fetcher = UrlFetcher.new("http://example.com/test1", :follow_redirects => false)
56
+ url_fetcher.success?.should == false
57
+ url_fetcher.redirect?.should == true
58
+ end
59
+
60
+ it "should call a block before each redirect with the new location" do
61
+ WebMock.stub_request(:get, "http://example.com/test1").to_return(:status => 302, :headers => {"Location" => "http://example.com/test2"})
62
+ WebMock.stub_request(:get, "http://example.com/test2").to_return(:status => 302, :headers => {"Location" => "http://example.com/test3"})
63
+ WebMock.stub_request(:get, "http://example.com/test3").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
64
+ redirects = []
65
+ url_fetcher = UrlFetcher.new("http://example.com/test1") do |location|
66
+ redirects << location
67
+ end
68
+ url_fetcher.success?.should == true
69
+ url_fetcher.body.open
70
+ url_fetcher.body.read.should == "Hello"
71
+ redirects.should == ["http://example.com/test2", "http://example.com/test3"]
72
+ end
73
+
74
+ it "should abort redirecting if a block is given that returns false" do
75
+ WebMock.stub_request(:get, "http://example.com/test1").to_return(:status => 302, :headers => {"Location" => "http://example.com/test2"})
76
+ WebMock.stub_request(:get, "http://example.com/test2").to_return(:status => 302, :headers => {"Location" => "http://example.com/test3"})
77
+ WebMock.stub_request(:get, "http://example.com/test3").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
78
+ redirects = []
79
+ url_fetcher = UrlFetcher.new("http://example.com/test1") do |location|
80
+ redirects << location
81
+ false
82
+ end
83
+ url_fetcher.success?.should == false
84
+ url_fetcher.redirect?.should == true
85
+ url_fetcher.body.should == nil
86
+ redirects.should == ["http://example.com/test2"]
87
+ end
88
+
89
+ it "should raise an error if there is a circular redirect" do
90
+ WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 302, :headers => {"Location" => "http://example.com/test"})
91
+ lambda{ UrlFetcher.new("http://example.com/test") }.should raise_error("Circular redirect")
92
+ end
93
+
94
+ it "should raise an error if there are too many redirects" do
95
+ 6.times do |i|
96
+ WebMock.stub_request(:get, "http://example.com/test#{i}").to_return(:status => 302, :headers => {"Location" => "http://example.com/test#{i + 1}"})
97
+ end
98
+ lambda{ UrlFetcher.new("http://example.com/test0") }.should raise_error("Too many redirects")
99
+ end
100
+
101
+ it "should raise an error if an HTTP error is returned" do
102
+ WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 404, :body => "Not Found")
103
+ lambda{ UrlFetcher.new("http://example.com/test") }.should raise_error(Net::HTTPServerException)
104
+ end
105
+
106
+ it "should not unlink the temp file if asked not to" do
107
+ WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 5})
108
+ url_fetcher = UrlFetcher.new("http://example.com/test", :unlink => false)
109
+ url_fetcher.success?.should == true
110
+ url_fetcher.body.open.read.should == "Hello"
111
+ url_fetcher.body.path.should_not == nil
112
+ end
113
+
114
+ it "should limit the size of the file downloaded" do
115
+ WebMock.stub_request(:get, "http://example.com/test").to_return(:status => 200, :body => "Hello", :headers => {"Content-Length" => 1001})
116
+ lambda do
117
+ UrlFetcher.new("http://example.com/test", :max_size => 1000)
118
+ end.should raise_error
119
+ end
120
+ end
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "url_fetcher"
7
+ spec.version = File.read(File.expand_path("../VERSION", __FILE__)).chomp
8
+ spec.authors = ["weheartit"]
9
+ spec.email = ["dev@weheartit.com"]
10
+ spec.description = %q{Fetch resources from the internetz!}
11
+ spec.summary = %q{Fetch resources from the internetz with circular redirects support}
12
+ spec.homepage = "https://github.com/weheartit/whi-url-fetcher"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_dependency('addressable', '~>2.3.4')
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "rspec"
25
+ spec.add_development_dependency "webmock"
26
+ end
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_fetcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - weheartit
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: addressable
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 2.3.4
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 2.3.4
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: webmock
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Fetch resources from the internetz!
84
+ email:
85
+ - dev@weheartit.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".rspec"
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - VERSION
97
+ - lib/url_fetcher.rb
98
+ - spec/spec_helper.rb
99
+ - spec/url_fetcher_spec.rb
100
+ - url_fetcher.gemspec
101
+ homepage: https://github.com/weheartit/whi-url-fetcher
102
+ licenses:
103
+ - MIT
104
+ metadata: {}
105
+ post_install_message:
106
+ rdoc_options: []
107
+ require_paths:
108
+ - lib
109
+ required_ruby_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ requirements: []
120
+ rubyforge_project:
121
+ rubygems_version: 2.2.2
122
+ signing_key:
123
+ specification_version: 4
124
+ summary: Fetch resources from the internetz with circular redirects support
125
+ test_files:
126
+ - spec/spec_helper.rb
127
+ - spec/url_fetcher_spec.rb