RubyGems - metainspector - Versions diffs - 4.0.0.rc2 → 4.0.0.rc3 - Mend

metainspector 4.0.0.rc2 → 4.0.0.rc3

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +27 -17
data/lib/meta_inspector/document.rb +10 -4
data/lib/meta_inspector/request.rb +12 -11
data/lib/meta_inspector/version.rb +1 -1
data/spec/request_spec.rb +0 -44
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5ba263e5a232d332082575e310c75d31c971a419
-  data.tar.gz: befb5ddec99b4b36db95e37eb2ab33e22bfd1e2f
+  metadata.gz: bf5c2667ff165768d1a0e0c49ebd47ea5f8de28e
+  data.tar.gz: 15b2f4fb7a2f090a75fe06ab98959e35d5f97a3f
 SHA512:
-  metadata.gz: 9d813747e71e15d058104398fcc53eefd8aeeee2c2eb224b53cb0d2dcf6bf786c1bcd3de9111152d7e583e6fd96a42b4b34fa34e4bfb53cf4b7fc8127e27dc01
-  data.tar.gz: fe41a9cb0a176c9d03892ab18d48194203d12aeb2df22faf47c6a2d32d91fa0422aabb5acd7c12e0b3f9a336898ca888601a5ebe1dae36a0de1a790d5bc60d51
+  metadata.gz: eeb60786169e979dd8bb257832f2bf2c0270af8b2bf63056330826677a4943373aea51269a1ddfc397ae296cb786b5285997a1721b5ae412cc006214c872af18
+  data.tar.gz: ae891af393d3746df5048a1e512e70f11718fc8357a2c8212376119afb174e8b7e0ccd180f48c252813581a4ed5671b0f01e35ca555b475efc9997238c29c952

data/README.md CHANGED Viewed

@@ -25,6 +25,8 @@ page.links.external # Returns all external HTTP links found
 * Now `page.image` will return the first image in `page.images` if no OG or Twitter image found, instead of returning `nil`.
+* You can now specify 2 different timeouts, `connection_timeout` and `read_timeout`, instead of the previous single `timeout`.
 ## Changes in 3.0
 * The redirect API has been changed, now the `:allow_redirections` option will expect only a boolean, which by default is `true`. That is, no more specifying `:safe`, `:unsafe` or `:all`.
@@ -213,28 +215,36 @@ And the full scraped document is accessible from:
 ### Timeout & Retries
-By default, MetaInspector times out after 20 seconds of waiting for a page to respond,
-and it will retry fetching the page 3 times.
-You can specify different values for both of these, like this:
+You can specify 2 different timeouts when requesting a page:
+* `connection_timeout` sets the maximum number of seconds to wait to get a connection to the page.
+* `read_timeout` sets the maximum number of seconds to wait to read the page, once connected.
+Both timeouts default to 20 seconds each.
-    # timeout after 5 seconds, retry 4 times
-    page = MetaInspector.new('sitevalidator.com', :timeout => 5, :retries => 4)
+You can also specify the number of `retries`, which defaults to 3.
+For example, this will time out after 10 seconds waiting for a connection, or after 5 seconds waiting
+to read its contents, and will retry 4 times:
+```ruby
+page = MetaInspector.new('www.google', :connection_timeout => 10, :read_timeout => 5, :retries => 4)
+```
 If MetaInspector fails to fetch the page after it has exhausted its retries,
-it will raise `MetaInspector::Request::TimeoutError`, which you can rescue in your
+it will raise `Faraday::TimeoutError`, which you can rescue in your
 application code.
-    begin
-      data = MetaInspector.new(url)
-    rescue MetaInspector::Request::TimeoutError
-      enqueue_for_future_fetch_attempt(url)
-      render_simple(url)
-    rescue
-      log_fetch_error($!)
-      render_simple(url)
-    else
-      render_rich(data)
-    end
+```ruby
+begin
+  page = MetaInspector.new(url)
+rescue Faraday::TimeoutError
+  enqueue_for_future_fetch_attempt(url)
+  render_simple(url)
+else
+  render_rich(page)
+end
+```
 ### Redirections

data/lib/meta_inspector/document.rb CHANGED Viewed

@@ -1,13 +1,15 @@
 module MetaInspector
   # A MetaInspector::Document knows about its URL and its contents
   class Document
-    attr_reader :timeout, :html_content_only, :allow_redirections, :warn_level, :headers
+    attr_reader :html_content_only, :allow_redirections, :warn_level, :headers
     include MetaInspector::Exceptionable
     # Initializes a new instance of MetaInspector::Document, setting the URL to the one given
     # Options:
-    # => timeout: defaults to 20 seconds
+    # => connection_timeout: defaults to 20 seconds
+    # => read_timeout: defaults to 20 seconds
+    # => retries: defaults to 3 times
     # => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
     # => allow_redirections: when true, follow HTTP redirects. Defaults to true
     # => document: the html of the url as a string
@@ -15,7 +17,9 @@ module MetaInspector
     # => headers: object containing custom headers for the request
     def initialize(initial_url, options = {})
       options             = defaults.merge(options)
-      @timeout            = options[:timeout]
+      @connection_timeout = options[:connection_timeout]
+      @read_timeout       = options[:read_timeout]
+      @retries            = options[:retries]
       @html_content_only  = options[:html_content_only]
       @allow_redirections = options[:allow_redirections]
       @document           = options[:document]
@@ -24,7 +28,9 @@ module MetaInspector
       @exception_log      = options[:exception_log] || MetaInspector::ExceptionLog.new(warn_level: warn_level)
       @url                = MetaInspector::URL.new(initial_url, exception_log: @exception_log)
       @request            = MetaInspector::Request.new(@url,  allow_redirections: @allow_redirections,
-                                                              timeout:            @timeout,
+                                                              connection_timeout: @connection_timeout,
+                                                              read_timeout:       @read_timeout,
+                                                              retries:            @retries,
                                                               exception_log:      @exception_log,
                                                               headers:            @headers) unless @document
       @parser             = MetaInspector::Parser.new(self,  exception_log:      @exception_log)

data/lib/meta_inspector/request.rb CHANGED Viewed

@@ -1,7 +1,6 @@
 require 'faraday'
 require 'faraday_middleware'
 require 'faraday-cookie_jar'
-require 'timeout'
 module MetaInspector
@@ -13,7 +12,8 @@ module MetaInspector
       @url                = initial_url
       @allow_redirections = options[:allow_redirections]
-      @timeout            = options[:timeout]
+      @connection_timeout = options[:connection_timeout]
+      @read_timeout       = options[:read_timeout]
       @retries            = options[:retries]
       @exception_log      = options[:exception_log]
       @headers            = options[:headers]
@@ -35,11 +35,8 @@ module MetaInspector
     def response
       request_count ||= 0
       request_count += 1
-      Timeout::timeout(@timeout) { @response ||= fetch }
-    rescue Timeout::Error
-      retry unless @retries == request_count
-      @exception_log << TimeoutError.new("Attempt to fetch #{url} timed out 3 times.")
-    rescue Faraday::Error::ConnectionFailed, RuntimeError => e
+      @response ||= fetch
+    rescue Faraday::TimeoutError, Faraday::Error::ConnectionFailed, RuntimeError => e
       @exception_log << e
       nil
     end
@@ -48,21 +45,25 @@ module MetaInspector
     def fetch
       session = Faraday.new(:url => url) do |faraday|
+        faraday.request :retry, max: @retries
         if @allow_redirections
           faraday.use FaradayMiddleware::FollowRedirects, limit: 10
           faraday.use :cookie_jar
         end
         faraday.headers.merge!(@headers || {})
         faraday.adapter :net_http
       end
-      response = session.get
+      response = session.get do |req|
+        req.options.timeout      = @connection_timeout
+        req.options.open_timeout = @read_timeout
+      end
       @url.url = response.env.url.to_s
       response
     end
-    class TimeoutError < StandardError
-    end
   end
 end

data/lib/meta_inspector/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module MetaInspector
-  VERSION = "4.0.0.rc2"
+  VERSION = "4.0.0.rc3"
 end

data/spec/request_spec.rb CHANGED Viewed

@@ -60,50 +60,6 @@ describe MetaInspector::Request do
     end
   end
-  describe "retrying on timeouts" do
-    let(:logger) { MetaInspector::ExceptionLog.new }
-    subject do
-      MetaInspector::Request.new(url('http://pagerankalert.com'),
-                                 exception_log: logger, retries: 3)
-     end
-    context "when request never succeeds" do
-      before{ Timeout.stub(:timeout).and_raise(Timeout::Error) }
-      it "swallows all the timeout errors and raises MetaInspector::Request::TimeoutError" do
-        logger.should receive(:<<).with(an_instance_of(MetaInspector::Request::TimeoutError))
-        subject
-      end
-    end
-    context "when request succeeds on third try" do
-      before do
-        Timeout.stub(:timeout).and_raise(Timeout::Error)
-        Timeout.stub(:timeout).and_raise(Timeout::Error)
-        Timeout.stub(:timeout).and_call_original
-      end
-      it "doesn't raise an exception" do
-        logger.should_not receive(:<<)
-        subject
-      end
-      it "succeeds as normal" do
-        subject.content_type.should == "text/html"
-      end
-    end
-    context "when request succeeds on fourth try" do
-      before do
-        Timeout.stub(:timeout).exactly(3).times.and_raise(Timeout::Error)
-        # if it were called a fourth time, rspec would raise an error
-        # so this implicitely tests the correct behavior
-      end
-      it "swallows all the timeout errors and raises MetaInspector::Request::TimeoutError" do
-        logger.should receive(:<<).with(an_instance_of(MetaInspector::Request::TimeoutError))
-        subject
-      end
-    end
-  end
   private
   def url(initial_url)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: metainspector
 version: !ruby/object:Gem::Version
-  version: 4.0.0.rc2
+  version: 4.0.0.rc3
 platform: ruby
 authors:
 - Jaime Iniesta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-11-16 00:00:00.000000000 Z
+date: 2014-11-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri