metainspector 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 19cf1cb2c804ede2e36407d38a150840c5f19de6
4
- data.tar.gz: 6e3cc4349b842ee52e5d4f9b993603662a573037
3
+ metadata.gz: 46f73f718a436065bc4353e279e997afebb0c43b
4
+ data.tar.gz: ed7aa01afb850aacca6ed942beb7ba9d5e596d3b
5
5
  SHA512:
6
- metadata.gz: f77d946f82fd31d6683c512cef527370e99b7edef0a84c355d25e0ce2c73c2c12de642438c9f4c28a80b8e82b73d9974e2f52a8b0eab2e0cfc1ba900aa7148d2
7
- data.tar.gz: cfa3f9e830fb56a8e3202c26827158d8504ca0ef0b1787953599b282b9d5005aa47275979e7d212a6f8dbed5a56aafd84ca0fd9025c700e819da8fc3bbbd183f
6
+ metadata.gz: 469d195a8b0f8fb5417bff510f3703b5cfde6ce5dba9866851e629f8c8d64dd73a680da7e3fb7614f556302ad1f12f7a59014df66b10ca1cf4f8cdead00ad67a
7
+ data.tar.gz: 0d7e3ae71b4beec707e293067c2f76db5698d399ea42071ef8845b551fedee2e7f400a62a066225572dc195ae21fc2504043419ef60b75ef0adf22ae6220034a
data/README.md CHANGED
@@ -184,12 +184,30 @@ And the full scraped document is accessible from:
184
184
 
185
185
  ## Options
186
186
 
187
- ### Timeout
188
-
189
- By default, MetaInspector times out after 20 seconds of waiting for a page to respond.
190
- You can set a different timeout with a second parameter, like this:
191
-
192
- page = MetaInspector.new('sitevalidator.com', :timeout => 5) # 5 seconds timeout
187
+ ### Timeout & Retries
188
+
189
+ By default, MetaInspector times out after 20 seconds of waiting for a page to respond,
190
+ and it will retry fetching the page 3 times.
191
+ You can specify different values for both of these, like this:
192
+
193
+ # timeout after 5 seconds, retry 4 times
194
+ page = MetaInspector.new('sitevalidator.com', :timeout => 5, :retries => 4)
195
+
196
+ If MetaInspector fails to fetch the page after it has exhausted its retries,
197
+ it will raise `MetaInspector::Request::TimeoutError`, which you can rescue in your
198
+ application code.
199
+
200
+ begin
201
+ data = MetaInspector.new(url)
202
+ rescue MetaInspector::Request::TimeoutError
203
+ enqueue_for_future_fetch_attempt(url)
204
+ render_simple(url)
205
+ rescue
206
+ log_fetch_error($!)
207
+ render_simple(url)
208
+ else
209
+ render_rich(data)
210
+ end
193
211
 
194
212
  ### Redirections
195
213
 
@@ -64,10 +64,12 @@ module MetaInspector
64
64
 
65
65
  def defaults
66
66
  { :timeout => 20,
67
+ :retries => 3,
67
68
  :html_content_only => false,
68
69
  :warn_level => :raise,
69
70
  :headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"},
70
- :allow_redirections => true
71
+ :allow_redirections => true,
72
+ :exception_log => MetaInspector::ExceptionLog.new
71
73
  }
72
74
  end
73
75
 
@@ -12,12 +12,11 @@ module MetaInspector
12
12
  include MetaInspector::Exceptionable
13
13
 
14
14
  def initialize(initial_url, options = {})
15
- options = defaults.merge(options)
16
-
17
15
  @url = initial_url
18
16
 
19
17
  @allow_redirections = options[:allow_redirections]
20
18
  @timeout = options[:timeout]
19
+ @retries = options[:retries]
21
20
  @exception_log = options[:exception_log]
22
21
  @headers = options[:headers]
23
22
 
@@ -38,8 +37,13 @@ module MetaInspector
38
37
  private
39
38
 
40
39
  def response
40
+ request_count ||= 0
41
+ request_count += 1
41
42
  Timeout::timeout(@timeout) { @response ||= fetch }
42
- rescue TimeoutError, Faraday::ConnectionFailed, RuntimeError => e
43
+ rescue Timeout::Error
44
+ retry unless @retries == request_count
45
+ @exception_log << TimeoutError.new("Attempt to fetch #{url} timed out 3 times.")
46
+ rescue Faraday::ConnectionFailed, RuntimeError => e
43
47
  @exception_log << e
44
48
  nil
45
49
  end
@@ -60,8 +64,7 @@ module MetaInspector
60
64
  response
61
65
  end
62
66
 
63
- def defaults
64
- { timeout: 20, exception_log: MetaInspector::ExceptionLog.new, allow_redirections: true }
67
+ class TimeoutError < StandardError
65
68
  end
66
69
  end
67
70
  end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "3.0.0"
4
+ VERSION = "3.1.0"
5
5
  end
data/spec/request_spec.rb CHANGED
@@ -37,12 +37,6 @@ describe MetaInspector::Request do
37
37
  FakeWeb.allow_net_connect = false
38
38
  end
39
39
 
40
- it "should handle timeouts" do
41
- logger.should receive(:<<).with(an_instance_of(Timeout::Error))
42
-
43
- MetaInspector::Request.new(url('http://example.com/timeout'), timeout: 0.0000000000000000001, exception_log: logger)
44
- end
45
-
46
40
  it "should handle socket errors" do
47
41
  TCPSocket.stub(:open).and_raise(SocketError)
48
42
  logger.should receive(:<<).with(an_instance_of(Faraday::ConnectionFailed))
@@ -51,6 +45,50 @@ describe MetaInspector::Request do
51
45
  end
52
46
  end
53
47
 
48
+ describe "retrying on timeouts" do
49
+ let(:logger) { MetaInspector::ExceptionLog.new }
50
+ subject do
51
+ MetaInspector::Request.new(url('http://pagerankalert.com'),
52
+ exception_log: logger, retries: 3)
53
+ end
54
+
55
+ context "when request never succeeds" do
56
+ before{ Timeout.stub(:timeout).and_raise(Timeout::Error) }
57
+ it "swallows all the timeout errors and raises MetaInspector::Request::TimeoutError" do
58
+ logger.should receive(:<<).with(an_instance_of(MetaInspector::Request::TimeoutError))
59
+ subject
60
+ end
61
+ end
62
+
63
+ context "when request succeeds on third try" do
64
+ before do
65
+ Timeout.stub(:timeout).and_raise(Timeout::Error)
66
+ Timeout.stub(:timeout).and_raise(Timeout::Error)
67
+ Timeout.stub(:timeout).and_call_original
68
+ end
69
+ it "doesn't raise an exception" do
70
+ logger.should_not receive(:<<)
71
+ subject
72
+ end
73
+ it "succeeds as normal" do
74
+ subject.content_type.should == "text/html"
75
+ end
76
+ end
77
+
78
+ context "when request succeeds on fourth try" do
79
+ before do
80
+ Timeout.stub(:timeout).exactly(3).times.and_raise(Timeout::Error)
81
+ # if it were called a fourth time, rspec would raise an error
82
+ # so this implicitely tests the correct behavior
83
+ end
84
+ it "swallows all the timeout errors and raises MetaInspector::Request::TimeoutError" do
85
+ logger.should receive(:<<).with(an_instance_of(MetaInspector::Request::TimeoutError))
86
+ subject
87
+ end
88
+ end
89
+
90
+ end
91
+
54
92
  private
55
93
 
56
94
  def url(initial_url)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-07 00:00:00.000000000 Z
11
+ date: 2014-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri