metainspector 3.0.0 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 19cf1cb2c804ede2e36407d38a150840c5f19de6
4
- data.tar.gz: 6e3cc4349b842ee52e5d4f9b993603662a573037
3
+ metadata.gz: 46f73f718a436065bc4353e279e997afebb0c43b
4
+ data.tar.gz: ed7aa01afb850aacca6ed942beb7ba9d5e596d3b
5
5
  SHA512:
6
- metadata.gz: f77d946f82fd31d6683c512cef527370e99b7edef0a84c355d25e0ce2c73c2c12de642438c9f4c28a80b8e82b73d9974e2f52a8b0eab2e0cfc1ba900aa7148d2
7
- data.tar.gz: cfa3f9e830fb56a8e3202c26827158d8504ca0ef0b1787953599b282b9d5005aa47275979e7d212a6f8dbed5a56aafd84ca0fd9025c700e819da8fc3bbbd183f
6
+ metadata.gz: 469d195a8b0f8fb5417bff510f3703b5cfde6ce5dba9866851e629f8c8d64dd73a680da7e3fb7614f556302ad1f12f7a59014df66b10ca1cf4f8cdead00ad67a
7
+ data.tar.gz: 0d7e3ae71b4beec707e293067c2f76db5698d399ea42071ef8845b551fedee2e7f400a62a066225572dc195ae21fc2504043419ef60b75ef0adf22ae6220034a
data/README.md CHANGED
@@ -184,12 +184,30 @@ And the full scraped document is accessible from:
184
184
 
185
185
  ## Options
186
186
 
187
- ### Timeout
188
-
189
- By default, MetaInspector times out after 20 seconds of waiting for a page to respond.
190
- You can set a different timeout with a second parameter, like this:
191
-
192
- page = MetaInspector.new('sitevalidator.com', :timeout => 5) # 5 seconds timeout
187
+ ### Timeout & Retries
188
+
189
+ By default, MetaInspector times out after 20 seconds of waiting for a page to respond,
190
+ and it will retry fetching the page 3 times.
191
+ You can specify different values for both of these, like this:
192
+
193
+ # timeout after 5 seconds, retry 4 times
194
+ page = MetaInspector.new('sitevalidator.com', :timeout => 5, :retries => 4)
195
+
196
+ If MetaInspector fails to fetch the page after it has exhausted its retries,
197
+ it will raise `MetaInspector::Request::TimeoutError`, which you can rescue in your
198
+ application code.
199
+
200
+ begin
201
+ data = MetaInspector.new(url)
202
+ rescue MetaInspector::Request::TimeoutError
203
+ enqueue_for_future_fetch_attempt(url)
204
+ render_simple(url)
205
+ rescue
206
+ log_fetch_error($!)
207
+ render_simple(url)
208
+ else
209
+ render_rich(data)
210
+ end
193
211
 
194
212
  ### Redirections
195
213
 
@@ -64,10 +64,12 @@ module MetaInspector
64
64
 
65
65
  def defaults
66
66
  { :timeout => 20,
67
+ :retries => 3,
67
68
  :html_content_only => false,
68
69
  :warn_level => :raise,
69
70
  :headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"},
70
- :allow_redirections => true
71
+ :allow_redirections => true,
72
+ :exception_log => MetaInspector::ExceptionLog.new
71
73
  }
72
74
  end
73
75
 
@@ -12,12 +12,11 @@ module MetaInspector
12
12
  include MetaInspector::Exceptionable
13
13
 
14
14
  def initialize(initial_url, options = {})
15
- options = defaults.merge(options)
16
-
17
15
  @url = initial_url
18
16
 
19
17
  @allow_redirections = options[:allow_redirections]
20
18
  @timeout = options[:timeout]
19
+ @retries = options[:retries]
21
20
  @exception_log = options[:exception_log]
22
21
  @headers = options[:headers]
23
22
 
@@ -38,8 +37,13 @@ module MetaInspector
38
37
  private
39
38
 
40
39
  def response
40
+ request_count ||= 0
41
+ request_count += 1
41
42
  Timeout::timeout(@timeout) { @response ||= fetch }
42
- rescue TimeoutError, Faraday::ConnectionFailed, RuntimeError => e
43
+ rescue Timeout::Error
44
+ retry unless @retries == request_count
45
+ @exception_log << TimeoutError.new("Attempt to fetch #{url} timed out 3 times.")
46
+ rescue Faraday::ConnectionFailed, RuntimeError => e
43
47
  @exception_log << e
44
48
  nil
45
49
  end
@@ -60,8 +64,7 @@ module MetaInspector
60
64
  response
61
65
  end
62
66
 
63
- def defaults
64
- { timeout: 20, exception_log: MetaInspector::ExceptionLog.new, allow_redirections: true }
67
+ class TimeoutError < StandardError
65
68
  end
66
69
  end
67
70
  end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "3.0.0"
4
+ VERSION = "3.1.0"
5
5
  end
data/spec/request_spec.rb CHANGED
@@ -37,12 +37,6 @@ describe MetaInspector::Request do
37
37
  FakeWeb.allow_net_connect = false
38
38
  end
39
39
 
40
- it "should handle timeouts" do
41
- logger.should receive(:<<).with(an_instance_of(Timeout::Error))
42
-
43
- MetaInspector::Request.new(url('http://example.com/timeout'), timeout: 0.0000000000000000001, exception_log: logger)
44
- end
45
-
46
40
  it "should handle socket errors" do
47
41
  TCPSocket.stub(:open).and_raise(SocketError)
48
42
  logger.should receive(:<<).with(an_instance_of(Faraday::ConnectionFailed))
@@ -51,6 +45,50 @@ describe MetaInspector::Request do
51
45
  end
52
46
  end
53
47
 
48
+ describe "retrying on timeouts" do
49
+ let(:logger) { MetaInspector::ExceptionLog.new }
50
+ subject do
51
+ MetaInspector::Request.new(url('http://pagerankalert.com'),
52
+ exception_log: logger, retries: 3)
53
+ end
54
+
55
+ context "when request never succeeds" do
56
+ before{ Timeout.stub(:timeout).and_raise(Timeout::Error) }
57
+ it "swallows all the timeout errors and raises MetaInspector::Request::TimeoutError" do
58
+ logger.should receive(:<<).with(an_instance_of(MetaInspector::Request::TimeoutError))
59
+ subject
60
+ end
61
+ end
62
+
63
+ context "when request succeeds on third try" do
64
+ before do
65
+ Timeout.stub(:timeout).and_raise(Timeout::Error)
66
+ Timeout.stub(:timeout).and_raise(Timeout::Error)
67
+ Timeout.stub(:timeout).and_call_original
68
+ end
69
+ it "doesn't raise an exception" do
70
+ logger.should_not receive(:<<)
71
+ subject
72
+ end
73
+ it "succeeds as normal" do
74
+ subject.content_type.should == "text/html"
75
+ end
76
+ end
77
+
78
+ context "when request succeeds on fourth try" do
79
+ before do
80
+ Timeout.stub(:timeout).exactly(3).times.and_raise(Timeout::Error)
81
+ # if it were called a fourth time, rspec would raise an error
82
+ # so this implicitely tests the correct behavior
83
+ end
84
+ it "swallows all the timeout errors and raises MetaInspector::Request::TimeoutError" do
85
+ logger.should receive(:<<).with(an_instance_of(MetaInspector::Request::TimeoutError))
86
+ subject
87
+ end
88
+ end
89
+
90
+ end
91
+
54
92
  private
55
93
 
56
94
  def url(initial_url)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-07 00:00:00.000000000 Z
11
+ date: 2014-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri