spidy 0.0.21 → 0.0.22
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/spidy/connector/html.rb +18 -11
- data/lib/spidy/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54a354e11d7404229aff8394c535cdf06f8fdf2fcd1afc6081a7bf9dceeb00fb
|
4
|
+
data.tar.gz: bc89e11adeeffe65fc12bb844014814f2dc56b17db41ab2f08a5c40cfdbd5d18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 271c80cfcef9dc172a442c22c1db133d0250b016b78e0fefea78b5e6ad287e7d994780345dcec1a681a443956a6b16cac703dce96dc1ae3d410a35a728386ff8
|
7
|
+
data.tar.gz: b4a0ae085bf77a4e3f7f19604088b9e18c6444cff622cacacfeef2ca0a5a463aadf30a289b2ce2df14ffbe9887e95e9a79cc341f763779a5f14ef9281a8796ac
|
data/lib/spidy/connector/html.rb
CHANGED
@@ -8,10 +8,12 @@ module Spidy::Connector::Html
|
|
8
8
|
# retry class
|
9
9
|
#
|
10
10
|
class Retry < StandardError
|
11
|
+
attr_reader :page
|
11
12
|
attr_reader :response_code
|
12
13
|
attr_reader :wait_time
|
13
14
|
|
14
15
|
def initialize(wait_time: 2, page: nil, error: nil)
|
16
|
+
@page = page
|
15
17
|
@wait_time = wait_time
|
16
18
|
@response_code = error.try(:response_code) || page.try(:response_code)
|
17
19
|
end
|
@@ -33,7 +35,7 @@ module Spidy::Connector::Html
|
|
33
35
|
attr_reader :agent
|
34
36
|
attr_accessor :logger
|
35
37
|
|
36
|
-
def call(url, encoding: nil, retry_count:
|
38
|
+
def call(url, encoding: nil, retry_count: 5, &yielder)
|
37
39
|
fail 'url is not specified' if url.blank?
|
38
40
|
if encoding
|
39
41
|
agent.default_encoding = encoding
|
@@ -45,16 +47,8 @@ module Spidy::Connector::Html
|
|
45
47
|
|
46
48
|
private
|
47
49
|
|
48
|
-
# rubocop:disable Metrics/MethodLength
|
49
50
|
def get(url, retry_count, yielder)
|
50
|
-
|
51
|
-
fail Retry, page: page, wait_time: 5 if page.title == 'Sorry, unable to access page...'
|
52
|
-
|
53
|
-
yielder.call(page)
|
54
|
-
end
|
55
|
-
rescue Mechanize::ResponseCodeError => e
|
56
|
-
raise Retry, error: e if e.response_code == '429'
|
57
|
-
raise e
|
51
|
+
connect(url, retry_count, yielder)
|
58
52
|
rescue Retry => e
|
59
53
|
logger.call('retry.accessed': Time.current,
|
60
54
|
'retry.uri': url,
|
@@ -71,6 +65,19 @@ module Spidy::Connector::Html
|
|
71
65
|
end
|
72
66
|
raise e
|
73
67
|
end
|
74
|
-
|
68
|
+
|
69
|
+
def connect(url, retry_count, yielder)
|
70
|
+
result = nil
|
71
|
+
agent.get(url) do |page|
|
72
|
+
fail Retry, page: page, wait_time: 5 if page.title == 'Sorry, unable to access page...'
|
73
|
+
|
74
|
+
result = yielder.call(page)
|
75
|
+
end
|
76
|
+
result
|
77
|
+
rescue Mechanize::ResponseCodeError => e
|
78
|
+
raise Retry, error: e if e.response_code == '429'
|
79
|
+
raise e
|
80
|
+
end
|
81
|
+
|
75
82
|
end
|
76
83
|
end
|
data/lib/spidy/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-12-
|
11
|
+
date: 2019-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|