spidy 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +35 -2
- data/lib/spidy/connector.rb +6 -5
- data/lib/spidy/connector/json.rb +1 -1
- data/lib/spidy/version.rb +1 -1
- data/spidy.gemspec +2 -0
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a70bf5d610f60d0b71c719cf870995a7e93b6e9abd71ef9823a71e2ed506f190
|
4
|
+
data.tar.gz: 203bc7721020e244b9ad3ecc526b27e1c8d83a807cc2798bcb8930b1e17d5277
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9977aeb13ff786bd8fbeb7d8ca0ee3ef7b67dfb577739c9aba0a310c46741fa48d86596de6f69ad05eaf2d64f9de4259bea84d4939ee2c7288781106fb25f2b3
|
7
|
+
data.tar.gz: d64e9e66b25d8985f2c009abddd6f6b862aa8f533b67e125fa35008d308424e766d3ad4a8a64c16a0c9ed3448cdad14a09c80f6f77437e604826808286270d4f
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
spidy (0.
|
4
|
+
spidy (0.3.3)
|
5
5
|
activesupport
|
6
6
|
mechanize
|
7
7
|
pry
|
@@ -11,12 +11,24 @@ PATH
|
|
11
11
|
GEM
|
12
12
|
remote: https://rubygems.org/
|
13
13
|
specs:
|
14
|
-
activesupport (6.0.3.
|
14
|
+
activesupport (6.0.3.3)
|
15
15
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
16
16
|
i18n (>= 0.7, < 2)
|
17
17
|
minitest (~> 5.1)
|
18
18
|
tzinfo (~> 1.1)
|
19
19
|
zeitwerk (~> 2.2, >= 2.2.2)
|
20
|
+
addressable (2.7.0)
|
21
|
+
public_suffix (>= 2.0.2, < 5.0)
|
22
|
+
capybara (3.33.0)
|
23
|
+
addressable
|
24
|
+
mini_mime (>= 0.1.3)
|
25
|
+
nokogiri (~> 1.8)
|
26
|
+
rack (>= 1.6.0)
|
27
|
+
rack-test (>= 0.6.3)
|
28
|
+
regexp_parser (~> 1.5)
|
29
|
+
xpath (~> 3.2)
|
30
|
+
capybara_discoball (0.1.0)
|
31
|
+
capybara (>= 2.7, < 4)
|
20
32
|
coderay (1.1.2)
|
21
33
|
concurrent-ruby (1.1.7)
|
22
34
|
connection_pool (2.2.3)
|
@@ -41,9 +53,12 @@ GEM
|
|
41
53
|
mime-types (3.3.1)
|
42
54
|
mime-types-data (~> 3.2015)
|
43
55
|
mime-types-data (3.2020.0512)
|
56
|
+
mini_mime (1.0.2)
|
44
57
|
mini_portile2 (2.4.0)
|
45
58
|
minitest (5.14.2)
|
46
59
|
mixlib-shellout (2.4.4)
|
60
|
+
mustermann (1.1.1)
|
61
|
+
ruby2_keywords (~> 0.0.1)
|
47
62
|
net-http-digest_auth (1.4.1)
|
48
63
|
net-http-persistent (4.0.0)
|
49
64
|
connection_pool (~> 2.2)
|
@@ -53,7 +68,14 @@ GEM
|
|
53
68
|
pry (0.12.2)
|
54
69
|
coderay (~> 1.1.0)
|
55
70
|
method_source (~> 0.9.0)
|
71
|
+
public_suffix (4.0.6)
|
72
|
+
rack (2.2.3)
|
73
|
+
rack-protection (2.0.8.1)
|
74
|
+
rack
|
75
|
+
rack-test (1.1.0)
|
76
|
+
rack (>= 1.0, < 3)
|
56
77
|
rake (10.5.0)
|
78
|
+
regexp_parser (1.8.1)
|
57
79
|
rspec (3.8.0)
|
58
80
|
rspec-core (~> 3.8.0)
|
59
81
|
rspec-expectations (~> 3.8.0)
|
@@ -74,8 +96,15 @@ GEM
|
|
74
96
|
diff-lcs (>= 1.2.0, < 2.0)
|
75
97
|
rspec-support (~> 3.8.0)
|
76
98
|
rspec-support (3.8.2)
|
99
|
+
ruby2_keywords (0.0.2)
|
100
|
+
sinatra (2.0.8.1)
|
101
|
+
mustermann (~> 1.0)
|
102
|
+
rack (~> 2.0)
|
103
|
+
rack-protection (= 2.0.8.1)
|
104
|
+
tilt (~> 2.0)
|
77
105
|
socksify (1.7.1)
|
78
106
|
thread_safe (0.3.6)
|
107
|
+
tilt (2.0.10)
|
79
108
|
tor (0.1.4)
|
80
109
|
tzinfo (1.2.7)
|
81
110
|
thread_safe (~> 0.1)
|
@@ -83,6 +112,8 @@ GEM
|
|
83
112
|
unf_ext
|
84
113
|
unf_ext (0.0.7.7)
|
85
114
|
webrobots (0.1.2)
|
115
|
+
xpath (3.2.0)
|
116
|
+
nokogiri (~> 1.8)
|
86
117
|
zeitwerk (2.4.0)
|
87
118
|
|
88
119
|
PLATFORMS
|
@@ -90,11 +121,13 @@ PLATFORMS
|
|
90
121
|
|
91
122
|
DEPENDENCIES
|
92
123
|
bundler (~> 2.0)
|
124
|
+
capybara_discoball
|
93
125
|
ffaker
|
94
126
|
pry
|
95
127
|
rake (~> 10.0)
|
96
128
|
rspec (~> 3.0)
|
97
129
|
rspec-command
|
130
|
+
sinatra
|
98
131
|
spidy!
|
99
132
|
|
100
133
|
BUNDLED WITH
|
data/lib/spidy/connector.rb
CHANGED
@@ -15,7 +15,7 @@ module Spidy::Connector
|
|
15
15
|
#
|
16
16
|
# default user agent
|
17
17
|
#
|
18
|
-
|
18
|
+
USER_AGENT = [
|
19
19
|
'Mozilla/5.0',
|
20
20
|
'(Macintosh; Intel Mac OS X 10_12_6)',
|
21
21
|
'AppleWebKit/537.36',
|
@@ -36,7 +36,7 @@ module Spidy::Connector
|
|
36
36
|
extend ActiveSupport::Concern
|
37
37
|
class_methods do
|
38
38
|
def call(url, wait_time: 5, logger: Spidy::Connector::DEFAULT_LOGGER, user_agent: Spidy::Connector::USER_AGENT, &block)
|
39
|
-
new(
|
39
|
+
::Spidy::Connector::RetryableCaller.new(new(user_agent: user_agent), wait_time: wait_time, logger: logger).call(url, &block)
|
40
40
|
end
|
41
41
|
end
|
42
42
|
end
|
@@ -57,7 +57,7 @@ module Spidy::Connector
|
|
57
57
|
#
|
58
58
|
# retry
|
59
59
|
#
|
60
|
-
class
|
60
|
+
class RetryableCaller
|
61
61
|
attr_reader :origin_connector
|
62
62
|
|
63
63
|
def initialize(connector, logger:, wait_time:)
|
@@ -68,6 +68,7 @@ module Spidy::Connector
|
|
68
68
|
end
|
69
69
|
|
70
70
|
def call(url, &block)
|
71
|
+
block ||= ->(result) { break result }
|
71
72
|
connect(url, &block)
|
72
73
|
end
|
73
74
|
|
@@ -120,12 +121,12 @@ module Spidy::Connector
|
|
120
121
|
end
|
121
122
|
|
122
123
|
def self.get(value, wait_time: nil, user_agent: nil, socks_proxy: nil, logger: nil)
|
124
|
+
user_agent ||= USER_AGENT
|
123
125
|
logger ||= DEFAULT_LOGGER
|
124
|
-
user_agent ||= DEFAULT_USER_AGENT
|
125
126
|
wait_time ||= DEFAULT_WAIT_TIME
|
126
127
|
|
127
128
|
connector = get_connector(value, user_agent: user_agent, socks_proxy: socks_proxy)
|
128
|
-
|
129
|
+
RetryableCaller.new(connector, wait_time: wait_time, logger: logger)
|
129
130
|
end
|
130
131
|
|
131
132
|
#
|
data/lib/spidy/connector/json.rb
CHANGED
data/lib/spidy/version.rb
CHANGED
data/spidy.gemspec
CHANGED
@@ -30,6 +30,8 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
31
31
|
spec.add_development_dependency 'ffaker'
|
32
32
|
spec.add_development_dependency 'rspec-command'
|
33
|
+
spec.add_development_dependency 'capybara_discoball'
|
34
|
+
spec.add_development_dependency 'sinatra'
|
33
35
|
|
34
36
|
spec.add_runtime_dependency 'tor'
|
35
37
|
spec.add_runtime_dependency 'activesupport'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- aileron
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,6 +94,34 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: capybara_discoball
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: sinatra
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
97
125
|
- !ruby/object:Gem::Dependency
|
98
126
|
name: tor
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|