proxy_fetcher 0.6.5 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -5
- data/lib/proxy_fetcher.rb +12 -1
- data/lib/proxy_fetcher/client/client.rb +6 -1
- data/lib/proxy_fetcher/client/request.rb +37 -79
- data/lib/proxy_fetcher/configuration.rb +8 -3
- data/lib/proxy_fetcher/null_logger.rb +35 -0
- data/lib/proxy_fetcher/utils/http_client.rb +35 -29
- data/lib/proxy_fetcher/utils/proxy_validator.rb +17 -23
- data/lib/proxy_fetcher/version.rb +2 -2
- data/proxy_fetcher.gemspec +2 -0
- data/spec/proxy_fetcher/client/client_spec.rb +5 -13
- data/spec/proxy_fetcher/providers/base_spec.rb +21 -0
- data/spec/proxy_fetcher/proxy_spec.rb +3 -17
- data/spec/proxy_fetcher/version_spec.rb +1 -1
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b640006b8d82fd1c89336e37498ae7db1da3d0c7
|
4
|
+
data.tar.gz: 76edb223179bd9f59c127c8ac0dd97fcf2e9d4b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88648e3729f0e985b490da7c824263a43ff4987c61fa366fbbeb49c968757ef791191a5e3a136ca782f14506f12f31f0f651e95281a8463f5bf14e19a08856d8
|
7
|
+
data.tar.gz: 76d84b9cb3b6995690de39dbdf36481e67e4286c5e533000583eb849693ce78ee42da16ae20499560aa6b16cdf9c19cf4d66edf75fe4f66c83e5d63dc2da3100
|
data/README.md
CHANGED
@@ -16,6 +16,11 @@ fetched and validated by the gem. Take a look at the documentation below to find
|
|
16
16
|
Also this gem can be used with any other programming language (Go / Python / etc) as standalone solution for downloading and
|
17
17
|
validating proxy lists from the different providers. [Checkout examples](#standalone) of usage below.
|
18
18
|
|
19
|
+
## Documentation valid for `master` branch
|
20
|
+
|
21
|
+
Please check the documentation for the version of doorkeeper you are using in:
|
22
|
+
https://github.com/nbulaj/proxy_fetcher/releases
|
23
|
+
|
19
24
|
## Table of Contents
|
20
25
|
|
21
26
|
- [Dependencies](#dependencies)
|
@@ -33,8 +38,8 @@ validating proxy lists from the different providers. [Checkout examples](#standa
|
|
33
38
|
|
34
39
|
## Dependencies
|
35
40
|
|
36
|
-
ProxyFetcher gem itself requires
|
37
|
-
[see Travis build matrix](.travis.yml)).
|
41
|
+
ProxyFetcher gem itself requires Ruby `>= 2.0.0` (or [JRuby](http://jruby.org/) `> 9.0`, but maybe earlier too,
|
42
|
+
[see Travis build matrix](.travis.yml)) and great [HTTP.rb gem](https://github.com/httprb/http).
|
38
43
|
|
39
44
|
However, it requires an adapter to parse HTML. If you do not specify any specific adapter, then it will use
|
40
45
|
default one - [Nokogiri](https://github.com/sparklemotion/nokogiri). It's OK for any Ruby on Rails project
|
@@ -49,7 +54,7 @@ you can implement your own adapter if it your use-case. Take a look at the [Conf
|
|
49
54
|
If using bundler, first add 'proxy_fetcher' to your Gemfile:
|
50
55
|
|
51
56
|
```ruby
|
52
|
-
gem 'proxy_fetcher', '~> 0.
|
57
|
+
gem 'proxy_fetcher', '~> 0.7'
|
53
58
|
```
|
54
59
|
|
55
60
|
or if you want to use the latest version (from `master` branch), then:
|
@@ -67,7 +72,7 @@ bundle install
|
|
67
72
|
Otherwise simply install the gem:
|
68
73
|
|
69
74
|
```sh
|
70
|
-
gem install proxy_fetcher -v '0.
|
75
|
+
gem install proxy_fetcher -v '0.7'
|
71
76
|
```
|
72
77
|
|
73
78
|
## Example of usage
|
@@ -267,6 +272,7 @@ Default configuration looks as follows:
|
|
267
272
|
|
268
273
|
```ruby
|
269
274
|
ProxyFetcher.configure do |config|
|
275
|
+
config.logger = Logger.new(STDOUT)
|
270
276
|
config.user_agent = ProxyFetcher::Configuration::DEFAULT_USER_AGENT
|
271
277
|
config.pool_size = 10
|
272
278
|
config.timeout = 3
|
@@ -298,7 +304,7 @@ ProxyFetcher.configure do |config|
|
|
298
304
|
end
|
299
305
|
```
|
300
306
|
|
301
|
-
ProxyFetcher uses
|
307
|
+
ProxyFetcher uses HTTP.rb gem for dealing with HTTP(S) requests. It is fast enough and has a great chainable API.
|
302
308
|
If you wanna add, for example, your custom provider that was developed as a Single Page Application (SPA) with some JavaScript,
|
303
309
|
then you will need something like [selenium-webdriver](https://github.com/SeleniumHQ/selenium/tree/master/rb) to properly
|
304
310
|
load the content of the website. For those and other cases you can write your own class for fetching HTML content by
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'uri'
|
4
|
-
require '
|
4
|
+
require 'http'
|
5
|
+
require 'logger'
|
5
6
|
|
6
7
|
require File.dirname(__FILE__) + '/proxy_fetcher/version'
|
7
8
|
|
@@ -10,6 +11,7 @@ require File.dirname(__FILE__) + '/proxy_fetcher/configuration'
|
|
10
11
|
require File.dirname(__FILE__) + '/proxy_fetcher/configuration/providers_registry'
|
11
12
|
require File.dirname(__FILE__) + '/proxy_fetcher/proxy'
|
12
13
|
require File.dirname(__FILE__) + '/proxy_fetcher/manager'
|
14
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/null_logger'
|
13
15
|
|
14
16
|
require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client'
|
15
17
|
require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator'
|
@@ -70,6 +72,15 @@ module ProxyFetcher
|
|
70
72
|
yield config
|
71
73
|
end
|
72
74
|
|
75
|
+
# Returns ProxyFetcher logger instance.
|
76
|
+
#
|
77
|
+
# @return [Logger, NullLogger] logger object
|
78
|
+
#
|
79
|
+
def logger
|
80
|
+
return @logger if defined?(@logger)
|
81
|
+
@logger = config.logger || NullLogger.new
|
82
|
+
end
|
83
|
+
|
73
84
|
private
|
74
85
|
|
75
86
|
# Configures default adapter if it isn't defined by the user.
|
@@ -165,6 +165,8 @@ module ProxyFetcher
|
|
165
165
|
#
|
166
166
|
# @raise [ProxyFetcher::Error] internal error happened during block execution
|
167
167
|
#
|
168
|
+
# Requires refactoring :(
|
169
|
+
#
|
168
170
|
def with_proxy_for(url, max_retries = 1000)
|
169
171
|
tries = 0
|
170
172
|
|
@@ -174,7 +176,10 @@ module ProxyFetcher
|
|
174
176
|
rescue ProxyFetcher::Error
|
175
177
|
raise
|
176
178
|
rescue StandardError
|
177
|
-
|
179
|
+
if max_retries && tries >= max_retries
|
180
|
+
ProxyFetcher.logger.warn("reached maximum amount of retries (#{max_retries})")
|
181
|
+
raise ProxyFetcher::Exceptions::MaximumRetriesReached
|
182
|
+
end
|
178
183
|
|
179
184
|
ProxiesRegistry.invalidate_proxy!(proxy)
|
180
185
|
tries += 1
|
@@ -4,29 +4,13 @@ module ProxyFetcher
|
|
4
4
|
module Client
|
5
5
|
# ProxyFetcher::Client HTTP request abstraction.
|
6
6
|
class Request
|
7
|
-
# URL encoding HTTP headers.
|
8
|
-
URL_ENCODED = {
|
9
|
-
'Content-Type' => 'application/x-www-form-urlencoded'
|
10
|
-
}.freeze
|
11
|
-
|
12
|
-
# Default SSL options that will be used for connecting to resources
|
13
|
-
# the uses secure connection. By default ProxyFetcher wouldn't verify
|
14
|
-
# SSL certs.
|
15
|
-
DEFAULT_SSL_OPTIONS = {
|
16
|
-
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
17
|
-
}.freeze
|
18
|
-
|
19
|
-
# @!attribute [r] http
|
20
|
-
# @return [Class] HTTP client
|
21
|
-
attr_reader :http
|
22
|
-
|
23
7
|
# @!attribute [r] method
|
24
8
|
# @return [String, Symbol] HTTP request method
|
25
9
|
attr_reader :method
|
26
10
|
|
27
|
-
# @!attribute [r]
|
28
|
-
# @return [
|
29
|
-
attr_reader :
|
11
|
+
# @!attribute [r] url
|
12
|
+
# @return [String] Request URL
|
13
|
+
attr_reader :url
|
30
14
|
|
31
15
|
# @!attribute [r] headers
|
32
16
|
# @return [Hash] HTTP headers
|
@@ -68,17 +52,17 @@ module ProxyFetcher
|
|
68
52
|
def initialize(args)
|
69
53
|
raise ArgumentError, 'args must be a Hash!' unless args.is_a?(Hash)
|
70
54
|
|
71
|
-
@
|
72
|
-
@method = args.fetch(:method).to_s.
|
55
|
+
@url = args.fetch(:url)
|
56
|
+
@method = args.fetch(:method).to_s.downcase
|
73
57
|
@headers = (args[:headers] || {}).dup
|
74
|
-
@payload =
|
58
|
+
@payload = args[:payload]
|
75
59
|
@timeout = args.fetch(:timeout, ProxyFetcher.config.timeout)
|
76
|
-
@ssl_options = args.fetch(:ssl_options,
|
60
|
+
@ssl_options = args.fetch(:ssl_options, default_ssl_options)
|
77
61
|
|
78
62
|
@proxy = args.fetch(:proxy)
|
79
63
|
@max_redirects = args.fetch(:max_redirects, 10)
|
80
64
|
|
81
|
-
build_http_client
|
65
|
+
@http = build_http_client
|
82
66
|
end
|
83
67
|
|
84
68
|
# Executes HTTP request with defined options.
|
@@ -87,77 +71,51 @@ module ProxyFetcher
|
|
87
71
|
# response body (requested resource content)
|
88
72
|
#
|
89
73
|
def execute
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
end
|
74
|
+
response = send_request
|
75
|
+
response.body.to_s
|
76
|
+
rescue HTTP::Redirector::TooManyRedirectsError
|
77
|
+
raise ProxyFetcher::Exceptions::MaximumRedirectsReached
|
95
78
|
end
|
96
79
|
|
97
80
|
private
|
98
81
|
|
99
|
-
#
|
100
|
-
# must be a WWW-Form encoded for example.
|
82
|
+
# Builds HTTP client.
|
101
83
|
#
|
102
|
-
|
103
|
-
return if payload.nil?
|
104
|
-
|
105
|
-
if payload.is_a?(Hash)
|
106
|
-
headers.merge!(URL_ENCODED)
|
107
|
-
URI.encode_www_form(payload)
|
108
|
-
else
|
109
|
-
payload
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
# Builds HTTP client based on stdlib Net::HTTP.
|
114
|
-
#
|
115
|
-
# @return [Net::HTTP]
|
84
|
+
# @return [HTTP::Client]
|
116
85
|
# HTTP client
|
117
86
|
#
|
118
87
|
def build_http_client
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
@http.open_timeout = timeout
|
124
|
-
@http.read_timeout = timeout
|
88
|
+
HTTP.via(proxy.addr, proxy.port.to_i)
|
89
|
+
.headers(headers)
|
90
|
+
.timeout(connect: timeout, read: timeout)
|
91
|
+
.follow(max_hops: max_redirects)
|
125
92
|
end
|
126
93
|
|
127
|
-
#
|
128
|
-
#
|
94
|
+
# Default SSL options that will be used for connecting to resources
|
95
|
+
# the uses secure connection. By default ProxyFetcher wouldn't verify
|
96
|
+
# SSL certs.
|
129
97
|
#
|
130
|
-
# @
|
131
|
-
# HTTP response object
|
98
|
+
# @return [OpenSSL::SSL::SSLContext] SSL context
|
132
99
|
#
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
case http_response
|
138
|
-
when Net::HTTPSuccess then http_response.read_body
|
139
|
-
when Net::HTTPRedirection then follow_redirection(http_response)
|
140
|
-
else
|
141
|
-
http_response.error!
|
142
|
-
end
|
100
|
+
def default_ssl_options
|
101
|
+
ssl_ctx = OpenSSL::SSL::SSLContext.new
|
102
|
+
ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
103
|
+
ssl_ctx
|
143
104
|
end
|
144
105
|
|
145
|
-
#
|
106
|
+
# Sends HTTP request to the URL. Check for the payload and it's type
|
107
|
+
# in order to build valid request.
|
146
108
|
#
|
147
|
-
|
148
|
-
raise ProxyFetcher::Exceptions::MaximumRedirectsReached if max_redirects <= 0
|
149
|
-
|
150
|
-
url = http_response.fetch('location')
|
151
|
-
url = uri.merge(url).to_s unless url.downcase.start_with?('http')
|
152
|
-
|
153
|
-
Request.execute(method: :get, url: url, proxy: proxy, headers: headers, timeout: timeout, max_redirects: max_redirects - 1)
|
154
|
-
end
|
155
|
-
|
156
|
-
# Returns particular Net::HTTP method object
|
157
|
-
# for processing required request.
|
109
|
+
# @return [HTTP::Response] request response
|
158
110
|
#
|
159
|
-
def
|
160
|
-
|
111
|
+
def send_request
|
112
|
+
if payload
|
113
|
+
payload_type = payload.is_a?(String) ? :body : :form
|
114
|
+
|
115
|
+
@http.public_send(method, url, payload_type => payload, ssl_context: ssl_options)
|
116
|
+
else
|
117
|
+
@http.public_send(method, url, ssl_context: ssl_options)
|
118
|
+
end
|
161
119
|
end
|
162
120
|
end
|
163
121
|
end
|
@@ -17,16 +17,20 @@ module ProxyFetcher
|
|
17
17
|
# @return [String] User-Agent string
|
18
18
|
attr_accessor :user_agent
|
19
19
|
|
20
|
+
# @!attribute [r] logger
|
21
|
+
# @return [Object] Logger object
|
22
|
+
attr_accessor :logger
|
23
|
+
|
20
24
|
# @!attribute [r] adapter
|
21
25
|
# @return [Object] HTML parser adapter
|
22
26
|
attr_reader :adapter
|
23
27
|
|
24
28
|
# @!attribute [r] http_client
|
25
|
-
# @return [
|
29
|
+
# @return [Object] HTTP client class
|
26
30
|
attr_reader :http_client
|
27
31
|
|
28
32
|
# @!attribute [r] proxy_validator
|
29
|
-
# @return [
|
33
|
+
# @return [Object] proxy validator class
|
30
34
|
attr_reader :proxy_validator
|
31
35
|
|
32
36
|
# @!attribute [r] providers
|
@@ -54,7 +58,7 @@ module ProxyFetcher
|
|
54
58
|
# providers registry
|
55
59
|
#
|
56
60
|
def providers_registry
|
57
|
-
@
|
61
|
+
@providers_registry ||= ProvidersRegistry.new
|
58
62
|
end
|
59
63
|
|
60
64
|
# Register new proxy provider. Requires provider name and class
|
@@ -91,6 +95,7 @@ module ProxyFetcher
|
|
91
95
|
|
92
96
|
# Sets default configuration options
|
93
97
|
def reset!
|
98
|
+
@logger = Logger.new(STDOUT)
|
94
99
|
@user_agent = DEFAULT_USER_AGENT
|
95
100
|
@pool_size = 10
|
96
101
|
@timeout = 3
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
class NullLogger
|
5
|
+
# @return [nil]
|
6
|
+
def unknown(*)
|
7
|
+
nil
|
8
|
+
end
|
9
|
+
|
10
|
+
# @return [nil]
|
11
|
+
def fatal(*)
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return [nil]
|
16
|
+
def error(*)
|
17
|
+
nil
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [nil]
|
21
|
+
def warn(*)
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [nil]
|
26
|
+
def info(*)
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
# @return [nil]
|
31
|
+
def debug(*)
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -5,25 +5,40 @@ module ProxyFetcher
|
|
5
5
|
# the different providers. Uses ProxyFetcher configuration options
|
6
6
|
# for sending HTTP requests to providers URLs.
|
7
7
|
class HTTPClient
|
8
|
-
# @!attribute [r]
|
9
|
-
# @return [
|
10
|
-
attr_reader :
|
8
|
+
# @!attribute [r] url
|
9
|
+
# @return [String] URL
|
10
|
+
attr_reader :url
|
11
11
|
|
12
12
|
# @!attribute [r] http
|
13
13
|
# @return [Net::HTTP] HTTP client
|
14
14
|
attr_reader :http
|
15
15
|
|
16
|
+
# @!attribute [r] ssl_ctx
|
17
|
+
# @return [OpenSSL::SSL::SSLContext] SSL context
|
18
|
+
attr_reader :ssl_ctx
|
19
|
+
|
20
|
+
# Fetches resource content by sending HTTP request to it.
|
21
|
+
# Synthetic sugar to simplify URIes fetching.
|
22
|
+
#
|
23
|
+
# @param url [String] URL
|
24
|
+
#
|
25
|
+
# @return [String]
|
26
|
+
# resource content
|
27
|
+
#
|
28
|
+
def self.fetch(url)
|
29
|
+
new(url).fetch
|
30
|
+
end
|
31
|
+
|
16
32
|
# Initialize HTTP client instance
|
17
33
|
#
|
18
34
|
# @return [HTTPClient]
|
19
35
|
#
|
20
36
|
def initialize(url)
|
21
|
-
@
|
22
|
-
@http =
|
23
|
-
return unless https?
|
37
|
+
@url = url.to_s
|
38
|
+
@http = HTTP.headers(default_headers)
|
24
39
|
|
25
|
-
@
|
26
|
-
@
|
40
|
+
@ssl_ctx = OpenSSL::SSL::SSLContext.new
|
41
|
+
@ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
27
42
|
end
|
28
43
|
|
29
44
|
# Fetches resource content by sending HTTP request to it.
|
@@ -32,32 +47,23 @@ module ProxyFetcher
|
|
32
47
|
# response body
|
33
48
|
#
|
34
49
|
def fetch
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
response.body
|
50
|
+
@http.get(url, ssl_context: ssl_ctx).body.to_s
|
51
|
+
rescue StandardError
|
52
|
+
ProxyFetcher.logger.warn("Failed to load proxy list for #{url}")
|
53
|
+
''
|
40
54
|
end
|
41
55
|
|
42
|
-
|
43
|
-
# Synthetic sugar to simplify URIes fetching.
|
44
|
-
#
|
45
|
-
# @param url [String] URL
|
46
|
-
#
|
47
|
-
# @return [String]
|
48
|
-
# resource content
|
49
|
-
#
|
50
|
-
def self.fetch(url)
|
51
|
-
new(url).fetch
|
52
|
-
end
|
56
|
+
protected
|
53
57
|
|
54
|
-
#
|
58
|
+
# Default HTTP client headers
|
55
59
|
#
|
56
|
-
# @return [
|
57
|
-
#
|
60
|
+
# @return [Hash]
|
61
|
+
# hash of HTTP headers
|
58
62
|
#
|
59
|
-
def
|
60
|
-
|
63
|
+
def default_headers
|
64
|
+
{
|
65
|
+
'User-Agent' => ProxyFetcher.config.user_agent
|
66
|
+
}
|
61
67
|
end
|
62
68
|
end
|
63
69
|
end
|
@@ -8,6 +8,18 @@ module ProxyFetcher
|
|
8
8
|
# Default URL that will be used to check if proxy can be used.
|
9
9
|
URL_TO_CHECK = 'https://google.com'.freeze
|
10
10
|
|
11
|
+
# Short variant to validate proxy.
|
12
|
+
#
|
13
|
+
# @param proxy_addr [String] proxy address or IP
|
14
|
+
# @param proxy_port [String, Integer] proxy port
|
15
|
+
#
|
16
|
+
# @return [Boolean]
|
17
|
+
# true if connection to the server using proxy established, otherwise false
|
18
|
+
#
|
19
|
+
def self.connectable?(proxy_addr, proxy_port)
|
20
|
+
new(proxy_addr, proxy_port).connectable?
|
21
|
+
end
|
22
|
+
|
11
23
|
# Initialize new ProxyValidator instance
|
12
24
|
#
|
13
25
|
# @param proxy_addr [String] proxy address or IP
|
@@ -16,13 +28,9 @@ module ProxyFetcher
|
|
16
28
|
# @return [ProxyValidator]
|
17
29
|
#
|
18
30
|
def initialize(proxy_addr, proxy_port)
|
19
|
-
|
20
|
-
@http = Net::HTTP.new(uri.host, uri.port, proxy_addr, proxy_port.to_i)
|
21
|
-
|
22
|
-
return unless uri.is_a?(URI::HTTPS)
|
31
|
+
timeout = ProxyFetcher.config.timeout
|
23
32
|
|
24
|
-
@http
|
25
|
-
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
33
|
+
@http = HTTP.follow.via(proxy_addr, proxy_port.to_i).timeout(connect: timeout, read: timeout)
|
26
34
|
end
|
27
35
|
|
28
36
|
# Checks if proxy is connectable (can be used to connect
|
@@ -32,26 +40,12 @@ module ProxyFetcher
|
|
32
40
|
# true if connection to the server using proxy established, otherwise false
|
33
41
|
#
|
34
42
|
def connectable?
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
@http.start { |connection| return true if connection.request_head('/') }
|
43
|
+
ssl_context = OpenSSL::SSL::SSLContext.new
|
44
|
+
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
39
45
|
|
40
|
-
|
46
|
+
@http.head(URL_TO_CHECK, ssl_context: ssl_context).status.success?
|
41
47
|
rescue StandardError
|
42
48
|
false
|
43
49
|
end
|
44
|
-
|
45
|
-
# Short variant to validate proxy.
|
46
|
-
#
|
47
|
-
# @param proxy_addr [String] proxy address or IP
|
48
|
-
# @param proxy_port [String, Integer] proxy port
|
49
|
-
#
|
50
|
-
# @return [Boolean]
|
51
|
-
# true if connection to the server using proxy established, otherwise false
|
52
|
-
#
|
53
|
-
def self.connectable?(proxy_addr, proxy_port)
|
54
|
-
new(proxy_addr, proxy_port).connectable?
|
55
|
-
end
|
56
50
|
end
|
57
51
|
end
|
data/proxy_fetcher.gemspec
CHANGED
@@ -6,7 +6,7 @@ require 'json'
|
|
6
6
|
require 'evil-proxy'
|
7
7
|
require 'evil-proxy/async'
|
8
8
|
|
9
|
-
|
9
|
+
xdescribe ProxyFetcher::Client do
|
10
10
|
before :all do
|
11
11
|
ProxyFetcher.configure do |config|
|
12
12
|
config.provider = :xroxy
|
@@ -32,14 +32,12 @@ describe ProxyFetcher::Client do
|
|
32
32
|
it 'successfully returns page content for HTTP' do
|
33
33
|
content = ProxyFetcher::Client.get('http://httpbin.org')
|
34
34
|
|
35
|
-
expect(content).not_to be_nil
|
36
35
|
expect(content).not_to be_empty
|
37
36
|
end
|
38
37
|
|
39
38
|
it 'successfully returns page content for HTTPS' do
|
40
39
|
content = ProxyFetcher::Client.get('https://httpbin.org')
|
41
40
|
|
42
|
-
expect(content).not_to be_nil
|
43
41
|
expect(content).not_to be_empty
|
44
42
|
end
|
45
43
|
|
@@ -49,7 +47,6 @@ describe ProxyFetcher::Client do
|
|
49
47
|
proxy = manager.get! until proxy
|
50
48
|
content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: proxy })
|
51
49
|
|
52
|
-
expect(content).not_to be_nil
|
53
50
|
expect(content).not_to be_empty
|
54
51
|
end
|
55
52
|
end
|
@@ -59,9 +56,8 @@ describe ProxyFetcher::Client do
|
|
59
56
|
headers = {
|
60
57
|
'X-Proxy-Fetcher-Version' => ProxyFetcher::VERSION::STRING
|
61
58
|
}
|
62
|
-
content = ProxyFetcher::Client.post('http://httpbin.org/post', { param: 'value'} , headers: headers)
|
59
|
+
content = ProxyFetcher::Client.post('http://httpbin.org/post', { param: 'value' } , headers: headers)
|
63
60
|
|
64
|
-
expect(content).not_to be_nil
|
65
61
|
expect(content).not_to be_empty
|
66
62
|
|
67
63
|
json = JSON.parse(content)
|
@@ -75,12 +71,11 @@ describe ProxyFetcher::Client do
|
|
75
71
|
it 'successfully returns page content for HTTP' do
|
76
72
|
content = ProxyFetcher::Client.put('http://httpbin.org/put', 'param=PutValue')
|
77
73
|
|
78
|
-
expect(content).not_to be_nil
|
79
74
|
expect(content).not_to be_empty
|
80
75
|
|
81
76
|
json = JSON.parse(content)
|
82
77
|
|
83
|
-
expect(json['
|
78
|
+
expect(json['data']).to eq('param=PutValue')
|
84
79
|
end
|
85
80
|
end
|
86
81
|
|
@@ -88,7 +83,6 @@ describe ProxyFetcher::Client do
|
|
88
83
|
it 'successfully returns page content for HTTP' do
|
89
84
|
content = ProxyFetcher::Client.patch('http://httpbin.org/patch', param: 'value')
|
90
85
|
|
91
|
-
expect(content).not_to be_nil
|
92
86
|
expect(content).not_to be_empty
|
93
87
|
|
94
88
|
json = JSON.parse(content)
|
@@ -101,7 +95,6 @@ describe ProxyFetcher::Client do
|
|
101
95
|
it 'successfully returns page content for HTTP' do
|
102
96
|
content = ProxyFetcher::Client.delete('http://httpbin.org/delete')
|
103
97
|
|
104
|
-
expect(content).not_to be_nil
|
105
98
|
expect(content).not_to be_empty
|
106
99
|
end
|
107
100
|
end
|
@@ -110,7 +103,7 @@ describe ProxyFetcher::Client do
|
|
110
103
|
it 'successfully works' do
|
111
104
|
content = ProxyFetcher::Client.head('http://httpbin.org')
|
112
105
|
|
113
|
-
expect(content).to
|
106
|
+
expect(content).to be_empty
|
114
107
|
end
|
115
108
|
end
|
116
109
|
|
@@ -122,7 +115,7 @@ describe ProxyFetcher::Client do
|
|
122
115
|
end
|
123
116
|
|
124
117
|
it 'raises an error when http request returns an error' do
|
125
|
-
allow_any_instance_of(
|
118
|
+
allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
|
126
119
|
|
127
120
|
expect { ProxyFetcher::Client.get('http://httpbin.org') }.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
128
121
|
end
|
@@ -138,7 +131,6 @@ describe ProxyFetcher::Client do
|
|
138
131
|
it 'follows redirect when present' do
|
139
132
|
content = ProxyFetcher::Client.get('http://httpbin.org/absolute-redirect/2')
|
140
133
|
|
141
|
-
expect(content).not_to be_nil
|
142
134
|
expect(content).not_to be_empty
|
143
135
|
end
|
144
136
|
|
@@ -27,4 +27,25 @@ describe ProxyFetcher::Providers::Base do
|
|
27
27
|
expect(error.message).to include('to_proxy')
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
it 'logs failed to load proxy providers' do
|
32
|
+
CustomProvider = Class.new(ProxyFetcher::Providers::Base) do
|
33
|
+
def load_proxy_list(*)
|
34
|
+
doc = load_document('https://google.com', {})
|
35
|
+
doc.xpath('//table[contains(@class, "table")]/tr[(not(@id="proxy-table-header")) and (count(td)>2)]')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
logger = Logger.new(StringIO.new)
|
40
|
+
|
41
|
+
ProxyFetcher::Configuration.register_provider(:custom_provider, CustomProvider)
|
42
|
+
ProxyFetcher.config.provider = :custom_provider
|
43
|
+
ProxyFetcher.config.logger = logger
|
44
|
+
|
45
|
+
allow_any_instance_of(HTTP::Client).to receive(:get).and_raise(StandardError)
|
46
|
+
|
47
|
+
expect(logger).to receive(:warn).with(/Failed to load proxy list for http[s:\/]/)
|
48
|
+
|
49
|
+
ProxyFetcher::Manager.new
|
50
|
+
end
|
30
51
|
end
|
@@ -3,15 +3,7 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe ProxyFetcher::Proxy do
|
6
|
-
|
7
|
-
ProxyFetcher.config.provider = :proxy_docker
|
8
|
-
end
|
9
|
-
|
10
|
-
before do
|
11
|
-
@manager = ProxyFetcher::Manager.new
|
12
|
-
end
|
13
|
-
|
14
|
-
let(:proxy) { @manager.proxies.first.dup }
|
6
|
+
let(:proxy) { described_class.new(addr: '192.169.1.1', port: 8080, type: 'HTTP') }
|
15
7
|
|
16
8
|
it 'can initialize a new proxy object' do
|
17
9
|
proxy = described_class.new(addr: '192.169.1.1', port: 8080, type: 'HTTP')
|
@@ -48,14 +40,8 @@ describe ProxyFetcher::Proxy do
|
|
48
40
|
end
|
49
41
|
|
50
42
|
it 'not connectable if there are some error during connection request' do
|
51
|
-
allow_any_instance_of(
|
52
|
-
expect(proxy.connectable?).to be_falsey
|
53
|
-
end
|
54
|
-
|
55
|
-
it "not connectable if server doesn't respond to head" do
|
56
|
-
allow_any_instance_of(Net::HTTP).to receive(:start).and_return(false)
|
43
|
+
allow_any_instance_of(HTTP::Client).to receive(:head).and_raise(HTTP::TimeoutError)
|
57
44
|
expect(proxy.connectable?).to be_falsey
|
58
|
-
expect(proxy.valid?).to be_falsey
|
59
45
|
end
|
60
46
|
|
61
47
|
it 'returns URI::Generic' do
|
@@ -69,7 +55,7 @@ describe ProxyFetcher::Proxy do
|
|
69
55
|
expect(proxy.url).to be_a(String)
|
70
56
|
end
|
71
57
|
|
72
|
-
it 'returns URL with
|
58
|
+
it 'returns URL with scheme' do
|
73
59
|
expect(proxy.url(scheme: true)).to include('://')
|
74
60
|
end
|
75
61
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04
|
11
|
+
date: 2018-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: http
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rspec
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,6 +72,7 @@ files:
|
|
58
72
|
- lib/proxy_fetcher/document/node.rb
|
59
73
|
- lib/proxy_fetcher/exceptions.rb
|
60
74
|
- lib/proxy_fetcher/manager.rb
|
75
|
+
- lib/proxy_fetcher/null_logger.rb
|
61
76
|
- lib/proxy_fetcher/providers/base.rb
|
62
77
|
- lib/proxy_fetcher/providers/free_proxy_list.rb
|
63
78
|
- lib/proxy_fetcher/providers/free_proxy_list_ssl.rb
|