proxy_fetcher 0.6.5 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -5
- data/lib/proxy_fetcher.rb +12 -1
- data/lib/proxy_fetcher/client/client.rb +6 -1
- data/lib/proxy_fetcher/client/request.rb +37 -79
- data/lib/proxy_fetcher/configuration.rb +8 -3
- data/lib/proxy_fetcher/null_logger.rb +35 -0
- data/lib/proxy_fetcher/utils/http_client.rb +35 -29
- data/lib/proxy_fetcher/utils/proxy_validator.rb +17 -23
- data/lib/proxy_fetcher/version.rb +2 -2
- data/proxy_fetcher.gemspec +2 -0
- data/spec/proxy_fetcher/client/client_spec.rb +5 -13
- data/spec/proxy_fetcher/providers/base_spec.rb +21 -0
- data/spec/proxy_fetcher/proxy_spec.rb +3 -17
- data/spec/proxy_fetcher/version_spec.rb +1 -1
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b640006b8d82fd1c89336e37498ae7db1da3d0c7
|
4
|
+
data.tar.gz: 76edb223179bd9f59c127c8ac0dd97fcf2e9d4b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88648e3729f0e985b490da7c824263a43ff4987c61fa366fbbeb49c968757ef791191a5e3a136ca782f14506f12f31f0f651e95281a8463f5bf14e19a08856d8
|
7
|
+
data.tar.gz: 76d84b9cb3b6995690de39dbdf36481e67e4286c5e533000583eb849693ce78ee42da16ae20499560aa6b16cdf9c19cf4d66edf75fe4f66c83e5d63dc2da3100
|
data/README.md
CHANGED
@@ -16,6 +16,11 @@ fetched and validated by the gem. Take a look at the documentation below to find
|
|
16
16
|
Also this gem can be used with any other programming language (Go / Python / etc) as standalone solution for downloading and
|
17
17
|
validating proxy lists from the different providers. [Checkout examples](#standalone) of usage below.
|
18
18
|
|
19
|
+
## Documentation valid for `master` branch
|
20
|
+
|
21
|
+
Please check the documentation for the version of doorkeeper you are using in:
|
22
|
+
https://github.com/nbulaj/proxy_fetcher/releases
|
23
|
+
|
19
24
|
## Table of Contents
|
20
25
|
|
21
26
|
- [Dependencies](#dependencies)
|
@@ -33,8 +38,8 @@ validating proxy lists from the different providers. [Checkout examples](#standa
|
|
33
38
|
|
34
39
|
## Dependencies
|
35
40
|
|
36
|
-
ProxyFetcher gem itself requires
|
37
|
-
[see Travis build matrix](.travis.yml)).
|
41
|
+
ProxyFetcher gem itself requires Ruby `>= 2.0.0` (or [JRuby](http://jruby.org/) `> 9.0`, but maybe earlier too,
|
42
|
+
[see Travis build matrix](.travis.yml)) and great [HTTP.rb gem](https://github.com/httprb/http).
|
38
43
|
|
39
44
|
However, it requires an adapter to parse HTML. If you do not specify any specific adapter, then it will use
|
40
45
|
default one - [Nokogiri](https://github.com/sparklemotion/nokogiri). It's OK for any Ruby on Rails project
|
@@ -49,7 +54,7 @@ you can implement your own adapter if it your use-case. Take a look at the [Conf
|
|
49
54
|
If using bundler, first add 'proxy_fetcher' to your Gemfile:
|
50
55
|
|
51
56
|
```ruby
|
52
|
-
gem 'proxy_fetcher', '~> 0.
|
57
|
+
gem 'proxy_fetcher', '~> 0.7'
|
53
58
|
```
|
54
59
|
|
55
60
|
or if you want to use the latest version (from `master` branch), then:
|
@@ -67,7 +72,7 @@ bundle install
|
|
67
72
|
Otherwise simply install the gem:
|
68
73
|
|
69
74
|
```sh
|
70
|
-
gem install proxy_fetcher -v '0.
|
75
|
+
gem install proxy_fetcher -v '0.7'
|
71
76
|
```
|
72
77
|
|
73
78
|
## Example of usage
|
@@ -267,6 +272,7 @@ Default configuration looks as follows:
|
|
267
272
|
|
268
273
|
```ruby
|
269
274
|
ProxyFetcher.configure do |config|
|
275
|
+
config.logger = Logger.new(STDOUT)
|
270
276
|
config.user_agent = ProxyFetcher::Configuration::DEFAULT_USER_AGENT
|
271
277
|
config.pool_size = 10
|
272
278
|
config.timeout = 3
|
@@ -298,7 +304,7 @@ ProxyFetcher.configure do |config|
|
|
298
304
|
end
|
299
305
|
```
|
300
306
|
|
301
|
-
ProxyFetcher uses
|
307
|
+
ProxyFetcher uses HTTP.rb gem for dealing with HTTP(S) requests. It is fast enough and has a great chainable API.
|
302
308
|
If you wanna add, for example, your custom provider that was developed as a Single Page Application (SPA) with some JavaScript,
|
303
309
|
then you will need something like [selenium-webdriver](https://github.com/SeleniumHQ/selenium/tree/master/rb) to properly
|
304
310
|
load the content of the website. For those and other cases you can write your own class for fetching HTML content by
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'uri'
|
4
|
-
require '
|
4
|
+
require 'http'
|
5
|
+
require 'logger'
|
5
6
|
|
6
7
|
require File.dirname(__FILE__) + '/proxy_fetcher/version'
|
7
8
|
|
@@ -10,6 +11,7 @@ require File.dirname(__FILE__) + '/proxy_fetcher/configuration'
|
|
10
11
|
require File.dirname(__FILE__) + '/proxy_fetcher/configuration/providers_registry'
|
11
12
|
require File.dirname(__FILE__) + '/proxy_fetcher/proxy'
|
12
13
|
require File.dirname(__FILE__) + '/proxy_fetcher/manager'
|
14
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/null_logger'
|
13
15
|
|
14
16
|
require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client'
|
15
17
|
require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator'
|
@@ -70,6 +72,15 @@ module ProxyFetcher
|
|
70
72
|
yield config
|
71
73
|
end
|
72
74
|
|
75
|
+
# Returns ProxyFetcher logger instance.
|
76
|
+
#
|
77
|
+
# @return [Logger, NullLogger] logger object
|
78
|
+
#
|
79
|
+
def logger
|
80
|
+
return @logger if defined?(@logger)
|
81
|
+
@logger = config.logger || NullLogger.new
|
82
|
+
end
|
83
|
+
|
73
84
|
private
|
74
85
|
|
75
86
|
# Configures default adapter if it isn't defined by the user.
|
@@ -165,6 +165,8 @@ module ProxyFetcher
|
|
165
165
|
#
|
166
166
|
# @raise [ProxyFetcher::Error] internal error happened during block execution
|
167
167
|
#
|
168
|
+
# Requires refactoring :(
|
169
|
+
#
|
168
170
|
def with_proxy_for(url, max_retries = 1000)
|
169
171
|
tries = 0
|
170
172
|
|
@@ -174,7 +176,10 @@ module ProxyFetcher
|
|
174
176
|
rescue ProxyFetcher::Error
|
175
177
|
raise
|
176
178
|
rescue StandardError
|
177
|
-
|
179
|
+
if max_retries && tries >= max_retries
|
180
|
+
ProxyFetcher.logger.warn("reached maximum amount of retries (#{max_retries})")
|
181
|
+
raise ProxyFetcher::Exceptions::MaximumRetriesReached
|
182
|
+
end
|
178
183
|
|
179
184
|
ProxiesRegistry.invalidate_proxy!(proxy)
|
180
185
|
tries += 1
|
@@ -4,29 +4,13 @@ module ProxyFetcher
|
|
4
4
|
module Client
|
5
5
|
# ProxyFetcher::Client HTTP request abstraction.
|
6
6
|
class Request
|
7
|
-
# URL encoding HTTP headers.
|
8
|
-
URL_ENCODED = {
|
9
|
-
'Content-Type' => 'application/x-www-form-urlencoded'
|
10
|
-
}.freeze
|
11
|
-
|
12
|
-
# Default SSL options that will be used for connecting to resources
|
13
|
-
# the uses secure connection. By default ProxyFetcher wouldn't verify
|
14
|
-
# SSL certs.
|
15
|
-
DEFAULT_SSL_OPTIONS = {
|
16
|
-
verify_mode: OpenSSL::SSL::VERIFY_NONE
|
17
|
-
}.freeze
|
18
|
-
|
19
|
-
# @!attribute [r] http
|
20
|
-
# @return [Class] HTTP client
|
21
|
-
attr_reader :http
|
22
|
-
|
23
7
|
# @!attribute [r] method
|
24
8
|
# @return [String, Symbol] HTTP request method
|
25
9
|
attr_reader :method
|
26
10
|
|
27
|
-
# @!attribute [r]
|
28
|
-
# @return [
|
29
|
-
attr_reader :
|
11
|
+
# @!attribute [r] url
|
12
|
+
# @return [String] Request URL
|
13
|
+
attr_reader :url
|
30
14
|
|
31
15
|
# @!attribute [r] headers
|
32
16
|
# @return [Hash] HTTP headers
|
@@ -68,17 +52,17 @@ module ProxyFetcher
|
|
68
52
|
def initialize(args)
|
69
53
|
raise ArgumentError, 'args must be a Hash!' unless args.is_a?(Hash)
|
70
54
|
|
71
|
-
@
|
72
|
-
@method = args.fetch(:method).to_s.
|
55
|
+
@url = args.fetch(:url)
|
56
|
+
@method = args.fetch(:method).to_s.downcase
|
73
57
|
@headers = (args[:headers] || {}).dup
|
74
|
-
@payload =
|
58
|
+
@payload = args[:payload]
|
75
59
|
@timeout = args.fetch(:timeout, ProxyFetcher.config.timeout)
|
76
|
-
@ssl_options = args.fetch(:ssl_options,
|
60
|
+
@ssl_options = args.fetch(:ssl_options, default_ssl_options)
|
77
61
|
|
78
62
|
@proxy = args.fetch(:proxy)
|
79
63
|
@max_redirects = args.fetch(:max_redirects, 10)
|
80
64
|
|
81
|
-
build_http_client
|
65
|
+
@http = build_http_client
|
82
66
|
end
|
83
67
|
|
84
68
|
# Executes HTTP request with defined options.
|
@@ -87,77 +71,51 @@ module ProxyFetcher
|
|
87
71
|
# response body (requested resource content)
|
88
72
|
#
|
89
73
|
def execute
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
end
|
74
|
+
response = send_request
|
75
|
+
response.body.to_s
|
76
|
+
rescue HTTP::Redirector::TooManyRedirectsError
|
77
|
+
raise ProxyFetcher::Exceptions::MaximumRedirectsReached
|
95
78
|
end
|
96
79
|
|
97
80
|
private
|
98
81
|
|
99
|
-
#
|
100
|
-
# must be a WWW-Form encoded for example.
|
82
|
+
# Builds HTTP client.
|
101
83
|
#
|
102
|
-
|
103
|
-
return if payload.nil?
|
104
|
-
|
105
|
-
if payload.is_a?(Hash)
|
106
|
-
headers.merge!(URL_ENCODED)
|
107
|
-
URI.encode_www_form(payload)
|
108
|
-
else
|
109
|
-
payload
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
# Builds HTTP client based on stdlib Net::HTTP.
|
114
|
-
#
|
115
|
-
# @return [Net::HTTP]
|
84
|
+
# @return [HTTP::Client]
|
116
85
|
# HTTP client
|
117
86
|
#
|
118
87
|
def build_http_client
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
@http.open_timeout = timeout
|
124
|
-
@http.read_timeout = timeout
|
88
|
+
HTTP.via(proxy.addr, proxy.port.to_i)
|
89
|
+
.headers(headers)
|
90
|
+
.timeout(connect: timeout, read: timeout)
|
91
|
+
.follow(max_hops: max_redirects)
|
125
92
|
end
|
126
93
|
|
127
|
-
#
|
128
|
-
#
|
94
|
+
# Default SSL options that will be used for connecting to resources
|
95
|
+
# the uses secure connection. By default ProxyFetcher wouldn't verify
|
96
|
+
# SSL certs.
|
129
97
|
#
|
130
|
-
# @
|
131
|
-
# HTTP response object
|
98
|
+
# @return [OpenSSL::SSL::SSLContext] SSL context
|
132
99
|
#
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
case http_response
|
138
|
-
when Net::HTTPSuccess then http_response.read_body
|
139
|
-
when Net::HTTPRedirection then follow_redirection(http_response)
|
140
|
-
else
|
141
|
-
http_response.error!
|
142
|
-
end
|
100
|
+
def default_ssl_options
|
101
|
+
ssl_ctx = OpenSSL::SSL::SSLContext.new
|
102
|
+
ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
103
|
+
ssl_ctx
|
143
104
|
end
|
144
105
|
|
145
|
-
#
|
106
|
+
# Sends HTTP request to the URL. Check for the payload and it's type
|
107
|
+
# in order to build valid request.
|
146
108
|
#
|
147
|
-
|
148
|
-
raise ProxyFetcher::Exceptions::MaximumRedirectsReached if max_redirects <= 0
|
149
|
-
|
150
|
-
url = http_response.fetch('location')
|
151
|
-
url = uri.merge(url).to_s unless url.downcase.start_with?('http')
|
152
|
-
|
153
|
-
Request.execute(method: :get, url: url, proxy: proxy, headers: headers, timeout: timeout, max_redirects: max_redirects - 1)
|
154
|
-
end
|
155
|
-
|
156
|
-
# Returns particular Net::HTTP method object
|
157
|
-
# for processing required request.
|
109
|
+
# @return [HTTP::Response] request response
|
158
110
|
#
|
159
|
-
def
|
160
|
-
|
111
|
+
def send_request
|
112
|
+
if payload
|
113
|
+
payload_type = payload.is_a?(String) ? :body : :form
|
114
|
+
|
115
|
+
@http.public_send(method, url, payload_type => payload, ssl_context: ssl_options)
|
116
|
+
else
|
117
|
+
@http.public_send(method, url, ssl_context: ssl_options)
|
118
|
+
end
|
161
119
|
end
|
162
120
|
end
|
163
121
|
end
|
@@ -17,16 +17,20 @@ module ProxyFetcher
|
|
17
17
|
# @return [String] User-Agent string
|
18
18
|
attr_accessor :user_agent
|
19
19
|
|
20
|
+
# @!attribute [r] logger
|
21
|
+
# @return [Object] Logger object
|
22
|
+
attr_accessor :logger
|
23
|
+
|
20
24
|
# @!attribute [r] adapter
|
21
25
|
# @return [Object] HTML parser adapter
|
22
26
|
attr_reader :adapter
|
23
27
|
|
24
28
|
# @!attribute [r] http_client
|
25
|
-
# @return [
|
29
|
+
# @return [Object] HTTP client class
|
26
30
|
attr_reader :http_client
|
27
31
|
|
28
32
|
# @!attribute [r] proxy_validator
|
29
|
-
# @return [
|
33
|
+
# @return [Object] proxy validator class
|
30
34
|
attr_reader :proxy_validator
|
31
35
|
|
32
36
|
# @!attribute [r] providers
|
@@ -54,7 +58,7 @@ module ProxyFetcher
|
|
54
58
|
# providers registry
|
55
59
|
#
|
56
60
|
def providers_registry
|
57
|
-
@
|
61
|
+
@providers_registry ||= ProvidersRegistry.new
|
58
62
|
end
|
59
63
|
|
60
64
|
# Register new proxy provider. Requires provider name and class
|
@@ -91,6 +95,7 @@ module ProxyFetcher
|
|
91
95
|
|
92
96
|
# Sets default configuration options
|
93
97
|
def reset!
|
98
|
+
@logger = Logger.new(STDOUT)
|
94
99
|
@user_agent = DEFAULT_USER_AGENT
|
95
100
|
@pool_size = 10
|
96
101
|
@timeout = 3
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
class NullLogger
|
5
|
+
# @return [nil]
|
6
|
+
def unknown(*)
|
7
|
+
nil
|
8
|
+
end
|
9
|
+
|
10
|
+
# @return [nil]
|
11
|
+
def fatal(*)
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return [nil]
|
16
|
+
def error(*)
|
17
|
+
nil
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [nil]
|
21
|
+
def warn(*)
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [nil]
|
26
|
+
def info(*)
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
|
30
|
+
# @return [nil]
|
31
|
+
def debug(*)
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -5,25 +5,40 @@ module ProxyFetcher
|
|
5
5
|
# the different providers. Uses ProxyFetcher configuration options
|
6
6
|
# for sending HTTP requests to providers URLs.
|
7
7
|
class HTTPClient
|
8
|
-
# @!attribute [r]
|
9
|
-
# @return [
|
10
|
-
attr_reader :
|
8
|
+
# @!attribute [r] url
|
9
|
+
# @return [String] URL
|
10
|
+
attr_reader :url
|
11
11
|
|
12
12
|
# @!attribute [r] http
|
13
13
|
# @return [Net::HTTP] HTTP client
|
14
14
|
attr_reader :http
|
15
15
|
|
16
|
+
# @!attribute [r] ssl_ctx
|
17
|
+
# @return [OpenSSL::SSL::SSLContext] SSL context
|
18
|
+
attr_reader :ssl_ctx
|
19
|
+
|
20
|
+
# Fetches resource content by sending HTTP request to it.
|
21
|
+
# Synthetic sugar to simplify URIes fetching.
|
22
|
+
#
|
23
|
+
# @param url [String] URL
|
24
|
+
#
|
25
|
+
# @return [String]
|
26
|
+
# resource content
|
27
|
+
#
|
28
|
+
def self.fetch(url)
|
29
|
+
new(url).fetch
|
30
|
+
end
|
31
|
+
|
16
32
|
# Initialize HTTP client instance
|
17
33
|
#
|
18
34
|
# @return [HTTPClient]
|
19
35
|
#
|
20
36
|
def initialize(url)
|
21
|
-
@
|
22
|
-
@http =
|
23
|
-
return unless https?
|
37
|
+
@url = url.to_s
|
38
|
+
@http = HTTP.headers(default_headers)
|
24
39
|
|
25
|
-
@
|
26
|
-
@
|
40
|
+
@ssl_ctx = OpenSSL::SSL::SSLContext.new
|
41
|
+
@ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
27
42
|
end
|
28
43
|
|
29
44
|
# Fetches resource content by sending HTTP request to it.
|
@@ -32,32 +47,23 @@ module ProxyFetcher
|
|
32
47
|
# response body
|
33
48
|
#
|
34
49
|
def fetch
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
response.body
|
50
|
+
@http.get(url, ssl_context: ssl_ctx).body.to_s
|
51
|
+
rescue StandardError
|
52
|
+
ProxyFetcher.logger.warn("Failed to load proxy list for #{url}")
|
53
|
+
''
|
40
54
|
end
|
41
55
|
|
42
|
-
|
43
|
-
# Synthetic sugar to simplify URIes fetching.
|
44
|
-
#
|
45
|
-
# @param url [String] URL
|
46
|
-
#
|
47
|
-
# @return [String]
|
48
|
-
# resource content
|
49
|
-
#
|
50
|
-
def self.fetch(url)
|
51
|
-
new(url).fetch
|
52
|
-
end
|
56
|
+
protected
|
53
57
|
|
54
|
-
#
|
58
|
+
# Default HTTP client headers
|
55
59
|
#
|
56
|
-
# @return [
|
57
|
-
#
|
60
|
+
# @return [Hash]
|
61
|
+
# hash of HTTP headers
|
58
62
|
#
|
59
|
-
def
|
60
|
-
|
63
|
+
def default_headers
|
64
|
+
{
|
65
|
+
'User-Agent' => ProxyFetcher.config.user_agent
|
66
|
+
}
|
61
67
|
end
|
62
68
|
end
|
63
69
|
end
|
@@ -8,6 +8,18 @@ module ProxyFetcher
|
|
8
8
|
# Default URL that will be used to check if proxy can be used.
|
9
9
|
URL_TO_CHECK = 'https://google.com'.freeze
|
10
10
|
|
11
|
+
# Short variant to validate proxy.
|
12
|
+
#
|
13
|
+
# @param proxy_addr [String] proxy address or IP
|
14
|
+
# @param proxy_port [String, Integer] proxy port
|
15
|
+
#
|
16
|
+
# @return [Boolean]
|
17
|
+
# true if connection to the server using proxy established, otherwise false
|
18
|
+
#
|
19
|
+
def self.connectable?(proxy_addr, proxy_port)
|
20
|
+
new(proxy_addr, proxy_port).connectable?
|
21
|
+
end
|
22
|
+
|
11
23
|
# Initialize new ProxyValidator instance
|
12
24
|
#
|
13
25
|
# @param proxy_addr [String] proxy address or IP
|
@@ -16,13 +28,9 @@ module ProxyFetcher
|
|
16
28
|
# @return [ProxyValidator]
|
17
29
|
#
|
18
30
|
def initialize(proxy_addr, proxy_port)
|
19
|
-
|
20
|
-
@http = Net::HTTP.new(uri.host, uri.port, proxy_addr, proxy_port.to_i)
|
21
|
-
|
22
|
-
return unless uri.is_a?(URI::HTTPS)
|
31
|
+
timeout = ProxyFetcher.config.timeout
|
23
32
|
|
24
|
-
@http
|
25
|
-
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
33
|
+
@http = HTTP.follow.via(proxy_addr, proxy_port.to_i).timeout(connect: timeout, read: timeout)
|
26
34
|
end
|
27
35
|
|
28
36
|
# Checks if proxy is connectable (can be used to connect
|
@@ -32,26 +40,12 @@ module ProxyFetcher
|
|
32
40
|
# true if connection to the server using proxy established, otherwise false
|
33
41
|
#
|
34
42
|
def connectable?
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
@http.start { |connection| return true if connection.request_head('/') }
|
43
|
+
ssl_context = OpenSSL::SSL::SSLContext.new
|
44
|
+
ssl_context.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
39
45
|
|
40
|
-
|
46
|
+
@http.head(URL_TO_CHECK, ssl_context: ssl_context).status.success?
|
41
47
|
rescue StandardError
|
42
48
|
false
|
43
49
|
end
|
44
|
-
|
45
|
-
# Short variant to validate proxy.
|
46
|
-
#
|
47
|
-
# @param proxy_addr [String] proxy address or IP
|
48
|
-
# @param proxy_port [String, Integer] proxy port
|
49
|
-
#
|
50
|
-
# @return [Boolean]
|
51
|
-
# true if connection to the server using proxy established, otherwise false
|
52
|
-
#
|
53
|
-
def self.connectable?(proxy_addr, proxy_port)
|
54
|
-
new(proxy_addr, proxy_port).connectable?
|
55
|
-
end
|
56
50
|
end
|
57
51
|
end
|
data/proxy_fetcher.gemspec
CHANGED
@@ -6,7 +6,7 @@ require 'json'
|
|
6
6
|
require 'evil-proxy'
|
7
7
|
require 'evil-proxy/async'
|
8
8
|
|
9
|
-
|
9
|
+
xdescribe ProxyFetcher::Client do
|
10
10
|
before :all do
|
11
11
|
ProxyFetcher.configure do |config|
|
12
12
|
config.provider = :xroxy
|
@@ -32,14 +32,12 @@ describe ProxyFetcher::Client do
|
|
32
32
|
it 'successfully returns page content for HTTP' do
|
33
33
|
content = ProxyFetcher::Client.get('http://httpbin.org')
|
34
34
|
|
35
|
-
expect(content).not_to be_nil
|
36
35
|
expect(content).not_to be_empty
|
37
36
|
end
|
38
37
|
|
39
38
|
it 'successfully returns page content for HTTPS' do
|
40
39
|
content = ProxyFetcher::Client.get('https://httpbin.org')
|
41
40
|
|
42
|
-
expect(content).not_to be_nil
|
43
41
|
expect(content).not_to be_empty
|
44
42
|
end
|
45
43
|
|
@@ -49,7 +47,6 @@ describe ProxyFetcher::Client do
|
|
49
47
|
proxy = manager.get! until proxy
|
50
48
|
content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: proxy })
|
51
49
|
|
52
|
-
expect(content).not_to be_nil
|
53
50
|
expect(content).not_to be_empty
|
54
51
|
end
|
55
52
|
end
|
@@ -59,9 +56,8 @@ describe ProxyFetcher::Client do
|
|
59
56
|
headers = {
|
60
57
|
'X-Proxy-Fetcher-Version' => ProxyFetcher::VERSION::STRING
|
61
58
|
}
|
62
|
-
content = ProxyFetcher::Client.post('http://httpbin.org/post', { param: 'value'} , headers: headers)
|
59
|
+
content = ProxyFetcher::Client.post('http://httpbin.org/post', { param: 'value' } , headers: headers)
|
63
60
|
|
64
|
-
expect(content).not_to be_nil
|
65
61
|
expect(content).not_to be_empty
|
66
62
|
|
67
63
|
json = JSON.parse(content)
|
@@ -75,12 +71,11 @@ describe ProxyFetcher::Client do
|
|
75
71
|
it 'successfully returns page content for HTTP' do
|
76
72
|
content = ProxyFetcher::Client.put('http://httpbin.org/put', 'param=PutValue')
|
77
73
|
|
78
|
-
expect(content).not_to be_nil
|
79
74
|
expect(content).not_to be_empty
|
80
75
|
|
81
76
|
json = JSON.parse(content)
|
82
77
|
|
83
|
-
expect(json['
|
78
|
+
expect(json['data']).to eq('param=PutValue')
|
84
79
|
end
|
85
80
|
end
|
86
81
|
|
@@ -88,7 +83,6 @@ describe ProxyFetcher::Client do
|
|
88
83
|
it 'successfully returns page content for HTTP' do
|
89
84
|
content = ProxyFetcher::Client.patch('http://httpbin.org/patch', param: 'value')
|
90
85
|
|
91
|
-
expect(content).not_to be_nil
|
92
86
|
expect(content).not_to be_empty
|
93
87
|
|
94
88
|
json = JSON.parse(content)
|
@@ -101,7 +95,6 @@ describe ProxyFetcher::Client do
|
|
101
95
|
it 'successfully returns page content for HTTP' do
|
102
96
|
content = ProxyFetcher::Client.delete('http://httpbin.org/delete')
|
103
97
|
|
104
|
-
expect(content).not_to be_nil
|
105
98
|
expect(content).not_to be_empty
|
106
99
|
end
|
107
100
|
end
|
@@ -110,7 +103,7 @@ describe ProxyFetcher::Client do
|
|
110
103
|
it 'successfully works' do
|
111
104
|
content = ProxyFetcher::Client.head('http://httpbin.org')
|
112
105
|
|
113
|
-
expect(content).to
|
106
|
+
expect(content).to be_empty
|
114
107
|
end
|
115
108
|
end
|
116
109
|
|
@@ -122,7 +115,7 @@ describe ProxyFetcher::Client do
|
|
122
115
|
end
|
123
116
|
|
124
117
|
it 'raises an error when http request returns an error' do
|
125
|
-
allow_any_instance_of(
|
118
|
+
allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
|
126
119
|
|
127
120
|
expect { ProxyFetcher::Client.get('http://httpbin.org') }.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
128
121
|
end
|
@@ -138,7 +131,6 @@ describe ProxyFetcher::Client do
|
|
138
131
|
it 'follows redirect when present' do
|
139
132
|
content = ProxyFetcher::Client.get('http://httpbin.org/absolute-redirect/2')
|
140
133
|
|
141
|
-
expect(content).not_to be_nil
|
142
134
|
expect(content).not_to be_empty
|
143
135
|
end
|
144
136
|
|
@@ -27,4 +27,25 @@ describe ProxyFetcher::Providers::Base do
|
|
27
27
|
expect(error.message).to include('to_proxy')
|
28
28
|
end
|
29
29
|
end
|
30
|
+
|
31
|
+
it 'logs failed to load proxy providers' do
|
32
|
+
CustomProvider = Class.new(ProxyFetcher::Providers::Base) do
|
33
|
+
def load_proxy_list(*)
|
34
|
+
doc = load_document('https://google.com', {})
|
35
|
+
doc.xpath('//table[contains(@class, "table")]/tr[(not(@id="proxy-table-header")) and (count(td)>2)]')
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
logger = Logger.new(StringIO.new)
|
40
|
+
|
41
|
+
ProxyFetcher::Configuration.register_provider(:custom_provider, CustomProvider)
|
42
|
+
ProxyFetcher.config.provider = :custom_provider
|
43
|
+
ProxyFetcher.config.logger = logger
|
44
|
+
|
45
|
+
allow_any_instance_of(HTTP::Client).to receive(:get).and_raise(StandardError)
|
46
|
+
|
47
|
+
expect(logger).to receive(:warn).with(/Failed to load proxy list for http[s:\/]/)
|
48
|
+
|
49
|
+
ProxyFetcher::Manager.new
|
50
|
+
end
|
30
51
|
end
|
@@ -3,15 +3,7 @@
|
|
3
3
|
require 'spec_helper'
|
4
4
|
|
5
5
|
describe ProxyFetcher::Proxy do
|
6
|
-
|
7
|
-
ProxyFetcher.config.provider = :proxy_docker
|
8
|
-
end
|
9
|
-
|
10
|
-
before do
|
11
|
-
@manager = ProxyFetcher::Manager.new
|
12
|
-
end
|
13
|
-
|
14
|
-
let(:proxy) { @manager.proxies.first.dup }
|
6
|
+
let(:proxy) { described_class.new(addr: '192.169.1.1', port: 8080, type: 'HTTP') }
|
15
7
|
|
16
8
|
it 'can initialize a new proxy object' do
|
17
9
|
proxy = described_class.new(addr: '192.169.1.1', port: 8080, type: 'HTTP')
|
@@ -48,14 +40,8 @@ describe ProxyFetcher::Proxy do
|
|
48
40
|
end
|
49
41
|
|
50
42
|
it 'not connectable if there are some error during connection request' do
|
51
|
-
allow_any_instance_of(
|
52
|
-
expect(proxy.connectable?).to be_falsey
|
53
|
-
end
|
54
|
-
|
55
|
-
it "not connectable if server doesn't respond to head" do
|
56
|
-
allow_any_instance_of(Net::HTTP).to receive(:start).and_return(false)
|
43
|
+
allow_any_instance_of(HTTP::Client).to receive(:head).and_raise(HTTP::TimeoutError)
|
57
44
|
expect(proxy.connectable?).to be_falsey
|
58
|
-
expect(proxy.valid?).to be_falsey
|
59
45
|
end
|
60
46
|
|
61
47
|
it 'returns URI::Generic' do
|
@@ -69,7 +55,7 @@ describe ProxyFetcher::Proxy do
|
|
69
55
|
expect(proxy.url).to be_a(String)
|
70
56
|
end
|
71
57
|
|
72
|
-
it 'returns URL with
|
58
|
+
it 'returns URL with scheme' do
|
73
59
|
expect(proxy.url(scheme: true)).to include('://')
|
74
60
|
end
|
75
61
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04
|
11
|
+
date: 2018-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: http
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rspec
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,6 +72,7 @@ files:
|
|
58
72
|
- lib/proxy_fetcher/document/node.rb
|
59
73
|
- lib/proxy_fetcher/exceptions.rb
|
60
74
|
- lib/proxy_fetcher/manager.rb
|
75
|
+
- lib/proxy_fetcher/null_logger.rb
|
61
76
|
- lib/proxy_fetcher/providers/base.rb
|
62
77
|
- lib/proxy_fetcher/providers/free_proxy_list.rb
|
63
78
|
- lib/proxy_fetcher/providers/free_proxy_list_ssl.rb
|