proxy_fetcher 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +41 -23
- data/bin/proxy_fetcher +8 -1
- data/lib/proxy_fetcher.rb +18 -16
- data/lib/proxy_fetcher/configuration.rb +17 -6
- data/lib/proxy_fetcher/manager.rb +15 -3
- data/lib/proxy_fetcher/providers/base.rb +2 -7
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +2 -2
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +1 -1
- data/lib/proxy_fetcher/providers/hide_my_name.rb +2 -27
- data/lib/proxy_fetcher/providers/xroxy.rb +8 -1
- data/lib/proxy_fetcher/proxy.rb +18 -15
- data/lib/proxy_fetcher/utils/http_client.rb +0 -15
- data/lib/proxy_fetcher/utils/proxy_validator.rb +32 -0
- data/lib/proxy_fetcher/version.rb +1 -1
- data/spec/proxy_fetcher/configuration_spec.rb +19 -4
- data/spec/proxy_fetcher/proxy_spec.rb +6 -14
- data/spec/support/manager_examples.rb +2 -2
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 27d97dfb3997e004b2fb1a07e935ba7a1132148d
|
4
|
+
data.tar.gz: 8084082dc54f59c2c6bc8c0457b2ea766c479db0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44a592c9882b108ff9f78cc274c27d6e33e8460aa8b3599a0388abb75e787829565223fd4bdfe48a798ca8e153c79c9a3bd1db6184540bf47567b85fa5bf01a9
|
7
|
+
data.tar.gz: 6bce6a4e8d0c197852edd8766c62a2e5850ab75e657f56b90f521307a98a3fa3995efd1219ff963648ad31dad2b5bc467a4c783887b724925e826393605cf36f
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -51,7 +51,7 @@ manager = ProxyFetcher::Manager.new # will immediately load proxy list from the
|
|
51
51
|
manager.proxies
|
52
52
|
|
53
53
|
#=> [#<ProxyFetcher::Proxy:0x00000002879680 @addr="97.77.104.22", @port=3128, @country="USA",
|
54
|
-
# @response_time=5217, @
|
54
|
+
# @response_time=5217, @type="HTTP", @anonymity="High">, ... ]
|
55
55
|
```
|
56
56
|
|
57
57
|
You can initialize proxy manager without immediate load of proxy list from the remote server by passing `refresh: false` on initialization:
|
@@ -75,8 +75,8 @@ Get raw proxy URLs as Strings:
|
|
75
75
|
manager = ProxyFetcher::Manager.new
|
76
76
|
manager.raw_proxies
|
77
77
|
|
78
|
-
# => ["
|
79
|
-
# "
|
78
|
+
# => ["97.77.104.22:3128", "94.23.205.32:3128", "209.79.65.140:8080",
|
79
|
+
# "91.217.42.2:8080", "97.77.104.22:80", "165.234.102.177:8080", ...]
|
80
80
|
```
|
81
81
|
|
82
82
|
If `ProxyFetcher::Manager` was already initialized somewhere, you can refresh the proxy list by calling `#refresh_list!` method:
|
@@ -85,7 +85,7 @@ If `ProxyFetcher::Manager` was already initialized somewhere, you can refresh th
|
|
85
85
|
manager.refresh_list! # or manager.fetch!
|
86
86
|
|
87
87
|
#=> [#<ProxyFetcher::Proxy:0x00000002879680 @addr="97.77.104.22", @port=3128, @country="USA",
|
88
|
-
# @response_time=5217, @
|
88
|
+
# @response_time=5217, @type="HTTP", @anonymity="High">, ... ]
|
89
89
|
```
|
90
90
|
|
91
91
|
If you need to filter proxy list, for example, by country or response time and selected provider supports filtering by GET params, then you
|
@@ -117,19 +117,23 @@ then you already have Ruby 2.3 installed. In other cases you can install it with
|
|
117
117
|
Just install the gem by running `gem install proxy_fetcher` in your terminal and run it:
|
118
118
|
|
119
119
|
```bash
|
120
|
-
proxy_fetcher >> proxies.txt # Will download proxies, validate them and write to file
|
120
|
+
proxy_fetcher >> proxies.txt # Will download proxies from the default provider, validate them and write to file
|
121
121
|
```
|
122
122
|
|
123
|
-
If you need a list of proxies
|
123
|
+
If you need a list of proxies from some specific provider, then you need to pass it's name with `-p` option:
|
124
|
+
|
125
|
+
```bash
|
126
|
+
proxy_fetcher -p proxy_docker >> proxies.txt # Will download proxies from the default provider, validate them and write to file
|
127
|
+
```
|
128
|
+
|
129
|
+
If you need a list of proxies in JSON format just pass a `--json` option to the command:
|
124
130
|
|
125
131
|
```bash
|
126
132
|
proxy_fetcher --json
|
127
133
|
|
128
134
|
# Will print:
|
129
|
-
# {"proxies":["
|
130
|
-
# "
|
131
|
-
# "https://121.41.82.99:1080","https://77.53.105.155:35923"]}
|
132
|
-
|
135
|
+
# {"proxies":["120.26.206.178:80","119.61.13.242:1080","117.40.213.26:80","92.62.72.242:1080","77.53.105.155:3124"
|
136
|
+
# "58.20.41.172:35923","204.116.192.151:35923","190.5.96.58:1080","170.250.109.97:35923","121.41.82.99:1080"]}
|
133
137
|
```
|
134
138
|
|
135
139
|
To get all the possible options run:
|
@@ -144,10 +148,9 @@ Every proxy is a `ProxyFetcher::Proxy` object that has next readers (instance va
|
|
144
148
|
|
145
149
|
* `addr` (IP address)
|
146
150
|
* `port`
|
151
|
+
* `type` (proxy type, can be HTTP, HTTPS, SOCKS4 or/and SOCKS5)
|
147
152
|
* `country` (USA or Brazil for example)
|
148
153
|
* `response_time` (5217 for example)
|
149
|
-
* `speed` (`:slow`, `:medium` or `:fast`. **Note:** depends on the proxy provider and can be `nil`)
|
150
|
-
* `type` (URI schema, HTTP or HTTPS)
|
151
154
|
* `anonymity` (`Low`, `Elite proxy` or `High +KA` for example)
|
152
155
|
|
153
156
|
Also you can call next instance methods for every Proxy object:
|
@@ -155,15 +158,11 @@ Also you can call next instance methods for every Proxy object:
|
|
155
158
|
* `connectable?` (whether proxy server is available)
|
156
159
|
* `http?` (whether proxy server has a HTTP protocol)
|
157
160
|
* `https?` (whether proxy server has a HTTPS protocol)
|
161
|
+
* `socks4?`
|
162
|
+
* `socks5?`
|
158
163
|
* `uri` (returns `URI::Generic` object)
|
159
164
|
* `url` (returns a formatted URL like "_http://IP:PORT_" )
|
160
165
|
|
161
|
-
You can sort or find any proxy by speed using next 3 instance methods (if it is available for the specific provider):
|
162
|
-
|
163
|
-
* `fast?`
|
164
|
-
* `medium?`
|
165
|
-
* `slow?`'
|
166
|
-
|
167
166
|
## Configuration
|
168
167
|
|
169
168
|
To change open/read timeout for `cleanup!` and `connectable?` methods you need to change ProxyFetcher.config:
|
@@ -188,10 +187,6 @@ class MyHTTPClient
|
|
188
187
|
def self.fetch(url)
|
189
188
|
# ... some magic to return proper HTML ...
|
190
189
|
end
|
191
|
-
|
192
|
-
def self.connectable?(url)
|
193
|
-
# ... some magic to check if url is connectable ...
|
194
|
-
end
|
195
190
|
end
|
196
191
|
|
197
192
|
ProxyFetcher.config.http_client = MyHTTPClient
|
@@ -200,11 +195,34 @@ manager = ProxyFetcher::Manager.new
|
|
200
195
|
manager.proxies
|
201
196
|
|
202
197
|
#=> [#<ProxyFetcher::Proxy:0x00000002879680 @addr="97.77.104.22", @port=3128, @country="USA",
|
203
|
-
# @response_time=5217, @
|
198
|
+
# @response_time=5217, @type="HTTP", @anonymity="High">, ... ]
|
204
199
|
```
|
205
200
|
|
206
201
|
You can take a look at the [lib/proxy_fetcher/utils/http_client.rb](lib/proxy_fetcher/utils/http_client.rb) for an example.
|
207
202
|
|
203
|
+
Moreover, you can write your own proxy validator to check if proxy is valid or not:
|
204
|
+
|
205
|
+
```ruby
|
206
|
+
class MyProxyValidator
|
207
|
+
# [IMPORTANT]: below methods are required!
|
208
|
+
def self.connectable?(proxy_addr, proxy_port)
|
209
|
+
# ... some magic to check if proxy is valid ...
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
ProxyFetcher.config.proxy_validator = MyProxyValidator
|
214
|
+
|
215
|
+
manager = ProxyFetcher::Manager.new
|
216
|
+
manager.proxies
|
217
|
+
|
218
|
+
#=> [#<ProxyFetcher::Proxy:0x00000002879680 @addr="97.77.104.22", @port=3128, @country="USA",
|
219
|
+
# @response_time=5217, @type="HTTP", @anonymity="High">, ... ]
|
220
|
+
|
221
|
+
manager.validate!
|
222
|
+
|
223
|
+
#=> [ ... ]
|
224
|
+
```
|
225
|
+
|
208
226
|
## Providers
|
209
227
|
|
210
228
|
Currently ProxyFetcher can deal with next proxy providers (services):
|
data/bin/proxy_fetcher
CHANGED
@@ -4,6 +4,7 @@ require 'optparse'
|
|
4
4
|
require 'proxy_fetcher'
|
5
5
|
|
6
6
|
options = {
|
7
|
+
filters: {},
|
7
8
|
validate: true,
|
8
9
|
json: false
|
9
10
|
}
|
@@ -33,6 +34,12 @@ OptionParser.new do |opts|
|
|
33
34
|
options[:validate] = false
|
34
35
|
end
|
35
36
|
|
37
|
+
opts.on('-f', '--filters={}', String, '# Filters for proxy provider in JSON format') do |filters|
|
38
|
+
require 'json'
|
39
|
+
|
40
|
+
options[:filters] = JSON.parse(filters)
|
41
|
+
end
|
42
|
+
|
36
43
|
opts.on('-t', '--timeout=SECONDS', Integer, '# Connection timeout in seconds') do |value|
|
37
44
|
options[:timeout] = value
|
38
45
|
end
|
@@ -45,7 +52,7 @@ end.parse!
|
|
45
52
|
ProxyFetcher.config.provider = options[:provider] if options[:provider]
|
46
53
|
ProxyFetcher.config.connection_timeout = options[:timeout] if options[:timeout]
|
47
54
|
|
48
|
-
manager = ProxyFetcher::Manager.new
|
55
|
+
manager = ProxyFetcher::Manager.new(filters: options[:filters])
|
49
56
|
manager.validate! if options[:validate]
|
50
57
|
|
51
58
|
if options[:json]
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -1,25 +1,27 @@
|
|
1
1
|
require 'uri'
|
2
|
-
require 'net/
|
3
|
-
require 'openssl'
|
2
|
+
require 'net/https'
|
4
3
|
require 'nokogiri'
|
5
|
-
require '
|
4
|
+
require 'thread'
|
6
5
|
|
7
|
-
require 'proxy_fetcher/configuration'
|
8
|
-
require 'proxy_fetcher/proxy'
|
9
|
-
require 'proxy_fetcher/manager'
|
6
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/configuration'
|
7
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/proxy'
|
8
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/manager'
|
10
9
|
|
11
|
-
require 'proxy_fetcher/utils/http_client'
|
12
|
-
require 'proxy_fetcher/utils/html'
|
13
|
-
|
14
|
-
require 'proxy_fetcher/providers/base'
|
15
|
-
require 'proxy_fetcher/providers/free_proxy_list'
|
16
|
-
require 'proxy_fetcher/providers/free_proxy_list_ssl'
|
17
|
-
require 'proxy_fetcher/providers/hide_my_name'
|
18
|
-
require 'proxy_fetcher/providers/proxy_docker'
|
19
|
-
require 'proxy_fetcher/providers/proxy_list'
|
20
|
-
require 'proxy_fetcher/providers/xroxy'
|
10
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client'
|
11
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/utils/html'
|
12
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator'
|
21
13
|
|
22
14
|
module ProxyFetcher
|
15
|
+
module Providers
|
16
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/providers/base'
|
17
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list'
|
18
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list_ssl'
|
19
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/providers/hide_my_name'
|
20
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/providers/proxy_docker'
|
21
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/providers/proxy_list'
|
22
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/providers/xroxy'
|
23
|
+
end
|
24
|
+
|
23
25
|
class << self
|
24
26
|
def config
|
25
27
|
@config ||= ProxyFetcher::Configuration.new
|
@@ -2,10 +2,10 @@ module ProxyFetcher
|
|
2
2
|
class Configuration
|
3
3
|
UnknownProvider = Class.new(StandardError)
|
4
4
|
RegisteredProvider = Class.new(StandardError)
|
5
|
-
|
5
|
+
WrongCustomClass = Class.new(StandardError)
|
6
6
|
|
7
|
-
attr_accessor :
|
8
|
-
attr_accessor :
|
7
|
+
attr_accessor :provider, :connection_timeout
|
8
|
+
attr_accessor :http_client, :proxy_validator, :logger
|
9
9
|
|
10
10
|
class << self
|
11
11
|
def providers
|
@@ -26,6 +26,7 @@ module ProxyFetcher
|
|
26
26
|
def reset!
|
27
27
|
@connection_timeout = 3
|
28
28
|
@http_client = HTTPClient
|
29
|
+
@proxy_validator = ProxyValidator
|
29
30
|
|
30
31
|
self.provider = :hide_my_name # currently default one
|
31
32
|
end
|
@@ -37,11 +38,21 @@ module ProxyFetcher
|
|
37
38
|
end
|
38
39
|
|
39
40
|
def http_client=(klass)
|
40
|
-
|
41
|
-
|
41
|
+
@http_client = setup_custom_class(klass, required_methods: :fetch)
|
42
|
+
end
|
43
|
+
|
44
|
+
def proxy_validator=(klass)
|
45
|
+
@proxy_validator = setup_custom_class(klass, required_methods: :connectable?)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def setup_custom_class(klass, required_methods: [])
|
51
|
+
unless klass.respond_to?(*required_methods)
|
52
|
+
raise WrongCustomClass, "#{klass} must respond to [#{Array(required_methods).join(', ')}] class methods!"
|
42
53
|
end
|
43
54
|
|
44
|
-
|
55
|
+
klass
|
45
56
|
end
|
46
57
|
end
|
47
58
|
end
|
@@ -49,9 +49,21 @@ module ProxyFetcher
|
|
49
49
|
|
50
50
|
alias pop! get!
|
51
51
|
|
52
|
-
# Clean current proxy list from dead proxies (doesn't respond by timeout)
|
53
|
-
def cleanup!
|
54
|
-
|
52
|
+
# Clean current proxy list from dead proxies (that doesn't respond by timeout)
|
53
|
+
def cleanup!(pool_size = 10)
|
54
|
+
lock = Mutex.new
|
55
|
+
|
56
|
+
proxies.dup.each_slice(pool_size) do |proxy_group|
|
57
|
+
threads = proxy_group.map do |group_proxy|
|
58
|
+
Thread.new(group_proxy, proxies) do |proxy, proxies|
|
59
|
+
lock.synchronize { proxies.delete(proxy) } unless proxy.connectable?
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
threads.each(&:join)
|
64
|
+
end
|
65
|
+
|
66
|
+
@proxies
|
55
67
|
end
|
56
68
|
|
57
69
|
alias validate! cleanup!
|
@@ -7,11 +7,6 @@ module ProxyFetcher
|
|
7
7
|
|
8
8
|
def_delegators ProxyFetcher::HTML, :clear, :convert_to_int
|
9
9
|
|
10
|
-
PROXY_TYPES = [
|
11
|
-
HTTP = 'HTTP'.freeze,
|
12
|
-
HTTPS = 'HTTPS'.freeze
|
13
|
-
].freeze
|
14
|
-
|
15
10
|
attr_reader :proxy
|
16
11
|
|
17
12
|
def fetch_proxies!(filters = {})
|
@@ -45,8 +40,8 @@ module ProxyFetcher
|
|
45
40
|
end
|
46
41
|
|
47
42
|
# Return normalized HTML element content by selector
|
48
|
-
def parse_element(
|
49
|
-
clear(
|
43
|
+
def parse_element(parent, selector, method = :at_xpath)
|
44
|
+
clear(parent.public_send(method, selector).content)
|
50
45
|
end
|
51
46
|
end
|
52
47
|
end
|
@@ -22,8 +22,8 @@ module ProxyFetcher
|
|
22
22
|
private
|
23
23
|
|
24
24
|
def parse_type(element)
|
25
|
-
|
26
|
-
|
25
|
+
https = parse_element(element, 'td[6]')
|
26
|
+
https && https.casecmp('yes').zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
@@ -15,7 +15,7 @@ module ProxyFetcher
|
|
15
15
|
proxy.port = convert_to_int(parse_element(html_element, 'td[2]'))
|
16
16
|
proxy.country = parse_element(html_element, 'td[4]')
|
17
17
|
proxy.anonymity = parse_element(html_element, 'td[5]')
|
18
|
-
proxy.type = HTTPS
|
18
|
+
proxy.type = ProxyFetcher::Proxy::HTTPS
|
19
19
|
end
|
20
20
|
end
|
21
21
|
end
|
@@ -13,14 +13,9 @@ module ProxyFetcher
|
|
13
13
|
proxy.addr = parse_element(html_element, 'td[1]')
|
14
14
|
proxy.port = convert_to_int(parse_element(html_element, 'td[2]'))
|
15
15
|
proxy.anonymity = parse_element(html_element, 'td[6]')
|
16
|
-
|
17
16
|
proxy.country = parse_country(html_element)
|
18
|
-
proxy.type =
|
19
|
-
|
20
|
-
response_time = parse_response_time(html_element)
|
21
|
-
|
22
|
-
proxy.response_time = response_time
|
23
|
-
proxy.speed = speed_from_response_time(response_time)
|
17
|
+
proxy.type = parse_element(html_element, 'td[5]')
|
18
|
+
proxy.response_time = parse_response_time(html_element)
|
24
19
|
end
|
25
20
|
end
|
26
21
|
|
@@ -30,29 +25,9 @@ module ProxyFetcher
|
|
30
25
|
clear(element.at_xpath('*//span[1]/following-sibling::text()[1]').content)
|
31
26
|
end
|
32
27
|
|
33
|
-
def parse_type(element)
|
34
|
-
schemas = parse_element(element, 'td[5]')
|
35
|
-
|
36
|
-
if schemas && schemas.downcase.include?('https')
|
37
|
-
HTTPS
|
38
|
-
else
|
39
|
-
HTTP
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
28
|
def parse_response_time(element)
|
44
29
|
convert_to_int(element.at_xpath('td[4]').content.strip[/\d+/])
|
45
30
|
end
|
46
|
-
|
47
|
-
def speed_from_response_time(response_time)
|
48
|
-
if response_time < 1500
|
49
|
-
:fast
|
50
|
-
elsif response_time < 3000
|
51
|
-
:medium
|
52
|
-
else
|
53
|
-
:slow
|
54
|
-
end
|
55
|
-
end
|
56
31
|
end
|
57
32
|
|
58
33
|
ProxyFetcher::Configuration.register_provider(:hide_my_name, HideMyName)
|
@@ -13,11 +13,18 @@ module ProxyFetcher
|
|
13
13
|
proxy.addr = parse_element(html_element, 'td[2]')
|
14
14
|
proxy.port = convert_to_int(parse_element(html_element, 'td[3]'))
|
15
15
|
proxy.anonymity = parse_element(html_element, 'td[4]')
|
16
|
-
proxy.type = parse_element(html_element, 'td[5]').casecmp('true').zero? ? HTTPS : HTTP
|
17
16
|
proxy.country = parse_element(html_element, 'td[6]')
|
18
17
|
proxy.response_time = convert_to_int(parse_element(html_element, 'td[7]'))
|
18
|
+
proxy.type = parse_type(html_element)
|
19
19
|
end
|
20
20
|
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def parse_type(element)
|
25
|
+
https = parse_element(element, 'td[5]')
|
26
|
+
https.casecmp('true').zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
27
|
+
end
|
21
28
|
end
|
22
29
|
|
23
30
|
ProxyFetcher::Configuration.register_provider(:xroxy, XRoxy)
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -1,31 +1,34 @@
|
|
1
1
|
module ProxyFetcher
|
2
|
-
class Proxy
|
3
|
-
|
4
|
-
ProxyFetcher.config.http_client.connectable?(url)
|
5
|
-
end
|
2
|
+
class Proxy
|
3
|
+
attr_accessor :addr, :port, :type, :country, :response_time, :anonymity
|
6
4
|
|
7
|
-
|
5
|
+
TYPES = [
|
6
|
+
HTTP = 'HTTP'.freeze,
|
7
|
+
HTTPS = 'HTTPS'.freeze,
|
8
|
+
SOCKS4 = 'SOCKS4'.freeze,
|
9
|
+
SOCKS5 = 'SOCKS5'.freeze
|
10
|
+
].freeze
|
8
11
|
|
9
|
-
|
10
|
-
define_method "#{
|
11
|
-
|
12
|
+
TYPES.each do |proxy_type|
|
13
|
+
define_method "#{proxy_type.downcase}?" do
|
14
|
+
type && type.upcase.include?(proxy_type)
|
12
15
|
end
|
13
16
|
end
|
14
17
|
|
15
|
-
|
16
|
-
type.casecmp('http').zero?
|
17
|
-
end
|
18
|
+
alias ssl? https?
|
18
19
|
|
19
|
-
def
|
20
|
-
|
20
|
+
def connectable?
|
21
|
+
ProxyFetcher.config.proxy_validator.connectable?(addr, port)
|
21
22
|
end
|
22
23
|
|
24
|
+
alias valid? connectable?
|
25
|
+
|
23
26
|
def uri
|
24
|
-
URI::Generic.build(host: addr, port: port
|
27
|
+
URI::Generic.build(host: addr, port: port)
|
25
28
|
end
|
26
29
|
|
27
30
|
def url
|
28
|
-
|
31
|
+
"#{addr}:#{port}"
|
29
32
|
end
|
30
33
|
end
|
31
34
|
end
|
@@ -18,17 +18,6 @@ module ProxyFetcher
|
|
18
18
|
response.body
|
19
19
|
end
|
20
20
|
|
21
|
-
def connectable?
|
22
|
-
@http.open_timeout = ProxyFetcher.config.connection_timeout
|
23
|
-
@http.read_timeout = ProxyFetcher.config.connection_timeout
|
24
|
-
|
25
|
-
@http.start { |connection| return true if connection.request_head('/') }
|
26
|
-
|
27
|
-
false
|
28
|
-
rescue StandardError
|
29
|
-
false
|
30
|
-
end
|
31
|
-
|
32
21
|
def https?
|
33
22
|
@uri.scheme.casecmp('https').zero?
|
34
23
|
end
|
@@ -37,10 +26,6 @@ module ProxyFetcher
|
|
37
26
|
def fetch(url)
|
38
27
|
new(url).fetch
|
39
28
|
end
|
40
|
-
|
41
|
-
def connectable?(url)
|
42
|
-
new(url).connectable?
|
43
|
-
end
|
44
29
|
end
|
45
30
|
end
|
46
31
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module ProxyFetcher
|
2
|
+
class ProxyValidator
|
3
|
+
URL_TO_CHECK = 'https://google.com'.freeze
|
4
|
+
|
5
|
+
def initialize(proxy_addr, proxy_port)
|
6
|
+
uri = URI.parse(URL_TO_CHECK)
|
7
|
+
@http = Net::HTTP.new(uri.host, uri.port, proxy_addr, proxy_port.to_i)
|
8
|
+
|
9
|
+
return unless uri.scheme.casecmp('https').zero?
|
10
|
+
|
11
|
+
@http.use_ssl = true
|
12
|
+
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
13
|
+
end
|
14
|
+
|
15
|
+
def connectable?
|
16
|
+
@http.open_timeout = ProxyFetcher.config.connection_timeout
|
17
|
+
@http.read_timeout = ProxyFetcher.config.connection_timeout
|
18
|
+
|
19
|
+
@http.start { |connection| return true if connection.request_head('/') }
|
20
|
+
|
21
|
+
false
|
22
|
+
rescue StandardError
|
23
|
+
false
|
24
|
+
end
|
25
|
+
|
26
|
+
class << self
|
27
|
+
def connectable?(proxy_addr, proxy_port)
|
28
|
+
new(proxy_addr, proxy_port).connectable?
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -10,20 +10,35 @@ describe ProxyFetcher::Configuration do
|
|
10
10
|
def self.fetch(url)
|
11
11
|
url
|
12
12
|
end
|
13
|
+
end
|
14
|
+
|
15
|
+
expect { ProxyFetcher.config.http_client = MyHTTPClient }.not_to raise_error
|
16
|
+
end
|
13
17
|
|
18
|
+
it 'failed on setup if required methods are missing' do
|
19
|
+
MyWrongHTTPClient = Class.new
|
20
|
+
|
21
|
+
expect { ProxyFetcher.config.http_client = MyWrongHTTPClient }
|
22
|
+
.to raise_error(ProxyFetcher::Configuration::WrongCustomClass)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'custom proxy validator' do
|
27
|
+
it 'successfully setups if class has all the required methods' do
|
28
|
+
class MyProxyValidator
|
14
29
|
def self.connectable?(*)
|
15
30
|
true
|
16
31
|
end
|
17
32
|
end
|
18
33
|
|
19
|
-
expect { ProxyFetcher.config.
|
34
|
+
expect { ProxyFetcher.config.proxy_validator = MyProxyValidator }.not_to raise_error
|
20
35
|
end
|
21
36
|
|
22
37
|
it 'failed on setup if required methods are missing' do
|
23
|
-
|
38
|
+
MyWrongProxyValidator = Class.new
|
24
39
|
|
25
|
-
expect { ProxyFetcher.config.
|
26
|
-
.to raise_error(ProxyFetcher::Configuration::
|
40
|
+
expect { ProxyFetcher.config.proxy_validator = MyWrongProxyValidator }
|
41
|
+
.to raise_error(ProxyFetcher::Configuration::WrongCustomClass)
|
27
42
|
end
|
28
43
|
end
|
29
44
|
|
@@ -12,13 +12,16 @@ describe ProxyFetcher::Proxy do
|
|
12
12
|
let(:proxy) { @manager.proxies.first.dup }
|
13
13
|
|
14
14
|
it 'checks schema' do
|
15
|
-
proxy.type = ProxyFetcher::
|
15
|
+
proxy.type = ProxyFetcher::Proxy::HTTP
|
16
16
|
expect(proxy.http?).to be_truthy
|
17
17
|
expect(proxy.https?).to be_falsey
|
18
18
|
|
19
|
-
proxy.type = ProxyFetcher::
|
19
|
+
proxy.type = ProxyFetcher::Proxy::HTTPS
|
20
20
|
expect(proxy.https?).to be_truthy
|
21
|
-
expect(proxy.http?).to
|
21
|
+
expect(proxy.http?).to be_truthy
|
22
|
+
|
23
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS5
|
24
|
+
expect(proxy.socks5?).to be_truthy
|
22
25
|
end
|
23
26
|
|
24
27
|
it 'not connectable if IP addr is wrong' do
|
@@ -44,15 +47,4 @@ describe ProxyFetcher::Proxy do
|
|
44
47
|
it 'returns URL' do
|
45
48
|
expect(proxy.url).to be_a(String)
|
46
49
|
end
|
47
|
-
|
48
|
-
it 'checks speed' do
|
49
|
-
proxy.speed = :fast
|
50
|
-
expect(proxy.fast?).to be_truthy
|
51
|
-
|
52
|
-
proxy.speed = :slow
|
53
|
-
expect(proxy.slow?).to be_truthy
|
54
|
-
|
55
|
-
proxy.speed = :medium
|
56
|
-
expect(proxy.medium?).to be_truthy
|
57
|
-
end
|
58
50
|
end
|
@@ -9,12 +9,12 @@ RSpec.shared_examples 'a manager' do
|
|
9
9
|
expect(manager.proxies).to be_empty
|
10
10
|
end
|
11
11
|
|
12
|
-
it '
|
12
|
+
it 'returns Proxy objects' do
|
13
13
|
manager = ProxyFetcher::Manager.new
|
14
14
|
expect(manager.proxies).to all(be_a(ProxyFetcher::Proxy))
|
15
15
|
end
|
16
16
|
|
17
|
-
it '
|
17
|
+
it 'returns raw proxies (HOST:PORT)' do
|
18
18
|
manager = ProxyFetcher::Manager.new
|
19
19
|
expect(manager.raw_proxies).to all(be_a(String))
|
20
20
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
@@ -73,6 +73,7 @@ files:
|
|
73
73
|
- lib/proxy_fetcher/proxy.rb
|
74
74
|
- lib/proxy_fetcher/utils/html.rb
|
75
75
|
- lib/proxy_fetcher/utils/http_client.rb
|
76
|
+
- lib/proxy_fetcher/utils/proxy_validator.rb
|
76
77
|
- lib/proxy_fetcher/version.rb
|
77
78
|
- proxy_fetcher.gemspec
|
78
79
|
- spec/proxy_fetcher/configuration_spec.rb
|