proxy_fetcher 0.10.2 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +8 -5
- data/Rakefile +4 -2
- data/gemfiles/nokogiri.gemfile +8 -6
- data/gemfiles/oga.gemfile +8 -6
- data/lib/proxy_fetcher.rb +30 -30
- data/lib/proxy_fetcher/client/client.rb +10 -3
- data/lib/proxy_fetcher/client/request.rb +4 -4
- data/lib/proxy_fetcher/configuration.rb +12 -11
- data/lib/proxy_fetcher/document.rb +0 -9
- data/lib/proxy_fetcher/document/adapters.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
- data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/node.rb +2 -2
- data/lib/proxy_fetcher/exceptions.rb +6 -6
- data/lib/proxy_fetcher/manager.rb +2 -2
- data/lib/proxy_fetcher/providers/base.rb +42 -22
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +30 -10
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -16
- data/lib/proxy_fetcher/providers/gather_proxy.rb +9 -17
- data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
- data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
- data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
- data/lib/proxy_fetcher/proxy.rb +4 -4
- data/lib/proxy_fetcher/utils/http_client.rb +10 -8
- data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
- data/lib/proxy_fetcher/utils/proxy_validator.rb +1 -1
- data/lib/proxy_fetcher/version.rb +3 -3
- data/proxy_fetcher.gemspec +19 -16
- data/spec/proxy_fetcher/client/client_spec.rb +72 -57
- data/spec/proxy_fetcher/configuration_spec.rb +11 -11
- data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
- data/spec/proxy_fetcher/document/node_spec.rb +4 -4
- data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -2
- data/spec/proxy_fetcher/proxy_spec.rb +14 -14
- data/spec/proxy_fetcher/version_spec.rb +2 -0
- data/spec/spec_helper.rb +10 -10
- data/spec/support/manager_examples.rb +21 -21
- metadata +14 -8
@@ -6,13 +6,12 @@ module ProxyFetcher
|
|
6
6
|
class FreeProxyList < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://free-proxy-list.net/"
|
10
10
|
end
|
11
11
|
|
12
12
|
# [NOTE] Doesn't support filtering
|
13
|
-
def
|
14
|
-
|
15
|
-
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
16
15
|
end
|
17
16
|
|
18
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -26,10 +25,10 @@ module ProxyFetcher
|
|
26
25
|
#
|
27
26
|
def to_proxy(html_node)
|
28
27
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
29
|
-
proxy.addr = html_node.content_at(
|
30
|
-
proxy.port = Integer(html_node.content_at(
|
31
|
-
proxy.country = html_node.content_at(
|
32
|
-
proxy.anonymity = html_node.content_at(
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
33
32
|
proxy.type = parse_type(html_node)
|
34
33
|
end
|
35
34
|
end
|
@@ -45,8 +44,29 @@ module ProxyFetcher
|
|
45
44
|
# Proxy type
|
46
45
|
#
|
47
46
|
def parse_type(html_node)
|
48
|
-
https = html_node.content_at(
|
49
|
-
|
47
|
+
https = html_node.content_at("td[6]")
|
48
|
+
# frozen_string_literal: true
|
49
|
+
# FreeProxyList provider class.
|
50
|
+
# Provider URL to fetch proxy list
|
51
|
+
# [NOTE] Doesn't support filtering
|
52
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
53
|
+
# object.
|
54
|
+
#
|
55
|
+
# @param html_node [Object]
|
56
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
57
|
+
#
|
58
|
+
# @return [ProxyFetcher::Proxy]
|
59
|
+
# Proxy object
|
60
|
+
#
|
61
|
+
# Parses HTML node to extract proxy type.
|
62
|
+
#
|
63
|
+
# @param html_node [Object]
|
64
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
65
|
+
#
|
66
|
+
# @return [String]
|
67
|
+
# Proxy type
|
68
|
+
#
|
69
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
50
70
|
end
|
51
71
|
end
|
52
72
|
|
@@ -6,20 +6,11 @@ module ProxyFetcher
|
|
6
6
|
class FreeProxyListSSL < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://www.sslproxies.org/"
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
# to return all the proxy entries (HTML nodes).
|
15
|
-
#
|
16
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
17
|
-
# Collection of extracted HTML nodes with full proxy info
|
18
|
-
#
|
19
|
-
# [NOTE] Doesn't support filtering
|
20
|
-
def load_proxy_list(_filters = {})
|
21
|
-
doc = load_document(provider_url, {})
|
22
|
-
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
12
|
+
def xpath
|
13
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
23
14
|
end
|
24
15
|
|
25
16
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -33,10 +24,10 @@ module ProxyFetcher
|
|
33
24
|
#
|
34
25
|
def to_proxy(html_node)
|
35
26
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
36
|
-
proxy.addr = html_node.content_at(
|
37
|
-
proxy.port = Integer(html_node.content_at(
|
38
|
-
proxy.country = html_node.content_at(
|
39
|
-
proxy.anonymity = html_node.content_at(
|
27
|
+
proxy.addr = html_node.content_at("td[1]")
|
28
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
29
|
+
proxy.country = html_node.content_at("td[4]")
|
30
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
40
31
|
proxy.type = ProxyFetcher::Proxy::HTTPS
|
41
32
|
end
|
42
33
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "json"
|
4
4
|
|
5
5
|
module ProxyFetcher
|
6
6
|
module Providers
|
@@ -8,19 +8,11 @@ module ProxyFetcher
|
|
8
8
|
class GatherProxy < Base
|
9
9
|
# Provider URL to fetch proxy list
|
10
10
|
def provider_url
|
11
|
-
|
11
|
+
"http://www.gatherproxy.com/"
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
# to return all the proxy entries (HTML nodes).
|
17
|
-
#
|
18
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
19
|
-
# Collection of extracted HTML nodes with full proxy info
|
20
|
-
#
|
21
|
-
def load_proxy_list(*)
|
22
|
-
doc = load_document(provider_url)
|
23
|
-
doc.xpath('//div[@class="proxy-list"]/table/script')
|
14
|
+
def xpath
|
15
|
+
'//div[@class="proxy-list"]/table/script'
|
24
16
|
end
|
25
17
|
|
26
18
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -36,11 +28,11 @@ module ProxyFetcher
|
|
36
28
|
json = parse_json(html_node)
|
37
29
|
|
38
30
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
39
|
-
proxy.addr = json[
|
40
|
-
proxy.port = json[
|
41
|
-
proxy.anonymity = json[
|
42
|
-
proxy.country = json[
|
43
|
-
proxy.response_time = json[
|
31
|
+
proxy.addr = json["PROXY_IP"]
|
32
|
+
proxy.port = json["PROXY_PORT"].to_i(16)
|
33
|
+
proxy.anonymity = json["PROXY_TYPE"]
|
34
|
+
proxy.country = json["PROXY_COUNTRY"]
|
35
|
+
proxy.response_time = json["PROXY_TIME"].to_i
|
44
36
|
proxy.type = ProxyFetcher::Proxy::HTTP
|
45
37
|
end
|
46
38
|
end
|
@@ -6,19 +6,11 @@ module ProxyFetcher
|
|
6
6
|
class HTTPTunnel < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"http://www.httptunnel.ge/ProxyListForFree.aspx"
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
# to return all the proxy entries (HTML nodes).
|
15
|
-
#
|
16
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
17
|
-
# Collection of extracted HTML nodes with full proxy info
|
18
|
-
#
|
19
|
-
def load_proxy_list(_filters = {})
|
20
|
-
doc = load_document(provider_url)
|
21
|
-
doc.xpath('//table[contains(@id, "GridView")]/tr[(count(td)>2)]')
|
12
|
+
def xpath
|
13
|
+
'//table[contains(@id, "GridView")]/tr[(count(td)>2)]'
|
22
14
|
end
|
23
15
|
|
24
16
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -53,7 +45,7 @@ module ProxyFetcher
|
|
53
45
|
# URI object
|
54
46
|
#
|
55
47
|
def parse_proxy_uri(html_node)
|
56
|
-
full_addr = html_node.content_at(
|
48
|
+
full_addr = html_node.content_at("td[1]")
|
57
49
|
URI.parse("http://#{full_addr}")
|
58
50
|
end
|
59
51
|
|
@@ -66,7 +58,7 @@ module ProxyFetcher
|
|
66
58
|
# Country code
|
67
59
|
#
|
68
60
|
def parse_country(html_node)
|
69
|
-
html_node.find(
|
61
|
+
html_node.find(".//img").attr("title")
|
70
62
|
end
|
71
63
|
|
72
64
|
# Parses HTML node to extract proxy anonymity level.
|
@@ -78,14 +70,14 @@ module ProxyFetcher
|
|
78
70
|
# Anonymity level
|
79
71
|
#
|
80
72
|
def parse_anonymity(html_node)
|
81
|
-
transparency = html_node.content_at(
|
73
|
+
transparency = html_node.content_at("td[5]").to_sym
|
82
74
|
|
83
75
|
{
|
84
|
-
A:
|
85
|
-
E:
|
86
|
-
T:
|
87
|
-
U:
|
88
|
-
}.fetch(transparency,
|
76
|
+
A: "Anonymous",
|
77
|
+
E: "Elite",
|
78
|
+
T: "Transparent",
|
79
|
+
U: "Unknown"
|
80
|
+
}.fetch(transparency, "Unknown")
|
89
81
|
end
|
90
82
|
end
|
91
83
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "base64"
|
4
4
|
|
5
5
|
module ProxyFetcher
|
6
6
|
module Providers
|
@@ -8,19 +8,11 @@ module ProxyFetcher
|
|
8
8
|
class ProxyList < Base
|
9
9
|
# Provider URL to fetch proxy list
|
10
10
|
def provider_url
|
11
|
-
|
11
|
+
"https://proxy-list.org/english/index.php"
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
# to return all the proxy entries (HTML nodes).
|
17
|
-
#
|
18
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
19
|
-
# Collection of extracted HTML nodes with full proxy info
|
20
|
-
#
|
21
|
-
def load_proxy_list(filters = {})
|
22
|
-
doc = load_document(provider_url, filters)
|
23
|
-
doc.css('.table-wrap .table ul')
|
14
|
+
def xpath
|
15
|
+
'//div[@class="table-wrap"]/div[@class="table"]/ul'
|
24
16
|
end
|
25
17
|
|
26
18
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -38,9 +30,9 @@ module ProxyFetcher
|
|
38
30
|
proxy.addr = uri.host
|
39
31
|
proxy.port = uri.port
|
40
32
|
|
41
|
-
proxy.type = html_node.content_at(
|
42
|
-
proxy.anonymity = html_node.content_at(
|
43
|
-
proxy.country = html_node.find("li[5]//span[@class='country']").attr(
|
33
|
+
proxy.type = html_node.content_at("li[2]")
|
34
|
+
proxy.anonymity = html_node.content_at("li[4]")
|
35
|
+
proxy.country = html_node.find("li[5]//span[@class='country']").attr("title")
|
44
36
|
end
|
45
37
|
end
|
46
38
|
|
@@ -55,7 +47,7 @@ module ProxyFetcher
|
|
55
47
|
# URI object
|
56
48
|
#
|
57
49
|
def parse_proxy_uri(html_node)
|
58
|
-
full_addr = ::Base64.decode64(html_node.at_css(
|
50
|
+
full_addr = ::Base64.decode64(html_node.at_css("li script").html.match(/'(.+)'/)[1])
|
59
51
|
URI.parse("http://#{full_addr}")
|
60
52
|
end
|
61
53
|
end
|
@@ -6,19 +6,11 @@ module ProxyFetcher
|
|
6
6
|
class XRoxy < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://www.xroxy.com/free-proxy-lists/"
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
# to return all the proxy entries (HTML nodes).
|
15
|
-
#
|
16
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
17
|
-
# Collection of extracted HTML nodes with full proxy info
|
18
|
-
#
|
19
|
-
def load_proxy_list(filters = { type: 'All_http' })
|
20
|
-
doc = load_document(provider_url, filters)
|
21
|
-
doc.xpath('//div/table/tbody/tr')
|
12
|
+
def xpath
|
13
|
+
"//div/table/tbody/tr"
|
22
14
|
end
|
23
15
|
|
24
16
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -32,12 +24,12 @@ module ProxyFetcher
|
|
32
24
|
#
|
33
25
|
def to_proxy(html_node)
|
34
26
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
35
|
-
proxy.addr = html_node.content_at(
|
36
|
-
proxy.port = Integer(html_node.content_at(
|
37
|
-
proxy.anonymity = html_node.content_at(
|
38
|
-
proxy.country = html_node.content_at(
|
39
|
-
proxy.response_time = Integer(html_node.content_at(
|
40
|
-
proxy.type = html_node.content_at(
|
27
|
+
proxy.addr = html_node.content_at("td[1]")
|
28
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
29
|
+
proxy.anonymity = html_node.content_at("td[3]")
|
30
|
+
proxy.country = html_node.content_at("td[5]")
|
31
|
+
proxy.response_time = Integer(html_node.content_at("td[6]"))
|
32
|
+
proxy.type = html_node.content_at("td[3]")
|
41
33
|
end
|
42
34
|
end
|
43
35
|
end
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -29,10 +29,10 @@ module ProxyFetcher
|
|
29
29
|
|
30
30
|
# Proxy types
|
31
31
|
TYPES = [
|
32
|
-
HTTP =
|
33
|
-
HTTPS =
|
34
|
-
SOCKS4 =
|
35
|
-
SOCKS5 =
|
32
|
+
HTTP = "HTTP",
|
33
|
+
HTTPS = "HTTPS",
|
34
|
+
SOCKS4 = "SOCKS4",
|
35
|
+
SOCKS5 = "SOCKS5"
|
36
36
|
].freeze
|
37
37
|
|
38
38
|
# Proxy type predicates (#socks4?, #https?)
|
@@ -70,22 +70,24 @@ module ProxyFetcher
|
|
70
70
|
def fetch
|
71
71
|
response = process_http_request
|
72
72
|
response.body.to_s
|
73
|
-
rescue StandardError =>
|
74
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{
|
75
|
-
|
73
|
+
rescue StandardError => e
|
74
|
+
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
75
|
+
""
|
76
76
|
end
|
77
77
|
|
78
78
|
def fetch_with_headers
|
79
79
|
process_http_request
|
80
|
-
rescue StandardError =>
|
81
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{
|
82
|
-
|
80
|
+
rescue StandardError => e
|
81
|
+
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
82
|
+
HTTP::Response.new(version: "1.1", status: 500, body: "")
|
83
83
|
end
|
84
84
|
|
85
85
|
protected
|
86
86
|
|
87
87
|
def process_http_request(http_method: method, http_params: params)
|
88
|
-
|
88
|
+
unless HTTP::Request::METHODS.include?(http_method)
|
89
|
+
raise ArgumentError, "'#{http_method}' is a wrong HTTP method name!"
|
90
|
+
end
|
89
91
|
|
90
92
|
http.public_send(
|
91
93
|
http_method.to_sym, url,
|
@@ -101,7 +103,7 @@ module ProxyFetcher
|
|
101
103
|
#
|
102
104
|
def default_headers
|
103
105
|
{
|
104
|
-
|
106
|
+
"User-Agent" => ProxyFetcher.config.user_agent
|
105
107
|
}
|
106
108
|
end
|
107
109
|
end
|
@@ -34,7 +34,9 @@ module ProxyFetcher
|
|
34
34
|
proxy = target_proxies_lock.synchronize { target_proxies.shift }
|
35
35
|
break unless proxy
|
36
36
|
|
37
|
-
|
37
|
+
if proxy.connectable?
|
38
|
+
connectable_proxies_lock.synchronize { connectable_proxies << proxy }
|
39
|
+
end
|
38
40
|
end
|
39
41
|
end
|
40
42
|
end
|
@@ -6,7 +6,7 @@ module ProxyFetcher
|
|
6
6
|
# URL to check if proxy can be used (aka connectable?).
|
7
7
|
class ProxyValidator
|
8
8
|
# Default URL that will be used to check if proxy can be used.
|
9
|
-
URL_TO_CHECK =
|
9
|
+
URL_TO_CHECK = "https://google.com"
|
10
10
|
|
11
11
|
# Short variant to validate proxy.
|
12
12
|
#
|
@@ -13,11 +13,11 @@ module ProxyFetcher
|
|
13
13
|
# Major version number
|
14
14
|
MAJOR = 0
|
15
15
|
# Minor version number
|
16
|
-
MINOR =
|
16
|
+
MINOR = 11
|
17
17
|
# Smallest version number
|
18
|
-
TINY =
|
18
|
+
TINY = 0
|
19
19
|
|
20
20
|
# Full version number
|
21
|
-
STRING = [MAJOR, MINOR, TINY].compact.join(
|
21
|
+
STRING = [MAJOR, MINOR, TINY].compact.join(".")
|
22
22
|
end
|
23
23
|
end
|
data/proxy_fetcher.gemspec
CHANGED
@@ -1,25 +1,28 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
|
4
|
+
|
5
|
+
require "proxy_fetcher/version"
|
4
6
|
|
5
7
|
Gem::Specification.new do |gem|
|
6
|
-
gem.name =
|
8
|
+
gem.name = "proxy_fetcher"
|
7
9
|
gem.version = ProxyFetcher.gem_version
|
8
|
-
gem.summary =
|
9
|
-
gem.description =
|
10
|
-
|
11
|
-
|
12
|
-
gem.
|
13
|
-
gem.
|
14
|
-
gem.
|
10
|
+
gem.summary = "Ruby gem for dealing with proxy lists from different providers"
|
11
|
+
gem.description = "This gem can help your Ruby application to make HTTP(S) requests " \
|
12
|
+
"using proxies by fetching and validating proxy lists from " \
|
13
|
+
"the different providers."
|
14
|
+
gem.authors = ["Nikita Bulai"]
|
15
|
+
gem.email = "bulajnikita@gmail.com"
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
gem.bindir = "bin"
|
15
18
|
gem.files = `git ls-files`.split($RS) - %w[README.md .travis.yml .rubocop.yml]
|
16
19
|
gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
17
|
-
gem.homepage =
|
18
|
-
gem.license =
|
19
|
-
gem.required_ruby_version =
|
20
|
+
gem.homepage = "http://github.com/nbulaj/proxy_fetcher"
|
21
|
+
gem.license = "MIT"
|
22
|
+
gem.required_ruby_version = ">= 2.3.0"
|
20
23
|
|
21
|
-
gem.add_runtime_dependency
|
24
|
+
gem.add_runtime_dependency "http", ">= 3", "< 5"
|
22
25
|
|
23
|
-
gem.add_development_dependency
|
24
|
-
gem.add_development_dependency
|
26
|
+
gem.add_development_dependency "rake", ">= 12.0"
|
27
|
+
gem.add_development_dependency "rspec", "~> 3.5"
|
25
28
|
end
|
@@ -1,19 +1,22 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require "spec_helper"
|
4
|
+
require "json"
|
5
5
|
|
6
|
-
require
|
7
|
-
require
|
6
|
+
require "evil-proxy"
|
7
|
+
require "evil-proxy/async"
|
8
8
|
|
9
|
-
|
9
|
+
describe ProxyFetcher::Client do
|
10
10
|
before :all do
|
11
11
|
ProxyFetcher.configure do |config|
|
12
12
|
config.provider = :xroxy
|
13
13
|
config.client_timeout = 5
|
14
|
+
config.logger = ProxyFetcher::NullLogger.new
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
+
quiet = ENV.key?("LOG_MITM") ? ENV["LOG_MITM"] == "false" : true
|
18
|
+
|
19
|
+
@server = EvilProxy::MITMProxyServer.new Port: 3128, Quiet: quiet
|
17
20
|
@server.start
|
18
21
|
end
|
19
22
|
|
@@ -21,121 +24,133 @@ xdescribe ProxyFetcher::Client do
|
|
21
24
|
@server.shutdown
|
22
25
|
end
|
23
26
|
|
27
|
+
let(:local_proxy) { ProxyFetcher::Proxy.new(addr: "127.0.0.1", port: 3128, type: "HTTP, HTTPS") }
|
28
|
+
|
24
29
|
# Use local proxy server in order to avoid side effects, non-working proxies, etc
|
25
30
|
before :each do
|
26
|
-
|
27
|
-
ProxyFetcher::
|
28
|
-
allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies!).and_return([proxy])
|
31
|
+
ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', [local_proxy])
|
32
|
+
allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies).and_return([local_proxy])
|
29
33
|
end
|
30
34
|
|
31
|
-
context
|
32
|
-
it
|
33
|
-
content = ProxyFetcher::Client.get(
|
35
|
+
context "GET request with the valid proxy" do
|
36
|
+
it "successfully returns page content for HTTP" do
|
37
|
+
content = ProxyFetcher::Client.get("http://httpbin.org/get")
|
34
38
|
|
35
39
|
expect(content).not_to be_empty
|
36
40
|
end
|
37
41
|
|
38
|
-
|
39
|
-
|
42
|
+
# TODO: oh this SSL / MITM proxies ....
|
43
|
+
xit "successfully returns page content for HTTPS" do
|
44
|
+
content = ProxyFetcher::Client.get("https://httpbin.org/get")
|
40
45
|
|
41
46
|
expect(content).not_to be_empty
|
42
47
|
end
|
43
48
|
|
44
|
-
it
|
45
|
-
|
46
|
-
|
47
|
-
proxy = manager.get! until proxy
|
48
|
-
content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: proxy })
|
49
|
+
it "successfully returns page content using custom proxy" do
|
50
|
+
content = ProxyFetcher::Client.get("http://httpbin.org/get", options: { proxy: local_proxy })
|
49
51
|
|
50
52
|
expect(content).not_to be_empty
|
51
53
|
end
|
52
54
|
end
|
53
55
|
|
54
|
-
context
|
55
|
-
it
|
56
|
+
context "POST request with the valid proxy" do
|
57
|
+
it "successfully returns page content for HTTP" do
|
56
58
|
headers = {
|
57
|
-
|
59
|
+
"X-Proxy-Fetcher-Version" => ProxyFetcher::VERSION::STRING
|
58
60
|
}
|
59
|
-
|
61
|
+
|
62
|
+
content = ProxyFetcher::Client.post(
|
63
|
+
"http://httpbin.org/post",
|
64
|
+
{ param: "value" },
|
65
|
+
headers: headers
|
66
|
+
)
|
60
67
|
|
61
68
|
expect(content).not_to be_empty
|
62
69
|
|
63
70
|
json = JSON.parse(content)
|
64
71
|
|
65
|
-
expect(json[
|
66
|
-
expect(json[
|
72
|
+
expect(json["headers"]["X-Proxy-Fetcher-Version"]).to eq(ProxyFetcher::VERSION::STRING)
|
73
|
+
expect(json["headers"]["User-Agent"]).to eq(ProxyFetcher.config.user_agent)
|
67
74
|
end
|
68
75
|
end
|
69
76
|
|
70
|
-
|
71
|
-
|
72
|
-
|
77
|
+
# TODO: EvilProxy incompatible with latest Ruby/Webrick
|
78
|
+
# @see https://github.com/bbtfr/evil-proxy/issues/10
|
79
|
+
if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.6")
|
80
|
+
context "PUT request with the valid proxy" do
|
81
|
+
it "successfully returns page content for HTTP" do
|
82
|
+
content = ProxyFetcher::Client.put("http://httpbin.org/put", "param=PutValue")
|
73
83
|
|
74
|
-
|
84
|
+
expect(content).not_to be_empty
|
75
85
|
|
76
|
-
|
86
|
+
json = JSON.parse(content)
|
77
87
|
|
78
|
-
|
88
|
+
expect(json["form"]["param"]).to eq("PutValue")
|
89
|
+
end
|
79
90
|
end
|
80
|
-
end
|
81
91
|
|
82
|
-
|
83
|
-
|
84
|
-
|
92
|
+
context "PATCH request with the valid proxy" do
|
93
|
+
it "successfully returns page content for HTTP" do
|
94
|
+
content = ProxyFetcher::Client.patch("http://httpbin.org/patch", param: "value")
|
85
95
|
|
86
|
-
|
96
|
+
expect(content).not_to be_empty
|
87
97
|
|
88
|
-
|
98
|
+
json = JSON.parse(content)
|
89
99
|
|
90
|
-
|
100
|
+
expect(json["form"]["param"]).to eq("value")
|
101
|
+
end
|
91
102
|
end
|
92
103
|
end
|
93
104
|
|
94
|
-
context
|
95
|
-
it
|
96
|
-
content = ProxyFetcher::Client.delete(
|
105
|
+
context "DELETE request with the valid proxy" do
|
106
|
+
it "successfully returns page content for HTTP" do
|
107
|
+
content = ProxyFetcher::Client.delete("http://httpbin.org/delete")
|
97
108
|
|
98
109
|
expect(content).not_to be_empty
|
99
110
|
end
|
100
111
|
end
|
101
112
|
|
102
|
-
context
|
103
|
-
it
|
104
|
-
content = ProxyFetcher::Client.head(
|
113
|
+
context "HEAD request with the valid proxy" do
|
114
|
+
it "successfully works" do
|
115
|
+
content = ProxyFetcher::Client.head("http://httpbin.org")
|
105
116
|
|
106
117
|
expect(content).to be_empty
|
107
118
|
end
|
108
119
|
end
|
109
120
|
|
110
|
-
|
111
|
-
it
|
121
|
+
xcontext "retries" do
|
122
|
+
it "raises an error when reaches max retries limit" do
|
112
123
|
allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError)
|
113
124
|
|
114
|
-
expect { ProxyFetcher::Client.get(
|
125
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
126
|
+
.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
115
127
|
end
|
116
128
|
|
117
|
-
it
|
129
|
+
it "raises an error when http request returns an error" do
|
118
130
|
allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
|
119
131
|
|
120
|
-
expect { ProxyFetcher::Client.get(
|
132
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
133
|
+
.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
121
134
|
end
|
122
135
|
|
123
|
-
it
|
124
|
-
ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:
|
136
|
+
it "refreshes proxy lists if no proxy found" do
|
137
|
+
ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:"@proxies", [])
|
125
138
|
|
126
|
-
expect { ProxyFetcher::Client.get(
|
139
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
140
|
+
.not_to raise_error
|
127
141
|
end
|
128
142
|
end
|
129
143
|
|
130
|
-
context
|
131
|
-
it
|
132
|
-
content = ProxyFetcher::Client.get(
|
144
|
+
context "redirects" do
|
145
|
+
it "follows redirect when present" do
|
146
|
+
content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2")
|
133
147
|
|
134
148
|
expect(content).not_to be_empty
|
135
149
|
end
|
136
150
|
|
137
|
-
it
|
138
|
-
expect { ProxyFetcher::Client.get(
|
151
|
+
it "raises an error when reaches max redirects limit" do
|
152
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/11") }
|
153
|
+
.to raise_error(ProxyFetcher::Exceptions::MaximumRedirectsReached)
|
139
154
|
end
|
140
155
|
end
|
141
156
|
end
|