proxy_fetcher 0.10.2 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +8 -5
- data/Rakefile +4 -2
- data/gemfiles/nokogiri.gemfile +8 -6
- data/gemfiles/oga.gemfile +8 -6
- data/lib/proxy_fetcher.rb +30 -30
- data/lib/proxy_fetcher/client/client.rb +10 -3
- data/lib/proxy_fetcher/client/request.rb +4 -4
- data/lib/proxy_fetcher/configuration.rb +12 -11
- data/lib/proxy_fetcher/document.rb +0 -9
- data/lib/proxy_fetcher/document/adapters.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
- data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/node.rb +2 -2
- data/lib/proxy_fetcher/exceptions.rb +6 -6
- data/lib/proxy_fetcher/manager.rb +2 -2
- data/lib/proxy_fetcher/providers/base.rb +42 -22
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +30 -10
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -16
- data/lib/proxy_fetcher/providers/gather_proxy.rb +9 -17
- data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
- data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
- data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
- data/lib/proxy_fetcher/proxy.rb +4 -4
- data/lib/proxy_fetcher/utils/http_client.rb +10 -8
- data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
- data/lib/proxy_fetcher/utils/proxy_validator.rb +1 -1
- data/lib/proxy_fetcher/version.rb +3 -3
- data/proxy_fetcher.gemspec +19 -16
- data/spec/proxy_fetcher/client/client_spec.rb +72 -57
- data/spec/proxy_fetcher/configuration_spec.rb +11 -11
- data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
- data/spec/proxy_fetcher/document/node_spec.rb +4 -4
- data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -2
- data/spec/proxy_fetcher/proxy_spec.rb +14 -14
- data/spec/proxy_fetcher/version_spec.rb +2 -0
- data/spec/spec_helper.rb +10 -10
- data/spec/support/manager_examples.rb +21 -21
- metadata +14 -8
@@ -6,13 +6,12 @@ module ProxyFetcher
|
|
6
6
|
class FreeProxyList < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://free-proxy-list.net/"
|
10
10
|
end
|
11
11
|
|
12
12
|
# [NOTE] Doesn't support filtering
|
13
|
-
def
|
14
|
-
|
15
|
-
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
16
15
|
end
|
17
16
|
|
18
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -26,10 +25,10 @@ module ProxyFetcher
|
|
26
25
|
#
|
27
26
|
def to_proxy(html_node)
|
28
27
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
29
|
-
proxy.addr = html_node.content_at(
|
30
|
-
proxy.port = Integer(html_node.content_at(
|
31
|
-
proxy.country = html_node.content_at(
|
32
|
-
proxy.anonymity = html_node.content_at(
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
33
32
|
proxy.type = parse_type(html_node)
|
34
33
|
end
|
35
34
|
end
|
@@ -45,8 +44,29 @@ module ProxyFetcher
|
|
45
44
|
# Proxy type
|
46
45
|
#
|
47
46
|
def parse_type(html_node)
|
48
|
-
https = html_node.content_at(
|
49
|
-
|
47
|
+
https = html_node.content_at("td[6]")
|
48
|
+
# frozen_string_literal: true
|
49
|
+
# FreeProxyList provider class.
|
50
|
+
# Provider URL to fetch proxy list
|
51
|
+
# [NOTE] Doesn't support filtering
|
52
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
53
|
+
# object.
|
54
|
+
#
|
55
|
+
# @param html_node [Object]
|
56
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
57
|
+
#
|
58
|
+
# @return [ProxyFetcher::Proxy]
|
59
|
+
# Proxy object
|
60
|
+
#
|
61
|
+
# Parses HTML node to extract proxy type.
|
62
|
+
#
|
63
|
+
# @param html_node [Object]
|
64
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
65
|
+
#
|
66
|
+
# @return [String]
|
67
|
+
# Proxy type
|
68
|
+
#
|
69
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
50
70
|
end
|
51
71
|
end
|
52
72
|
|
@@ -6,20 +6,11 @@ module ProxyFetcher
|
|
6
6
|
class FreeProxyListSSL < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://www.sslproxies.org/"
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
# to return all the proxy entries (HTML nodes).
|
15
|
-
#
|
16
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
17
|
-
# Collection of extracted HTML nodes with full proxy info
|
18
|
-
#
|
19
|
-
# [NOTE] Doesn't support filtering
|
20
|
-
def load_proxy_list(_filters = {})
|
21
|
-
doc = load_document(provider_url, {})
|
22
|
-
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
12
|
+
def xpath
|
13
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
23
14
|
end
|
24
15
|
|
25
16
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -33,10 +24,10 @@ module ProxyFetcher
|
|
33
24
|
#
|
34
25
|
def to_proxy(html_node)
|
35
26
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
36
|
-
proxy.addr = html_node.content_at(
|
37
|
-
proxy.port = Integer(html_node.content_at(
|
38
|
-
proxy.country = html_node.content_at(
|
39
|
-
proxy.anonymity = html_node.content_at(
|
27
|
+
proxy.addr = html_node.content_at("td[1]")
|
28
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
29
|
+
proxy.country = html_node.content_at("td[4]")
|
30
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
40
31
|
proxy.type = ProxyFetcher::Proxy::HTTPS
|
41
32
|
end
|
42
33
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "json"
|
4
4
|
|
5
5
|
module ProxyFetcher
|
6
6
|
module Providers
|
@@ -8,19 +8,11 @@ module ProxyFetcher
|
|
8
8
|
class GatherProxy < Base
|
9
9
|
# Provider URL to fetch proxy list
|
10
10
|
def provider_url
|
11
|
-
|
11
|
+
"http://www.gatherproxy.com/"
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
# to return all the proxy entries (HTML nodes).
|
17
|
-
#
|
18
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
19
|
-
# Collection of extracted HTML nodes with full proxy info
|
20
|
-
#
|
21
|
-
def load_proxy_list(*)
|
22
|
-
doc = load_document(provider_url)
|
23
|
-
doc.xpath('//div[@class="proxy-list"]/table/script')
|
14
|
+
def xpath
|
15
|
+
'//div[@class="proxy-list"]/table/script'
|
24
16
|
end
|
25
17
|
|
26
18
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -36,11 +28,11 @@ module ProxyFetcher
|
|
36
28
|
json = parse_json(html_node)
|
37
29
|
|
38
30
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
39
|
-
proxy.addr = json[
|
40
|
-
proxy.port = json[
|
41
|
-
proxy.anonymity = json[
|
42
|
-
proxy.country = json[
|
43
|
-
proxy.response_time = json[
|
31
|
+
proxy.addr = json["PROXY_IP"]
|
32
|
+
proxy.port = json["PROXY_PORT"].to_i(16)
|
33
|
+
proxy.anonymity = json["PROXY_TYPE"]
|
34
|
+
proxy.country = json["PROXY_COUNTRY"]
|
35
|
+
proxy.response_time = json["PROXY_TIME"].to_i
|
44
36
|
proxy.type = ProxyFetcher::Proxy::HTTP
|
45
37
|
end
|
46
38
|
end
|
@@ -6,19 +6,11 @@ module ProxyFetcher
|
|
6
6
|
class HTTPTunnel < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"http://www.httptunnel.ge/ProxyListForFree.aspx"
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
# to return all the proxy entries (HTML nodes).
|
15
|
-
#
|
16
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
17
|
-
# Collection of extracted HTML nodes with full proxy info
|
18
|
-
#
|
19
|
-
def load_proxy_list(_filters = {})
|
20
|
-
doc = load_document(provider_url)
|
21
|
-
doc.xpath('//table[contains(@id, "GridView")]/tr[(count(td)>2)]')
|
12
|
+
def xpath
|
13
|
+
'//table[contains(@id, "GridView")]/tr[(count(td)>2)]'
|
22
14
|
end
|
23
15
|
|
24
16
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -53,7 +45,7 @@ module ProxyFetcher
|
|
53
45
|
# URI object
|
54
46
|
#
|
55
47
|
def parse_proxy_uri(html_node)
|
56
|
-
full_addr = html_node.content_at(
|
48
|
+
full_addr = html_node.content_at("td[1]")
|
57
49
|
URI.parse("http://#{full_addr}")
|
58
50
|
end
|
59
51
|
|
@@ -66,7 +58,7 @@ module ProxyFetcher
|
|
66
58
|
# Country code
|
67
59
|
#
|
68
60
|
def parse_country(html_node)
|
69
|
-
html_node.find(
|
61
|
+
html_node.find(".//img").attr("title")
|
70
62
|
end
|
71
63
|
|
72
64
|
# Parses HTML node to extract proxy anonymity level.
|
@@ -78,14 +70,14 @@ module ProxyFetcher
|
|
78
70
|
# Anonymity level
|
79
71
|
#
|
80
72
|
def parse_anonymity(html_node)
|
81
|
-
transparency = html_node.content_at(
|
73
|
+
transparency = html_node.content_at("td[5]").to_sym
|
82
74
|
|
83
75
|
{
|
84
|
-
A:
|
85
|
-
E:
|
86
|
-
T:
|
87
|
-
U:
|
88
|
-
}.fetch(transparency,
|
76
|
+
A: "Anonymous",
|
77
|
+
E: "Elite",
|
78
|
+
T: "Transparent",
|
79
|
+
U: "Unknown"
|
80
|
+
}.fetch(transparency, "Unknown")
|
89
81
|
end
|
90
82
|
end
|
91
83
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "base64"
|
4
4
|
|
5
5
|
module ProxyFetcher
|
6
6
|
module Providers
|
@@ -8,19 +8,11 @@ module ProxyFetcher
|
|
8
8
|
class ProxyList < Base
|
9
9
|
# Provider URL to fetch proxy list
|
10
10
|
def provider_url
|
11
|
-
|
11
|
+
"https://proxy-list.org/english/index.php"
|
12
12
|
end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
# to return all the proxy entries (HTML nodes).
|
17
|
-
#
|
18
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
19
|
-
# Collection of extracted HTML nodes with full proxy info
|
20
|
-
#
|
21
|
-
def load_proxy_list(filters = {})
|
22
|
-
doc = load_document(provider_url, filters)
|
23
|
-
doc.css('.table-wrap .table ul')
|
14
|
+
def xpath
|
15
|
+
'//div[@class="table-wrap"]/div[@class="table"]/ul'
|
24
16
|
end
|
25
17
|
|
26
18
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -38,9 +30,9 @@ module ProxyFetcher
|
|
38
30
|
proxy.addr = uri.host
|
39
31
|
proxy.port = uri.port
|
40
32
|
|
41
|
-
proxy.type = html_node.content_at(
|
42
|
-
proxy.anonymity = html_node.content_at(
|
43
|
-
proxy.country = html_node.find("li[5]//span[@class='country']").attr(
|
33
|
+
proxy.type = html_node.content_at("li[2]")
|
34
|
+
proxy.anonymity = html_node.content_at("li[4]")
|
35
|
+
proxy.country = html_node.find("li[5]//span[@class='country']").attr("title")
|
44
36
|
end
|
45
37
|
end
|
46
38
|
|
@@ -55,7 +47,7 @@ module ProxyFetcher
|
|
55
47
|
# URI object
|
56
48
|
#
|
57
49
|
def parse_proxy_uri(html_node)
|
58
|
-
full_addr = ::Base64.decode64(html_node.at_css(
|
50
|
+
full_addr = ::Base64.decode64(html_node.at_css("li script").html.match(/'(.+)'/)[1])
|
59
51
|
URI.parse("http://#{full_addr}")
|
60
52
|
end
|
61
53
|
end
|
@@ -6,19 +6,11 @@ module ProxyFetcher
|
|
6
6
|
class XRoxy < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://www.xroxy.com/free-proxy-lists/"
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
# to return all the proxy entries (HTML nodes).
|
15
|
-
#
|
16
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
17
|
-
# Collection of extracted HTML nodes with full proxy info
|
18
|
-
#
|
19
|
-
def load_proxy_list(filters = { type: 'All_http' })
|
20
|
-
doc = load_document(provider_url, filters)
|
21
|
-
doc.xpath('//div/table/tbody/tr')
|
12
|
+
def xpath
|
13
|
+
"//div/table/tbody/tr"
|
22
14
|
end
|
23
15
|
|
24
16
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -32,12 +24,12 @@ module ProxyFetcher
|
|
32
24
|
#
|
33
25
|
def to_proxy(html_node)
|
34
26
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
35
|
-
proxy.addr = html_node.content_at(
|
36
|
-
proxy.port = Integer(html_node.content_at(
|
37
|
-
proxy.anonymity = html_node.content_at(
|
38
|
-
proxy.country = html_node.content_at(
|
39
|
-
proxy.response_time = Integer(html_node.content_at(
|
40
|
-
proxy.type = html_node.content_at(
|
27
|
+
proxy.addr = html_node.content_at("td[1]")
|
28
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
29
|
+
proxy.anonymity = html_node.content_at("td[3]")
|
30
|
+
proxy.country = html_node.content_at("td[5]")
|
31
|
+
proxy.response_time = Integer(html_node.content_at("td[6]"))
|
32
|
+
proxy.type = html_node.content_at("td[3]")
|
41
33
|
end
|
42
34
|
end
|
43
35
|
end
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -29,10 +29,10 @@ module ProxyFetcher
|
|
29
29
|
|
30
30
|
# Proxy types
|
31
31
|
TYPES = [
|
32
|
-
HTTP =
|
33
|
-
HTTPS =
|
34
|
-
SOCKS4 =
|
35
|
-
SOCKS5 =
|
32
|
+
HTTP = "HTTP",
|
33
|
+
HTTPS = "HTTPS",
|
34
|
+
SOCKS4 = "SOCKS4",
|
35
|
+
SOCKS5 = "SOCKS5"
|
36
36
|
].freeze
|
37
37
|
|
38
38
|
# Proxy type predicates (#socks4?, #https?)
|
@@ -70,22 +70,24 @@ module ProxyFetcher
|
|
70
70
|
def fetch
|
71
71
|
response = process_http_request
|
72
72
|
response.body.to_s
|
73
|
-
rescue StandardError =>
|
74
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{
|
75
|
-
|
73
|
+
rescue StandardError => e
|
74
|
+
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
75
|
+
""
|
76
76
|
end
|
77
77
|
|
78
78
|
def fetch_with_headers
|
79
79
|
process_http_request
|
80
|
-
rescue StandardError =>
|
81
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{
|
82
|
-
|
80
|
+
rescue StandardError => e
|
81
|
+
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
82
|
+
HTTP::Response.new(version: "1.1", status: 500, body: "")
|
83
83
|
end
|
84
84
|
|
85
85
|
protected
|
86
86
|
|
87
87
|
def process_http_request(http_method: method, http_params: params)
|
88
|
-
|
88
|
+
unless HTTP::Request::METHODS.include?(http_method)
|
89
|
+
raise ArgumentError, "'#{http_method}' is a wrong HTTP method name!"
|
90
|
+
end
|
89
91
|
|
90
92
|
http.public_send(
|
91
93
|
http_method.to_sym, url,
|
@@ -101,7 +103,7 @@ module ProxyFetcher
|
|
101
103
|
#
|
102
104
|
def default_headers
|
103
105
|
{
|
104
|
-
|
106
|
+
"User-Agent" => ProxyFetcher.config.user_agent
|
105
107
|
}
|
106
108
|
end
|
107
109
|
end
|
@@ -34,7 +34,9 @@ module ProxyFetcher
|
|
34
34
|
proxy = target_proxies_lock.synchronize { target_proxies.shift }
|
35
35
|
break unless proxy
|
36
36
|
|
37
|
-
|
37
|
+
if proxy.connectable?
|
38
|
+
connectable_proxies_lock.synchronize { connectable_proxies << proxy }
|
39
|
+
end
|
38
40
|
end
|
39
41
|
end
|
40
42
|
end
|
@@ -6,7 +6,7 @@ module ProxyFetcher
|
|
6
6
|
# URL to check if proxy can be used (aka connectable?).
|
7
7
|
class ProxyValidator
|
8
8
|
# Default URL that will be used to check if proxy can be used.
|
9
|
-
URL_TO_CHECK =
|
9
|
+
URL_TO_CHECK = "https://google.com"
|
10
10
|
|
11
11
|
# Short variant to validate proxy.
|
12
12
|
#
|
@@ -13,11 +13,11 @@ module ProxyFetcher
|
|
13
13
|
# Major version number
|
14
14
|
MAJOR = 0
|
15
15
|
# Minor version number
|
16
|
-
MINOR =
|
16
|
+
MINOR = 11
|
17
17
|
# Smallest version number
|
18
|
-
TINY =
|
18
|
+
TINY = 0
|
19
19
|
|
20
20
|
# Full version number
|
21
|
-
STRING = [MAJOR, MINOR, TINY].compact.join(
|
21
|
+
STRING = [MAJOR, MINOR, TINY].compact.join(".")
|
22
22
|
end
|
23
23
|
end
|
data/proxy_fetcher.gemspec
CHANGED
@@ -1,25 +1,28 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
|
4
|
+
|
5
|
+
require "proxy_fetcher/version"
|
4
6
|
|
5
7
|
Gem::Specification.new do |gem|
|
6
|
-
gem.name =
|
8
|
+
gem.name = "proxy_fetcher"
|
7
9
|
gem.version = ProxyFetcher.gem_version
|
8
|
-
gem.summary =
|
9
|
-
gem.description =
|
10
|
-
|
11
|
-
|
12
|
-
gem.
|
13
|
-
gem.
|
14
|
-
gem.
|
10
|
+
gem.summary = "Ruby gem for dealing with proxy lists from different providers"
|
11
|
+
gem.description = "This gem can help your Ruby application to make HTTP(S) requests " \
|
12
|
+
"using proxies by fetching and validating proxy lists from " \
|
13
|
+
"the different providers."
|
14
|
+
gem.authors = ["Nikita Bulai"]
|
15
|
+
gem.email = "bulajnikita@gmail.com"
|
16
|
+
gem.require_paths = ["lib"]
|
17
|
+
gem.bindir = "bin"
|
15
18
|
gem.files = `git ls-files`.split($RS) - %w[README.md .travis.yml .rubocop.yml]
|
16
19
|
gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
17
|
-
gem.homepage =
|
18
|
-
gem.license =
|
19
|
-
gem.required_ruby_version =
|
20
|
+
gem.homepage = "http://github.com/nbulaj/proxy_fetcher"
|
21
|
+
gem.license = "MIT"
|
22
|
+
gem.required_ruby_version = ">= 2.3.0"
|
20
23
|
|
21
|
-
gem.add_runtime_dependency
|
24
|
+
gem.add_runtime_dependency "http", ">= 3", "< 5"
|
22
25
|
|
23
|
-
gem.add_development_dependency
|
24
|
-
gem.add_development_dependency
|
26
|
+
gem.add_development_dependency "rake", ">= 12.0"
|
27
|
+
gem.add_development_dependency "rspec", "~> 3.5"
|
25
28
|
end
|
@@ -1,19 +1,22 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require "spec_helper"
|
4
|
+
require "json"
|
5
5
|
|
6
|
-
require
|
7
|
-
require
|
6
|
+
require "evil-proxy"
|
7
|
+
require "evil-proxy/async"
|
8
8
|
|
9
|
-
|
9
|
+
describe ProxyFetcher::Client do
|
10
10
|
before :all do
|
11
11
|
ProxyFetcher.configure do |config|
|
12
12
|
config.provider = :xroxy
|
13
13
|
config.client_timeout = 5
|
14
|
+
config.logger = ProxyFetcher::NullLogger.new
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
+
quiet = ENV.key?("LOG_MITM") ? ENV["LOG_MITM"] == "false" : true
|
18
|
+
|
19
|
+
@server = EvilProxy::MITMProxyServer.new Port: 3128, Quiet: quiet
|
17
20
|
@server.start
|
18
21
|
end
|
19
22
|
|
@@ -21,121 +24,133 @@ xdescribe ProxyFetcher::Client do
|
|
21
24
|
@server.shutdown
|
22
25
|
end
|
23
26
|
|
27
|
+
let(:local_proxy) { ProxyFetcher::Proxy.new(addr: "127.0.0.1", port: 3128, type: "HTTP, HTTPS") }
|
28
|
+
|
24
29
|
# Use local proxy server in order to avoid side effects, non-working proxies, etc
|
25
30
|
before :each do
|
26
|
-
|
27
|
-
ProxyFetcher::
|
28
|
-
allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies!).and_return([proxy])
|
31
|
+
ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', [local_proxy])
|
32
|
+
allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies).and_return([local_proxy])
|
29
33
|
end
|
30
34
|
|
31
|
-
context
|
32
|
-
it
|
33
|
-
content = ProxyFetcher::Client.get(
|
35
|
+
context "GET request with the valid proxy" do
|
36
|
+
it "successfully returns page content for HTTP" do
|
37
|
+
content = ProxyFetcher::Client.get("http://httpbin.org/get")
|
34
38
|
|
35
39
|
expect(content).not_to be_empty
|
36
40
|
end
|
37
41
|
|
38
|
-
|
39
|
-
|
42
|
+
# TODO: oh this SSL / MITM proxies ....
|
43
|
+
xit "successfully returns page content for HTTPS" do
|
44
|
+
content = ProxyFetcher::Client.get("https://httpbin.org/get")
|
40
45
|
|
41
46
|
expect(content).not_to be_empty
|
42
47
|
end
|
43
48
|
|
44
|
-
it
|
45
|
-
|
46
|
-
|
47
|
-
proxy = manager.get! until proxy
|
48
|
-
content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: proxy })
|
49
|
+
it "successfully returns page content using custom proxy" do
|
50
|
+
content = ProxyFetcher::Client.get("http://httpbin.org/get", options: { proxy: local_proxy })
|
49
51
|
|
50
52
|
expect(content).not_to be_empty
|
51
53
|
end
|
52
54
|
end
|
53
55
|
|
54
|
-
context
|
55
|
-
it
|
56
|
+
context "POST request with the valid proxy" do
|
57
|
+
it "successfully returns page content for HTTP" do
|
56
58
|
headers = {
|
57
|
-
|
59
|
+
"X-Proxy-Fetcher-Version" => ProxyFetcher::VERSION::STRING
|
58
60
|
}
|
59
|
-
|
61
|
+
|
62
|
+
content = ProxyFetcher::Client.post(
|
63
|
+
"http://httpbin.org/post",
|
64
|
+
{ param: "value" },
|
65
|
+
headers: headers
|
66
|
+
)
|
60
67
|
|
61
68
|
expect(content).not_to be_empty
|
62
69
|
|
63
70
|
json = JSON.parse(content)
|
64
71
|
|
65
|
-
expect(json[
|
66
|
-
expect(json[
|
72
|
+
expect(json["headers"]["X-Proxy-Fetcher-Version"]).to eq(ProxyFetcher::VERSION::STRING)
|
73
|
+
expect(json["headers"]["User-Agent"]).to eq(ProxyFetcher.config.user_agent)
|
67
74
|
end
|
68
75
|
end
|
69
76
|
|
70
|
-
|
71
|
-
|
72
|
-
|
77
|
+
# TODO: EvilProxy incompatible with latest Ruby/Webrick
|
78
|
+
# @see https://github.com/bbtfr/evil-proxy/issues/10
|
79
|
+
if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.6")
|
80
|
+
context "PUT request with the valid proxy" do
|
81
|
+
it "successfully returns page content for HTTP" do
|
82
|
+
content = ProxyFetcher::Client.put("http://httpbin.org/put", "param=PutValue")
|
73
83
|
|
74
|
-
|
84
|
+
expect(content).not_to be_empty
|
75
85
|
|
76
|
-
|
86
|
+
json = JSON.parse(content)
|
77
87
|
|
78
|
-
|
88
|
+
expect(json["form"]["param"]).to eq("PutValue")
|
89
|
+
end
|
79
90
|
end
|
80
|
-
end
|
81
91
|
|
82
|
-
|
83
|
-
|
84
|
-
|
92
|
+
context "PATCH request with the valid proxy" do
|
93
|
+
it "successfully returns page content for HTTP" do
|
94
|
+
content = ProxyFetcher::Client.patch("http://httpbin.org/patch", param: "value")
|
85
95
|
|
86
|
-
|
96
|
+
expect(content).not_to be_empty
|
87
97
|
|
88
|
-
|
98
|
+
json = JSON.parse(content)
|
89
99
|
|
90
|
-
|
100
|
+
expect(json["form"]["param"]).to eq("value")
|
101
|
+
end
|
91
102
|
end
|
92
103
|
end
|
93
104
|
|
94
|
-
context
|
95
|
-
it
|
96
|
-
content = ProxyFetcher::Client.delete(
|
105
|
+
context "DELETE request with the valid proxy" do
|
106
|
+
it "successfully returns page content for HTTP" do
|
107
|
+
content = ProxyFetcher::Client.delete("http://httpbin.org/delete")
|
97
108
|
|
98
109
|
expect(content).not_to be_empty
|
99
110
|
end
|
100
111
|
end
|
101
112
|
|
102
|
-
context
|
103
|
-
it
|
104
|
-
content = ProxyFetcher::Client.head(
|
113
|
+
context "HEAD request with the valid proxy" do
|
114
|
+
it "successfully works" do
|
115
|
+
content = ProxyFetcher::Client.head("http://httpbin.org")
|
105
116
|
|
106
117
|
expect(content).to be_empty
|
107
118
|
end
|
108
119
|
end
|
109
120
|
|
110
|
-
|
111
|
-
it
|
121
|
+
xcontext "retries" do
|
122
|
+
it "raises an error when reaches max retries limit" do
|
112
123
|
allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError)
|
113
124
|
|
114
|
-
expect { ProxyFetcher::Client.get(
|
125
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
126
|
+
.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
115
127
|
end
|
116
128
|
|
117
|
-
it
|
129
|
+
it "raises an error when http request returns an error" do
|
118
130
|
allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
|
119
131
|
|
120
|
-
expect { ProxyFetcher::Client.get(
|
132
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
133
|
+
.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
121
134
|
end
|
122
135
|
|
123
|
-
it
|
124
|
-
ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:
|
136
|
+
it "refreshes proxy lists if no proxy found" do
|
137
|
+
ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:"@proxies", [])
|
125
138
|
|
126
|
-
expect { ProxyFetcher::Client.get(
|
139
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
140
|
+
.not_to raise_error
|
127
141
|
end
|
128
142
|
end
|
129
143
|
|
130
|
-
context
|
131
|
-
it
|
132
|
-
content = ProxyFetcher::Client.get(
|
144
|
+
context "redirects" do
|
145
|
+
it "follows redirect when present" do
|
146
|
+
content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2")
|
133
147
|
|
134
148
|
expect(content).not_to be_empty
|
135
149
|
end
|
136
150
|
|
137
|
-
it
|
138
|
-
expect { ProxyFetcher::Client.get(
|
151
|
+
it "raises an error when reaches max redirects limit" do
|
152
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/11") }
|
153
|
+
.to raise_error(ProxyFetcher::Exceptions::MaximumRedirectsReached)
|
139
154
|
end
|
140
155
|
end
|
141
156
|
end
|