proxy_fetcher 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +3 -1
- data/gemfiles/nokogiri.gemfile +1 -1
- data/gemfiles/oga.gemfile +2 -2
- data/lib/proxy_fetcher.rb +6 -2
- data/lib/proxy_fetcher/manager.rb +2 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +1 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
- data/lib/proxy_fetcher/providers/mtpro.rb +1 -1
- data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
- data/lib/proxy_fetcher/providers/xroxy.rb +1 -1
- data/lib/proxy_fetcher/utils/http_client.rb +1 -8
- data/lib/proxy_fetcher/utils/proxy_validator.rb +20 -8
- data/lib/proxy_fetcher/version.rb +1 -1
- data/proxy_fetcher.gemspec +6 -4
- data/spec/proxy_fetcher/client/client_spec.rb +5 -5
- data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
- metadata +12 -14
- data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -50
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/mtpro_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/proxypedia_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf6591a8ede2c142fca7f433f3edceb407ed884c1dee6a98e3ca33efbae392e2
|
4
|
+
data.tar.gz: 7d7259da718adec21b8d749d04faf594fa926389e570be6c55bbe9ebf3695783
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 90eed5f390d122ae58f9bd6c7553b7a1ff6bbca8e65c05406593f53b7a00a4ed160120ae62d0ce8289965810aaa3795287769ee9ef12f9404d805694a8ac0cff
|
7
|
+
data.tar.gz: 31a5b4fcd6199dfc75ca3e89c7bddcc82533ca171f86954e50d4ff44716d73f1ad014f921a05e9eb2fd38a83c5e603eec1fbbf9530b4c01153f6780877388501
|
data/Rakefile
CHANGED
data/gemfiles/nokogiri.gemfile
CHANGED
data/gemfiles/oga.gemfile
CHANGED
data/lib/proxy_fetcher.rb
CHANGED
@@ -33,13 +33,17 @@ module ProxyFetcher
|
|
33
33
|
# ProxyFetcher providers namespace
|
34
34
|
module Providers
|
35
35
|
require File.dirname(__FILE__) + "/proxy_fetcher/providers/base"
|
36
|
-
require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxypedia"
|
37
36
|
require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list"
|
37
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_socks"
|
38
38
|
require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_ssl"
|
39
|
-
require File.dirname(__FILE__) + "/proxy_fetcher/providers/
|
39
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_us"
|
40
40
|
require File.dirname(__FILE__) + "/proxy_fetcher/providers/http_tunnel"
|
41
41
|
require File.dirname(__FILE__) + "/proxy_fetcher/providers/mtpro"
|
42
42
|
require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxy_list"
|
43
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxypedia"
|
44
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxyscrape_http"
|
45
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxyscrape_socks4"
|
46
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxyscrape_socks5"
|
43
47
|
require File.dirname(__FILE__) + "/proxy_fetcher/providers/xroxy"
|
44
48
|
end
|
45
49
|
|
@@ -47,6 +47,8 @@ module ProxyFetcher
|
|
47
47
|
|
48
48
|
ProxyFetcher.config.providers.each do |provider_name|
|
49
49
|
threads << Thread.new do
|
50
|
+
Thread.current.report_on_exception = false
|
51
|
+
|
50
52
|
provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
|
51
53
|
provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
|
52
54
|
provider_proxies = provider.fetch_proxies!(provider_filters)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListSocks provider class.
|
6
|
+
class FreeProxyListSocks < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.socks-proxy.net/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.type = parse_type(html_node)
|
32
|
+
proxy.anonymity = html_node.content_at("td[6]")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[5]")
|
48
|
+
|
49
|
+
return ProxyFetcher::Proxy::SOCKS4 if https&.casecmp("socks4")&.zero?
|
50
|
+
return ProxyFetcher::Proxy::SOCKS5 if https&.casecmp("socks5")&.zero?
|
51
|
+
|
52
|
+
"Unknown"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_socks, FreeProxyListSocks)
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListUS provider class.
|
6
|
+
class FreeProxyListUS < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.us-proxy.org/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
32
|
+
proxy.type = parse_type(html_node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[7]")
|
48
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeHTTP < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=http"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(node)
|
51
|
+
addr, port = node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::HTTP
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_http, ProxyscrapeHTTP)
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeSOCKS4 < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(html_node)
|
51
|
+
addr, port = html_node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS4
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_socks4, ProxyscrapeSOCKS4)
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeSOCKS5 < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(html_node)
|
51
|
+
addr, port = html_node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS5
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_socks5, ProxyscrapeSOCKS5)
|
64
|
+
end
|
65
|
+
end
|
@@ -71,17 +71,10 @@ module ProxyFetcher
|
|
71
71
|
response = process_http_request
|
72
72
|
response.body.to_s
|
73
73
|
rescue StandardError => e
|
74
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
74
|
+
ProxyFetcher.config.logger.warn("Failed to process request to #{url} (#{e.message})")
|
75
75
|
""
|
76
76
|
end
|
77
77
|
|
78
|
-
def fetch_with_headers
|
79
|
-
process_http_request
|
80
|
-
rescue StandardError => e
|
81
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
82
|
-
HTTP::Response.new(version: "1.1", status: 500, body: "")
|
83
|
-
end
|
84
|
-
|
85
78
|
protected
|
86
79
|
|
87
80
|
def process_http_request(http_method: method, http_params: params)
|
@@ -10,27 +10,39 @@ module ProxyFetcher
|
|
10
10
|
|
11
11
|
# Short variant to validate proxy.
|
12
12
|
#
|
13
|
-
# @param
|
14
|
-
# @param
|
13
|
+
# @param address [String] proxy address or IP
|
14
|
+
# @param port [String, Integer] proxy port
|
15
15
|
#
|
16
16
|
# @return [Boolean]
|
17
17
|
# true if connection to the server using proxy established, otherwise false
|
18
18
|
#
|
19
|
-
def self.connectable?(
|
20
|
-
new(
|
19
|
+
def self.connectable?(address, port)
|
20
|
+
new(address, port).connectable?
|
21
21
|
end
|
22
22
|
|
23
23
|
# Initialize new ProxyValidator instance
|
24
24
|
#
|
25
|
-
# @param
|
26
|
-
# @param
|
25
|
+
# @param address [String] Proxy address or IP
|
26
|
+
# @param port [String, Integer] Proxy port
|
27
|
+
# @param options [Hash] proxy options
|
28
|
+
# @option username [String] Proxy authentication username
|
29
|
+
# @option password [String] Proxy authentication password
|
30
|
+
# @option headers [Hash] Proxy headers
|
27
31
|
#
|
28
32
|
# @return [ProxyValidator]
|
29
33
|
#
|
30
|
-
def initialize(
|
34
|
+
def initialize(address, port, options: {})
|
31
35
|
timeout = ProxyFetcher.config.proxy_validation_timeout
|
36
|
+
proxy = [address, port.to_i]
|
32
37
|
|
33
|
-
|
38
|
+
if options[:username] && options[:password]
|
39
|
+
proxy << options[:username]
|
40
|
+
proxy << options[:password]
|
41
|
+
end
|
42
|
+
|
43
|
+
proxy << options[:headers].to_h if options[:headers]
|
44
|
+
|
45
|
+
@http = HTTP.follow.via(*proxy).timeout(connect: timeout, read: timeout)
|
34
46
|
end
|
35
47
|
|
36
48
|
# Checks if proxy is connectable (can be used to connect
|
data/proxy_fetcher.gemspec
CHANGED
@@ -8,9 +8,11 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.name = "proxy_fetcher"
|
9
9
|
gem.version = ProxyFetcher.gem_version
|
10
10
|
gem.summary = "Ruby gem for dealing with proxy lists from different providers"
|
11
|
-
gem.description =
|
12
|
-
|
13
|
-
|
11
|
+
gem.description = <<-TEXT.strip.gsub(/[\s\n]+/, " ")
|
12
|
+
This gem can help your Ruby application to make HTTP(S) requests
|
13
|
+
using proxies by fetching and validating proxy lists from
|
14
|
+
the different providers.
|
15
|
+
TEXT
|
14
16
|
gem.authors = ["Nikita Bulai"]
|
15
17
|
gem.email = "bulajnikita@gmail.com"
|
16
18
|
gem.require_paths = ["lib"]
|
@@ -24,5 +26,5 @@ Gem::Specification.new do |gem|
|
|
24
26
|
gem.add_runtime_dependency "http", ">= 3", "< 5"
|
25
27
|
|
26
28
|
gem.add_development_dependency "rake", ">= 12.0"
|
27
|
-
gem.add_development_dependency "rspec", "~> 3.
|
29
|
+
gem.add_development_dependency "rspec", "~> 3.9"
|
28
30
|
end
|
@@ -118,15 +118,15 @@ describe ProxyFetcher::Client do
|
|
118
118
|
end
|
119
119
|
end
|
120
120
|
|
121
|
-
|
121
|
+
context "retries" do
|
122
122
|
it "raises an error when reaches max retries limit" do
|
123
123
|
allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError)
|
124
124
|
|
125
|
-
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
125
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org", options: { max_retries: 10 }) }
|
126
126
|
.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
127
127
|
end
|
128
128
|
|
129
|
-
|
129
|
+
xit "raises an error when http request returns an error" do
|
130
130
|
allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
|
131
131
|
|
132
132
|
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
@@ -134,14 +134,14 @@ describe ProxyFetcher::Client do
|
|
134
134
|
end
|
135
135
|
|
136
136
|
it "refreshes proxy lists if no proxy found" do
|
137
|
-
ProxyFetcher::
|
137
|
+
allow(ProxyFetcher::Manager.new).to receive(:proxies).and_return([])
|
138
138
|
|
139
139
|
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
140
140
|
.not_to raise_error
|
141
141
|
end
|
142
142
|
end
|
143
143
|
|
144
|
-
|
144
|
+
xcontext "redirects" do
|
145
145
|
it "follows redirect when present" do
|
146
146
|
content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2")
|
147
147
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "Proxy classes" do
|
6
|
+
[
|
7
|
+
[:free_proxy_list, "FreeProxyList"],
|
8
|
+
[:free_proxy_list_socks, "FreeProxyListSocks"],
|
9
|
+
[:free_proxy_list_ssl, "FreeProxyListSSL"],
|
10
|
+
[:free_proxy_list_us, "FreeProxyListUS"],
|
11
|
+
[:http_tunnel, "HTTPTunnel"],
|
12
|
+
[:mtpro, "MTPro"],
|
13
|
+
[:proxy_list, "ProxyList"],
|
14
|
+
[:proxypedia, "Proxypedia"],
|
15
|
+
[:proxyscrape_http, "ProxyscrapeHTTP"],
|
16
|
+
[:proxyscrape_socks4, "ProxyscrapeSOCKS4"],
|
17
|
+
[:proxyscrape_socks5, "ProxyscrapeSOCKS5"],
|
18
|
+
[:xroxy, "XRoxy"]
|
19
|
+
].each do |(provider_name, provider_klass)|
|
20
|
+
describe Object.const_get("ProxyFetcher::Providers::#{provider_klass}") do
|
21
|
+
before :all do
|
22
|
+
ProxyFetcher.config.provider = provider_name
|
23
|
+
end
|
24
|
+
|
25
|
+
it_behaves_like "a manager"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -50,14 +50,14 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - "~>"
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: '3.
|
53
|
+
version: '3.9'
|
54
54
|
type: :development
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
57
|
requirements:
|
58
58
|
- - "~>"
|
59
59
|
- !ruby/object:Gem::Version
|
60
|
-
version: '3.
|
60
|
+
version: '3.9'
|
61
61
|
description: This gem can help your Ruby application to make HTTP(S) requests using
|
62
62
|
proxies by fetching and validating proxy lists from the different providers.
|
63
63
|
email: bulajnikita@gmail.com
|
@@ -94,12 +94,16 @@ files:
|
|
94
94
|
- lib/proxy_fetcher/null_logger.rb
|
95
95
|
- lib/proxy_fetcher/providers/base.rb
|
96
96
|
- lib/proxy_fetcher/providers/free_proxy_list.rb
|
97
|
+
- lib/proxy_fetcher/providers/free_proxy_list_socks.rb
|
97
98
|
- lib/proxy_fetcher/providers/free_proxy_list_ssl.rb
|
98
|
-
- lib/proxy_fetcher/providers/
|
99
|
+
- lib/proxy_fetcher/providers/free_proxy_list_us.rb
|
99
100
|
- lib/proxy_fetcher/providers/http_tunnel.rb
|
100
101
|
- lib/proxy_fetcher/providers/mtpro.rb
|
101
102
|
- lib/proxy_fetcher/providers/proxy_list.rb
|
102
103
|
- lib/proxy_fetcher/providers/proxypedia.rb
|
104
|
+
- lib/proxy_fetcher/providers/proxyscrape_http.rb
|
105
|
+
- lib/proxy_fetcher/providers/proxyscrape_socks4.rb
|
106
|
+
- lib/proxy_fetcher/providers/proxyscrape_socks5.rb
|
103
107
|
- lib/proxy_fetcher/providers/xroxy.rb
|
104
108
|
- lib/proxy_fetcher/proxy.rb
|
105
109
|
- lib/proxy_fetcher/utils/http_client.rb
|
@@ -114,15 +118,8 @@ files:
|
|
114
118
|
- spec/proxy_fetcher/document/node_spec.rb
|
115
119
|
- spec/proxy_fetcher/manager_spec.rb
|
116
120
|
- spec/proxy_fetcher/providers/base_spec.rb
|
117
|
-
- spec/proxy_fetcher/providers/free_proxy_list_spec.rb
|
118
|
-
- spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb
|
119
|
-
- spec/proxy_fetcher/providers/gather_proxy_spec.rb
|
120
|
-
- spec/proxy_fetcher/providers/http_tunnel_spec.rb
|
121
|
-
- spec/proxy_fetcher/providers/mtpro_spec.rb
|
122
121
|
- spec/proxy_fetcher/providers/multiple_providers_spec.rb
|
123
|
-
- spec/proxy_fetcher/providers/
|
124
|
-
- spec/proxy_fetcher/providers/proxypedia_spec.rb
|
125
|
-
- spec/proxy_fetcher/providers/xroxy_spec.rb
|
122
|
+
- spec/proxy_fetcher/providers/proxy_classes_spec.rb
|
126
123
|
- spec/proxy_fetcher/proxy_spec.rb
|
127
124
|
- spec/proxy_fetcher/version_spec.rb
|
128
125
|
- spec/spec_helper.rb
|
@@ -146,7 +143,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
146
143
|
- !ruby/object:Gem::Version
|
147
144
|
version: '0'
|
148
145
|
requirements: []
|
149
|
-
|
146
|
+
rubyforge_project:
|
147
|
+
rubygems_version: 2.7.9
|
150
148
|
signing_key:
|
151
149
|
specification_version: 4
|
152
150
|
summary: Ruby gem for dealing with proxy lists from different providers
|
@@ -1,50 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "json"
|
4
|
-
|
5
|
-
module ProxyFetcher
|
6
|
-
module Providers
|
7
|
-
# GatherProxy provider class.
|
8
|
-
class GatherProxy < Base
|
9
|
-
# Provider URL to fetch proxy list
|
10
|
-
def provider_url
|
11
|
-
"https://proxygather.com"
|
12
|
-
end
|
13
|
-
|
14
|
-
def xpath
|
15
|
-
'//div[@class="proxy-list"]/table/script'
|
16
|
-
end
|
17
|
-
|
18
|
-
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
19
|
-
# object.
|
20
|
-
#
|
21
|
-
# @param html_node [Object]
|
22
|
-
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
23
|
-
#
|
24
|
-
# @return [ProxyFetcher::Proxy]
|
25
|
-
# Proxy object
|
26
|
-
#
|
27
|
-
def to_proxy(html_node)
|
28
|
-
json = parse_json(html_node)
|
29
|
-
|
30
|
-
ProxyFetcher::Proxy.new.tap do |proxy|
|
31
|
-
proxy.addr = json["PROXY_IP"]
|
32
|
-
proxy.port = json["PROXY_PORT"].to_i(16)
|
33
|
-
proxy.anonymity = json["PROXY_TYPE"]
|
34
|
-
proxy.country = json["PROXY_COUNTRY"]
|
35
|
-
proxy.response_time = json["PROXY_TIME"].to_i
|
36
|
-
proxy.type = ProxyFetcher::Proxy::HTTP
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def parse_json(html_node)
|
43
|
-
javascript = html_node.content[/{.+}/im]
|
44
|
-
JSON.parse(javascript)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
ProxyFetcher::Configuration.register_provider(:gather_proxy, GatherProxy)
|
49
|
-
end
|
50
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helper"
|
4
|
-
|
5
|
-
describe ProxyFetcher::Providers::FreeProxyList do
|
6
|
-
before :all do
|
7
|
-
ProxyFetcher.configure do |config|
|
8
|
-
config.provider = :free_proxy_list
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it_behaves_like "a manager"
|
13
|
-
end
|