proxy_fetcher 0.10.2 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/Gemfile +8 -5
- data/Rakefile +7 -3
- data/gemfiles/nokogiri.gemfile +8 -6
- data/gemfiles/oga.gemfile +8 -6
- data/lib/proxy_fetcher.rb +46 -35
- data/lib/proxy_fetcher/client/client.rb +10 -3
- data/lib/proxy_fetcher/client/request.rb +4 -4
- data/lib/proxy_fetcher/configuration.rb +24 -19
- data/lib/proxy_fetcher/document.rb +0 -9
- data/lib/proxy_fetcher/document/adapters.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
- data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/node.rb +2 -2
- data/lib/proxy_fetcher/exceptions.rb +6 -6
- data/lib/proxy_fetcher/manager.rb +42 -9
- data/lib/proxy_fetcher/providers/base.rb +43 -22
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +9 -10
- data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -15
- data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
- data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
- data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
- data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
- data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
- data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
- data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
- data/lib/proxy_fetcher/proxy.rb +16 -4
- data/lib/proxy_fetcher/utils/http_client.rb +7 -12
- data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
- data/lib/proxy_fetcher/utils/proxy_validator.rb +21 -9
- data/lib/proxy_fetcher/version.rb +3 -3
- data/proxy_fetcher.gemspec +21 -16
- data/spec/fixtures/proxies.txt +14 -0
- data/spec/proxy_fetcher/client/client_spec.rb +72 -57
- data/spec/proxy_fetcher/configuration_spec.rb +11 -11
- data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
- data/spec/proxy_fetcher/document/node_spec.rb +4 -4
- data/spec/proxy_fetcher/manager_spec.rb +18 -0
- data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
- data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
- data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
- data/spec/proxy_fetcher/proxy_spec.rb +14 -14
- data/spec/proxy_fetcher/version_spec.rb +2 -0
- data/spec/spec_helper.rb +10 -10
- data/spec/support/manager_examples.rb +21 -21
- metadata +27 -17
- data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -58
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
@@ -13,7 +13,7 @@ module ProxyFetcher
|
|
13
13
|
# @return [WrongCustomClass]
|
14
14
|
#
|
15
15
|
def initialize(klass, methods)
|
16
|
-
required_methods = Array(methods).join(
|
16
|
+
required_methods = Array(methods).join(", ")
|
17
17
|
super("#{klass} must respond to [#{required_methods}] class methods!")
|
18
18
|
end
|
19
19
|
end
|
@@ -53,7 +53,7 @@ module ProxyFetcher
|
|
53
53
|
# @return [MaximumRedirectsReached]
|
54
54
|
#
|
55
55
|
def initialize(*)
|
56
|
-
super(
|
56
|
+
super("maximum redirects reached")
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
@@ -66,7 +66,7 @@ module ProxyFetcher
|
|
66
66
|
# @return [MaximumRetriesReached]
|
67
67
|
#
|
68
68
|
def initialize(*)
|
69
|
-
super(
|
69
|
+
super("reached the maximum number of retries")
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
@@ -95,7 +95,7 @@ module ProxyFetcher
|
|
95
95
|
super(<<-MSG.strip.squeeze
|
96
96
|
you need to specify adapter for HTML parsing: ProxyFetcher.config.adapter = :nokogiri.
|
97
97
|
You can use one of the predefined adapters (:nokogiri or :oga) or your own implementation.
|
98
|
-
|
98
|
+
MSG
|
99
99
|
)
|
100
100
|
end
|
101
101
|
end
|
@@ -111,7 +111,7 @@ module ProxyFetcher
|
|
111
111
|
# @return [AdapterSetupError]
|
112
112
|
#
|
113
113
|
def initialize(adapter_name, error)
|
114
|
-
adapter = demodulize(adapter_name.gsub(
|
114
|
+
adapter = demodulize(adapter_name.gsub("Adapter", ""))
|
115
115
|
|
116
116
|
super("can't setup '#{adapter}' adapter during the following error:\n\t#{error}'")
|
117
117
|
end
|
@@ -127,7 +127,7 @@ module ProxyFetcher
|
|
127
127
|
#
|
128
128
|
def demodulize(path)
|
129
129
|
path = path.to_s
|
130
|
-
index = path.rindex(
|
130
|
+
index = path.rindex("::")
|
131
131
|
|
132
132
|
index ? path[(index + 2)..-1] : path
|
133
133
|
end
|
@@ -3,6 +3,16 @@
|
|
3
3
|
module ProxyFetcher
|
4
4
|
# ProxyFetcher Manager class for interacting with proxy lists from various providers.
|
5
5
|
class Manager
|
6
|
+
REFRESHER_LOCK = Mutex.new
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def from_files(files, **options)
|
10
|
+
new(**options.merge(files: Array(files)))
|
11
|
+
end
|
12
|
+
|
13
|
+
alias from_file from_files
|
14
|
+
end
|
15
|
+
|
6
16
|
# @!attribute [r] proxies
|
7
17
|
# @return [Array<ProxyFetcher::Proxy>] An array of proxies
|
8
18
|
attr_reader :proxies
|
@@ -14,14 +24,17 @@ module ProxyFetcher
|
|
14
24
|
#
|
15
25
|
# @return [Manager]
|
16
26
|
#
|
17
|
-
def initialize(
|
18
|
-
if refresh
|
19
|
-
refresh_list!(filters)
|
27
|
+
def initialize(**options)
|
28
|
+
if options.fetch(:refresh, true)
|
29
|
+
refresh_list!(options.fetch(:filters, {}))
|
20
30
|
else
|
21
31
|
@proxies = []
|
22
32
|
end
|
23
33
|
|
24
|
-
|
34
|
+
files = Array(options.fetch(:file, options.fetch(:files, [])))
|
35
|
+
load_proxies_from_files!(files) if files&.any?
|
36
|
+
|
37
|
+
cleanup! if options.fetch(:validate, false)
|
25
38
|
end
|
26
39
|
|
27
40
|
# Update current proxy list using configured providers.
|
@@ -30,17 +43,17 @@ module ProxyFetcher
|
|
30
43
|
#
|
31
44
|
def refresh_list!(filters = nil)
|
32
45
|
@proxies = []
|
33
|
-
|
34
46
|
threads = []
|
35
|
-
lock = Mutex.new
|
36
47
|
|
37
48
|
ProxyFetcher.config.providers.each do |provider_name|
|
38
49
|
threads << Thread.new do
|
50
|
+
Thread.current.report_on_exception = false
|
51
|
+
|
39
52
|
provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
|
40
53
|
provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
|
41
54
|
provider_proxies = provider.fetch_proxies!(provider_filters)
|
42
55
|
|
43
|
-
|
56
|
+
REFRESHER_LOCK.synchronize do
|
44
57
|
@proxies.concat(provider_proxies)
|
45
58
|
end
|
46
59
|
end
|
@@ -55,7 +68,7 @@ module ProxyFetcher
|
|
55
68
|
|
56
69
|
# Pop just first proxy (and back it to the end of the proxy list).
|
57
70
|
#
|
58
|
-
# @return [Proxy]
|
71
|
+
# @return [ProxyFetcher::Proxy, NilClass]
|
59
72
|
# proxy object from the list
|
60
73
|
#
|
61
74
|
def get
|
@@ -72,7 +85,7 @@ module ProxyFetcher
|
|
72
85
|
# Pop first valid proxy (and back it to the end of the proxy list)
|
73
86
|
# Invalid proxies will be removed from the list
|
74
87
|
#
|
75
|
-
# @return [Proxy]
|
88
|
+
# @return [ProxyFetcher::Proxy, NilClass]
|
76
89
|
# proxy object from the list
|
77
90
|
#
|
78
91
|
def get!
|
@@ -89,6 +102,26 @@ module ProxyFetcher
|
|
89
102
|
|
90
103
|
alias pop! get!
|
91
104
|
|
105
|
+
# Loads proxies from files.
|
106
|
+
#
|
107
|
+
# @param proxy_files [String, Array<String,Pathname>]
|
108
|
+
# file path of list of files to load
|
109
|
+
#
|
110
|
+
def load_proxies_from_files!(proxy_files)
|
111
|
+
proxy_files = Array(proxy_files)
|
112
|
+
return if proxy_files.empty?
|
113
|
+
|
114
|
+
proxy_files.each do |proxy_file|
|
115
|
+
File.foreach(proxy_file, chomp: true) do |proxy_string|
|
116
|
+
addr, port = proxy_string.split(":", 2)
|
117
|
+
port = Integer(port) if port
|
118
|
+
@proxies << Proxy.new(addr: addr, port: port)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
@proxies.uniq!
|
123
|
+
end
|
124
|
+
|
92
125
|
# Clean current proxy list from dead proxies (that doesn't respond by timeout)
|
93
126
|
#
|
94
127
|
# @return [Array<ProxyFetcher::Proxy>]
|
@@ -6,12 +6,15 @@ module ProxyFetcher
|
|
6
6
|
class Base
|
7
7
|
# Loads proxy provider page content, extract proxy list from it
|
8
8
|
# and convert every entry to proxy object.
|
9
|
-
def fetch_proxies
|
9
|
+
def fetch_proxies(filters = {})
|
10
10
|
raw_proxies = load_proxy_list(filters)
|
11
11
|
proxies = raw_proxies.map { |html_node| build_proxy(html_node) }.compact
|
12
12
|
proxies.reject { |proxy| proxy.addr.nil? }
|
13
13
|
end
|
14
14
|
|
15
|
+
# For retro-compatibility
|
16
|
+
alias fetch_proxies! fetch_proxies
|
17
|
+
|
15
18
|
def provider_url
|
16
19
|
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
17
20
|
end
|
@@ -24,10 +27,17 @@ module ProxyFetcher
|
|
24
27
|
{}
|
25
28
|
end
|
26
29
|
|
30
|
+
# @return [Hash]
|
31
|
+
# Provider headers required to fetch the proxy list
|
32
|
+
#
|
27
33
|
def provider_headers
|
28
34
|
{}
|
29
35
|
end
|
30
36
|
|
37
|
+
def xpath
|
38
|
+
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
39
|
+
end
|
40
|
+
|
31
41
|
# Just synthetic sugar to make it easier to call #fetch_proxies! method.
|
32
42
|
def self.fetch_proxies!(*args)
|
33
43
|
new.fetch_proxies!(*args)
|
@@ -37,18 +47,29 @@ module ProxyFetcher
|
|
37
47
|
|
38
48
|
# Loads raw provider HTML with proxies.
|
39
49
|
#
|
50
|
+
# @param url [String]
|
51
|
+
# Provider URL
|
52
|
+
#
|
53
|
+
# @param filters [#to_h]
|
54
|
+
# Provider filters (Hash-like object)
|
55
|
+
#
|
40
56
|
# @return [String]
|
41
|
-
# HTML body
|
57
|
+
# HTML body from the response
|
42
58
|
#
|
43
59
|
def load_html(url, filters = {})
|
44
|
-
|
60
|
+
unless filters.respond_to?(:to_h)
|
61
|
+
raise ArgumentError, "filters must be a Hash or respond to #to_h"
|
62
|
+
end
|
45
63
|
|
46
|
-
|
47
|
-
|
48
|
-
|
64
|
+
if filters&.any?
|
65
|
+
# TODO: query for post request?
|
66
|
+
uri = URI.parse(url)
|
67
|
+
uri.query = URI.encode_www_form(provider_params.merge(filters.to_h))
|
68
|
+
url = uri.to_s
|
69
|
+
end
|
49
70
|
|
50
71
|
ProxyFetcher.config.http_client.fetch(
|
51
|
-
|
72
|
+
url,
|
52
73
|
method: provider_method,
|
53
74
|
headers: provider_headers,
|
54
75
|
params: provider_params
|
@@ -71,29 +92,29 @@ module ProxyFetcher
|
|
71
92
|
ProxyFetcher::Document.parse(html)
|
72
93
|
end
|
73
94
|
|
95
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
96
|
+
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
97
|
+
# to return all the proxy entries (HTML nodes).
|
98
|
+
#
|
99
|
+
# @return [Array<ProxyFetcher::Document::Node>]
|
100
|
+
# Collection of extracted HTML nodes with full proxy info
|
101
|
+
#
|
102
|
+
def load_proxy_list(filters = {})
|
103
|
+
doc = load_document(provider_url, filters)
|
104
|
+
doc.xpath(xpath)
|
105
|
+
end
|
106
|
+
|
74
107
|
def build_proxy(*args)
|
75
108
|
to_proxy(*args)
|
76
|
-
rescue StandardError =>
|
109
|
+
rescue StandardError => e
|
77
110
|
ProxyFetcher.logger.warn(
|
78
|
-
"Failed to build Proxy
|
111
|
+
"Failed to build Proxy for #{self.class.name.split("::").last} " \
|
112
|
+
"due to error: #{e.message}"
|
79
113
|
)
|
80
114
|
|
81
115
|
nil
|
82
116
|
end
|
83
117
|
|
84
|
-
# Fetches HTML content by sending HTTP request to the provider URL and
|
85
|
-
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
86
|
-
# to return all the proxy entries (HTML nodes).
|
87
|
-
#
|
88
|
-
# Abstract method. Must be implemented in a descendant class
|
89
|
-
#
|
90
|
-
# @return [Array<Document::Node>]
|
91
|
-
# list of proxy elements from the providers HTML content
|
92
|
-
#
|
93
|
-
def load_proxy_list(*)
|
94
|
-
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
95
|
-
end
|
96
|
-
|
97
118
|
# Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
|
98
119
|
#
|
99
120
|
# Abstract method. Must be implemented in a descendant class
|
@@ -6,13 +6,12 @@ module ProxyFetcher
|
|
6
6
|
class FreeProxyList < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://free-proxy-list.net/"
|
10
10
|
end
|
11
11
|
|
12
12
|
# [NOTE] Doesn't support filtering
|
13
|
-
def
|
14
|
-
|
15
|
-
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
16
15
|
end
|
17
16
|
|
18
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -26,10 +25,10 @@ module ProxyFetcher
|
|
26
25
|
#
|
27
26
|
def to_proxy(html_node)
|
28
27
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
29
|
-
proxy.addr = html_node.content_at(
|
30
|
-
proxy.port = Integer(html_node.content_at(
|
31
|
-
proxy.country = html_node.content_at(
|
32
|
-
proxy.anonymity = html_node.content_at(
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
33
32
|
proxy.type = parse_type(html_node)
|
34
33
|
end
|
35
34
|
end
|
@@ -45,8 +44,8 @@ module ProxyFetcher
|
|
45
44
|
# Proxy type
|
46
45
|
#
|
47
46
|
def parse_type(html_node)
|
48
|
-
https = html_node.content_at(
|
49
|
-
https
|
47
|
+
https = html_node.content_at("td[6]")
|
48
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
50
49
|
end
|
51
50
|
end
|
52
51
|
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListSocks provider class.
|
6
|
+
class FreeProxyListSocks < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.socks-proxy.net/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.type = parse_type(html_node)
|
32
|
+
proxy.anonymity = html_node.content_at("td[6]")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[5]")
|
48
|
+
|
49
|
+
return ProxyFetcher::Proxy::SOCKS4 if https&.casecmp("socks4")&.zero?
|
50
|
+
return ProxyFetcher::Proxy::SOCKS5 if https&.casecmp("socks5")&.zero?
|
51
|
+
|
52
|
+
"Unknown"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_socks, FreeProxyListSocks)
|
57
|
+
end
|
58
|
+
end
|
@@ -6,20 +6,12 @@ module ProxyFetcher
|
|
6
6
|
class FreeProxyListSSL < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://www.sslproxies.org/"
|
10
10
|
end
|
11
11
|
|
12
|
-
# Fetches HTML content by sending HTTP request to the provider URL and
|
13
|
-
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
14
|
-
# to return all the proxy entries (HTML nodes).
|
15
|
-
#
|
16
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
17
|
-
# Collection of extracted HTML nodes with full proxy info
|
18
|
-
#
|
19
12
|
# [NOTE] Doesn't support filtering
|
20
|
-
def
|
21
|
-
|
22
|
-
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
23
15
|
end
|
24
16
|
|
25
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -33,10 +25,10 @@ module ProxyFetcher
|
|
33
25
|
#
|
34
26
|
def to_proxy(html_node)
|
35
27
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
36
|
-
proxy.addr = html_node.content_at(
|
37
|
-
proxy.port = Integer(html_node.content_at(
|
38
|
-
proxy.country = html_node.content_at(
|
39
|
-
proxy.anonymity = html_node.content_at(
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
40
32
|
proxy.type = ProxyFetcher::Proxy::HTTPS
|
41
33
|
end
|
42
34
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListUS provider class.
|
6
|
+
class FreeProxyListUS < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.us-proxy.org/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
32
|
+
proxy.type = parse_type(html_node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[7]")
|
48
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
|
53
|
+
end
|
54
|
+
end
|