proxy_fetcher 0.10.2 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -1
- data/Gemfile +8 -5
- data/Rakefile +7 -3
- data/gemfiles/nokogiri.gemfile +8 -6
- data/gemfiles/oga.gemfile +8 -6
- data/lib/proxy_fetcher.rb +46 -35
- data/lib/proxy_fetcher/client/client.rb +10 -3
- data/lib/proxy_fetcher/client/request.rb +4 -4
- data/lib/proxy_fetcher/configuration.rb +24 -19
- data/lib/proxy_fetcher/document.rb +0 -9
- data/lib/proxy_fetcher/document/adapters.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
- data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/node.rb +2 -2
- data/lib/proxy_fetcher/exceptions.rb +6 -6
- data/lib/proxy_fetcher/manager.rb +42 -9
- data/lib/proxy_fetcher/providers/base.rb +43 -22
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +9 -10
- data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -15
- data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
- data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
- data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
- data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
- data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
- data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
- data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
- data/lib/proxy_fetcher/proxy.rb +16 -4
- data/lib/proxy_fetcher/utils/http_client.rb +7 -12
- data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
- data/lib/proxy_fetcher/utils/proxy_validator.rb +21 -9
- data/lib/proxy_fetcher/version.rb +3 -3
- data/proxy_fetcher.gemspec +21 -16
- data/spec/fixtures/proxies.txt +14 -0
- data/spec/proxy_fetcher/client/client_spec.rb +72 -57
- data/spec/proxy_fetcher/configuration_spec.rb +11 -11
- data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
- data/spec/proxy_fetcher/document/node_spec.rb +4 -4
- data/spec/proxy_fetcher/manager_spec.rb +18 -0
- data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
- data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
- data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
- data/spec/proxy_fetcher/proxy_spec.rb +14 -14
- data/spec/proxy_fetcher/version_spec.rb +2 -0
- data/spec/spec_helper.rb +10 -10
- data/spec/support/manager_examples.rb +21 -21
- metadata +27 -17
- data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -58
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
@@ -13,7 +13,7 @@ module ProxyFetcher
|
|
13
13
|
# @return [WrongCustomClass]
|
14
14
|
#
|
15
15
|
def initialize(klass, methods)
|
16
|
-
required_methods = Array(methods).join(
|
16
|
+
required_methods = Array(methods).join(", ")
|
17
17
|
super("#{klass} must respond to [#{required_methods}] class methods!")
|
18
18
|
end
|
19
19
|
end
|
@@ -53,7 +53,7 @@ module ProxyFetcher
|
|
53
53
|
# @return [MaximumRedirectsReached]
|
54
54
|
#
|
55
55
|
def initialize(*)
|
56
|
-
super(
|
56
|
+
super("maximum redirects reached")
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
@@ -66,7 +66,7 @@ module ProxyFetcher
|
|
66
66
|
# @return [MaximumRetriesReached]
|
67
67
|
#
|
68
68
|
def initialize(*)
|
69
|
-
super(
|
69
|
+
super("reached the maximum number of retries")
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
@@ -95,7 +95,7 @@ module ProxyFetcher
|
|
95
95
|
super(<<-MSG.strip.squeeze
|
96
96
|
you need to specify adapter for HTML parsing: ProxyFetcher.config.adapter = :nokogiri.
|
97
97
|
You can use one of the predefined adapters (:nokogiri or :oga) or your own implementation.
|
98
|
-
|
98
|
+
MSG
|
99
99
|
)
|
100
100
|
end
|
101
101
|
end
|
@@ -111,7 +111,7 @@ module ProxyFetcher
|
|
111
111
|
# @return [AdapterSetupError]
|
112
112
|
#
|
113
113
|
def initialize(adapter_name, error)
|
114
|
-
adapter = demodulize(adapter_name.gsub(
|
114
|
+
adapter = demodulize(adapter_name.gsub("Adapter", ""))
|
115
115
|
|
116
116
|
super("can't setup '#{adapter}' adapter during the following error:\n\t#{error}'")
|
117
117
|
end
|
@@ -127,7 +127,7 @@ module ProxyFetcher
|
|
127
127
|
#
|
128
128
|
def demodulize(path)
|
129
129
|
path = path.to_s
|
130
|
-
index = path.rindex(
|
130
|
+
index = path.rindex("::")
|
131
131
|
|
132
132
|
index ? path[(index + 2)..-1] : path
|
133
133
|
end
|
@@ -3,6 +3,16 @@
|
|
3
3
|
module ProxyFetcher
|
4
4
|
# ProxyFetcher Manager class for interacting with proxy lists from various providers.
|
5
5
|
class Manager
|
6
|
+
REFRESHER_LOCK = Mutex.new
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def from_files(files, **options)
|
10
|
+
new(**options.merge(files: Array(files)))
|
11
|
+
end
|
12
|
+
|
13
|
+
alias from_file from_files
|
14
|
+
end
|
15
|
+
|
6
16
|
# @!attribute [r] proxies
|
7
17
|
# @return [Array<ProxyFetcher::Proxy>] An array of proxies
|
8
18
|
attr_reader :proxies
|
@@ -14,14 +24,17 @@ module ProxyFetcher
|
|
14
24
|
#
|
15
25
|
# @return [Manager]
|
16
26
|
#
|
17
|
-
def initialize(
|
18
|
-
if refresh
|
19
|
-
refresh_list!(filters)
|
27
|
+
def initialize(**options)
|
28
|
+
if options.fetch(:refresh, true)
|
29
|
+
refresh_list!(options.fetch(:filters, {}))
|
20
30
|
else
|
21
31
|
@proxies = []
|
22
32
|
end
|
23
33
|
|
24
|
-
|
34
|
+
files = Array(options.fetch(:file, options.fetch(:files, [])))
|
35
|
+
load_proxies_from_files!(files) if files&.any?
|
36
|
+
|
37
|
+
cleanup! if options.fetch(:validate, false)
|
25
38
|
end
|
26
39
|
|
27
40
|
# Update current proxy list using configured providers.
|
@@ -30,17 +43,17 @@ module ProxyFetcher
|
|
30
43
|
#
|
31
44
|
def refresh_list!(filters = nil)
|
32
45
|
@proxies = []
|
33
|
-
|
34
46
|
threads = []
|
35
|
-
lock = Mutex.new
|
36
47
|
|
37
48
|
ProxyFetcher.config.providers.each do |provider_name|
|
38
49
|
threads << Thread.new do
|
50
|
+
Thread.current.report_on_exception = false
|
51
|
+
|
39
52
|
provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
|
40
53
|
provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
|
41
54
|
provider_proxies = provider.fetch_proxies!(provider_filters)
|
42
55
|
|
43
|
-
|
56
|
+
REFRESHER_LOCK.synchronize do
|
44
57
|
@proxies.concat(provider_proxies)
|
45
58
|
end
|
46
59
|
end
|
@@ -55,7 +68,7 @@ module ProxyFetcher
|
|
55
68
|
|
56
69
|
# Pop just first proxy (and back it to the end of the proxy list).
|
57
70
|
#
|
58
|
-
# @return [Proxy]
|
71
|
+
# @return [ProxyFetcher::Proxy, NilClass]
|
59
72
|
# proxy object from the list
|
60
73
|
#
|
61
74
|
def get
|
@@ -72,7 +85,7 @@ module ProxyFetcher
|
|
72
85
|
# Pop first valid proxy (and back it to the end of the proxy list)
|
73
86
|
# Invalid proxies will be removed from the list
|
74
87
|
#
|
75
|
-
# @return [Proxy]
|
88
|
+
# @return [ProxyFetcher::Proxy, NilClass]
|
76
89
|
# proxy object from the list
|
77
90
|
#
|
78
91
|
def get!
|
@@ -89,6 +102,26 @@ module ProxyFetcher
|
|
89
102
|
|
90
103
|
alias pop! get!
|
91
104
|
|
105
|
+
# Loads proxies from files.
|
106
|
+
#
|
107
|
+
# @param proxy_files [String, Array<String,Pathname>]
|
108
|
+
# file path of list of files to load
|
109
|
+
#
|
110
|
+
def load_proxies_from_files!(proxy_files)
|
111
|
+
proxy_files = Array(proxy_files)
|
112
|
+
return if proxy_files.empty?
|
113
|
+
|
114
|
+
proxy_files.each do |proxy_file|
|
115
|
+
File.foreach(proxy_file, chomp: true) do |proxy_string|
|
116
|
+
addr, port = proxy_string.split(":", 2)
|
117
|
+
port = Integer(port) if port
|
118
|
+
@proxies << Proxy.new(addr: addr, port: port)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
@proxies.uniq!
|
123
|
+
end
|
124
|
+
|
92
125
|
# Clean current proxy list from dead proxies (that doesn't respond by timeout)
|
93
126
|
#
|
94
127
|
# @return [Array<ProxyFetcher::Proxy>]
|
@@ -6,12 +6,15 @@ module ProxyFetcher
|
|
6
6
|
class Base
|
7
7
|
# Loads proxy provider page content, extract proxy list from it
|
8
8
|
# and convert every entry to proxy object.
|
9
|
-
def fetch_proxies
|
9
|
+
def fetch_proxies(filters = {})
|
10
10
|
raw_proxies = load_proxy_list(filters)
|
11
11
|
proxies = raw_proxies.map { |html_node| build_proxy(html_node) }.compact
|
12
12
|
proxies.reject { |proxy| proxy.addr.nil? }
|
13
13
|
end
|
14
14
|
|
15
|
+
# For retro-compatibility
|
16
|
+
alias fetch_proxies! fetch_proxies
|
17
|
+
|
15
18
|
def provider_url
|
16
19
|
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
17
20
|
end
|
@@ -24,10 +27,17 @@ module ProxyFetcher
|
|
24
27
|
{}
|
25
28
|
end
|
26
29
|
|
30
|
+
# @return [Hash]
|
31
|
+
# Provider headers required to fetch the proxy list
|
32
|
+
#
|
27
33
|
def provider_headers
|
28
34
|
{}
|
29
35
|
end
|
30
36
|
|
37
|
+
def xpath
|
38
|
+
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
39
|
+
end
|
40
|
+
|
31
41
|
# Just synthetic sugar to make it easier to call #fetch_proxies! method.
|
32
42
|
def self.fetch_proxies!(*args)
|
33
43
|
new.fetch_proxies!(*args)
|
@@ -37,18 +47,29 @@ module ProxyFetcher
|
|
37
47
|
|
38
48
|
# Loads raw provider HTML with proxies.
|
39
49
|
#
|
50
|
+
# @param url [String]
|
51
|
+
# Provider URL
|
52
|
+
#
|
53
|
+
# @param filters [#to_h]
|
54
|
+
# Provider filters (Hash-like object)
|
55
|
+
#
|
40
56
|
# @return [String]
|
41
|
-
# HTML body
|
57
|
+
# HTML body from the response
|
42
58
|
#
|
43
59
|
def load_html(url, filters = {})
|
44
|
-
|
60
|
+
unless filters.respond_to?(:to_h)
|
61
|
+
raise ArgumentError, "filters must be a Hash or respond to #to_h"
|
62
|
+
end
|
45
63
|
|
46
|
-
|
47
|
-
|
48
|
-
|
64
|
+
if filters&.any?
|
65
|
+
# TODO: query for post request?
|
66
|
+
uri = URI.parse(url)
|
67
|
+
uri.query = URI.encode_www_form(provider_params.merge(filters.to_h))
|
68
|
+
url = uri.to_s
|
69
|
+
end
|
49
70
|
|
50
71
|
ProxyFetcher.config.http_client.fetch(
|
51
|
-
|
72
|
+
url,
|
52
73
|
method: provider_method,
|
53
74
|
headers: provider_headers,
|
54
75
|
params: provider_params
|
@@ -71,29 +92,29 @@ module ProxyFetcher
|
|
71
92
|
ProxyFetcher::Document.parse(html)
|
72
93
|
end
|
73
94
|
|
95
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
96
|
+
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
97
|
+
# to return all the proxy entries (HTML nodes).
|
98
|
+
#
|
99
|
+
# @return [Array<ProxyFetcher::Document::Node>]
|
100
|
+
# Collection of extracted HTML nodes with full proxy info
|
101
|
+
#
|
102
|
+
def load_proxy_list(filters = {})
|
103
|
+
doc = load_document(provider_url, filters)
|
104
|
+
doc.xpath(xpath)
|
105
|
+
end
|
106
|
+
|
74
107
|
def build_proxy(*args)
|
75
108
|
to_proxy(*args)
|
76
|
-
rescue StandardError =>
|
109
|
+
rescue StandardError => e
|
77
110
|
ProxyFetcher.logger.warn(
|
78
|
-
"Failed to build Proxy
|
111
|
+
"Failed to build Proxy for #{self.class.name.split("::").last} " \
|
112
|
+
"due to error: #{e.message}"
|
79
113
|
)
|
80
114
|
|
81
115
|
nil
|
82
116
|
end
|
83
117
|
|
84
|
-
# Fetches HTML content by sending HTTP request to the provider URL and
|
85
|
-
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
86
|
-
# to return all the proxy entries (HTML nodes).
|
87
|
-
#
|
88
|
-
# Abstract method. Must be implemented in a descendant class
|
89
|
-
#
|
90
|
-
# @return [Array<Document::Node>]
|
91
|
-
# list of proxy elements from the providers HTML content
|
92
|
-
#
|
93
|
-
def load_proxy_list(*)
|
94
|
-
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
95
|
-
end
|
96
|
-
|
97
118
|
# Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
|
98
119
|
#
|
99
120
|
# Abstract method. Must be implemented in a descendant class
|
@@ -6,13 +6,12 @@ module ProxyFetcher
|
|
6
6
|
class FreeProxyList < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://free-proxy-list.net/"
|
10
10
|
end
|
11
11
|
|
12
12
|
# [NOTE] Doesn't support filtering
|
13
|
-
def
|
14
|
-
|
15
|
-
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
16
15
|
end
|
17
16
|
|
18
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -26,10 +25,10 @@ module ProxyFetcher
|
|
26
25
|
#
|
27
26
|
def to_proxy(html_node)
|
28
27
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
29
|
-
proxy.addr = html_node.content_at(
|
30
|
-
proxy.port = Integer(html_node.content_at(
|
31
|
-
proxy.country = html_node.content_at(
|
32
|
-
proxy.anonymity = html_node.content_at(
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
33
32
|
proxy.type = parse_type(html_node)
|
34
33
|
end
|
35
34
|
end
|
@@ -45,8 +44,8 @@ module ProxyFetcher
|
|
45
44
|
# Proxy type
|
46
45
|
#
|
47
46
|
def parse_type(html_node)
|
48
|
-
https = html_node.content_at(
|
49
|
-
https
|
47
|
+
https = html_node.content_at("td[6]")
|
48
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
50
49
|
end
|
51
50
|
end
|
52
51
|
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListSocks provider class.
|
6
|
+
class FreeProxyListSocks < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.socks-proxy.net/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.type = parse_type(html_node)
|
32
|
+
proxy.anonymity = html_node.content_at("td[6]")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[5]")
|
48
|
+
|
49
|
+
return ProxyFetcher::Proxy::SOCKS4 if https&.casecmp("socks4")&.zero?
|
50
|
+
return ProxyFetcher::Proxy::SOCKS5 if https&.casecmp("socks5")&.zero?
|
51
|
+
|
52
|
+
"Unknown"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_socks, FreeProxyListSocks)
|
57
|
+
end
|
58
|
+
end
|
@@ -6,20 +6,12 @@ module ProxyFetcher
|
|
6
6
|
class FreeProxyListSSL < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
8
|
def provider_url
|
9
|
-
|
9
|
+
"https://www.sslproxies.org/"
|
10
10
|
end
|
11
11
|
|
12
|
-
# Fetches HTML content by sending HTTP request to the provider URL and
|
13
|
-
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
14
|
-
# to return all the proxy entries (HTML nodes).
|
15
|
-
#
|
16
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
17
|
-
# Collection of extracted HTML nodes with full proxy info
|
18
|
-
#
|
19
12
|
# [NOTE] Doesn't support filtering
|
20
|
-
def
|
21
|
-
|
22
|
-
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
23
15
|
end
|
24
16
|
|
25
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -33,10 +25,10 @@ module ProxyFetcher
|
|
33
25
|
#
|
34
26
|
def to_proxy(html_node)
|
35
27
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
36
|
-
proxy.addr = html_node.content_at(
|
37
|
-
proxy.port = Integer(html_node.content_at(
|
38
|
-
proxy.country = html_node.content_at(
|
39
|
-
proxy.anonymity = html_node.content_at(
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
40
32
|
proxy.type = ProxyFetcher::Proxy::HTTPS
|
41
33
|
end
|
42
34
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListUS provider class.
|
6
|
+
class FreeProxyListUS < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.us-proxy.org/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
'//table[@id="proxylisttable"]/tbody/tr'
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
32
|
+
proxy.type = parse_type(html_node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[7]")
|
48
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
|
53
|
+
end
|
54
|
+
end
|