proxy_fetcher 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/gemfiles/nokogiri.gemfile +1 -1
- data/gemfiles/oga.gemfile +1 -1
- data/lib/proxy_fetcher.rb +1 -0
- data/lib/proxy_fetcher/providers/base.rb +48 -8
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +5 -3
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +5 -3
- data/lib/proxy_fetcher/providers/gather_proxy.rb +4 -2
- data/lib/proxy_fetcher/providers/http_tunnel.rb +5 -3
- data/lib/proxy_fetcher/providers/proxy_docker.rb +53 -13
- data/lib/proxy_fetcher/providers/proxy_list.rb +4 -2
- data/lib/proxy_fetcher/providers/xroxy.rb +4 -2
- data/lib/proxy_fetcher/utils/http_client.rb +30 -9
- data/lib/proxy_fetcher/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '01894f07ae28eafbb09934aa7a4e52188fb0bf02db7ab458df5c91a93b3e32af'
|
4
|
+
data.tar.gz: b487f3aeb6b833ab6b6395d0d4c9311294f7894e68f6a51c2e9943ccf2904d09
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aef23de20b41467dc2e1cadf65e6e728555f62e3cba0006df55ec62c8b877f8ca924dff9a81441897dc8e4f3efbf346ecefad62cf0b1e5a3bbcbd78272e1ea34
|
7
|
+
data.tar.gz: de41ae50f5bff9c8b8ae309057695661e76c47d2df703d369df6a2b3b40bffcfc881edc21b9833ca0f303392a480b7562b53b39d3f8c79e8e47cd0595a690366
|
data/gemfiles/nokogiri.gemfile
CHANGED
data/gemfiles/oga.gemfile
CHANGED
data/lib/proxy_fetcher.rb
CHANGED
@@ -7,7 +7,25 @@ module ProxyFetcher
|
|
7
7
|
# Loads proxy provider page content, extract proxy list from it
|
8
8
|
# and convert every entry to proxy object.
|
9
9
|
def fetch_proxies!(filters = {})
|
10
|
-
load_proxy_list(filters)
|
10
|
+
raw_proxies = load_proxy_list(filters)
|
11
|
+
proxies = raw_proxies.map { |html_node| build_proxy(html_node) }.compact
|
12
|
+
proxies.reject { |proxy| proxy.addr.nil? }
|
13
|
+
end
|
14
|
+
|
15
|
+
def provider_url
|
16
|
+
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
17
|
+
end
|
18
|
+
|
19
|
+
def provider_method
|
20
|
+
:get
|
21
|
+
end
|
22
|
+
|
23
|
+
def provider_params
|
24
|
+
{}
|
25
|
+
end
|
26
|
+
|
27
|
+
def provider_headers
|
28
|
+
{}
|
11
29
|
end
|
12
30
|
|
13
31
|
# Just synthetic sugar to make it easier to call #fetch_proxies! method.
|
@@ -17,7 +35,27 @@ module ProxyFetcher
|
|
17
35
|
|
18
36
|
protected
|
19
37
|
|
20
|
-
# Loads
|
38
|
+
# Loads raw provider HTML with proxies.
|
39
|
+
#
|
40
|
+
# @return [String]
|
41
|
+
# HTML body
|
42
|
+
#
|
43
|
+
def load_html(url, filters = {})
|
44
|
+
raise ArgumentError, 'filters must be a Hash' if filters && !filters.is_a?(Hash)
|
45
|
+
|
46
|
+
uri = URI.parse(url)
|
47
|
+
# TODO: query for post request?
|
48
|
+
uri.query = URI.encode_www_form(provider_params.merge(filters)) if filters && filters.any?
|
49
|
+
|
50
|
+
ProxyFetcher.config.http_client.fetch(
|
51
|
+
uri.to_s,
|
52
|
+
method: provider_method,
|
53
|
+
headers: provider_headers,
|
54
|
+
params: provider_params
|
55
|
+
)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Loads provider HTML and parses it with internal document object.
|
21
59
|
#
|
22
60
|
# @param url [String]
|
23
61
|
# URL to fetch
|
@@ -29,15 +67,17 @@ module ProxyFetcher
|
|
29
67
|
# ProxyFetcher document object
|
30
68
|
#
|
31
69
|
def load_document(url, filters = {})
|
32
|
-
|
33
|
-
|
34
|
-
uri = URI.parse(url)
|
35
|
-
uri.query = URI.encode_www_form(filters) if filters && filters.any?
|
36
|
-
|
37
|
-
html = ProxyFetcher.config.http_client.fetch(uri.to_s)
|
70
|
+
html = load_html(url, filters)
|
38
71
|
ProxyFetcher::Document.parse(html)
|
39
72
|
end
|
40
73
|
|
74
|
+
def build_proxy(*args)
|
75
|
+
to_proxy(*args)
|
76
|
+
rescue StandardError => error
|
77
|
+
ProxyFetcher.logger.warn("Failed to build Proxy object due to error: #{error.message}")
|
78
|
+
nil
|
79
|
+
end
|
80
|
+
|
41
81
|
# Fetches HTML content by sending HTTP request to the provider URL and
|
42
82
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
43
83
|
# to return all the proxy entries (HTML nodes).
|
@@ -5,11 +5,13 @@ module ProxyFetcher
|
|
5
5
|
# FreeProxyList provider class.
|
6
6
|
class FreeProxyList < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
|
-
|
8
|
+
def provider_url
|
9
|
+
'https://free-proxy-list.net/'
|
10
|
+
end
|
9
11
|
|
10
12
|
# [NOTE] Doesn't support filtering
|
11
|
-
def load_proxy_list(
|
12
|
-
doc = load_document(
|
13
|
+
def load_proxy_list(_filters = {})
|
14
|
+
doc = load_document(provider_url, {})
|
13
15
|
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
14
16
|
end
|
15
17
|
|
@@ -5,7 +5,9 @@ module ProxyFetcher
|
|
5
5
|
# FreeProxyListSSL provider class.
|
6
6
|
class FreeProxyListSSL < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
|
-
|
8
|
+
def provider_url
|
9
|
+
'https://www.sslproxies.org/'
|
10
|
+
end
|
9
11
|
|
10
12
|
# Fetches HTML content by sending HTTP request to the provider URL and
|
11
13
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
@@ -15,8 +17,8 @@ module ProxyFetcher
|
|
15
17
|
# Collection of extracted HTML nodes with full proxy info
|
16
18
|
#
|
17
19
|
# [NOTE] Doesn't support filtering
|
18
|
-
def load_proxy_list(
|
19
|
-
doc = load_document(
|
20
|
+
def load_proxy_list(_filters = {})
|
21
|
+
doc = load_document(provider_url, {})
|
20
22
|
doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
|
21
23
|
end
|
22
24
|
|
@@ -7,7 +7,9 @@ module ProxyFetcher
|
|
7
7
|
# GatherProxy provider class.
|
8
8
|
class GatherProxy < Base
|
9
9
|
# Provider URL to fetch proxy list
|
10
|
-
|
10
|
+
def provider_url
|
11
|
+
'http://www.gatherproxy.com/'
|
12
|
+
end
|
11
13
|
|
12
14
|
# Fetches HTML content by sending HTTP request to the provider URL and
|
13
15
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
@@ -17,7 +19,7 @@ module ProxyFetcher
|
|
17
19
|
# Collection of extracted HTML nodes with full proxy info
|
18
20
|
#
|
19
21
|
def load_proxy_list(*)
|
20
|
-
doc = load_document(
|
22
|
+
doc = load_document(provider_url)
|
21
23
|
doc.xpath('//div[@class="proxy-list"]/table/script')
|
22
24
|
end
|
23
25
|
|
@@ -5,7 +5,9 @@ module ProxyFetcher
|
|
5
5
|
# HTTPTunnel provider class.
|
6
6
|
class HTTPTunnel < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
|
-
|
8
|
+
def provider_url
|
9
|
+
'http://www.httptunnel.ge/ProxyListForFree.aspx'
|
10
|
+
end
|
9
11
|
|
10
12
|
# Fetches HTML content by sending HTTP request to the provider URL and
|
11
13
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
@@ -14,8 +16,8 @@ module ProxyFetcher
|
|
14
16
|
# @return [Array<ProxyFetcher::Document::Node>]
|
15
17
|
# Collection of extracted HTML nodes with full proxy info
|
16
18
|
#
|
17
|
-
def load_proxy_list(
|
18
|
-
doc = load_document(
|
19
|
+
def load_proxy_list(_filters = {})
|
20
|
+
doc = load_document(provider_url)
|
19
21
|
doc.xpath('//table[contains(@id, "GridView")]/tr[(count(td)>2)]')
|
20
22
|
end
|
21
23
|
|
@@ -1,11 +1,39 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'json'
|
4
|
+
|
3
5
|
module ProxyFetcher
|
4
6
|
module Providers
|
5
7
|
# ProxyDocker provider class.
|
6
8
|
class ProxyDocker < Base
|
7
9
|
# Provider URL to fetch proxy list
|
8
|
-
|
10
|
+
def provider_url
|
11
|
+
'https://www.proxydocker.com/en/api/proxylist/'
|
12
|
+
end
|
13
|
+
|
14
|
+
def provider_method
|
15
|
+
:post
|
16
|
+
end
|
17
|
+
|
18
|
+
def provider_params
|
19
|
+
{
|
20
|
+
token: 'GmZyl0OJmmgrWakdzO7AFf6AWfkdledR6xmKvGmwmJg',
|
21
|
+
country: 'all',
|
22
|
+
city: 'all',
|
23
|
+
state: 'all',
|
24
|
+
port: 'all',
|
25
|
+
type: 'all',
|
26
|
+
anonymity: 'all',
|
27
|
+
need: 'all',
|
28
|
+
page: '1'
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def provider_headers
|
33
|
+
{
|
34
|
+
cookie: 'PHPSESSID=7f59558ee58b1e4352c4ab4c2f1a3c11'
|
35
|
+
}
|
36
|
+
end
|
9
37
|
|
10
38
|
# Fetches HTML content by sending HTTP request to the provider URL and
|
11
39
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
@@ -16,30 +44,42 @@ module ProxyFetcher
|
|
16
44
|
#
|
17
45
|
# [NOTE] Doesn't support direct filters
|
18
46
|
def load_proxy_list(*)
|
19
|
-
|
20
|
-
|
47
|
+
json = JSON.parse(load_html(provider_url, {}))
|
48
|
+
json.fetch('proxies', [])
|
49
|
+
rescue JSON::ParserError
|
50
|
+
[]
|
21
51
|
end
|
22
52
|
|
23
|
-
# Converts
|
53
|
+
# Converts JSON node to <code>ProxyFetcher::Proxy</code>
|
24
54
|
# object.
|
25
55
|
#
|
26
|
-
# @param
|
27
|
-
#
|
56
|
+
# @param node [Hash]
|
57
|
+
# JSON entry from the API response
|
28
58
|
#
|
29
59
|
# @return [ProxyFetcher::Proxy]
|
30
60
|
# Proxy object
|
31
61
|
#
|
32
|
-
def to_proxy(
|
62
|
+
def to_proxy(node)
|
33
63
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
34
|
-
|
35
|
-
proxy.
|
36
|
-
proxy.port = uri.port
|
64
|
+
proxy.addr = node['ip']
|
65
|
+
proxy.port = node['port']
|
37
66
|
|
38
|
-
proxy.type =
|
39
|
-
proxy.anonymity =
|
40
|
-
proxy.country =
|
67
|
+
proxy.type = types_mapping.fetch(node['type'], ProxyFetcher::Proxy::HTTP)
|
68
|
+
proxy.anonymity = "Lvl#{node['anonymity']}"
|
69
|
+
proxy.country = node['country']
|
41
70
|
end
|
42
71
|
end
|
72
|
+
|
73
|
+
def types_mapping
|
74
|
+
{
|
75
|
+
'16' => ProxyFetcher::Proxy::HTTP,
|
76
|
+
'26' => ProxyFetcher::Proxy::HTTPS,
|
77
|
+
'3' => ProxyFetcher::Proxy::SOCKS4,
|
78
|
+
'4' => ProxyFetcher::Proxy::SOCKS5,
|
79
|
+
'56' => ProxyFetcher::Proxy::HTTP, # CON25
|
80
|
+
'6' => ProxyFetcher::Proxy::HTTP # CON80
|
81
|
+
}
|
82
|
+
end
|
43
83
|
end
|
44
84
|
|
45
85
|
ProxyFetcher::Configuration.register_provider(:proxy_docker, ProxyDocker)
|
@@ -7,7 +7,9 @@ module ProxyFetcher
|
|
7
7
|
# ProxyList provider class.
|
8
8
|
class ProxyList < Base
|
9
9
|
# Provider URL to fetch proxy list
|
10
|
-
|
10
|
+
def provider_url
|
11
|
+
'https://proxy-list.org/english/index.php'
|
12
|
+
end
|
11
13
|
|
12
14
|
# Fetches HTML content by sending HTTP request to the provider URL and
|
13
15
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
@@ -17,7 +19,7 @@ module ProxyFetcher
|
|
17
19
|
# Collection of extracted HTML nodes with full proxy info
|
18
20
|
#
|
19
21
|
def load_proxy_list(filters = {})
|
20
|
-
doc = load_document(
|
22
|
+
doc = load_document(provider_url, filters)
|
21
23
|
doc.css('.table-wrap .table ul')
|
22
24
|
end
|
23
25
|
|
@@ -5,7 +5,9 @@ module ProxyFetcher
|
|
5
5
|
# XRoxy provider class.
|
6
6
|
class XRoxy < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
|
-
|
8
|
+
def provider_url
|
9
|
+
'https://www.xroxy.com/free-proxy-lists/'
|
10
|
+
end
|
9
11
|
|
10
12
|
# Fetches HTML content by sending HTTP request to the provider URL and
|
11
13
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
@@ -15,7 +17,7 @@ module ProxyFetcher
|
|
15
17
|
# Collection of extracted HTML nodes with full proxy info
|
16
18
|
#
|
17
19
|
def load_proxy_list(filters = { type: 'All_http' })
|
18
|
-
doc = load_document(
|
20
|
+
doc = load_document(provider_url, filters)
|
19
21
|
doc.xpath('//div/table/tbody/tr')
|
20
22
|
end
|
21
23
|
|
@@ -9,6 +9,18 @@ module ProxyFetcher
|
|
9
9
|
# @return [String] URL
|
10
10
|
attr_reader :url
|
11
11
|
|
12
|
+
# @!attribute [r] HTTP method
|
13
|
+
# @return [String] HTTP method verb
|
14
|
+
attr_reader :method
|
15
|
+
|
16
|
+
# @!attribute [r] HTTP params
|
17
|
+
# @return [Hash] params
|
18
|
+
attr_reader :params
|
19
|
+
|
20
|
+
# @!attribute [r] HTTP headers
|
21
|
+
# @return [Hash] headers
|
22
|
+
attr_reader :headers
|
23
|
+
|
12
24
|
# @!attribute [r] http
|
13
25
|
# @return [Net::HTTP] HTTP client
|
14
26
|
attr_reader :http
|
@@ -29,17 +41,21 @@ module ProxyFetcher
|
|
29
41
|
# @return [String]
|
30
42
|
# resource content
|
31
43
|
#
|
32
|
-
def self.fetch(
|
33
|
-
new(
|
44
|
+
def self.fetch(*args)
|
45
|
+
new(*args).fetch
|
34
46
|
end
|
35
47
|
|
36
48
|
# Initialize HTTP client instance
|
37
49
|
#
|
38
50
|
# @return [HTTPClient]
|
39
51
|
#
|
40
|
-
def initialize(url)
|
52
|
+
def initialize(url, method: :get, params: {}, headers: {})
|
41
53
|
@url = url.to_s
|
42
|
-
@
|
54
|
+
@method = method
|
55
|
+
@params = params
|
56
|
+
@headers = headers
|
57
|
+
|
58
|
+
@http = HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
|
43
59
|
@timeout = ProxyFetcher.config.provider_proxies_load_timeout
|
44
60
|
|
45
61
|
@ssl_ctx = OpenSSL::SSL::SSLContext.new
|
@@ -52,11 +68,16 @@ module ProxyFetcher
|
|
52
68
|
# response body
|
53
69
|
#
|
54
70
|
def fetch
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
71
|
+
# TODO: must be more generic
|
72
|
+
response = if method == :post
|
73
|
+
http.post(url, form: params, ssl_context: ssl_ctx)
|
74
|
+
else
|
75
|
+
http.get(url, ssl_context: ssl_ctx)
|
76
|
+
end
|
77
|
+
|
78
|
+
response.body.to_s
|
79
|
+
rescue StandardError => error
|
80
|
+
ProxyFetcher.logger.warn("Failed to load proxy list for #{url} (#{error.message})")
|
60
81
|
''
|
61
82
|
end
|
62
83
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|