proxy_fetcher 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/LICENSE +1 -1
- data/README.md +12 -1
- data/lib/proxy_fetcher.rb +5 -0
- data/lib/proxy_fetcher/client/client.rb +28 -7
- data/lib/proxy_fetcher/client/proxies_registry.rb +27 -0
- data/lib/proxy_fetcher/client/request.rb +73 -2
- data/lib/proxy_fetcher/configuration.rb +66 -2
- data/lib/proxy_fetcher/configuration/providers_registry.rb +31 -2
- data/lib/proxy_fetcher/document.rb +29 -6
- data/lib/proxy_fetcher/document/adapters.rb +20 -0
- data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +29 -0
- data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +26 -0
- data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +26 -0
- data/lib/proxy_fetcher/document/node.rb +47 -0
- data/lib/proxy_fetcher/exceptions.rb +52 -2
- data/lib/proxy_fetcher/manager.rb +32 -4
- data/lib/proxy_fetcher/providers/base.rb +27 -8
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +2 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +2 -0
- data/lib/proxy_fetcher/providers/gather_proxy.rb +2 -0
- data/lib/proxy_fetcher/providers/http_tunnel.rb +2 -0
- data/lib/proxy_fetcher/providers/proxy_docker.rb +2 -0
- data/lib/proxy_fetcher/providers/proxy_list.rb +2 -0
- data/lib/proxy_fetcher/providers/xroxy.rb +2 -0
- data/lib/proxy_fetcher/proxy.rb +36 -5
- data/lib/proxy_fetcher/utils/http_client.rb +35 -7
- data/lib/proxy_fetcher/utils/proxy_validator.rb +25 -4
- data/lib/proxy_fetcher/version.rb +3 -1
- data/proxy_fetcher.gemspec +1 -1
- data/spec/proxy_fetcher/{client_spec.rb → client/client_spec.rb} +10 -0
- data/spec/proxy_fetcher/configuration_spec.rb +2 -0
- data/spec/proxy_fetcher/document/adapters_spec.rb +2 -0
- data/spec/proxy_fetcher/document/node_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/base_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/proxy_docker_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -0
- data/spec/proxy_fetcher/proxy_spec.rb +2 -0
- data/spec/proxy_fetcher/version_spec.rb +3 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/support/manager_examples.rb +2 -0
- metadata +4 -3
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
module Providers
|
3
5
|
# Base class for all the ProxyFetcher providers.
|
@@ -8,18 +10,26 @@ module ProxyFetcher
|
|
8
10
|
load_proxy_list(filters).map { |html| to_proxy(html) }
|
9
11
|
end
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
new.fetch_proxies!(*args)
|
15
|
-
end
|
13
|
+
# Just synthetic sugar to make it easier to call #fetch_proxies! method.
|
14
|
+
def self.fetch_proxies!(*args)
|
15
|
+
new.fetch_proxies!(*args)
|
16
16
|
end
|
17
17
|
|
18
18
|
protected
|
19
19
|
|
20
20
|
# Loads HTML document with Nokogiri by the URL combined with custom filters
|
21
|
+
#
|
22
|
+
# @param url [String]
|
23
|
+
# URL to fetch
|
24
|
+
#
|
25
|
+
# @param filters [Hash]
|
26
|
+
# filters for proxy provider
|
27
|
+
#
|
28
|
+
# @return [ProxyFetcher::Document]
|
29
|
+
# ProxyFetcher document object
|
30
|
+
#
|
21
31
|
def load_document(url, filters = {})
|
22
|
-
raise ArgumentError, 'filters must be a Hash'
|
32
|
+
raise ArgumentError, 'filters must be a Hash' if filters && !filters.is_a?(Hash)
|
23
33
|
|
24
34
|
uri = URI.parse(url)
|
25
35
|
uri.query = URI.encode_www_form(filters) if filters && filters.any?
|
@@ -32,13 +42,22 @@ module ProxyFetcher
|
|
32
42
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
33
43
|
# to return all the proxy entries (HTML nodes).
|
34
44
|
#
|
35
|
-
# Abstract method.
|
45
|
+
# Abstract method. Must be implemented in a descendant class
|
46
|
+
#
|
47
|
+
# @return [Array<Document::Node>]
|
48
|
+
# list of proxy elements from the providers HTML content
|
36
49
|
#
|
37
50
|
def load_proxy_list(*)
|
38
51
|
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
39
52
|
end
|
40
53
|
|
41
|
-
# Convert HTML element with proxy info to ProxyFetcher::Proxy instance
|
54
|
+
# Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
|
55
|
+
#
|
56
|
+
# Abstract method. Must be implemented in a descendant class
|
57
|
+
#
|
58
|
+
# @return [Proxy]
|
59
|
+
# new proxy object from the HTML node
|
60
|
+
#
|
42
61
|
def to_proxy(*)
|
43
62
|
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
44
63
|
end
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -1,9 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
# Proxy object
|
3
5
|
class Proxy
|
4
|
-
|
6
|
+
# @!attribute [rw] addr
|
7
|
+
# @return [String] address (IP or domain)
|
8
|
+
attr_accessor :addr
|
9
|
+
|
10
|
+
# @!attribute [rw] port
|
11
|
+
# @return [Integer] port
|
12
|
+
attr_accessor :port
|
13
|
+
|
14
|
+
# @!attribute [rw] type
|
15
|
+
# @return [String] type (SOCKS, HTTP(S))
|
16
|
+
attr_accessor :type
|
17
|
+
|
18
|
+
# @!attribute [rw] country
|
19
|
+
# @return [String] country or country code
|
20
|
+
attr_accessor :country
|
21
|
+
|
22
|
+
# @!attribute [rw] response_time
|
23
|
+
# @return [Integer] response time (value and measurements depends on the provider)
|
24
|
+
attr_accessor :response_time
|
5
25
|
|
6
|
-
#
|
26
|
+
# @!attribute [rw] anonymity
|
27
|
+
# @return [String] anonymity level (high, elite, transparent, etc)
|
28
|
+
attr_accessor :anonymity
|
29
|
+
|
30
|
+
# Proxy types
|
7
31
|
TYPES = [
|
8
32
|
HTTP = 'HTTP'.freeze,
|
9
33
|
HTTPS = 'HTTPS'.freeze,
|
@@ -31,14 +55,21 @@ module ProxyFetcher
|
|
31
55
|
https? || socks4? || socks5?
|
32
56
|
end
|
33
57
|
|
58
|
+
# Initialize new Proxy
|
59
|
+
#
|
60
|
+
# @param attributes [Hash]
|
61
|
+
# proxy attributes
|
62
|
+
#
|
63
|
+
# @return [Proxy]
|
64
|
+
#
|
34
65
|
def initialize(attributes = {})
|
35
66
|
attributes.each do |attr, value|
|
36
67
|
public_send("#{attr}=", value)
|
37
68
|
end
|
38
69
|
end
|
39
70
|
|
40
|
-
# Checks if proxy object is connectable
|
41
|
-
#
|
71
|
+
# Checks if proxy object is connectable (can be used as a proxy for
|
72
|
+
# HTTP requests).
|
42
73
|
#
|
43
74
|
# @return [Boolean]
|
44
75
|
# true if proxy connectable, otherwise false.
|
@@ -58,7 +89,7 @@ module ProxyFetcher
|
|
58
89
|
URI::Generic.build(host: addr, port: port)
|
59
90
|
end
|
60
91
|
|
61
|
-
# Returns <code>String</
|
92
|
+
# Returns <code>String</code> object with <i>addr:port</i> values of the proxy.
|
62
93
|
#
|
63
94
|
# @return [String]
|
64
95
|
# true if proxy connectable, otherwise false.
|
@@ -1,10 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
# Default ProxyFetcher HTTP client used to fetch proxy lists from
|
3
5
|
# the different providers. Uses ProxyFetcher configuration options
|
4
6
|
# for sending HTTP requests to providers URLs.
|
5
7
|
class HTTPClient
|
6
|
-
|
8
|
+
# @!attribute [r] uri
|
9
|
+
# @return [URI] URI
|
10
|
+
attr_reader :uri
|
11
|
+
|
12
|
+
# @!attribute [r] http
|
13
|
+
# @return [Net::HTTP] HTTP client
|
14
|
+
attr_reader :http
|
7
15
|
|
16
|
+
# Initialize HTTP client instance
|
17
|
+
#
|
18
|
+
# @return [HTTPClient]
|
19
|
+
#
|
8
20
|
def initialize(url)
|
9
21
|
@uri = URI.parse(url)
|
10
22
|
@http = Net::HTTP.new(@uri.host, @uri.port)
|
@@ -14,6 +26,11 @@ module ProxyFetcher
|
|
14
26
|
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
15
27
|
end
|
16
28
|
|
29
|
+
# Fetches resource content by sending HTTP request to it.
|
30
|
+
#
|
31
|
+
# @return [String]
|
32
|
+
# response body
|
33
|
+
#
|
17
34
|
def fetch
|
18
35
|
request = Net::HTTP::Get.new(@uri.to_s)
|
19
36
|
request['Connection'] = 'keep-alive'
|
@@ -22,14 +39,25 @@ module ProxyFetcher
|
|
22
39
|
response.body
|
23
40
|
end
|
24
41
|
|
25
|
-
|
26
|
-
|
42
|
+
# Fetches resource content by sending HTTP request to it.
|
43
|
+
# Synthetic sugar to simplify URIes fetching.
|
44
|
+
#
|
45
|
+
# @param url [String] URL
|
46
|
+
#
|
47
|
+
# @return [String]
|
48
|
+
# resource content
|
49
|
+
#
|
50
|
+
def self.fetch(url)
|
51
|
+
new(url).fetch
|
27
52
|
end
|
28
53
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
54
|
+
# Checks if URI requires secure connection (HTTPS)
|
55
|
+
#
|
56
|
+
# @return [Boolean]
|
57
|
+
# true if URI is HTTPS, false otherwise
|
58
|
+
#
|
59
|
+
def https?
|
60
|
+
@uri.is_a?(URI::HTTPS)
|
33
61
|
end
|
34
62
|
end
|
35
63
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
# Default ProxyFetcher proxy validator that checks either proxy
|
3
5
|
# connectable or not. It tries to send HEAD request to default
|
@@ -6,6 +8,13 @@ module ProxyFetcher
|
|
6
8
|
# Default URL that will be used to check if proxy can be used.
|
7
9
|
URL_TO_CHECK = 'https://google.com'.freeze
|
8
10
|
|
11
|
+
# Initialize new ProxyValidator instance
|
12
|
+
#
|
13
|
+
# @param proxy_addr [String] proxy address or IP
|
14
|
+
# @param proxy_port [String, Integer] proxy port
|
15
|
+
#
|
16
|
+
# @return [ProxyValidator]
|
17
|
+
#
|
9
18
|
def initialize(proxy_addr, proxy_port)
|
10
19
|
uri = URI.parse(URL_TO_CHECK)
|
11
20
|
@http = Net::HTTP.new(uri.host, uri.port, proxy_addr, proxy_port.to_i)
|
@@ -16,6 +25,12 @@ module ProxyFetcher
|
|
16
25
|
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
17
26
|
end
|
18
27
|
|
28
|
+
# Checks if proxy is connectable (can be used to connect
|
29
|
+
# resources via proxy server).
|
30
|
+
#
|
31
|
+
# @return [Boolean]
|
32
|
+
# true if connection to the server using proxy established, otherwise false
|
33
|
+
#
|
19
34
|
def connectable?
|
20
35
|
@http.open_timeout = ProxyFetcher.config.timeout
|
21
36
|
@http.read_timeout = ProxyFetcher.config.timeout
|
@@ -27,10 +42,16 @@ module ProxyFetcher
|
|
27
42
|
false
|
28
43
|
end
|
29
44
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
45
|
+
# Short variant to validate proxy.
|
46
|
+
#
|
47
|
+
# @param proxy_addr [String] proxy address or IP
|
48
|
+
# @param proxy_port [String, Integer] proxy port
|
49
|
+
#
|
50
|
+
# @return [Boolean]
|
51
|
+
# true if connection to the server using proxy established, otherwise false
|
52
|
+
#
|
53
|
+
def self.connectable?(proxy_addr, proxy_port)
|
54
|
+
new(proxy_addr, proxy_port).connectable?
|
34
55
|
end
|
35
56
|
end
|
36
57
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
##
|
3
5
|
# ProxyFetcher gem version.
|
@@ -13,7 +15,7 @@ module ProxyFetcher
|
|
13
15
|
# Minor version number
|
14
16
|
MINOR = 6
|
15
17
|
# Smallest version number
|
16
|
-
TINY =
|
18
|
+
TINY = 3
|
17
19
|
|
18
20
|
# Full version number
|
19
21
|
STRING = [MAJOR, MINOR, TINY].compact.join('.')
|
data/proxy_fetcher.gemspec
CHANGED
@@ -5,7 +5,7 @@ require 'proxy_fetcher/version'
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
6
|
gem.name = 'proxy_fetcher'
|
7
7
|
gem.version = ProxyFetcher.gem_version
|
8
|
-
gem.date = '
|
8
|
+
gem.date = '2018-02-13'
|
9
9
|
gem.summary = 'Ruby gem for dealing with proxy lists from different providers'
|
10
10
|
gem.description = 'This gem can help your Ruby application to make HTTP(S) requests ' \
|
11
11
|
'using proxies by fetching and validating proxy lists from the different providers.'
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
require 'json'
|
3
5
|
|
@@ -40,6 +42,14 @@ describe ProxyFetcher::Client do
|
|
40
42
|
expect(content).not_to be_nil
|
41
43
|
expect(content).not_to be_empty
|
42
44
|
end
|
45
|
+
|
46
|
+
it 'successfully returns page content using custom proxy' do
|
47
|
+
manager = ProxyFetcher::Manager.new
|
48
|
+
content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: manager.get! })
|
49
|
+
|
50
|
+
expect(content).not_to be_nil
|
51
|
+
expect(content).not_to be_empty
|
52
|
+
end
|
43
53
|
end
|
44
54
|
|
45
55
|
context 'POST request with the valid proxy' do
|