proxy_fetcher 0.6.2 → 0.6.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/LICENSE +1 -1
- data/README.md +12 -1
- data/lib/proxy_fetcher.rb +5 -0
- data/lib/proxy_fetcher/client/client.rb +28 -7
- data/lib/proxy_fetcher/client/proxies_registry.rb +27 -0
- data/lib/proxy_fetcher/client/request.rb +73 -2
- data/lib/proxy_fetcher/configuration.rb +66 -2
- data/lib/proxy_fetcher/configuration/providers_registry.rb +31 -2
- data/lib/proxy_fetcher/document.rb +29 -6
- data/lib/proxy_fetcher/document/adapters.rb +20 -0
- data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +29 -0
- data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +26 -0
- data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +26 -0
- data/lib/proxy_fetcher/document/node.rb +47 -0
- data/lib/proxy_fetcher/exceptions.rb +52 -2
- data/lib/proxy_fetcher/manager.rb +32 -4
- data/lib/proxy_fetcher/providers/base.rb +27 -8
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +2 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +2 -0
- data/lib/proxy_fetcher/providers/gather_proxy.rb +2 -0
- data/lib/proxy_fetcher/providers/http_tunnel.rb +2 -0
- data/lib/proxy_fetcher/providers/proxy_docker.rb +2 -0
- data/lib/proxy_fetcher/providers/proxy_list.rb +2 -0
- data/lib/proxy_fetcher/providers/xroxy.rb +2 -0
- data/lib/proxy_fetcher/proxy.rb +36 -5
- data/lib/proxy_fetcher/utils/http_client.rb +35 -7
- data/lib/proxy_fetcher/utils/proxy_validator.rb +25 -4
- data/lib/proxy_fetcher/version.rb +3 -1
- data/proxy_fetcher.gemspec +1 -1
- data/spec/proxy_fetcher/{client_spec.rb → client/client_spec.rb} +10 -0
- data/spec/proxy_fetcher/configuration_spec.rb +2 -0
- data/spec/proxy_fetcher/document/adapters_spec.rb +2 -0
- data/spec/proxy_fetcher/document/node_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/base_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/proxy_docker_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -0
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -0
- data/spec/proxy_fetcher/proxy_spec.rb +2 -0
- data/spec/proxy_fetcher/version_spec.rb +3 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/support/manager_examples.rb +2 -0
- metadata +4 -3
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
module Providers
|
3
5
|
# Base class for all the ProxyFetcher providers.
|
@@ -8,18 +10,26 @@ module ProxyFetcher
|
|
8
10
|
load_proxy_list(filters).map { |html| to_proxy(html) }
|
9
11
|
end
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
new.fetch_proxies!(*args)
|
15
|
-
end
|
13
|
+
# Just synthetic sugar to make it easier to call #fetch_proxies! method.
|
14
|
+
def self.fetch_proxies!(*args)
|
15
|
+
new.fetch_proxies!(*args)
|
16
16
|
end
|
17
17
|
|
18
18
|
protected
|
19
19
|
|
20
20
|
# Loads HTML document with Nokogiri by the URL combined with custom filters
|
21
|
+
#
|
22
|
+
# @param url [String]
|
23
|
+
# URL to fetch
|
24
|
+
#
|
25
|
+
# @param filters [Hash]
|
26
|
+
# filters for proxy provider
|
27
|
+
#
|
28
|
+
# @return [ProxyFetcher::Document]
|
29
|
+
# ProxyFetcher document object
|
30
|
+
#
|
21
31
|
def load_document(url, filters = {})
|
22
|
-
raise ArgumentError, 'filters must be a Hash'
|
32
|
+
raise ArgumentError, 'filters must be a Hash' if filters && !filters.is_a?(Hash)
|
23
33
|
|
24
34
|
uri = URI.parse(url)
|
25
35
|
uri.query = URI.encode_www_form(filters) if filters && filters.any?
|
@@ -32,13 +42,22 @@ module ProxyFetcher
|
|
32
42
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
33
43
|
# to return all the proxy entries (HTML nodes).
|
34
44
|
#
|
35
|
-
# Abstract method.
|
45
|
+
# Abstract method. Must be implemented in a descendant class
|
46
|
+
#
|
47
|
+
# @return [Array<Document::Node>]
|
48
|
+
# list of proxy elements from the providers HTML content
|
36
49
|
#
|
37
50
|
def load_proxy_list(*)
|
38
51
|
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
39
52
|
end
|
40
53
|
|
41
|
-
# Convert HTML element with proxy info to ProxyFetcher::Proxy instance
|
54
|
+
# Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
|
55
|
+
#
|
56
|
+
# Abstract method. Must be implemented in a descendant class
|
57
|
+
#
|
58
|
+
# @return [Proxy]
|
59
|
+
# new proxy object from the HTML node
|
60
|
+
#
|
42
61
|
def to_proxy(*)
|
43
62
|
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
44
63
|
end
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -1,9 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
# Proxy object
|
3
5
|
class Proxy
|
4
|
-
|
6
|
+
# @!attribute [rw] addr
|
7
|
+
# @return [String] address (IP or domain)
|
8
|
+
attr_accessor :addr
|
9
|
+
|
10
|
+
# @!attribute [rw] port
|
11
|
+
# @return [Integer] port
|
12
|
+
attr_accessor :port
|
13
|
+
|
14
|
+
# @!attribute [rw] type
|
15
|
+
# @return [String] type (SOCKS, HTTP(S))
|
16
|
+
attr_accessor :type
|
17
|
+
|
18
|
+
# @!attribute [rw] country
|
19
|
+
# @return [String] country or country code
|
20
|
+
attr_accessor :country
|
21
|
+
|
22
|
+
# @!attribute [rw] response_time
|
23
|
+
# @return [Integer] response time (value and measurements depends on the provider)
|
24
|
+
attr_accessor :response_time
|
5
25
|
|
6
|
-
#
|
26
|
+
# @!attribute [rw] anonymity
|
27
|
+
# @return [String] anonymity level (high, elite, transparent, etc)
|
28
|
+
attr_accessor :anonymity
|
29
|
+
|
30
|
+
# Proxy types
|
7
31
|
TYPES = [
|
8
32
|
HTTP = 'HTTP'.freeze,
|
9
33
|
HTTPS = 'HTTPS'.freeze,
|
@@ -31,14 +55,21 @@ module ProxyFetcher
|
|
31
55
|
https? || socks4? || socks5?
|
32
56
|
end
|
33
57
|
|
58
|
+
# Initialize new Proxy
|
59
|
+
#
|
60
|
+
# @param attributes [Hash]
|
61
|
+
# proxy attributes
|
62
|
+
#
|
63
|
+
# @return [Proxy]
|
64
|
+
#
|
34
65
|
def initialize(attributes = {})
|
35
66
|
attributes.each do |attr, value|
|
36
67
|
public_send("#{attr}=", value)
|
37
68
|
end
|
38
69
|
end
|
39
70
|
|
40
|
-
# Checks if proxy object is connectable
|
41
|
-
#
|
71
|
+
# Checks if proxy object is connectable (can be used as a proxy for
|
72
|
+
# HTTP requests).
|
42
73
|
#
|
43
74
|
# @return [Boolean]
|
44
75
|
# true if proxy connectable, otherwise false.
|
@@ -58,7 +89,7 @@ module ProxyFetcher
|
|
58
89
|
URI::Generic.build(host: addr, port: port)
|
59
90
|
end
|
60
91
|
|
61
|
-
# Returns <code>String</
|
92
|
+
# Returns <code>String</code> object with <i>addr:port</i> values of the proxy.
|
62
93
|
#
|
63
94
|
# @return [String]
|
64
95
|
# true if proxy connectable, otherwise false.
|
@@ -1,10 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
# Default ProxyFetcher HTTP client used to fetch proxy lists from
|
3
5
|
# the different providers. Uses ProxyFetcher configuration options
|
4
6
|
# for sending HTTP requests to providers URLs.
|
5
7
|
class HTTPClient
|
6
|
-
|
8
|
+
# @!attribute [r] uri
|
9
|
+
# @return [URI] URI
|
10
|
+
attr_reader :uri
|
11
|
+
|
12
|
+
# @!attribute [r] http
|
13
|
+
# @return [Net::HTTP] HTTP client
|
14
|
+
attr_reader :http
|
7
15
|
|
16
|
+
# Initialize HTTP client instance
|
17
|
+
#
|
18
|
+
# @return [HTTPClient]
|
19
|
+
#
|
8
20
|
def initialize(url)
|
9
21
|
@uri = URI.parse(url)
|
10
22
|
@http = Net::HTTP.new(@uri.host, @uri.port)
|
@@ -14,6 +26,11 @@ module ProxyFetcher
|
|
14
26
|
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
15
27
|
end
|
16
28
|
|
29
|
+
# Fetches resource content by sending HTTP request to it.
|
30
|
+
#
|
31
|
+
# @return [String]
|
32
|
+
# response body
|
33
|
+
#
|
17
34
|
def fetch
|
18
35
|
request = Net::HTTP::Get.new(@uri.to_s)
|
19
36
|
request['Connection'] = 'keep-alive'
|
@@ -22,14 +39,25 @@ module ProxyFetcher
|
|
22
39
|
response.body
|
23
40
|
end
|
24
41
|
|
25
|
-
|
26
|
-
|
42
|
+
# Fetches resource content by sending HTTP request to it.
|
43
|
+
# Synthetic sugar to simplify URIes fetching.
|
44
|
+
#
|
45
|
+
# @param url [String] URL
|
46
|
+
#
|
47
|
+
# @return [String]
|
48
|
+
# resource content
|
49
|
+
#
|
50
|
+
def self.fetch(url)
|
51
|
+
new(url).fetch
|
27
52
|
end
|
28
53
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
54
|
+
# Checks if URI requires secure connection (HTTPS)
|
55
|
+
#
|
56
|
+
# @return [Boolean]
|
57
|
+
# true if URI is HTTPS, false otherwise
|
58
|
+
#
|
59
|
+
def https?
|
60
|
+
@uri.is_a?(URI::HTTPS)
|
33
61
|
end
|
34
62
|
end
|
35
63
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
# Default ProxyFetcher proxy validator that checks either proxy
|
3
5
|
# connectable or not. It tries to send HEAD request to default
|
@@ -6,6 +8,13 @@ module ProxyFetcher
|
|
6
8
|
# Default URL that will be used to check if proxy can be used.
|
7
9
|
URL_TO_CHECK = 'https://google.com'.freeze
|
8
10
|
|
11
|
+
# Initialize new ProxyValidator instance
|
12
|
+
#
|
13
|
+
# @param proxy_addr [String] proxy address or IP
|
14
|
+
# @param proxy_port [String, Integer] proxy port
|
15
|
+
#
|
16
|
+
# @return [ProxyValidator]
|
17
|
+
#
|
9
18
|
def initialize(proxy_addr, proxy_port)
|
10
19
|
uri = URI.parse(URL_TO_CHECK)
|
11
20
|
@http = Net::HTTP.new(uri.host, uri.port, proxy_addr, proxy_port.to_i)
|
@@ -16,6 +25,12 @@ module ProxyFetcher
|
|
16
25
|
@http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
17
26
|
end
|
18
27
|
|
28
|
+
# Checks if proxy is connectable (can be used to connect
|
29
|
+
# resources via proxy server).
|
30
|
+
#
|
31
|
+
# @return [Boolean]
|
32
|
+
# true if connection to the server using proxy established, otherwise false
|
33
|
+
#
|
19
34
|
def connectable?
|
20
35
|
@http.open_timeout = ProxyFetcher.config.timeout
|
21
36
|
@http.read_timeout = ProxyFetcher.config.timeout
|
@@ -27,10 +42,16 @@ module ProxyFetcher
|
|
27
42
|
false
|
28
43
|
end
|
29
44
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
45
|
+
# Short variant to validate proxy.
|
46
|
+
#
|
47
|
+
# @param proxy_addr [String] proxy address or IP
|
48
|
+
# @param proxy_port [String, Integer] proxy port
|
49
|
+
#
|
50
|
+
# @return [Boolean]
|
51
|
+
# true if connection to the server using proxy established, otherwise false
|
52
|
+
#
|
53
|
+
def self.connectable?(proxy_addr, proxy_port)
|
54
|
+
new(proxy_addr, proxy_port).connectable?
|
34
55
|
end
|
35
56
|
end
|
36
57
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module ProxyFetcher
|
2
4
|
##
|
3
5
|
# ProxyFetcher gem version.
|
@@ -13,7 +15,7 @@ module ProxyFetcher
|
|
13
15
|
# Minor version number
|
14
16
|
MINOR = 6
|
15
17
|
# Smallest version number
|
16
|
-
TINY =
|
18
|
+
TINY = 3
|
17
19
|
|
18
20
|
# Full version number
|
19
21
|
STRING = [MAJOR, MINOR, TINY].compact.join('.')
|
data/proxy_fetcher.gemspec
CHANGED
@@ -5,7 +5,7 @@ require 'proxy_fetcher/version'
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
6
|
gem.name = 'proxy_fetcher'
|
7
7
|
gem.version = ProxyFetcher.gem_version
|
8
|
-
gem.date = '
|
8
|
+
gem.date = '2018-02-13'
|
9
9
|
gem.summary = 'Ruby gem for dealing with proxy lists from different providers'
|
10
10
|
gem.description = 'This gem can help your Ruby application to make HTTP(S) requests ' \
|
11
11
|
'using proxies by fetching and validating proxy lists from the different providers.'
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
require 'json'
|
3
5
|
|
@@ -40,6 +42,14 @@ describe ProxyFetcher::Client do
|
|
40
42
|
expect(content).not_to be_nil
|
41
43
|
expect(content).not_to be_empty
|
42
44
|
end
|
45
|
+
|
46
|
+
it 'successfully returns page content using custom proxy' do
|
47
|
+
manager = ProxyFetcher::Manager.new
|
48
|
+
content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: manager.get! })
|
49
|
+
|
50
|
+
expect(content).not_to be_nil
|
51
|
+
expect(content).not_to be_empty
|
52
|
+
end
|
43
53
|
end
|
44
54
|
|
45
55
|
context 'POST request with the valid proxy' do
|