proxy_fetcher 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -1
  3. data/LICENSE +1 -1
  4. data/README.md +12 -1
  5. data/lib/proxy_fetcher.rb +5 -0
  6. data/lib/proxy_fetcher/client/client.rb +28 -7
  7. data/lib/proxy_fetcher/client/proxies_registry.rb +27 -0
  8. data/lib/proxy_fetcher/client/request.rb +73 -2
  9. data/lib/proxy_fetcher/configuration.rb +66 -2
  10. data/lib/proxy_fetcher/configuration/providers_registry.rb +31 -2
  11. data/lib/proxy_fetcher/document.rb +29 -6
  12. data/lib/proxy_fetcher/document/adapters.rb +20 -0
  13. data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +29 -0
  14. data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +26 -0
  15. data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +26 -0
  16. data/lib/proxy_fetcher/document/node.rb +47 -0
  17. data/lib/proxy_fetcher/exceptions.rb +52 -2
  18. data/lib/proxy_fetcher/manager.rb +32 -4
  19. data/lib/proxy_fetcher/providers/base.rb +27 -8
  20. data/lib/proxy_fetcher/providers/free_proxy_list.rb +2 -0
  21. data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +2 -0
  22. data/lib/proxy_fetcher/providers/gather_proxy.rb +2 -0
  23. data/lib/proxy_fetcher/providers/http_tunnel.rb +2 -0
  24. data/lib/proxy_fetcher/providers/proxy_docker.rb +2 -0
  25. data/lib/proxy_fetcher/providers/proxy_list.rb +2 -0
  26. data/lib/proxy_fetcher/providers/xroxy.rb +2 -0
  27. data/lib/proxy_fetcher/proxy.rb +36 -5
  28. data/lib/proxy_fetcher/utils/http_client.rb +35 -7
  29. data/lib/proxy_fetcher/utils/proxy_validator.rb +25 -4
  30. data/lib/proxy_fetcher/version.rb +3 -1
  31. data/proxy_fetcher.gemspec +1 -1
  32. data/spec/proxy_fetcher/{client_spec.rb → client/client_spec.rb} +10 -0
  33. data/spec/proxy_fetcher/configuration_spec.rb +2 -0
  34. data/spec/proxy_fetcher/document/adapters_spec.rb +2 -0
  35. data/spec/proxy_fetcher/document/node_spec.rb +2 -0
  36. data/spec/proxy_fetcher/providers/base_spec.rb +2 -0
  37. data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -0
  38. data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -0
  39. data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -0
  40. data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -0
  41. data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +2 -0
  42. data/spec/proxy_fetcher/providers/proxy_docker_spec.rb +2 -0
  43. data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -0
  44. data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -0
  45. data/spec/proxy_fetcher/proxy_spec.rb +2 -0
  46. data/spec/proxy_fetcher/version_spec.rb +3 -0
  47. data/spec/spec_helper.rb +2 -0
  48. data/spec/support/manager_examples.rb +2 -0
  49. metadata +4 -3
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  module Providers
3
5
  # Base class for all the ProxyFetcher providers.
@@ -8,18 +10,26 @@ module ProxyFetcher
8
10
  load_proxy_list(filters).map { |html| to_proxy(html) }
9
11
  end
10
12
 
11
- class << self
12
- # Just synthetic sugar to make it easier to call #fetch_proxies! method.
13
- def fetch_proxies!(*args)
14
- new.fetch_proxies!(*args)
15
- end
13
+ # Just synthetic sugar to make it easier to call #fetch_proxies! method.
14
+ def self.fetch_proxies!(*args)
15
+ new.fetch_proxies!(*args)
16
16
  end
17
17
 
18
18
  protected
19
19
 
20
20
  # Loads HTML document with Nokogiri by the URL combined with custom filters
21
+ #
22
+ # @param url [String]
23
+ # URL to fetch
24
+ #
25
+ # @param filters [Hash]
26
+ # filters for proxy provider
27
+ #
28
+ # @return [ProxyFetcher::Document]
29
+ # ProxyFetcher document object
30
+ #
21
31
  def load_document(url, filters = {})
22
- raise ArgumentError, 'filters must be a Hash' unless filters.is_a?(Hash)
32
+ raise ArgumentError, 'filters must be a Hash' if filters && !filters.is_a?(Hash)
23
33
 
24
34
  uri = URI.parse(url)
25
35
  uri.query = URI.encode_www_form(filters) if filters && filters.any?
@@ -32,13 +42,22 @@ module ProxyFetcher
32
42
  # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
33
43
  # to return all the proxy entries (HTML nodes).
34
44
  #
35
- # Abstract method.
45
+ # Abstract method. Must be implemented in a descendant class
46
+ #
47
+ # @return [Array<Document::Node>]
48
+ # list of proxy elements from the providers HTML content
36
49
  #
37
50
  def load_proxy_list(*)
38
51
  raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
39
52
  end
40
53
 
41
- # Convert HTML element with proxy info to ProxyFetcher::Proxy instance
54
+ # Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
55
+ #
56
+ # Abstract method. Must be implemented in a descendant class
57
+ #
58
+ # @return [Proxy]
59
+ # new proxy object from the HTML node
60
+ #
42
61
  def to_proxy(*)
43
62
  raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
44
63
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  module Providers
3
5
  # FreeProxyList provider class.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  module Providers
3
5
  # FreeProxyListSSL provider class.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'json'
2
4
 
3
5
  module ProxyFetcher
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  module Providers
3
5
  # HTTPTunnel provider class.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  module Providers
3
5
  # ProxyDocker provider class.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'base64'
2
4
 
3
5
  module ProxyFetcher
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  module Providers
3
5
  # XRoxy provider class.
@@ -1,9 +1,33 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  # Proxy object
3
5
  class Proxy
4
- attr_accessor :addr, :port, :type, :country, :response_time, :anonymity
6
+ # @!attribute [rw] addr
7
+ # @return [String] address (IP or domain)
8
+ attr_accessor :addr
9
+
10
+ # @!attribute [rw] port
11
+ # @return [Integer] port
12
+ attr_accessor :port
13
+
14
+ # @!attribute [rw] type
15
+ # @return [String] type (SOCKS, HTTP(S))
16
+ attr_accessor :type
17
+
18
+ # @!attribute [rw] country
19
+ # @return [String] country or country code
20
+ attr_accessor :country
21
+
22
+ # @!attribute [rw] response_time
23
+ # @return [Integer] response time (value and measurements depends on the provider)
24
+ attr_accessor :response_time
5
25
 
6
- # Proxy type
26
+ # @!attribute [rw] anonymity
27
+ # @return [String] anonymity level (high, elite, transparent, etc)
28
+ attr_accessor :anonymity
29
+
30
+ # Proxy types
7
31
  TYPES = [
8
32
  HTTP = 'HTTP'.freeze,
9
33
  HTTPS = 'HTTPS'.freeze,
@@ -31,14 +55,21 @@ module ProxyFetcher
31
55
  https? || socks4? || socks5?
32
56
  end
33
57
 
58
+ # Initialize new Proxy
59
+ #
60
+ # @param attributes [Hash]
61
+ # proxy attributes
62
+ #
63
+ # @return [Proxy]
64
+ #
34
65
  def initialize(attributes = {})
35
66
  attributes.each do |attr, value|
36
67
  public_send("#{attr}=", value)
37
68
  end
38
69
  end
39
70
 
40
- # Checks if proxy object is connectable? (can be used as a proxy for
41
- # network requests).
71
+ # Checks if proxy object is connectable (can be used as a proxy for
72
+ # HTTP requests).
42
73
  #
43
74
  # @return [Boolean]
44
75
  # true if proxy connectable, otherwise false.
@@ -58,7 +89,7 @@ module ProxyFetcher
58
89
  URI::Generic.build(host: addr, port: port)
59
90
  end
60
91
 
61
- # Returns <code>String</object> object with <i>addr:port<i> values of the proxy.
92
+ # Returns <code>String</code> object with <i>addr:port</i> values of the proxy.
62
93
  #
63
94
  # @return [String]
64
95
  # true if proxy connectable, otherwise false.
@@ -1,10 +1,22 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  # Default ProxyFetcher HTTP client used to fetch proxy lists from
3
5
  # the different providers. Uses ProxyFetcher configuration options
4
6
  # for sending HTTP requests to providers URLs.
5
7
  class HTTPClient
6
- attr_reader :uri, :http
8
+ # @!attribute [r] uri
9
+ # @return [URI] URI
10
+ attr_reader :uri
11
+
12
+ # @!attribute [r] http
13
+ # @return [Net::HTTP] HTTP client
14
+ attr_reader :http
7
15
 
16
+ # Initialize HTTP client instance
17
+ #
18
+ # @return [HTTPClient]
19
+ #
8
20
  def initialize(url)
9
21
  @uri = URI.parse(url)
10
22
  @http = Net::HTTP.new(@uri.host, @uri.port)
@@ -14,6 +26,11 @@ module ProxyFetcher
14
26
  @http.verify_mode = OpenSSL::SSL::VERIFY_NONE
15
27
  end
16
28
 
29
+ # Fetches resource content by sending HTTP request to it.
30
+ #
31
+ # @return [String]
32
+ # response body
33
+ #
17
34
  def fetch
18
35
  request = Net::HTTP::Get.new(@uri.to_s)
19
36
  request['Connection'] = 'keep-alive'
@@ -22,14 +39,25 @@ module ProxyFetcher
22
39
  response.body
23
40
  end
24
41
 
25
- def https?
26
- @uri.is_a?(URI::HTTPS)
42
+ # Fetches resource content by sending HTTP request to it.
43
+ # Synthetic sugar to simplify URIes fetching.
44
+ #
45
+ # @param url [String] URL
46
+ #
47
+ # @return [String]
48
+ # resource content
49
+ #
50
+ def self.fetch(url)
51
+ new(url).fetch
27
52
  end
28
53
 
29
- class << self
30
- def fetch(url)
31
- new(url).fetch
32
- end
54
+ # Checks if URI requires secure connection (HTTPS)
55
+ #
56
+ # @return [Boolean]
57
+ # true if URI is HTTPS, false otherwise
58
+ #
59
+ def https?
60
+ @uri.is_a?(URI::HTTPS)
33
61
  end
34
62
  end
35
63
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  # Default ProxyFetcher proxy validator that checks either proxy
3
5
  # connectable or not. It tries to send HEAD request to default
@@ -6,6 +8,13 @@ module ProxyFetcher
6
8
  # Default URL that will be used to check if proxy can be used.
7
9
  URL_TO_CHECK = 'https://google.com'.freeze
8
10
 
11
+ # Initialize new ProxyValidator instance
12
+ #
13
+ # @param proxy_addr [String] proxy address or IP
14
+ # @param proxy_port [String, Integer] proxy port
15
+ #
16
+ # @return [ProxyValidator]
17
+ #
9
18
  def initialize(proxy_addr, proxy_port)
10
19
  uri = URI.parse(URL_TO_CHECK)
11
20
  @http = Net::HTTP.new(uri.host, uri.port, proxy_addr, proxy_port.to_i)
@@ -16,6 +25,12 @@ module ProxyFetcher
16
25
  @http.verify_mode = OpenSSL::SSL::VERIFY_NONE
17
26
  end
18
27
 
28
+ # Checks if proxy is connectable (can be used to connect
29
+ # resources via proxy server).
30
+ #
31
+ # @return [Boolean]
32
+ # true if connection to the server using proxy established, otherwise false
33
+ #
19
34
  def connectable?
20
35
  @http.open_timeout = ProxyFetcher.config.timeout
21
36
  @http.read_timeout = ProxyFetcher.config.timeout
@@ -27,10 +42,16 @@ module ProxyFetcher
27
42
  false
28
43
  end
29
44
 
30
- class << self
31
- def connectable?(proxy_addr, proxy_port)
32
- new(proxy_addr, proxy_port).connectable?
33
- end
45
+ # Short variant to validate proxy.
46
+ #
47
+ # @param proxy_addr [String] proxy address or IP
48
+ # @param proxy_port [String, Integer] proxy port
49
+ #
50
+ # @return [Boolean]
51
+ # true if connection to the server using proxy established, otherwise false
52
+ #
53
+ def self.connectable?(proxy_addr, proxy_port)
54
+ new(proxy_addr, proxy_port).connectable?
34
55
  end
35
56
  end
36
57
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module ProxyFetcher
2
4
  ##
3
5
  # ProxyFetcher gem version.
@@ -13,7 +15,7 @@ module ProxyFetcher
13
15
  # Minor version number
14
16
  MINOR = 6
15
17
  # Smallest version number
16
- TINY = 2
18
+ TINY = 3
17
19
 
18
20
  # Full version number
19
21
  STRING = [MAJOR, MINOR, TINY].compact.join('.')
@@ -5,7 +5,7 @@ require 'proxy_fetcher/version'
5
5
  Gem::Specification.new do |gem|
6
6
  gem.name = 'proxy_fetcher'
7
7
  gem.version = ProxyFetcher.gem_version
8
- gem.date = '2017-12-27'
8
+ gem.date = '2018-02-13'
9
9
  gem.summary = 'Ruby gem for dealing with proxy lists from different providers'
10
10
  gem.description = 'This gem can help your Ruby application to make HTTP(S) requests ' \
11
11
  'using proxies by fetching and validating proxy lists from the different providers.'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
  require 'json'
3
5
 
@@ -40,6 +42,14 @@ describe ProxyFetcher::Client do
40
42
  expect(content).not_to be_nil
41
43
  expect(content).not_to be_empty
42
44
  end
45
+
46
+ it 'successfully returns page content using custom proxy' do
47
+ manager = ProxyFetcher::Manager.new
48
+ content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: manager.get! })
49
+
50
+ expect(content).not_to be_nil
51
+ expect(content).not_to be_empty
52
+ end
43
53
  end
44
54
 
45
55
  context 'POST request with the valid proxy' do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Configuration do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Document::Adapters do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Document::Node do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Providers::Base do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Providers::FreeProxyList do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Providers::FreeProxyListSSL do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Providers::GatherProxy do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Providers::HTTPTunnel do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe 'Multiple proxy providers' do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Providers::ProxyDocker do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Providers::ProxyList do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Providers::XRoxy do
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe ProxyFetcher::Proxy do
@@ -0,0 +1,3 @@
1
+ RSpec.describe ProxyFetcher::VERSION do
2
+ it { expect(ProxyFetcher::VERSION::STRING).to eq '0.6.3' }
3
+ end