proxy_fetcher 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +8 -5
  3. data/Rakefile +4 -2
  4. data/gemfiles/nokogiri.gemfile +8 -6
  5. data/gemfiles/oga.gemfile +8 -6
  6. data/lib/proxy_fetcher.rb +30 -30
  7. data/lib/proxy_fetcher/client/client.rb +10 -3
  8. data/lib/proxy_fetcher/client/request.rb +4 -4
  9. data/lib/proxy_fetcher/configuration.rb +12 -11
  10. data/lib/proxy_fetcher/document.rb +0 -9
  11. data/lib/proxy_fetcher/document/adapters.rb +1 -1
  12. data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
  13. data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
  14. data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
  15. data/lib/proxy_fetcher/document/node.rb +2 -2
  16. data/lib/proxy_fetcher/exceptions.rb +6 -6
  17. data/lib/proxy_fetcher/manager.rb +2 -2
  18. data/lib/proxy_fetcher/providers/base.rb +42 -22
  19. data/lib/proxy_fetcher/providers/free_proxy_list.rb +30 -10
  20. data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -16
  21. data/lib/proxy_fetcher/providers/gather_proxy.rb +9 -17
  22. data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
  23. data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
  24. data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
  25. data/lib/proxy_fetcher/proxy.rb +4 -4
  26. data/lib/proxy_fetcher/utils/http_client.rb +10 -8
  27. data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
  28. data/lib/proxy_fetcher/utils/proxy_validator.rb +1 -1
  29. data/lib/proxy_fetcher/version.rb +3 -3
  30. data/proxy_fetcher.gemspec +19 -16
  31. data/spec/proxy_fetcher/client/client_spec.rb +72 -57
  32. data/spec/proxy_fetcher/configuration_spec.rb +11 -11
  33. data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
  34. data/spec/proxy_fetcher/document/node_spec.rb +4 -4
  35. data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
  36. data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -2
  37. data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -2
  38. data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -2
  39. data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -2
  40. data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
  41. data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -2
  42. data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -2
  43. data/spec/proxy_fetcher/proxy_spec.rb +14 -14
  44. data/spec/proxy_fetcher/version_spec.rb +2 -0
  45. data/spec/spec_helper.rb +10 -10
  46. data/spec/support/manager_examples.rb +21 -21
  47. metadata +14 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d256aaf841030823ef753d6c2b2726dd7e8e5306dbddae6b6079a26815df4224
4
- data.tar.gz: a747a8d9ed567ffa6373bd0d30dc9a99e5320cb03b75dd3feb5ef45465144375
3
+ metadata.gz: 5a0d7b377ed3c25e50552e89ba76c0e73fad3923bf171e1cee2f592d777787c3
4
+ data.tar.gz: 83b594e04e03c74a63146a6907c99025d50607c88f8cf94f6d5ce044795243ad
5
5
  SHA512:
6
- metadata.gz: '01997f86deda80383198be18ac9bca8309970fa38ea337a082093822838732038425b642a6806798fa35e2a891b63d679020fad33fd5f447efed00b426b85706'
7
- data.tar.gz: 2fd10653fdf594e8847d4ebe34cb66c30b3d79d2900bd741768a5d4742ffa50923092f2c8f2bd95e3215f505a073e040e27cad8e2d868ff5837e81078c7d6f95
6
+ metadata.gz: 1f096c2473035255eb9492297b5641ab5caee62566eb20eb40d3b2f02eea5d06fa1279a2cadeb3266c0f52ce98040185a69678faf116b6398c2c75f79d5c4ebd
7
+ data.tar.gz: a9372ef8bdbb3c51c5060308cbc46c905df3819e682bceb858ba494f5f94722095f2a2bee94575606f628d091b87325425c218f8c31a2b807bcb159c59ba6e65
data/Gemfile CHANGED
@@ -1,11 +1,14 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
2
4
 
3
5
  gemspec
4
6
 
5
- gem 'nokogiri', '~> 1.8'
6
- gem 'oga', '~> 2.0'
7
+ gem "nokogiri", "~> 1.8"
8
+ gem "oga", "~> 2.0"
9
+ gem "rubocop", "~> 0.74"
7
10
 
8
11
  group :test do
9
- gem 'coveralls', require: false
10
- gem 'evil-proxy', '~> 0.2'
12
+ gem "coveralls", require: false
13
+ gem "evil-proxy", "~> 0.2"
11
14
  end
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
- require 'bundler/gem_tasks'
1
+ # frozen_string_literal: true
2
2
 
3
- require 'rspec/core/rake_task'
3
+ require "bundler/gem_tasks"
4
+
5
+ require "rspec/core/rake_task"
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
6
8
  task default: :spec
@@ -1,11 +1,13 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
2
 
3
- gemspec path: '../'
3
+ source "https://rubygems.org"
4
4
 
5
- gem 'nokogiri', '~> 1.8'
5
+ gemspec path: "../"
6
+
7
+ gem "nokogiri", "~> 1.8"
6
8
 
7
9
  group :test do
8
- gem 'coveralls', require: false
9
- gem 'evil-proxy', '~> 0.2'
10
- gem 'rspec', '~> 3.6'
10
+ gem "coveralls", require: false
11
+ gem "evil-proxy", "~> 0.2"
12
+ gem "rspec", "~> 3.6"
11
13
  end
@@ -1,11 +1,13 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
2
 
3
- gemspec path: '../'
3
+ source "https://rubygems.org"
4
4
 
5
- gem 'oga', '~> 2.0'
5
+ gemspec path: "../"
6
+
7
+ gem "oga", "~> 2.0"
6
8
 
7
9
  group :test do
8
- gem 'coveralls', require: false
9
- gem 'evil-proxy', '~> 0.2'
10
- gem 'rspec', '~> 3.6'
10
+ gem "coveralls", require: false
11
+ gem "evil-proxy", "~> 0.2"
12
+ gem "rspec", "~> 3.6"
11
13
  end
@@ -1,44 +1,44 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'uri'
4
- require 'http'
5
- require 'logger'
3
+ require "uri"
4
+ require "http"
5
+ require "logger"
6
6
 
7
- require File.dirname(__FILE__) + '/proxy_fetcher/version'
7
+ require File.dirname(__FILE__) + "/proxy_fetcher/version"
8
8
 
9
- require File.dirname(__FILE__) + '/proxy_fetcher/exceptions'
10
- require File.dirname(__FILE__) + '/proxy_fetcher/configuration'
11
- require File.dirname(__FILE__) + '/proxy_fetcher/configuration/providers_registry'
12
- require File.dirname(__FILE__) + '/proxy_fetcher/proxy'
13
- require File.dirname(__FILE__) + '/proxy_fetcher/manager'
14
- require File.dirname(__FILE__) + '/proxy_fetcher/null_logger'
9
+ require File.dirname(__FILE__) + "/proxy_fetcher/exceptions"
10
+ require File.dirname(__FILE__) + "/proxy_fetcher/configuration"
11
+ require File.dirname(__FILE__) + "/proxy_fetcher/configuration/providers_registry"
12
+ require File.dirname(__FILE__) + "/proxy_fetcher/proxy"
13
+ require File.dirname(__FILE__) + "/proxy_fetcher/manager"
14
+ require File.dirname(__FILE__) + "/proxy_fetcher/null_logger"
15
15
 
16
- require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client'
17
- require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator'
18
- require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_list_validator'
19
- require File.dirname(__FILE__) + '/proxy_fetcher/client/client'
20
- require File.dirname(__FILE__) + '/proxy_fetcher/client/request'
21
- require File.dirname(__FILE__) + '/proxy_fetcher/client/proxies_registry'
16
+ require File.dirname(__FILE__) + "/proxy_fetcher/utils/http_client"
17
+ require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_validator"
18
+ require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_list_validator"
19
+ require File.dirname(__FILE__) + "/proxy_fetcher/client/client"
20
+ require File.dirname(__FILE__) + "/proxy_fetcher/client/request"
21
+ require File.dirname(__FILE__) + "/proxy_fetcher/client/proxies_registry"
22
22
 
23
- require File.dirname(__FILE__) + '/proxy_fetcher/document'
24
- require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters'
25
- require File.dirname(__FILE__) + '/proxy_fetcher/document/node'
26
- require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/abstract_adapter'
27
- require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/nokogiri_adapter'
28
- require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/oga_adapter'
23
+ require File.dirname(__FILE__) + "/proxy_fetcher/document"
24
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters"
25
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/node"
26
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/abstract_adapter"
27
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/nokogiri_adapter"
28
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/oga_adapter"
29
29
 
30
30
  ##
31
31
  # Ruby / JRuby lib for managing proxies
32
32
  module ProxyFetcher
33
33
  # ProxyFetcher providers namespace
34
34
  module Providers
35
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/base'
36
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list'
37
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list_ssl'
38
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/gather_proxy'
39
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/http_tunnel'
40
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/proxy_list'
41
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/xroxy'
35
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/base"
36
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list"
37
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_ssl"
38
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/gather_proxy"
39
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/http_tunnel"
40
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxy_list"
41
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/xroxy"
42
42
  end
43
43
 
44
44
  # Main ProxyFetcher module.
@@ -75,7 +75,7 @@ module ProxyFetcher
75
75
 
76
76
  # Returns ProxyFetcher logger instance.
77
77
  #
78
- # @return [Logger, NullLogger] logger object
78
+ # @return [Logger, ProxyFetcher::NullLogger] logger object
79
79
  #
80
80
  def logger
81
81
  return @logger if defined?(@logger)
@@ -128,7 +128,11 @@ module ProxyFetcher
128
128
  #
129
129
  def request_with_payload(method, url, payload, headers, options)
130
130
  with_proxy_for(url, options.fetch(:max_retries, 1000)) do |proxy|
131
- opts = options.merge(payload: payload, proxy: options.fetch(:proxy, proxy), headers: default_headers.merge(headers))
131
+ opts = options.merge(
132
+ payload: payload,
133
+ proxy: options.fetch(:proxy, proxy),
134
+ headers: default_headers.merge(headers)
135
+ )
132
136
 
133
137
  Request.execute(url: url, method: method, **opts)
134
138
  end
@@ -138,7 +142,10 @@ module ProxyFetcher
138
142
  #
139
143
  def request_without_payload(method, url, headers, options)
140
144
  with_proxy_for(url, options.fetch(:max_retries, 1000)) do |proxy|
141
- opts = options.merge(proxy: options.fetch(:proxy, proxy), headers: default_headers.merge(headers))
145
+ opts = options.merge(
146
+ proxy: options.fetch(:proxy, proxy),
147
+ headers: default_headers.merge(headers)
148
+ )
142
149
 
143
150
  Request.execute(url: url, method: method, **opts)
144
151
  end
@@ -152,7 +159,7 @@ module ProxyFetcher
152
159
  #
153
160
  def default_headers
154
161
  {
155
- 'User-Agent' => ProxyFetcher.config.user_agent
162
+ "User-Agent" => ProxyFetcher.config.user_agent
156
163
  }
157
164
  end
158
165
 
@@ -50,7 +50,7 @@ module ProxyFetcher
50
50
  # @return [Request]
51
51
  #
52
52
  def initialize(args)
53
- raise ArgumentError, 'args must be a Hash!' unless args.is_a?(Hash)
53
+ raise ArgumentError, "args must be a Hash!" unless args.is_a?(Hash)
54
54
 
55
55
  @url = args.fetch(:url)
56
56
  @method = args.fetch(:method).to_s.downcase
@@ -86,9 +86,9 @@ module ProxyFetcher
86
86
  #
87
87
  def build_http_client
88
88
  HTTP.via(proxy.addr, proxy.port.to_i)
89
- .headers(headers)
90
- .timeout(connect: timeout, read: timeout)
91
- .follow(max_hops: max_redirects)
89
+ .headers(headers)
90
+ .timeout(connect: timeout, read: timeout)
91
+ .follow(max_hops: max_redirects)
92
92
  end
93
93
 
94
94
  # Default SSL options that will be used for connecting to resources
@@ -6,15 +6,20 @@ module ProxyFetcher
6
6
  #
7
7
  class Configuration
8
8
  # @!attribute client_timeout
9
- # @return [Integer] HTTP request timeout (connect / open) for [ProxyFetcher::Client]
9
+ # @return [Integer]
10
+ # HTTP request timeout (connect / open) for [ProxyFetcher::Client]
10
11
  attr_accessor :client_timeout
11
12
 
12
13
  # @!attribute provider_proxies_load_timeout
13
- # @return [Integer] HTTP request timeout (connect / open) for loading of proxies list by provider
14
+ # @return [Integer]
15
+ # HTTP request timeout (connect / open) for loading
16
+ # of proxies list by provider
14
17
  attr_accessor :provider_proxies_load_timeout
15
18
 
16
19
  # @!attribute proxy_validation_timeout
17
- # @return [Integer] HTTP request timeout (connect / open) for proxy validation with [ProxyFetcher::ProxyValidator]
20
+ # @return [Integer]
21
+ # HTTP request timeout (connect / open) for proxy
22
+ # validation with [ProxyFetcher::ProxyValidator]
18
23
  attr_accessor :proxy_validation_timeout
19
24
 
20
25
  # to save compatibility
@@ -30,16 +35,12 @@ module ProxyFetcher
30
35
  attr_accessor :user_agent
31
36
 
32
37
  # @!attribute [r] logger
33
- # @return [Object] Logger object
38
+ # @return [Logger] Logger object
34
39
  attr_accessor :logger
35
40
 
36
41
  # @!attribute [r] adapter
37
42
  # @return [Object] HTML parser adapter
38
- attr_accessor :adapter
39
-
40
- # @!attribute [r] adapter_class
41
- # @return [Object] HTML adapter class
42
- attr_reader :adapter_class
43
+ attr_reader :adapter
43
44
 
44
45
  # @!attribute [r] http_client
45
46
  # @return [Object] HTTP client class
@@ -58,8 +59,8 @@ module ProxyFetcher
58
59
  #
59
60
  # Default is Google Chrome 60, but can be changed in <code>ProxyFetcher.config</code>.
60
61
  #
61
- DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 ' \
62
- '(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36'.freeze
62
+ DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 " \
63
+ "(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36"
63
64
 
64
65
  # HTML parser adapter name.
65
66
  #
@@ -36,14 +36,5 @@ module ProxyFetcher
36
36
  def xpath(*args)
37
37
  backend.xpath(*args).map { |node| backend.proxy_node.new(node) }
38
38
  end
39
-
40
- # Searches elements by CSS selector.
41
- #
42
- # @return [Array<ProxyFetcher::Document::Node>]
43
- # collection of nodes
44
- #
45
- def css(*args)
46
- backend.css(*args).map { |node| backend.proxy_node.new(node) }
47
- end
48
39
  end
49
40
  end
@@ -13,7 +13,7 @@ module ProxyFetcher
13
13
  # <code>ProxyFetcher::Document::AbstractAdapter</code>.
14
14
  class Adapters
15
15
  # Adapters class name suffix
16
- ADAPTER = 'Adapter'.freeze
16
+ ADAPTER = "Adapter"
17
17
  private_constant :ADAPTER
18
18
 
19
19
  class << self
@@ -26,15 +26,6 @@ module ProxyFetcher
26
26
  document.xpath(selector)
27
27
  end
28
28
 
29
- # You can override this method in your own adapter class
30
- #
31
- # @param selector [String]
32
- # CSS selector
33
- #
34
- def css(selector)
35
- document.css(selector)
36
- end
37
-
38
29
  # Returns <code>Node</code> class that will handle HTML
39
30
  # nodes for particular adapter.
40
31
  #
@@ -42,7 +33,7 @@ module ProxyFetcher
42
33
  # node
43
34
  #
44
35
  def proxy_node
45
- self.class.const_get('Node')
36
+ self.class.const_get("Node")
46
37
  end
47
38
 
48
39
  # Installs adapter requirements.
@@ -53,8 +44,8 @@ module ProxyFetcher
53
44
  def self.setup!(*args)
54
45
  install_requirements!(*args)
55
46
  self
56
- rescue LoadError, StandardError => error
57
- raise Exceptions::AdapterSetupError.new(name, error.message)
47
+ rescue LoadError, StandardError => e
48
+ raise Exceptions::AdapterSetupError.new(name, e.message)
58
49
  end
59
50
  end
60
51
  end
@@ -6,7 +6,7 @@ module ProxyFetcher
6
6
  class NokogiriAdapter < AbstractAdapter
7
7
  # Requires Nokogiri gem to the application.
8
8
  def self.install_requirements!
9
- require 'nokogiri'
9
+ require "nokogiri"
10
10
  end
11
11
 
12
12
  # Parses raw HTML content with specific gem.
@@ -6,7 +6,7 @@ module ProxyFetcher
6
6
  class OgaAdapter < AbstractAdapter
7
7
  # Requires Oga gem to the application.
8
8
  def self.install_requirements!
9
- require 'oga'
9
+ require "oga"
10
10
  end
11
11
 
12
12
  # Parses raw HTML content with specific gem.
@@ -81,9 +81,9 @@ module ProxyFetcher
81
81
  # clean text
82
82
  #
83
83
  def clear(text)
84
- return '' if text.nil? || text.empty?
84
+ return "" if text.nil? || text.empty?
85
85
 
86
- text.strip.gsub(/[ \t]/i, '')
86
+ text.strip.gsub(/[\t]/i, "")
87
87
  end
88
88
  end
89
89
  end
@@ -13,7 +13,7 @@ module ProxyFetcher
13
13
  # @return [WrongCustomClass]
14
14
  #
15
15
  def initialize(klass, methods)
16
- required_methods = Array(methods).join(', ')
16
+ required_methods = Array(methods).join(", ")
17
17
  super("#{klass} must respond to [#{required_methods}] class methods!")
18
18
  end
19
19
  end
@@ -53,7 +53,7 @@ module ProxyFetcher
53
53
  # @return [MaximumRedirectsReached]
54
54
  #
55
55
  def initialize(*)
56
- super('maximum redirects reached')
56
+ super("maximum redirects reached")
57
57
  end
58
58
  end
59
59
 
@@ -66,7 +66,7 @@ module ProxyFetcher
66
66
  # @return [MaximumRetriesReached]
67
67
  #
68
68
  def initialize(*)
69
- super('reached the maximum number of retries')
69
+ super("reached the maximum number of retries")
70
70
  end
71
71
  end
72
72
 
@@ -95,7 +95,7 @@ module ProxyFetcher
95
95
  super(<<-MSG.strip.squeeze
96
96
  you need to specify adapter for HTML parsing: ProxyFetcher.config.adapter = :nokogiri.
97
97
  You can use one of the predefined adapters (:nokogiri or :oga) or your own implementation.
98
- MSG
98
+ MSG
99
99
  )
100
100
  end
101
101
  end
@@ -111,7 +111,7 @@ module ProxyFetcher
111
111
  # @return [AdapterSetupError]
112
112
  #
113
113
  def initialize(adapter_name, error)
114
- adapter = demodulize(adapter_name.gsub('Adapter', ''))
114
+ adapter = demodulize(adapter_name.gsub("Adapter", ""))
115
115
 
116
116
  super("can't setup '#{adapter}' adapter during the following error:\n\t#{error}'")
117
117
  end
@@ -127,7 +127,7 @@ module ProxyFetcher
127
127
  #
128
128
  def demodulize(path)
129
129
  path = path.to_s
130
- index = path.rindex('::')
130
+ index = path.rindex("::")
131
131
 
132
132
  index ? path[(index + 2)..-1] : path
133
133
  end
@@ -55,7 +55,7 @@ module ProxyFetcher
55
55
 
56
56
  # Pop just first proxy (and back it to the end of the proxy list).
57
57
  #
58
- # @return [Proxy]
58
+ # @return [ProxyFetcher::Proxy, NilClass]
59
59
  # proxy object from the list
60
60
  #
61
61
  def get
@@ -72,7 +72,7 @@ module ProxyFetcher
72
72
  # Pop first valid proxy (and back it to the end of the proxy list)
73
73
  # Invalid proxies will be removed from the list
74
74
  #
75
- # @return [Proxy]
75
+ # @return [ProxyFetcher::Proxy, NilClass]
76
76
  # proxy object from the list
77
77
  #
78
78
  def get!
@@ -6,12 +6,15 @@ module ProxyFetcher
6
6
  class Base
7
7
  # Loads proxy provider page content, extract proxy list from it
8
8
  # and convert every entry to proxy object.
9
- def fetch_proxies!(filters = {})
9
+ def fetch_proxies(filters = {})
10
10
  raw_proxies = load_proxy_list(filters)
11
11
  proxies = raw_proxies.map { |html_node| build_proxy(html_node) }.compact
12
12
  proxies.reject { |proxy| proxy.addr.nil? }
13
13
  end
14
14
 
15
+ # For retro-compatibility
16
+ alias fetch_proxies! fetch_proxies
17
+
15
18
  def provider_url
16
19
  raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
17
20
  end
@@ -24,10 +27,17 @@ module ProxyFetcher
24
27
  {}
25
28
  end
26
29
 
30
+ # @return [Hash]
31
+ # Provider headers required to fetch the proxy list
32
+ #
27
33
  def provider_headers
28
34
  {}
29
35
  end
30
36
 
37
+ def xpath
38
+ raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
39
+ end
40
+
31
41
  # Just synthetic sugar to make it easier to call #fetch_proxies! method.
32
42
  def self.fetch_proxies!(*args)
33
43
  new.fetch_proxies!(*args)
@@ -37,18 +47,29 @@ module ProxyFetcher
37
47
 
38
48
  # Loads raw provider HTML with proxies.
39
49
  #
50
+ # @param url [String]
51
+ # Provider URL
52
+ #
53
+ # @param filters [#to_h]
54
+ # Provider filters (Hash-like object)
55
+ #
40
56
  # @return [String]
41
- # HTML body
57
+ # HTML body from the response
42
58
  #
43
59
  def load_html(url, filters = {})
44
- raise ArgumentError, 'filters must be a Hash' if filters && !filters.is_a?(Hash)
60
+ unless filters.respond_to?(:to_h)
61
+ raise ArgumentError, "filters must be a Hash or respond to #to_h"
62
+ end
45
63
 
46
- uri = URI.parse(url)
47
- # TODO: query for post request?
48
- uri.query = URI.encode_www_form(provider_params.merge(filters)) if filters && filters.any?
64
+ if filters&.any?
65
+ # TODO: query for post request?
66
+ uri = URI.parse(url)
67
+ uri.query = URI.encode_www_form(provider_params.merge(filters.to_h))
68
+ url = uri.to_s
69
+ end
49
70
 
50
71
  ProxyFetcher.config.http_client.fetch(
51
- uri.to_s,
72
+ url,
52
73
  method: provider_method,
53
74
  headers: provider_headers,
54
75
  params: provider_params
@@ -71,29 +92,28 @@ module ProxyFetcher
71
92
  ProxyFetcher::Document.parse(html)
72
93
  end
73
94
 
95
+ # Fetches HTML content by sending HTTP request to the provider URL and
96
+ # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
97
+ # to return all the proxy entries (HTML nodes).
98
+ #
99
+ # @return [Array<ProxyFetcher::Document::Node>]
100
+ # Collection of extracted HTML nodes with full proxy info
101
+ #
102
+ def load_proxy_list(filters = {})
103
+ doc = load_document(provider_url, filters)
104
+ doc.xpath(xpath)
105
+ end
106
+
74
107
  def build_proxy(*args)
75
108
  to_proxy(*args)
76
- rescue StandardError => error
109
+ rescue StandardError => e
77
110
  ProxyFetcher.logger.warn(
78
- "Failed to build Proxy object for #{self.class.name} due to error: #{error.message}"
111
+ "Failed to build Proxy object for #{self.class.name} due to error: #{e.message}"
79
112
  )
80
113
 
81
114
  nil
82
115
  end
83
116
 
84
- # Fetches HTML content by sending HTTP request to the provider URL and
85
- # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
86
- # to return all the proxy entries (HTML nodes).
87
- #
88
- # Abstract method. Must be implemented in a descendant class
89
- #
90
- # @return [Array<Document::Node>]
91
- # list of proxy elements from the providers HTML content
92
- #
93
- def load_proxy_list(*)
94
- raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
95
- end
96
-
97
117
  # Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
98
118
  #
99
119
  # Abstract method. Must be implemented in a descendant class