proxy_fetcher 0.10.2 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +8 -5
  3. data/Rakefile +4 -2
  4. data/gemfiles/nokogiri.gemfile +8 -6
  5. data/gemfiles/oga.gemfile +8 -6
  6. data/lib/proxy_fetcher.rb +30 -30
  7. data/lib/proxy_fetcher/client/client.rb +10 -3
  8. data/lib/proxy_fetcher/client/request.rb +4 -4
  9. data/lib/proxy_fetcher/configuration.rb +12 -11
  10. data/lib/proxy_fetcher/document.rb +0 -9
  11. data/lib/proxy_fetcher/document/adapters.rb +1 -1
  12. data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
  13. data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
  14. data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
  15. data/lib/proxy_fetcher/document/node.rb +2 -2
  16. data/lib/proxy_fetcher/exceptions.rb +6 -6
  17. data/lib/proxy_fetcher/manager.rb +2 -2
  18. data/lib/proxy_fetcher/providers/base.rb +42 -22
  19. data/lib/proxy_fetcher/providers/free_proxy_list.rb +30 -10
  20. data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -16
  21. data/lib/proxy_fetcher/providers/gather_proxy.rb +9 -17
  22. data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
  23. data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
  24. data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
  25. data/lib/proxy_fetcher/proxy.rb +4 -4
  26. data/lib/proxy_fetcher/utils/http_client.rb +10 -8
  27. data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
  28. data/lib/proxy_fetcher/utils/proxy_validator.rb +1 -1
  29. data/lib/proxy_fetcher/version.rb +3 -3
  30. data/proxy_fetcher.gemspec +19 -16
  31. data/spec/proxy_fetcher/client/client_spec.rb +72 -57
  32. data/spec/proxy_fetcher/configuration_spec.rb +11 -11
  33. data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
  34. data/spec/proxy_fetcher/document/node_spec.rb +4 -4
  35. data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
  36. data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -2
  37. data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -2
  38. data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -2
  39. data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -2
  40. data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
  41. data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -2
  42. data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -2
  43. data/spec/proxy_fetcher/proxy_spec.rb +14 -14
  44. data/spec/proxy_fetcher/version_spec.rb +2 -0
  45. data/spec/spec_helper.rb +10 -10
  46. data/spec/support/manager_examples.rb +21 -21
  47. metadata +14 -8
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d256aaf841030823ef753d6c2b2726dd7e8e5306dbddae6b6079a26815df4224
4
- data.tar.gz: a747a8d9ed567ffa6373bd0d30dc9a99e5320cb03b75dd3feb5ef45465144375
3
+ metadata.gz: 5a0d7b377ed3c25e50552e89ba76c0e73fad3923bf171e1cee2f592d777787c3
4
+ data.tar.gz: 83b594e04e03c74a63146a6907c99025d50607c88f8cf94f6d5ce044795243ad
5
5
  SHA512:
6
- metadata.gz: '01997f86deda80383198be18ac9bca8309970fa38ea337a082093822838732038425b642a6806798fa35e2a891b63d679020fad33fd5f447efed00b426b85706'
7
- data.tar.gz: 2fd10653fdf594e8847d4ebe34cb66c30b3d79d2900bd741768a5d4742ffa50923092f2c8f2bd95e3215f505a073e040e27cad8e2d868ff5837e81078c7d6f95
6
+ metadata.gz: 1f096c2473035255eb9492297b5641ab5caee62566eb20eb40d3b2f02eea5d06fa1279a2cadeb3266c0f52ce98040185a69678faf116b6398c2c75f79d5c4ebd
7
+ data.tar.gz: a9372ef8bdbb3c51c5060308cbc46c905df3819e682bceb858ba494f5f94722095f2a2bee94575606f628d091b87325425c218f8c31a2b807bcb159c59ba6e65
data/Gemfile CHANGED
@@ -1,11 +1,14 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
2
4
 
3
5
  gemspec
4
6
 
5
- gem 'nokogiri', '~> 1.8'
6
- gem 'oga', '~> 2.0'
7
+ gem "nokogiri", "~> 1.8"
8
+ gem "oga", "~> 2.0"
9
+ gem "rubocop", "~> 0.74"
7
10
 
8
11
  group :test do
9
- gem 'coveralls', require: false
10
- gem 'evil-proxy', '~> 0.2'
12
+ gem "coveralls", require: false
13
+ gem "evil-proxy", "~> 0.2"
11
14
  end
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
- require 'bundler/gem_tasks'
1
+ # frozen_string_literal: true
2
2
 
3
- require 'rspec/core/rake_task'
3
+ require "bundler/gem_tasks"
4
+
5
+ require "rspec/core/rake_task"
4
6
  RSpec::Core::RakeTask.new(:spec)
5
7
 
6
8
  task default: :spec
@@ -1,11 +1,13 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
2
 
3
- gemspec path: '../'
3
+ source "https://rubygems.org"
4
4
 
5
- gem 'nokogiri', '~> 1.8'
5
+ gemspec path: "../"
6
+
7
+ gem "nokogiri", "~> 1.8"
6
8
 
7
9
  group :test do
8
- gem 'coveralls', require: false
9
- gem 'evil-proxy', '~> 0.2'
10
- gem 'rspec', '~> 3.6'
10
+ gem "coveralls", require: false
11
+ gem "evil-proxy", "~> 0.2"
12
+ gem "rspec", "~> 3.6"
11
13
  end
@@ -1,11 +1,13 @@
1
- source 'https://rubygems.org'
1
+ # frozen_string_literal: true
2
2
 
3
- gemspec path: '../'
3
+ source "https://rubygems.org"
4
4
 
5
- gem 'oga', '~> 2.0'
5
+ gemspec path: "../"
6
+
7
+ gem "oga", "~> 2.0"
6
8
 
7
9
  group :test do
8
- gem 'coveralls', require: false
9
- gem 'evil-proxy', '~> 0.2'
10
- gem 'rspec', '~> 3.6'
10
+ gem "coveralls", require: false
11
+ gem "evil-proxy", "~> 0.2"
12
+ gem "rspec", "~> 3.6"
11
13
  end
@@ -1,44 +1,44 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'uri'
4
- require 'http'
5
- require 'logger'
3
+ require "uri"
4
+ require "http"
5
+ require "logger"
6
6
 
7
- require File.dirname(__FILE__) + '/proxy_fetcher/version'
7
+ require File.dirname(__FILE__) + "/proxy_fetcher/version"
8
8
 
9
- require File.dirname(__FILE__) + '/proxy_fetcher/exceptions'
10
- require File.dirname(__FILE__) + '/proxy_fetcher/configuration'
11
- require File.dirname(__FILE__) + '/proxy_fetcher/configuration/providers_registry'
12
- require File.dirname(__FILE__) + '/proxy_fetcher/proxy'
13
- require File.dirname(__FILE__) + '/proxy_fetcher/manager'
14
- require File.dirname(__FILE__) + '/proxy_fetcher/null_logger'
9
+ require File.dirname(__FILE__) + "/proxy_fetcher/exceptions"
10
+ require File.dirname(__FILE__) + "/proxy_fetcher/configuration"
11
+ require File.dirname(__FILE__) + "/proxy_fetcher/configuration/providers_registry"
12
+ require File.dirname(__FILE__) + "/proxy_fetcher/proxy"
13
+ require File.dirname(__FILE__) + "/proxy_fetcher/manager"
14
+ require File.dirname(__FILE__) + "/proxy_fetcher/null_logger"
15
15
 
16
- require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client'
17
- require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator'
18
- require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_list_validator'
19
- require File.dirname(__FILE__) + '/proxy_fetcher/client/client'
20
- require File.dirname(__FILE__) + '/proxy_fetcher/client/request'
21
- require File.dirname(__FILE__) + '/proxy_fetcher/client/proxies_registry'
16
+ require File.dirname(__FILE__) + "/proxy_fetcher/utils/http_client"
17
+ require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_validator"
18
+ require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_list_validator"
19
+ require File.dirname(__FILE__) + "/proxy_fetcher/client/client"
20
+ require File.dirname(__FILE__) + "/proxy_fetcher/client/request"
21
+ require File.dirname(__FILE__) + "/proxy_fetcher/client/proxies_registry"
22
22
 
23
- require File.dirname(__FILE__) + '/proxy_fetcher/document'
24
- require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters'
25
- require File.dirname(__FILE__) + '/proxy_fetcher/document/node'
26
- require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/abstract_adapter'
27
- require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/nokogiri_adapter'
28
- require File.dirname(__FILE__) + '/proxy_fetcher/document/adapters/oga_adapter'
23
+ require File.dirname(__FILE__) + "/proxy_fetcher/document"
24
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters"
25
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/node"
26
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/abstract_adapter"
27
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/nokogiri_adapter"
28
+ require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/oga_adapter"
29
29
 
30
30
  ##
31
31
  # Ruby / JRuby lib for managing proxies
32
32
  module ProxyFetcher
33
33
  # ProxyFetcher providers namespace
34
34
  module Providers
35
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/base'
36
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list'
37
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/free_proxy_list_ssl'
38
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/gather_proxy'
39
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/http_tunnel'
40
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/proxy_list'
41
- require File.dirname(__FILE__) + '/proxy_fetcher/providers/xroxy'
35
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/base"
36
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list"
37
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_ssl"
38
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/gather_proxy"
39
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/http_tunnel"
40
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxy_list"
41
+ require File.dirname(__FILE__) + "/proxy_fetcher/providers/xroxy"
42
42
  end
43
43
 
44
44
  # Main ProxyFetcher module.
@@ -75,7 +75,7 @@ module ProxyFetcher
75
75
 
76
76
  # Returns ProxyFetcher logger instance.
77
77
  #
78
- # @return [Logger, NullLogger] logger object
78
+ # @return [Logger, ProxyFetcher::NullLogger] logger object
79
79
  #
80
80
  def logger
81
81
  return @logger if defined?(@logger)
@@ -128,7 +128,11 @@ module ProxyFetcher
128
128
  #
129
129
  def request_with_payload(method, url, payload, headers, options)
130
130
  with_proxy_for(url, options.fetch(:max_retries, 1000)) do |proxy|
131
- opts = options.merge(payload: payload, proxy: options.fetch(:proxy, proxy), headers: default_headers.merge(headers))
131
+ opts = options.merge(
132
+ payload: payload,
133
+ proxy: options.fetch(:proxy, proxy),
134
+ headers: default_headers.merge(headers)
135
+ )
132
136
 
133
137
  Request.execute(url: url, method: method, **opts)
134
138
  end
@@ -138,7 +142,10 @@ module ProxyFetcher
138
142
  #
139
143
  def request_without_payload(method, url, headers, options)
140
144
  with_proxy_for(url, options.fetch(:max_retries, 1000)) do |proxy|
141
- opts = options.merge(proxy: options.fetch(:proxy, proxy), headers: default_headers.merge(headers))
145
+ opts = options.merge(
146
+ proxy: options.fetch(:proxy, proxy),
147
+ headers: default_headers.merge(headers)
148
+ )
142
149
 
143
150
  Request.execute(url: url, method: method, **opts)
144
151
  end
@@ -152,7 +159,7 @@ module ProxyFetcher
152
159
  #
153
160
  def default_headers
154
161
  {
155
- 'User-Agent' => ProxyFetcher.config.user_agent
162
+ "User-Agent" => ProxyFetcher.config.user_agent
156
163
  }
157
164
  end
158
165
 
@@ -50,7 +50,7 @@ module ProxyFetcher
50
50
  # @return [Request]
51
51
  #
52
52
  def initialize(args)
53
- raise ArgumentError, 'args must be a Hash!' unless args.is_a?(Hash)
53
+ raise ArgumentError, "args must be a Hash!" unless args.is_a?(Hash)
54
54
 
55
55
  @url = args.fetch(:url)
56
56
  @method = args.fetch(:method).to_s.downcase
@@ -86,9 +86,9 @@ module ProxyFetcher
86
86
  #
87
87
  def build_http_client
88
88
  HTTP.via(proxy.addr, proxy.port.to_i)
89
- .headers(headers)
90
- .timeout(connect: timeout, read: timeout)
91
- .follow(max_hops: max_redirects)
89
+ .headers(headers)
90
+ .timeout(connect: timeout, read: timeout)
91
+ .follow(max_hops: max_redirects)
92
92
  end
93
93
 
94
94
  # Default SSL options that will be used for connecting to resources
@@ -6,15 +6,20 @@ module ProxyFetcher
6
6
  #
7
7
  class Configuration
8
8
  # @!attribute client_timeout
9
- # @return [Integer] HTTP request timeout (connect / open) for [ProxyFetcher::Client]
9
+ # @return [Integer]
10
+ # HTTP request timeout (connect / open) for [ProxyFetcher::Client]
10
11
  attr_accessor :client_timeout
11
12
 
12
13
  # @!attribute provider_proxies_load_timeout
13
- # @return [Integer] HTTP request timeout (connect / open) for loading of proxies list by provider
14
+ # @return [Integer]
15
+ # HTTP request timeout (connect / open) for loading
16
+ # of proxies list by provider
14
17
  attr_accessor :provider_proxies_load_timeout
15
18
 
16
19
  # @!attribute proxy_validation_timeout
17
- # @return [Integer] HTTP request timeout (connect / open) for proxy validation with [ProxyFetcher::ProxyValidator]
20
+ # @return [Integer]
21
+ # HTTP request timeout (connect / open) for proxy
22
+ # validation with [ProxyFetcher::ProxyValidator]
18
23
  attr_accessor :proxy_validation_timeout
19
24
 
20
25
  # to save compatibility
@@ -30,16 +35,12 @@ module ProxyFetcher
30
35
  attr_accessor :user_agent
31
36
 
32
37
  # @!attribute [r] logger
33
- # @return [Object] Logger object
38
+ # @return [Logger] Logger object
34
39
  attr_accessor :logger
35
40
 
36
41
  # @!attribute [r] adapter
37
42
  # @return [Object] HTML parser adapter
38
- attr_accessor :adapter
39
-
40
- # @!attribute [r] adapter_class
41
- # @return [Object] HTML adapter class
42
- attr_reader :adapter_class
43
+ attr_reader :adapter
43
44
 
44
45
  # @!attribute [r] http_client
45
46
  # @return [Object] HTTP client class
@@ -58,8 +59,8 @@ module ProxyFetcher
58
59
  #
59
60
  # Default is Google Chrome 60, but can be changed in <code>ProxyFetcher.config</code>.
60
61
  #
61
- DEFAULT_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 ' \
62
- '(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36'.freeze
62
+ DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 " \
63
+ "(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36"
63
64
 
64
65
  # HTML parser adapter name.
65
66
  #
@@ -36,14 +36,5 @@ module ProxyFetcher
36
36
  def xpath(*args)
37
37
  backend.xpath(*args).map { |node| backend.proxy_node.new(node) }
38
38
  end
39
-
40
- # Searches elements by CSS selector.
41
- #
42
- # @return [Array<ProxyFetcher::Document::Node>]
43
- # collection of nodes
44
- #
45
- def css(*args)
46
- backend.css(*args).map { |node| backend.proxy_node.new(node) }
47
- end
48
39
  end
49
40
  end
@@ -13,7 +13,7 @@ module ProxyFetcher
13
13
  # <code>ProxyFetcher::Document::AbstractAdapter</code>.
14
14
  class Adapters
15
15
  # Adapters class name suffix
16
- ADAPTER = 'Adapter'.freeze
16
+ ADAPTER = "Adapter"
17
17
  private_constant :ADAPTER
18
18
 
19
19
  class << self
@@ -26,15 +26,6 @@ module ProxyFetcher
26
26
  document.xpath(selector)
27
27
  end
28
28
 
29
- # You can override this method in your own adapter class
30
- #
31
- # @param selector [String]
32
- # CSS selector
33
- #
34
- def css(selector)
35
- document.css(selector)
36
- end
37
-
38
29
  # Returns <code>Node</code> class that will handle HTML
39
30
  # nodes for particular adapter.
40
31
  #
@@ -42,7 +33,7 @@ module ProxyFetcher
42
33
  # node
43
34
  #
44
35
  def proxy_node
45
- self.class.const_get('Node')
36
+ self.class.const_get("Node")
46
37
  end
47
38
 
48
39
  # Installs adapter requirements.
@@ -53,8 +44,8 @@ module ProxyFetcher
53
44
  def self.setup!(*args)
54
45
  install_requirements!(*args)
55
46
  self
56
- rescue LoadError, StandardError => error
57
- raise Exceptions::AdapterSetupError.new(name, error.message)
47
+ rescue LoadError, StandardError => e
48
+ raise Exceptions::AdapterSetupError.new(name, e.message)
58
49
  end
59
50
  end
60
51
  end
@@ -6,7 +6,7 @@ module ProxyFetcher
6
6
  class NokogiriAdapter < AbstractAdapter
7
7
  # Requires Nokogiri gem to the application.
8
8
  def self.install_requirements!
9
- require 'nokogiri'
9
+ require "nokogiri"
10
10
  end
11
11
 
12
12
  # Parses raw HTML content with specific gem.
@@ -6,7 +6,7 @@ module ProxyFetcher
6
6
  class OgaAdapter < AbstractAdapter
7
7
  # Requires Oga gem to the application.
8
8
  def self.install_requirements!
9
- require 'oga'
9
+ require "oga"
10
10
  end
11
11
 
12
12
  # Parses raw HTML content with specific gem.
@@ -81,9 +81,9 @@ module ProxyFetcher
81
81
  # clean text
82
82
  #
83
83
  def clear(text)
84
- return '' if text.nil? || text.empty?
84
+ return "" if text.nil? || text.empty?
85
85
 
86
- text.strip.gsub(/[ \t]/i, '')
86
+ text.strip.gsub(/[\t]/i, "")
87
87
  end
88
88
  end
89
89
  end
@@ -13,7 +13,7 @@ module ProxyFetcher
13
13
  # @return [WrongCustomClass]
14
14
  #
15
15
  def initialize(klass, methods)
16
- required_methods = Array(methods).join(', ')
16
+ required_methods = Array(methods).join(", ")
17
17
  super("#{klass} must respond to [#{required_methods}] class methods!")
18
18
  end
19
19
  end
@@ -53,7 +53,7 @@ module ProxyFetcher
53
53
  # @return [MaximumRedirectsReached]
54
54
  #
55
55
  def initialize(*)
56
- super('maximum redirects reached')
56
+ super("maximum redirects reached")
57
57
  end
58
58
  end
59
59
 
@@ -66,7 +66,7 @@ module ProxyFetcher
66
66
  # @return [MaximumRetriesReached]
67
67
  #
68
68
  def initialize(*)
69
- super('reached the maximum number of retries')
69
+ super("reached the maximum number of retries")
70
70
  end
71
71
  end
72
72
 
@@ -95,7 +95,7 @@ module ProxyFetcher
95
95
  super(<<-MSG.strip.squeeze
96
96
  you need to specify adapter for HTML parsing: ProxyFetcher.config.adapter = :nokogiri.
97
97
  You can use one of the predefined adapters (:nokogiri or :oga) or your own implementation.
98
- MSG
98
+ MSG
99
99
  )
100
100
  end
101
101
  end
@@ -111,7 +111,7 @@ module ProxyFetcher
111
111
  # @return [AdapterSetupError]
112
112
  #
113
113
  def initialize(adapter_name, error)
114
- adapter = demodulize(adapter_name.gsub('Adapter', ''))
114
+ adapter = demodulize(adapter_name.gsub("Adapter", ""))
115
115
 
116
116
  super("can't setup '#{adapter}' adapter during the following error:\n\t#{error}'")
117
117
  end
@@ -127,7 +127,7 @@ module ProxyFetcher
127
127
  #
128
128
  def demodulize(path)
129
129
  path = path.to_s
130
- index = path.rindex('::')
130
+ index = path.rindex("::")
131
131
 
132
132
  index ? path[(index + 2)..-1] : path
133
133
  end
@@ -55,7 +55,7 @@ module ProxyFetcher
55
55
 
56
56
  # Pop just first proxy (and back it to the end of the proxy list).
57
57
  #
58
- # @return [Proxy]
58
+ # @return [ProxyFetcher::Proxy, NilClass]
59
59
  # proxy object from the list
60
60
  #
61
61
  def get
@@ -72,7 +72,7 @@ module ProxyFetcher
72
72
  # Pop first valid proxy (and back it to the end of the proxy list)
73
73
  # Invalid proxies will be removed from the list
74
74
  #
75
- # @return [Proxy]
75
+ # @return [ProxyFetcher::Proxy, NilClass]
76
76
  # proxy object from the list
77
77
  #
78
78
  def get!
@@ -6,12 +6,15 @@ module ProxyFetcher
6
6
  class Base
7
7
  # Loads proxy provider page content, extract proxy list from it
8
8
  # and convert every entry to proxy object.
9
- def fetch_proxies!(filters = {})
9
+ def fetch_proxies(filters = {})
10
10
  raw_proxies = load_proxy_list(filters)
11
11
  proxies = raw_proxies.map { |html_node| build_proxy(html_node) }.compact
12
12
  proxies.reject { |proxy| proxy.addr.nil? }
13
13
  end
14
14
 
15
+ # For retro-compatibility
16
+ alias fetch_proxies! fetch_proxies
17
+
15
18
  def provider_url
16
19
  raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
17
20
  end
@@ -24,10 +27,17 @@ module ProxyFetcher
24
27
  {}
25
28
  end
26
29
 
30
+ # @return [Hash]
31
+ # Provider headers required to fetch the proxy list
32
+ #
27
33
  def provider_headers
28
34
  {}
29
35
  end
30
36
 
37
+ def xpath
38
+ raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
39
+ end
40
+
31
41
  # Just synthetic sugar to make it easier to call #fetch_proxies! method.
32
42
  def self.fetch_proxies!(*args)
33
43
  new.fetch_proxies!(*args)
@@ -37,18 +47,29 @@ module ProxyFetcher
37
47
 
38
48
  # Loads raw provider HTML with proxies.
39
49
  #
50
+ # @param url [String]
51
+ # Provider URL
52
+ #
53
+ # @param filters [#to_h]
54
+ # Provider filters (Hash-like object)
55
+ #
40
56
  # @return [String]
41
- # HTML body
57
+ # HTML body from the response
42
58
  #
43
59
  def load_html(url, filters = {})
44
- raise ArgumentError, 'filters must be a Hash' if filters && !filters.is_a?(Hash)
60
+ unless filters.respond_to?(:to_h)
61
+ raise ArgumentError, "filters must be a Hash or respond to #to_h"
62
+ end
45
63
 
46
- uri = URI.parse(url)
47
- # TODO: query for post request?
48
- uri.query = URI.encode_www_form(provider_params.merge(filters)) if filters && filters.any?
64
+ if filters&.any?
65
+ # TODO: query for post request?
66
+ uri = URI.parse(url)
67
+ uri.query = URI.encode_www_form(provider_params.merge(filters.to_h))
68
+ url = uri.to_s
69
+ end
49
70
 
50
71
  ProxyFetcher.config.http_client.fetch(
51
- uri.to_s,
72
+ url,
52
73
  method: provider_method,
53
74
  headers: provider_headers,
54
75
  params: provider_params
@@ -71,29 +92,28 @@ module ProxyFetcher
71
92
  ProxyFetcher::Document.parse(html)
72
93
  end
73
94
 
95
+ # Fetches HTML content by sending HTTP request to the provider URL and
96
+ # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
97
+ # to return all the proxy entries (HTML nodes).
98
+ #
99
+ # @return [Array<ProxyFetcher::Document::Node>]
100
+ # Collection of extracted HTML nodes with full proxy info
101
+ #
102
+ def load_proxy_list(filters = {})
103
+ doc = load_document(provider_url, filters)
104
+ doc.xpath(xpath)
105
+ end
106
+
74
107
  def build_proxy(*args)
75
108
  to_proxy(*args)
76
- rescue StandardError => error
109
+ rescue StandardError => e
77
110
  ProxyFetcher.logger.warn(
78
- "Failed to build Proxy object for #{self.class.name} due to error: #{error.message}"
111
+ "Failed to build Proxy object for #{self.class.name} due to error: #{e.message}"
79
112
  )
80
113
 
81
114
  nil
82
115
  end
83
116
 
84
- # Fetches HTML content by sending HTTP request to the provider URL and
85
- # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
86
- # to return all the proxy entries (HTML nodes).
87
- #
88
- # Abstract method. Must be implemented in a descendant class
89
- #
90
- # @return [Array<Document::Node>]
91
- # list of proxy elements from the providers HTML content
92
- #
93
- def load_proxy_list(*)
94
- raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
95
- end
96
-
97
117
  # Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
98
118
  #
99
119
  # Abstract method. Must be implemented in a descendant class