proxy_fetcher 0.10.2 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +8 -5
- data/Rakefile +4 -2
- data/gemfiles/nokogiri.gemfile +8 -6
- data/gemfiles/oga.gemfile +8 -6
- data/lib/proxy_fetcher.rb +30 -30
- data/lib/proxy_fetcher/client/client.rb +10 -3
- data/lib/proxy_fetcher/client/request.rb +4 -4
- data/lib/proxy_fetcher/configuration.rb +12 -11
- data/lib/proxy_fetcher/document.rb +0 -9
- data/lib/proxy_fetcher/document/adapters.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
- data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
- data/lib/proxy_fetcher/document/node.rb +2 -2
- data/lib/proxy_fetcher/exceptions.rb +6 -6
- data/lib/proxy_fetcher/manager.rb +2 -2
- data/lib/proxy_fetcher/providers/base.rb +42 -22
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +30 -10
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -16
- data/lib/proxy_fetcher/providers/gather_proxy.rb +9 -17
- data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
- data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
- data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
- data/lib/proxy_fetcher/proxy.rb +4 -4
- data/lib/proxy_fetcher/utils/http_client.rb +10 -8
- data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
- data/lib/proxy_fetcher/utils/proxy_validator.rb +1 -1
- data/lib/proxy_fetcher/version.rb +3 -3
- data/proxy_fetcher.gemspec +19 -16
- data/spec/proxy_fetcher/client/client_spec.rb +72 -57
- data/spec/proxy_fetcher/configuration_spec.rb +11 -11
- data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
- data/spec/proxy_fetcher/document/node_spec.rb +4 -4
- data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -2
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -2
- data/spec/proxy_fetcher/proxy_spec.rb +14 -14
- data/spec/proxy_fetcher/version_spec.rb +2 -0
- data/spec/spec_helper.rb +10 -10
- data/spec/support/manager_examples.rb +21 -21
- metadata +14 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a0d7b377ed3c25e50552e89ba76c0e73fad3923bf171e1cee2f592d777787c3
|
4
|
+
data.tar.gz: 83b594e04e03c74a63146a6907c99025d50607c88f8cf94f6d5ce044795243ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1f096c2473035255eb9492297b5641ab5caee62566eb20eb40d3b2f02eea5d06fa1279a2cadeb3266c0f52ce98040185a69678faf116b6398c2c75f79d5c4ebd
|
7
|
+
data.tar.gz: a9372ef8bdbb3c51c5060308cbc46c905df3819e682bceb858ba494f5f94722095f2a2bee94575606f628d091b87325425c218f8c31a2b807bcb159c59ba6e65
|
data/Gemfile
CHANGED
@@ -1,11 +1,14 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
2
4
|
|
3
5
|
gemspec
|
4
6
|
|
5
|
-
gem
|
6
|
-
gem
|
7
|
+
gem "nokogiri", "~> 1.8"
|
8
|
+
gem "oga", "~> 2.0"
|
9
|
+
gem "rubocop", "~> 0.74"
|
7
10
|
|
8
11
|
group :test do
|
9
|
-
gem
|
10
|
-
gem
|
12
|
+
gem "coveralls", require: false
|
13
|
+
gem "evil-proxy", "~> 0.2"
|
11
14
|
end
|
data/Rakefile
CHANGED
data/gemfiles/nokogiri.gemfile
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
source "https://rubygems.org"
|
4
4
|
|
5
|
-
|
5
|
+
gemspec path: "../"
|
6
|
+
|
7
|
+
gem "nokogiri", "~> 1.8"
|
6
8
|
|
7
9
|
group :test do
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
10
|
+
gem "coveralls", require: false
|
11
|
+
gem "evil-proxy", "~> 0.2"
|
12
|
+
gem "rspec", "~> 3.6"
|
11
13
|
end
|
data/gemfiles/oga.gemfile
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
source "https://rubygems.org"
|
4
4
|
|
5
|
-
|
5
|
+
gemspec path: "../"
|
6
|
+
|
7
|
+
gem "oga", "~> 2.0"
|
6
8
|
|
7
9
|
group :test do
|
8
|
-
gem
|
9
|
-
gem
|
10
|
-
gem
|
10
|
+
gem "coveralls", require: false
|
11
|
+
gem "evil-proxy", "~> 0.2"
|
12
|
+
gem "rspec", "~> 3.6"
|
11
13
|
end
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -1,44 +1,44 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
3
|
+
require "uri"
|
4
|
+
require "http"
|
5
|
+
require "logger"
|
6
6
|
|
7
|
-
require File.dirname(__FILE__) +
|
7
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/version"
|
8
8
|
|
9
|
-
require File.dirname(__FILE__) +
|
10
|
-
require File.dirname(__FILE__) +
|
11
|
-
require File.dirname(__FILE__) +
|
12
|
-
require File.dirname(__FILE__) +
|
13
|
-
require File.dirname(__FILE__) +
|
14
|
-
require File.dirname(__FILE__) +
|
9
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/exceptions"
|
10
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/configuration"
|
11
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/configuration/providers_registry"
|
12
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/proxy"
|
13
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/manager"
|
14
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/null_logger"
|
15
15
|
|
16
|
-
require File.dirname(__FILE__) +
|
17
|
-
require File.dirname(__FILE__) +
|
18
|
-
require File.dirname(__FILE__) +
|
19
|
-
require File.dirname(__FILE__) +
|
20
|
-
require File.dirname(__FILE__) +
|
21
|
-
require File.dirname(__FILE__) +
|
16
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/utils/http_client"
|
17
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_validator"
|
18
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_list_validator"
|
19
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/client/client"
|
20
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/client/request"
|
21
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/client/proxies_registry"
|
22
22
|
|
23
|
-
require File.dirname(__FILE__) +
|
24
|
-
require File.dirname(__FILE__) +
|
25
|
-
require File.dirname(__FILE__) +
|
26
|
-
require File.dirname(__FILE__) +
|
27
|
-
require File.dirname(__FILE__) +
|
28
|
-
require File.dirname(__FILE__) +
|
23
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/document"
|
24
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters"
|
25
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/document/node"
|
26
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/abstract_adapter"
|
27
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/nokogiri_adapter"
|
28
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/oga_adapter"
|
29
29
|
|
30
30
|
##
|
31
31
|
# Ruby / JRuby lib for managing proxies
|
32
32
|
module ProxyFetcher
|
33
33
|
# ProxyFetcher providers namespace
|
34
34
|
module Providers
|
35
|
-
require File.dirname(__FILE__) +
|
36
|
-
require File.dirname(__FILE__) +
|
37
|
-
require File.dirname(__FILE__) +
|
38
|
-
require File.dirname(__FILE__) +
|
39
|
-
require File.dirname(__FILE__) +
|
40
|
-
require File.dirname(__FILE__) +
|
41
|
-
require File.dirname(__FILE__) +
|
35
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/base"
|
36
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list"
|
37
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_ssl"
|
38
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/gather_proxy"
|
39
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/http_tunnel"
|
40
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxy_list"
|
41
|
+
require File.dirname(__FILE__) + "/proxy_fetcher/providers/xroxy"
|
42
42
|
end
|
43
43
|
|
44
44
|
# Main ProxyFetcher module.
|
@@ -75,7 +75,7 @@ module ProxyFetcher
|
|
75
75
|
|
76
76
|
# Returns ProxyFetcher logger instance.
|
77
77
|
#
|
78
|
-
# @return [Logger, NullLogger] logger object
|
78
|
+
# @return [Logger, ProxyFetcher::NullLogger] logger object
|
79
79
|
#
|
80
80
|
def logger
|
81
81
|
return @logger if defined?(@logger)
|
@@ -128,7 +128,11 @@ module ProxyFetcher
|
|
128
128
|
#
|
129
129
|
def request_with_payload(method, url, payload, headers, options)
|
130
130
|
with_proxy_for(url, options.fetch(:max_retries, 1000)) do |proxy|
|
131
|
-
opts = options.merge(
|
131
|
+
opts = options.merge(
|
132
|
+
payload: payload,
|
133
|
+
proxy: options.fetch(:proxy, proxy),
|
134
|
+
headers: default_headers.merge(headers)
|
135
|
+
)
|
132
136
|
|
133
137
|
Request.execute(url: url, method: method, **opts)
|
134
138
|
end
|
@@ -138,7 +142,10 @@ module ProxyFetcher
|
|
138
142
|
#
|
139
143
|
def request_without_payload(method, url, headers, options)
|
140
144
|
with_proxy_for(url, options.fetch(:max_retries, 1000)) do |proxy|
|
141
|
-
opts = options.merge(
|
145
|
+
opts = options.merge(
|
146
|
+
proxy: options.fetch(:proxy, proxy),
|
147
|
+
headers: default_headers.merge(headers)
|
148
|
+
)
|
142
149
|
|
143
150
|
Request.execute(url: url, method: method, **opts)
|
144
151
|
end
|
@@ -152,7 +159,7 @@ module ProxyFetcher
|
|
152
159
|
#
|
153
160
|
def default_headers
|
154
161
|
{
|
155
|
-
|
162
|
+
"User-Agent" => ProxyFetcher.config.user_agent
|
156
163
|
}
|
157
164
|
end
|
158
165
|
|
@@ -50,7 +50,7 @@ module ProxyFetcher
|
|
50
50
|
# @return [Request]
|
51
51
|
#
|
52
52
|
def initialize(args)
|
53
|
-
raise ArgumentError,
|
53
|
+
raise ArgumentError, "args must be a Hash!" unless args.is_a?(Hash)
|
54
54
|
|
55
55
|
@url = args.fetch(:url)
|
56
56
|
@method = args.fetch(:method).to_s.downcase
|
@@ -86,9 +86,9 @@ module ProxyFetcher
|
|
86
86
|
#
|
87
87
|
def build_http_client
|
88
88
|
HTTP.via(proxy.addr, proxy.port.to_i)
|
89
|
-
|
90
|
-
|
91
|
-
|
89
|
+
.headers(headers)
|
90
|
+
.timeout(connect: timeout, read: timeout)
|
91
|
+
.follow(max_hops: max_redirects)
|
92
92
|
end
|
93
93
|
|
94
94
|
# Default SSL options that will be used for connecting to resources
|
@@ -6,15 +6,20 @@ module ProxyFetcher
|
|
6
6
|
#
|
7
7
|
class Configuration
|
8
8
|
# @!attribute client_timeout
|
9
|
-
# @return [Integer]
|
9
|
+
# @return [Integer]
|
10
|
+
# HTTP request timeout (connect / open) for [ProxyFetcher::Client]
|
10
11
|
attr_accessor :client_timeout
|
11
12
|
|
12
13
|
# @!attribute provider_proxies_load_timeout
|
13
|
-
# @return [Integer]
|
14
|
+
# @return [Integer]
|
15
|
+
# HTTP request timeout (connect / open) for loading
|
16
|
+
# of proxies list by provider
|
14
17
|
attr_accessor :provider_proxies_load_timeout
|
15
18
|
|
16
19
|
# @!attribute proxy_validation_timeout
|
17
|
-
# @return [Integer]
|
20
|
+
# @return [Integer]
|
21
|
+
# HTTP request timeout (connect / open) for proxy
|
22
|
+
# validation with [ProxyFetcher::ProxyValidator]
|
18
23
|
attr_accessor :proxy_validation_timeout
|
19
24
|
|
20
25
|
# to save compatibility
|
@@ -30,16 +35,12 @@ module ProxyFetcher
|
|
30
35
|
attr_accessor :user_agent
|
31
36
|
|
32
37
|
# @!attribute [r] logger
|
33
|
-
# @return [
|
38
|
+
# @return [Logger] Logger object
|
34
39
|
attr_accessor :logger
|
35
40
|
|
36
41
|
# @!attribute [r] adapter
|
37
42
|
# @return [Object] HTML parser adapter
|
38
|
-
|
39
|
-
|
40
|
-
# @!attribute [r] adapter_class
|
41
|
-
# @return [Object] HTML adapter class
|
42
|
-
attr_reader :adapter_class
|
43
|
+
attr_reader :adapter
|
43
44
|
|
44
45
|
# @!attribute [r] http_client
|
45
46
|
# @return [Object] HTTP client class
|
@@ -58,8 +59,8 @@ module ProxyFetcher
|
|
58
59
|
#
|
59
60
|
# Default is Google Chrome 60, but can be changed in <code>ProxyFetcher.config</code>.
|
60
61
|
#
|
61
|
-
DEFAULT_USER_AGENT =
|
62
|
-
|
62
|
+
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 " \
|
63
|
+
"(KHTML, like Gecko) Chrome/60.0.3112 Safari/537.36"
|
63
64
|
|
64
65
|
# HTML parser adapter name.
|
65
66
|
#
|
@@ -36,14 +36,5 @@ module ProxyFetcher
|
|
36
36
|
def xpath(*args)
|
37
37
|
backend.xpath(*args).map { |node| backend.proxy_node.new(node) }
|
38
38
|
end
|
39
|
-
|
40
|
-
# Searches elements by CSS selector.
|
41
|
-
#
|
42
|
-
# @return [Array<ProxyFetcher::Document::Node>]
|
43
|
-
# collection of nodes
|
44
|
-
#
|
45
|
-
def css(*args)
|
46
|
-
backend.css(*args).map { |node| backend.proxy_node.new(node) }
|
47
|
-
end
|
48
39
|
end
|
49
40
|
end
|
@@ -26,15 +26,6 @@ module ProxyFetcher
|
|
26
26
|
document.xpath(selector)
|
27
27
|
end
|
28
28
|
|
29
|
-
# You can override this method in your own adapter class
|
30
|
-
#
|
31
|
-
# @param selector [String]
|
32
|
-
# CSS selector
|
33
|
-
#
|
34
|
-
def css(selector)
|
35
|
-
document.css(selector)
|
36
|
-
end
|
37
|
-
|
38
29
|
# Returns <code>Node</code> class that will handle HTML
|
39
30
|
# nodes for particular adapter.
|
40
31
|
#
|
@@ -42,7 +33,7 @@ module ProxyFetcher
|
|
42
33
|
# node
|
43
34
|
#
|
44
35
|
def proxy_node
|
45
|
-
self.class.const_get(
|
36
|
+
self.class.const_get("Node")
|
46
37
|
end
|
47
38
|
|
48
39
|
# Installs adapter requirements.
|
@@ -53,8 +44,8 @@ module ProxyFetcher
|
|
53
44
|
def self.setup!(*args)
|
54
45
|
install_requirements!(*args)
|
55
46
|
self
|
56
|
-
rescue LoadError, StandardError =>
|
57
|
-
raise Exceptions::AdapterSetupError.new(name,
|
47
|
+
rescue LoadError, StandardError => e
|
48
|
+
raise Exceptions::AdapterSetupError.new(name, e.message)
|
58
49
|
end
|
59
50
|
end
|
60
51
|
end
|
@@ -13,7 +13,7 @@ module ProxyFetcher
|
|
13
13
|
# @return [WrongCustomClass]
|
14
14
|
#
|
15
15
|
def initialize(klass, methods)
|
16
|
-
required_methods = Array(methods).join(
|
16
|
+
required_methods = Array(methods).join(", ")
|
17
17
|
super("#{klass} must respond to [#{required_methods}] class methods!")
|
18
18
|
end
|
19
19
|
end
|
@@ -53,7 +53,7 @@ module ProxyFetcher
|
|
53
53
|
# @return [MaximumRedirectsReached]
|
54
54
|
#
|
55
55
|
def initialize(*)
|
56
|
-
super(
|
56
|
+
super("maximum redirects reached")
|
57
57
|
end
|
58
58
|
end
|
59
59
|
|
@@ -66,7 +66,7 @@ module ProxyFetcher
|
|
66
66
|
# @return [MaximumRetriesReached]
|
67
67
|
#
|
68
68
|
def initialize(*)
|
69
|
-
super(
|
69
|
+
super("reached the maximum number of retries")
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
@@ -95,7 +95,7 @@ module ProxyFetcher
|
|
95
95
|
super(<<-MSG.strip.squeeze
|
96
96
|
you need to specify adapter for HTML parsing: ProxyFetcher.config.adapter = :nokogiri.
|
97
97
|
You can use one of the predefined adapters (:nokogiri or :oga) or your own implementation.
|
98
|
-
|
98
|
+
MSG
|
99
99
|
)
|
100
100
|
end
|
101
101
|
end
|
@@ -111,7 +111,7 @@ module ProxyFetcher
|
|
111
111
|
# @return [AdapterSetupError]
|
112
112
|
#
|
113
113
|
def initialize(adapter_name, error)
|
114
|
-
adapter = demodulize(adapter_name.gsub(
|
114
|
+
adapter = demodulize(adapter_name.gsub("Adapter", ""))
|
115
115
|
|
116
116
|
super("can't setup '#{adapter}' adapter during the following error:\n\t#{error}'")
|
117
117
|
end
|
@@ -127,7 +127,7 @@ module ProxyFetcher
|
|
127
127
|
#
|
128
128
|
def demodulize(path)
|
129
129
|
path = path.to_s
|
130
|
-
index = path.rindex(
|
130
|
+
index = path.rindex("::")
|
131
131
|
|
132
132
|
index ? path[(index + 2)..-1] : path
|
133
133
|
end
|
@@ -55,7 +55,7 @@ module ProxyFetcher
|
|
55
55
|
|
56
56
|
# Pop just first proxy (and back it to the end of the proxy list).
|
57
57
|
#
|
58
|
-
# @return [Proxy]
|
58
|
+
# @return [ProxyFetcher::Proxy, NilClass]
|
59
59
|
# proxy object from the list
|
60
60
|
#
|
61
61
|
def get
|
@@ -72,7 +72,7 @@ module ProxyFetcher
|
|
72
72
|
# Pop first valid proxy (and back it to the end of the proxy list)
|
73
73
|
# Invalid proxies will be removed from the list
|
74
74
|
#
|
75
|
-
# @return [Proxy]
|
75
|
+
# @return [ProxyFetcher::Proxy, NilClass]
|
76
76
|
# proxy object from the list
|
77
77
|
#
|
78
78
|
def get!
|
@@ -6,12 +6,15 @@ module ProxyFetcher
|
|
6
6
|
class Base
|
7
7
|
# Loads proxy provider page content, extract proxy list from it
|
8
8
|
# and convert every entry to proxy object.
|
9
|
-
def fetch_proxies
|
9
|
+
def fetch_proxies(filters = {})
|
10
10
|
raw_proxies = load_proxy_list(filters)
|
11
11
|
proxies = raw_proxies.map { |html_node| build_proxy(html_node) }.compact
|
12
12
|
proxies.reject { |proxy| proxy.addr.nil? }
|
13
13
|
end
|
14
14
|
|
15
|
+
# For retro-compatibility
|
16
|
+
alias fetch_proxies! fetch_proxies
|
17
|
+
|
15
18
|
def provider_url
|
16
19
|
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
17
20
|
end
|
@@ -24,10 +27,17 @@ module ProxyFetcher
|
|
24
27
|
{}
|
25
28
|
end
|
26
29
|
|
30
|
+
# @return [Hash]
|
31
|
+
# Provider headers required to fetch the proxy list
|
32
|
+
#
|
27
33
|
def provider_headers
|
28
34
|
{}
|
29
35
|
end
|
30
36
|
|
37
|
+
def xpath
|
38
|
+
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
39
|
+
end
|
40
|
+
|
31
41
|
# Just synthetic sugar to make it easier to call #fetch_proxies! method.
|
32
42
|
def self.fetch_proxies!(*args)
|
33
43
|
new.fetch_proxies!(*args)
|
@@ -37,18 +47,29 @@ module ProxyFetcher
|
|
37
47
|
|
38
48
|
# Loads raw provider HTML with proxies.
|
39
49
|
#
|
50
|
+
# @param url [String]
|
51
|
+
# Provider URL
|
52
|
+
#
|
53
|
+
# @param filters [#to_h]
|
54
|
+
# Provider filters (Hash-like object)
|
55
|
+
#
|
40
56
|
# @return [String]
|
41
|
-
# HTML body
|
57
|
+
# HTML body from the response
|
42
58
|
#
|
43
59
|
def load_html(url, filters = {})
|
44
|
-
|
60
|
+
unless filters.respond_to?(:to_h)
|
61
|
+
raise ArgumentError, "filters must be a Hash or respond to #to_h"
|
62
|
+
end
|
45
63
|
|
46
|
-
|
47
|
-
|
48
|
-
|
64
|
+
if filters&.any?
|
65
|
+
# TODO: query for post request?
|
66
|
+
uri = URI.parse(url)
|
67
|
+
uri.query = URI.encode_www_form(provider_params.merge(filters.to_h))
|
68
|
+
url = uri.to_s
|
69
|
+
end
|
49
70
|
|
50
71
|
ProxyFetcher.config.http_client.fetch(
|
51
|
-
|
72
|
+
url,
|
52
73
|
method: provider_method,
|
53
74
|
headers: provider_headers,
|
54
75
|
params: provider_params
|
@@ -71,29 +92,28 @@ module ProxyFetcher
|
|
71
92
|
ProxyFetcher::Document.parse(html)
|
72
93
|
end
|
73
94
|
|
95
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
96
|
+
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
97
|
+
# to return all the proxy entries (HTML nodes).
|
98
|
+
#
|
99
|
+
# @return [Array<ProxyFetcher::Document::Node>]
|
100
|
+
# Collection of extracted HTML nodes with full proxy info
|
101
|
+
#
|
102
|
+
def load_proxy_list(filters = {})
|
103
|
+
doc = load_document(provider_url, filters)
|
104
|
+
doc.xpath(xpath)
|
105
|
+
end
|
106
|
+
|
74
107
|
def build_proxy(*args)
|
75
108
|
to_proxy(*args)
|
76
|
-
rescue StandardError =>
|
109
|
+
rescue StandardError => e
|
77
110
|
ProxyFetcher.logger.warn(
|
78
|
-
"Failed to build Proxy object for #{self.class.name} due to error: #{
|
111
|
+
"Failed to build Proxy object for #{self.class.name} due to error: #{e.message}"
|
79
112
|
)
|
80
113
|
|
81
114
|
nil
|
82
115
|
end
|
83
116
|
|
84
|
-
# Fetches HTML content by sending HTTP request to the provider URL and
|
85
|
-
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
86
|
-
# to return all the proxy entries (HTML nodes).
|
87
|
-
#
|
88
|
-
# Abstract method. Must be implemented in a descendant class
|
89
|
-
#
|
90
|
-
# @return [Array<Document::Node>]
|
91
|
-
# list of proxy elements from the providers HTML content
|
92
|
-
#
|
93
|
-
def load_proxy_list(*)
|
94
|
-
raise NotImplementedError, "#{__method__} must be implemented in a descendant class!"
|
95
|
-
end
|
96
|
-
|
97
117
|
# Convert HTML element with proxy info to ProxyFetcher::Proxy instance.
|
98
118
|
#
|
99
119
|
# Abstract method. Must be implemented in a descendant class
|