proxy_fetcher 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +8 -5
  3. data/Rakefile +4 -2
  4. data/gemfiles/nokogiri.gemfile +8 -6
  5. data/gemfiles/oga.gemfile +8 -6
  6. data/lib/proxy_fetcher.rb +30 -30
  7. data/lib/proxy_fetcher/client/client.rb +10 -3
  8. data/lib/proxy_fetcher/client/request.rb +4 -4
  9. data/lib/proxy_fetcher/configuration.rb +12 -11
  10. data/lib/proxy_fetcher/document.rb +0 -9
  11. data/lib/proxy_fetcher/document/adapters.rb +1 -1
  12. data/lib/proxy_fetcher/document/adapters/abstract_adapter.rb +3 -12
  13. data/lib/proxy_fetcher/document/adapters/nokogiri_adapter.rb +1 -1
  14. data/lib/proxy_fetcher/document/adapters/oga_adapter.rb +1 -1
  15. data/lib/proxy_fetcher/document/node.rb +2 -2
  16. data/lib/proxy_fetcher/exceptions.rb +6 -6
  17. data/lib/proxy_fetcher/manager.rb +2 -2
  18. data/lib/proxy_fetcher/providers/base.rb +42 -22
  19. data/lib/proxy_fetcher/providers/free_proxy_list.rb +30 -10
  20. data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +7 -16
  21. data/lib/proxy_fetcher/providers/gather_proxy.rb +9 -17
  22. data/lib/proxy_fetcher/providers/http_tunnel.rb +11 -19
  23. data/lib/proxy_fetcher/providers/proxy_list.rb +8 -16
  24. data/lib/proxy_fetcher/providers/xroxy.rb +9 -17
  25. data/lib/proxy_fetcher/proxy.rb +4 -4
  26. data/lib/proxy_fetcher/utils/http_client.rb +10 -8
  27. data/lib/proxy_fetcher/utils/proxy_list_validator.rb +3 -1
  28. data/lib/proxy_fetcher/utils/proxy_validator.rb +1 -1
  29. data/lib/proxy_fetcher/version.rb +3 -3
  30. data/proxy_fetcher.gemspec +19 -16
  31. data/spec/proxy_fetcher/client/client_spec.rb +72 -57
  32. data/spec/proxy_fetcher/configuration_spec.rb +11 -11
  33. data/spec/proxy_fetcher/document/adapters_spec.rb +8 -8
  34. data/spec/proxy_fetcher/document/node_spec.rb +4 -4
  35. data/spec/proxy_fetcher/providers/base_spec.rb +9 -9
  36. data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +2 -2
  37. data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +2 -2
  38. data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +2 -2
  39. data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +2 -2
  40. data/spec/proxy_fetcher/providers/multiple_providers_spec.rb +4 -4
  41. data/spec/proxy_fetcher/providers/proxy_list_spec.rb +2 -2
  42. data/spec/proxy_fetcher/providers/xroxy_spec.rb +2 -2
  43. data/spec/proxy_fetcher/proxy_spec.rb +14 -14
  44. data/spec/proxy_fetcher/version_spec.rb +2 -0
  45. data/spec/spec_helper.rb +10 -10
  46. data/spec/support/manager_examples.rb +21 -21
  47. metadata +14 -8
@@ -6,13 +6,12 @@ module ProxyFetcher
6
6
  class FreeProxyList < Base
7
7
  # Provider URL to fetch proxy list
8
8
  def provider_url
9
- 'https://free-proxy-list.net/'
9
+ "https://free-proxy-list.net/"
10
10
  end
11
11
 
12
12
  # [NOTE] Doesn't support filtering
13
- def load_proxy_list(_filters = {})
14
- doc = load_document(provider_url, {})
15
- doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
13
+ def xpath
14
+ '//table[@id="proxylisttable"]/tbody/tr'
16
15
  end
17
16
 
18
17
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -26,10 +25,10 @@ module ProxyFetcher
26
25
  #
27
26
  def to_proxy(html_node)
28
27
  ProxyFetcher::Proxy.new.tap do |proxy|
29
- proxy.addr = html_node.content_at('td[1]')
30
- proxy.port = Integer(html_node.content_at('td[2]').gsub(/^0+/, ''))
31
- proxy.country = html_node.content_at('td[4]')
32
- proxy.anonymity = html_node.content_at('td[5]')
28
+ proxy.addr = html_node.content_at("td[1]")
29
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
30
+ proxy.country = html_node.content_at("td[4]")
31
+ proxy.anonymity = html_node.content_at("td[5]")
33
32
  proxy.type = parse_type(html_node)
34
33
  end
35
34
  end
@@ -45,8 +44,29 @@ module ProxyFetcher
45
44
  # Proxy type
46
45
  #
47
46
  def parse_type(html_node)
48
- https = html_node.content_at('td[6]')
49
- https && https.casecmp('yes').zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
47
+ https = html_node.content_at("td[6]")
48
+ # frozen_string_literal: true
49
+ # FreeProxyList provider class.
50
+ # Provider URL to fetch proxy list
51
+ # [NOTE] Doesn't support filtering
52
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
53
+ # object.
54
+ #
55
+ # @param html_node [Object]
56
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
57
+ #
58
+ # @return [ProxyFetcher::Proxy]
59
+ # Proxy object
60
+ #
61
+ # Parses HTML node to extract proxy type.
62
+ #
63
+ # @param html_node [Object]
64
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
65
+ #
66
+ # @return [String]
67
+ # Proxy type
68
+ #
69
+ https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
50
70
  end
51
71
  end
52
72
 
@@ -6,20 +6,11 @@ module ProxyFetcher
6
6
  class FreeProxyListSSL < Base
7
7
  # Provider URL to fetch proxy list
8
8
  def provider_url
9
- 'https://www.sslproxies.org/'
9
+ "https://www.sslproxies.org/"
10
10
  end
11
11
 
12
- # Fetches HTML content by sending HTTP request to the provider URL and
13
- # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
14
- # to return all the proxy entries (HTML nodes).
15
- #
16
- # @return [Array<ProxyFetcher::Document::Node>]
17
- # Collection of extracted HTML nodes with full proxy info
18
- #
19
- # [NOTE] Doesn't support filtering
20
- def load_proxy_list(_filters = {})
21
- doc = load_document(provider_url, {})
22
- doc.xpath('//table[@id="proxylisttable"]/tbody/tr')
12
+ def xpath
13
+ '//table[@id="proxylisttable"]/tbody/tr'
23
14
  end
24
15
 
25
16
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -33,10 +24,10 @@ module ProxyFetcher
33
24
  #
34
25
  def to_proxy(html_node)
35
26
  ProxyFetcher::Proxy.new.tap do |proxy|
36
- proxy.addr = html_node.content_at('td[1]')
37
- proxy.port = Integer(html_node.content_at('td[2]').gsub(/^0+/, ''))
38
- proxy.country = html_node.content_at('td[4]')
39
- proxy.anonymity = html_node.content_at('td[5]')
27
+ proxy.addr = html_node.content_at("td[1]")
28
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
29
+ proxy.country = html_node.content_at("td[4]")
30
+ proxy.anonymity = html_node.content_at("td[5]")
40
31
  proxy.type = ProxyFetcher::Proxy::HTTPS
41
32
  end
42
33
  end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'json'
3
+ require "json"
4
4
 
5
5
  module ProxyFetcher
6
6
  module Providers
@@ -8,19 +8,11 @@ module ProxyFetcher
8
8
  class GatherProxy < Base
9
9
  # Provider URL to fetch proxy list
10
10
  def provider_url
11
- 'http://www.gatherproxy.com/'
11
+ "http://www.gatherproxy.com/"
12
12
  end
13
13
 
14
- # Fetches HTML content by sending HTTP request to the provider URL and
15
- # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
16
- # to return all the proxy entries (HTML nodes).
17
- #
18
- # @return [Array<ProxyFetcher::Document::Node>]
19
- # Collection of extracted HTML nodes with full proxy info
20
- #
21
- def load_proxy_list(*)
22
- doc = load_document(provider_url)
23
- doc.xpath('//div[@class="proxy-list"]/table/script')
14
+ def xpath
15
+ '//div[@class="proxy-list"]/table/script'
24
16
  end
25
17
 
26
18
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -36,11 +28,11 @@ module ProxyFetcher
36
28
  json = parse_json(html_node)
37
29
 
38
30
  ProxyFetcher::Proxy.new.tap do |proxy|
39
- proxy.addr = json['PROXY_IP']
40
- proxy.port = json['PROXY_PORT'].to_i(16)
41
- proxy.anonymity = json['PROXY_TYPE']
42
- proxy.country = json['PROXY_COUNTRY']
43
- proxy.response_time = json['PROXY_TIME'].to_i
31
+ proxy.addr = json["PROXY_IP"]
32
+ proxy.port = json["PROXY_PORT"].to_i(16)
33
+ proxy.anonymity = json["PROXY_TYPE"]
34
+ proxy.country = json["PROXY_COUNTRY"]
35
+ proxy.response_time = json["PROXY_TIME"].to_i
44
36
  proxy.type = ProxyFetcher::Proxy::HTTP
45
37
  end
46
38
  end
@@ -6,19 +6,11 @@ module ProxyFetcher
6
6
  class HTTPTunnel < Base
7
7
  # Provider URL to fetch proxy list
8
8
  def provider_url
9
- 'http://www.httptunnel.ge/ProxyListForFree.aspx'
9
+ "http://www.httptunnel.ge/ProxyListForFree.aspx"
10
10
  end
11
11
 
12
- # Fetches HTML content by sending HTTP request to the provider URL and
13
- # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
14
- # to return all the proxy entries (HTML nodes).
15
- #
16
- # @return [Array<ProxyFetcher::Document::Node>]
17
- # Collection of extracted HTML nodes with full proxy info
18
- #
19
- def load_proxy_list(_filters = {})
20
- doc = load_document(provider_url)
21
- doc.xpath('//table[contains(@id, "GridView")]/tr[(count(td)>2)]')
12
+ def xpath
13
+ '//table[contains(@id, "GridView")]/tr[(count(td)>2)]'
22
14
  end
23
15
 
24
16
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -53,7 +45,7 @@ module ProxyFetcher
53
45
  # URI object
54
46
  #
55
47
  def parse_proxy_uri(html_node)
56
- full_addr = html_node.content_at('td[1]')
48
+ full_addr = html_node.content_at("td[1]")
57
49
  URI.parse("http://#{full_addr}")
58
50
  end
59
51
 
@@ -66,7 +58,7 @@ module ProxyFetcher
66
58
  # Country code
67
59
  #
68
60
  def parse_country(html_node)
69
- html_node.find('.//img').attr('title')
61
+ html_node.find(".//img").attr("title")
70
62
  end
71
63
 
72
64
  # Parses HTML node to extract proxy anonymity level.
@@ -78,14 +70,14 @@ module ProxyFetcher
78
70
  # Anonymity level
79
71
  #
80
72
  def parse_anonymity(html_node)
81
- transparency = html_node.content_at('td[5]').to_sym
73
+ transparency = html_node.content_at("td[5]").to_sym
82
74
 
83
75
  {
84
- A: 'Anonymous',
85
- E: 'Elite',
86
- T: 'Transparent',
87
- U: 'Unknown'
88
- }.fetch(transparency, 'Unknown')
76
+ A: "Anonymous",
77
+ E: "Elite",
78
+ T: "Transparent",
79
+ U: "Unknown"
80
+ }.fetch(transparency, "Unknown")
89
81
  end
90
82
  end
91
83
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'base64'
3
+ require "base64"
4
4
 
5
5
  module ProxyFetcher
6
6
  module Providers
@@ -8,19 +8,11 @@ module ProxyFetcher
8
8
  class ProxyList < Base
9
9
  # Provider URL to fetch proxy list
10
10
  def provider_url
11
- 'https://proxy-list.org/english/index.php'
11
+ "https://proxy-list.org/english/index.php"
12
12
  end
13
13
 
14
- # Fetches HTML content by sending HTTP request to the provider URL and
15
- # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
16
- # to return all the proxy entries (HTML nodes).
17
- #
18
- # @return [Array<ProxyFetcher::Document::Node>]
19
- # Collection of extracted HTML nodes with full proxy info
20
- #
21
- def load_proxy_list(filters = {})
22
- doc = load_document(provider_url, filters)
23
- doc.css('.table-wrap .table ul')
14
+ def xpath
15
+ '//div[@class="table-wrap"]/div[@class="table"]/ul'
24
16
  end
25
17
 
26
18
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -38,9 +30,9 @@ module ProxyFetcher
38
30
  proxy.addr = uri.host
39
31
  proxy.port = uri.port
40
32
 
41
- proxy.type = html_node.content_at('li[2]')
42
- proxy.anonymity = html_node.content_at('li[4]')
43
- proxy.country = html_node.find("li[5]//span[@class='country']").attr('title')
33
+ proxy.type = html_node.content_at("li[2]")
34
+ proxy.anonymity = html_node.content_at("li[4]")
35
+ proxy.country = html_node.find("li[5]//span[@class='country']").attr("title")
44
36
  end
45
37
  end
46
38
 
@@ -55,7 +47,7 @@ module ProxyFetcher
55
47
  # URI object
56
48
  #
57
49
  def parse_proxy_uri(html_node)
58
- full_addr = ::Base64.decode64(html_node.at_css('li script').html.match(/'(.+)'/)[1])
50
+ full_addr = ::Base64.decode64(html_node.at_css("li script").html.match(/'(.+)'/)[1])
59
51
  URI.parse("http://#{full_addr}")
60
52
  end
61
53
  end
@@ -6,19 +6,11 @@ module ProxyFetcher
6
6
  class XRoxy < Base
7
7
  # Provider URL to fetch proxy list
8
8
  def provider_url
9
- 'https://www.xroxy.com/free-proxy-lists/'
9
+ "https://www.xroxy.com/free-proxy-lists/"
10
10
  end
11
11
 
12
- # Fetches HTML content by sending HTTP request to the provider URL and
13
- # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
14
- # to return all the proxy entries (HTML nodes).
15
- #
16
- # @return [Array<ProxyFetcher::Document::Node>]
17
- # Collection of extracted HTML nodes with full proxy info
18
- #
19
- def load_proxy_list(filters = { type: 'All_http' })
20
- doc = load_document(provider_url, filters)
21
- doc.xpath('//div/table/tbody/tr')
12
+ def xpath
13
+ "//div/table/tbody/tr"
22
14
  end
23
15
 
24
16
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -32,12 +24,12 @@ module ProxyFetcher
32
24
  #
33
25
  def to_proxy(html_node)
34
26
  ProxyFetcher::Proxy.new.tap do |proxy|
35
- proxy.addr = html_node.content_at('td[1]')
36
- proxy.port = Integer(html_node.content_at('td[2]').gsub(/^0+/, ''))
37
- proxy.anonymity = html_node.content_at('td[3]')
38
- proxy.country = html_node.content_at('td[5]')
39
- proxy.response_time = Integer(html_node.content_at('td[6]'))
40
- proxy.type = html_node.content_at('td[3]')
27
+ proxy.addr = html_node.content_at("td[1]")
28
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
29
+ proxy.anonymity = html_node.content_at("td[3]")
30
+ proxy.country = html_node.content_at("td[5]")
31
+ proxy.response_time = Integer(html_node.content_at("td[6]"))
32
+ proxy.type = html_node.content_at("td[3]")
41
33
  end
42
34
  end
43
35
  end
@@ -29,10 +29,10 @@ module ProxyFetcher
29
29
 
30
30
  # Proxy types
31
31
  TYPES = [
32
- HTTP = 'HTTP'.freeze,
33
- HTTPS = 'HTTPS'.freeze,
34
- SOCKS4 = 'SOCKS4'.freeze,
35
- SOCKS5 = 'SOCKS5'.freeze
32
+ HTTP = "HTTP",
33
+ HTTPS = "HTTPS",
34
+ SOCKS4 = "SOCKS4",
35
+ SOCKS5 = "SOCKS5"
36
36
  ].freeze
37
37
 
38
38
  # Proxy type predicates (#socks4?, #https?)
@@ -70,22 +70,24 @@ module ProxyFetcher
70
70
  def fetch
71
71
  response = process_http_request
72
72
  response.body.to_s
73
- rescue StandardError => error
74
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{error.message})")
75
- ''
73
+ rescue StandardError => e
74
+ ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
75
+ ""
76
76
  end
77
77
 
78
78
  def fetch_with_headers
79
79
  process_http_request
80
- rescue StandardError => error
81
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{error.message})")
82
- HTTP::Response.new(version: '1.1', status: 500, body: '')
80
+ rescue StandardError => e
81
+ ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
82
+ HTTP::Response.new(version: "1.1", status: 500, body: "")
83
83
  end
84
84
 
85
85
  protected
86
86
 
87
87
  def process_http_request(http_method: method, http_params: params)
88
- raise ArgumentError, 'wrong http method name!' unless HTTP::Request::METHODS.include?(http_method)
88
+ unless HTTP::Request::METHODS.include?(http_method)
89
+ raise ArgumentError, "'#{http_method}' is a wrong HTTP method name!"
90
+ end
89
91
 
90
92
  http.public_send(
91
93
  http_method.to_sym, url,
@@ -101,7 +103,7 @@ module ProxyFetcher
101
103
  #
102
104
  def default_headers
103
105
  {
104
- 'User-Agent' => ProxyFetcher.config.user_agent
106
+ "User-Agent" => ProxyFetcher.config.user_agent
105
107
  }
106
108
  end
107
109
  end
@@ -34,7 +34,9 @@ module ProxyFetcher
34
34
  proxy = target_proxies_lock.synchronize { target_proxies.shift }
35
35
  break unless proxy
36
36
 
37
- connectable_proxies_lock.synchronize { connectable_proxies << proxy } if proxy.connectable?
37
+ if proxy.connectable?
38
+ connectable_proxies_lock.synchronize { connectable_proxies << proxy }
39
+ end
38
40
  end
39
41
  end
40
42
  end
@@ -6,7 +6,7 @@ module ProxyFetcher
6
6
  # URL to check if proxy can be used (aka connectable?).
7
7
  class ProxyValidator
8
8
  # Default URL that will be used to check if proxy can be used.
9
- URL_TO_CHECK = 'https://google.com'.freeze
9
+ URL_TO_CHECK = "https://google.com"
10
10
 
11
11
  # Short variant to validate proxy.
12
12
  #
@@ -13,11 +13,11 @@ module ProxyFetcher
13
13
  # Major version number
14
14
  MAJOR = 0
15
15
  # Minor version number
16
- MINOR = 10
16
+ MINOR = 11
17
17
  # Smallest version number
18
- TINY = 2
18
+ TINY = 0
19
19
 
20
20
  # Full version number
21
- STRING = [MAJOR, MINOR, TINY].compact.join('.')
21
+ STRING = [MAJOR, MINOR, TINY].compact.join(".")
22
22
  end
23
23
  end
@@ -1,25 +1,28 @@
1
- $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), 'lib'))
1
+ # frozen_string_literal: true
2
2
 
3
- require 'proxy_fetcher/version'
3
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
4
+
5
+ require "proxy_fetcher/version"
4
6
 
5
7
  Gem::Specification.new do |gem|
6
- gem.name = 'proxy_fetcher'
8
+ gem.name = "proxy_fetcher"
7
9
  gem.version = ProxyFetcher.gem_version
8
- gem.summary = 'Ruby gem for dealing with proxy lists from different providers'
9
- gem.description = 'This gem can help your Ruby application to make HTTP(S) requests ' \
10
- 'using proxies by fetching and validating proxy lists from the different providers.'
11
- gem.authors = ['Nikita Bulai']
12
- gem.email = 'bulajnikita@gmail.com'
13
- gem.require_paths = ['lib']
14
- gem.bindir = 'bin'
10
+ gem.summary = "Ruby gem for dealing with proxy lists from different providers"
11
+ gem.description = "This gem can help your Ruby application to make HTTP(S) requests " \
12
+ "using proxies by fetching and validating proxy lists from " \
13
+ "the different providers."
14
+ gem.authors = ["Nikita Bulai"]
15
+ gem.email = "bulajnikita@gmail.com"
16
+ gem.require_paths = ["lib"]
17
+ gem.bindir = "bin"
15
18
  gem.files = `git ls-files`.split($RS) - %w[README.md .travis.yml .rubocop.yml]
16
19
  gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
17
- gem.homepage = 'http://github.com/nbulaj/proxy_fetcher'
18
- gem.license = 'MIT'
19
- gem.required_ruby_version = '>= 2.0.0'
20
+ gem.homepage = "http://github.com/nbulaj/proxy_fetcher"
21
+ gem.license = "MIT"
22
+ gem.required_ruby_version = ">= 2.3.0"
20
23
 
21
- gem.add_runtime_dependency 'http', '~> 3.0'
24
+ gem.add_runtime_dependency "http", ">= 3", "< 5"
22
25
 
23
- gem.add_development_dependency 'rake', '>= 12.0'
24
- gem.add_development_dependency 'rspec', '~> 3.5'
26
+ gem.add_development_dependency "rake", ">= 12.0"
27
+ gem.add_development_dependency "rspec", "~> 3.5"
25
28
  end
@@ -1,19 +1,22 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'spec_helper'
4
- require 'json'
3
+ require "spec_helper"
4
+ require "json"
5
5
 
6
- require 'evil-proxy'
7
- require 'evil-proxy/async'
6
+ require "evil-proxy"
7
+ require "evil-proxy/async"
8
8
 
9
- xdescribe ProxyFetcher::Client do
9
+ describe ProxyFetcher::Client do
10
10
  before :all do
11
11
  ProxyFetcher.configure do |config|
12
12
  config.provider = :xroxy
13
13
  config.client_timeout = 5
14
+ config.logger = ProxyFetcher::NullLogger.new
14
15
  end
15
16
 
16
- @server = EvilProxy::MITMProxyServer.new Port: 3128, Quiet: true
17
+ quiet = ENV.key?("LOG_MITM") ? ENV["LOG_MITM"] == "false" : true
18
+
19
+ @server = EvilProxy::MITMProxyServer.new Port: 3128, Quiet: quiet
17
20
  @server.start
18
21
  end
19
22
 
@@ -21,121 +24,133 @@ xdescribe ProxyFetcher::Client do
21
24
  @server.shutdown
22
25
  end
23
26
 
27
+ let(:local_proxy) { ProxyFetcher::Proxy.new(addr: "127.0.0.1", port: 3128, type: "HTTP, HTTPS") }
28
+
24
29
  # Use local proxy server in order to avoid side effects, non-working proxies, etc
25
30
  before :each do
26
- proxy = ProxyFetcher::Proxy.new(addr: '127.0.0.1', port: 3128, type: 'HTTP, HTTPS')
27
- ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', [proxy])
28
- allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies!).and_return([proxy])
31
+ ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', [local_proxy])
32
+ allow_any_instance_of(ProxyFetcher::Providers::Base).to receive(:fetch_proxies).and_return([local_proxy])
29
33
  end
30
34
 
31
- context 'GET request with the valid proxy' do
32
- it 'successfully returns page content for HTTP' do
33
- content = ProxyFetcher::Client.get('http://httpbin.org')
35
+ context "GET request with the valid proxy" do
36
+ it "successfully returns page content for HTTP" do
37
+ content = ProxyFetcher::Client.get("http://httpbin.org/get")
34
38
 
35
39
  expect(content).not_to be_empty
36
40
  end
37
41
 
38
- it 'successfully returns page content for HTTPS' do
39
- content = ProxyFetcher::Client.get('https://httpbin.org')
42
+ # TODO: oh this SSL / MITM proxies ....
43
+ xit "successfully returns page content for HTTPS" do
44
+ content = ProxyFetcher::Client.get("https://httpbin.org/get")
40
45
 
41
46
  expect(content).not_to be_empty
42
47
  end
43
48
 
44
- it 'successfully returns page content using custom proxy' do
45
- manager = ProxyFetcher::Manager.new
46
-
47
- proxy = manager.get! until proxy
48
- content = ProxyFetcher::Client.get('http://httpbin.org', options: { proxy: proxy })
49
+ it "successfully returns page content using custom proxy" do
50
+ content = ProxyFetcher::Client.get("http://httpbin.org/get", options: { proxy: local_proxy })
49
51
 
50
52
  expect(content).not_to be_empty
51
53
  end
52
54
  end
53
55
 
54
- context 'POST request with the valid proxy' do
55
- it 'successfully returns page content for HTTP' do
56
+ context "POST request with the valid proxy" do
57
+ it "successfully returns page content for HTTP" do
56
58
  headers = {
57
- 'X-Proxy-Fetcher-Version' => ProxyFetcher::VERSION::STRING
59
+ "X-Proxy-Fetcher-Version" => ProxyFetcher::VERSION::STRING
58
60
  }
59
- content = ProxyFetcher::Client.post('http://httpbin.org/post', { param: 'value' } , headers: headers)
61
+
62
+ content = ProxyFetcher::Client.post(
63
+ "http://httpbin.org/post",
64
+ { param: "value" },
65
+ headers: headers
66
+ )
60
67
 
61
68
  expect(content).not_to be_empty
62
69
 
63
70
  json = JSON.parse(content)
64
71
 
65
- expect(json['headers']['X-Proxy-Fetcher-Version']).to eq(ProxyFetcher::VERSION::STRING)
66
- expect(json['headers']['User-Agent']).to eq(ProxyFetcher.config.user_agent)
72
+ expect(json["headers"]["X-Proxy-Fetcher-Version"]).to eq(ProxyFetcher::VERSION::STRING)
73
+ expect(json["headers"]["User-Agent"]).to eq(ProxyFetcher.config.user_agent)
67
74
  end
68
75
  end
69
76
 
70
- context 'PUT request with the valid proxy' do
71
- it 'successfully returns page content for HTTP' do
72
- content = ProxyFetcher::Client.put('http://httpbin.org/put', 'param=PutValue')
77
+ # TODO: EvilProxy incompatible with latest Ruby/Webrick
78
+ # @see https://github.com/bbtfr/evil-proxy/issues/10
79
+ if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.6")
80
+ context "PUT request with the valid proxy" do
81
+ it "successfully returns page content for HTTP" do
82
+ content = ProxyFetcher::Client.put("http://httpbin.org/put", "param=PutValue")
73
83
 
74
- expect(content).not_to be_empty
84
+ expect(content).not_to be_empty
75
85
 
76
- json = JSON.parse(content)
86
+ json = JSON.parse(content)
77
87
 
78
- expect(json['data']).to eq('param=PutValue')
88
+ expect(json["form"]["param"]).to eq("PutValue")
89
+ end
79
90
  end
80
- end
81
91
 
82
- context 'PATCH request with the valid proxy' do
83
- it 'successfully returns page content for HTTP' do
84
- content = ProxyFetcher::Client.patch('http://httpbin.org/patch', param: 'value')
92
+ context "PATCH request with the valid proxy" do
93
+ it "successfully returns page content for HTTP" do
94
+ content = ProxyFetcher::Client.patch("http://httpbin.org/patch", param: "value")
85
95
 
86
- expect(content).not_to be_empty
96
+ expect(content).not_to be_empty
87
97
 
88
- json = JSON.parse(content)
98
+ json = JSON.parse(content)
89
99
 
90
- expect(json['form']['param']).to eq('value')
100
+ expect(json["form"]["param"]).to eq("value")
101
+ end
91
102
  end
92
103
  end
93
104
 
94
- context 'DELETE request with the valid proxy' do
95
- it 'successfully returns page content for HTTP' do
96
- content = ProxyFetcher::Client.delete('http://httpbin.org/delete')
105
+ context "DELETE request with the valid proxy" do
106
+ it "successfully returns page content for HTTP" do
107
+ content = ProxyFetcher::Client.delete("http://httpbin.org/delete")
97
108
 
98
109
  expect(content).not_to be_empty
99
110
  end
100
111
  end
101
112
 
102
- context 'HEAD request with the valid proxy' do
103
- it 'successfully works' do
104
- content = ProxyFetcher::Client.head('http://httpbin.org')
113
+ context "HEAD request with the valid proxy" do
114
+ it "successfully works" do
115
+ content = ProxyFetcher::Client.head("http://httpbin.org")
105
116
 
106
117
  expect(content).to be_empty
107
118
  end
108
119
  end
109
120
 
110
- context 'retries' do
111
- it 'raises an error when reaches max retries limit' do
121
+ xcontext "retries" do
122
+ it "raises an error when reaches max retries limit" do
112
123
  allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError)
113
124
 
114
- expect { ProxyFetcher::Client.get('http://httpbin.org') }.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
125
+ expect { ProxyFetcher::Client.get("http://httpbin.org") }
126
+ .to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
115
127
  end
116
128
 
117
- it 'raises an error when http request returns an error' do
129
+ it "raises an error when http request returns an error" do
118
130
  allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
119
131
 
120
- expect { ProxyFetcher::Client.get('http://httpbin.org') }.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
132
+ expect { ProxyFetcher::Client.get("http://httpbin.org") }
133
+ .to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
121
134
  end
122
135
 
123
- it 'refreshes proxy lists if no proxy found' do
124
- ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:'@proxies', [])
136
+ it "refreshes proxy lists if no proxy found" do
137
+ ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:"@proxies", [])
125
138
 
126
- expect { ProxyFetcher::Client.get('http://httpbin.org') }.not_to raise_error
139
+ expect { ProxyFetcher::Client.get("http://httpbin.org") }
140
+ .not_to raise_error
127
141
  end
128
142
  end
129
143
 
130
- context 'redirects' do
131
- it 'follows redirect when present' do
132
- content = ProxyFetcher::Client.get('http://httpbin.org/absolute-redirect/2')
144
+ context "redirects" do
145
+ it "follows redirect when present" do
146
+ content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2")
133
147
 
134
148
  expect(content).not_to be_empty
135
149
  end
136
150
 
137
- it 'raises an error when reaches max redirects limit' do
138
- expect { ProxyFetcher::Client.get('http://httpbin.org/absolute-redirect/11') }.to raise_error(ProxyFetcher::Exceptions::MaximumRedirectsReached)
151
+ it "raises an error when reaches max redirects limit" do
152
+ expect { ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/11") }
153
+ .to raise_error(ProxyFetcher::Exceptions::MaximumRedirectsReached)
139
154
  end
140
155
  end
141
156
  end