proxy_fetcher 0.13.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ci.yml +58 -0
  4. data/CHANGELOG.md +16 -1
  5. data/Gemfile +4 -2
  6. data/Rakefile +3 -1
  7. data/gemfiles/nokogiri.gemfile +2 -2
  8. data/gemfiles/oga.gemfile +3 -3
  9. data/lib/proxy_fetcher/client/request.rb +3 -3
  10. data/lib/proxy_fetcher/configuration.rb +1 -1
  11. data/lib/proxy_fetcher/document/node.rb +1 -1
  12. data/lib/proxy_fetcher/manager.rb +2 -0
  13. data/lib/proxy_fetcher/providers/free_proxy_list.rb +1 -22
  14. data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
  15. data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +2 -1
  16. data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
  17. data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
  18. data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
  19. data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
  20. data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
  21. data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
  22. data/lib/proxy_fetcher/providers/xroxy.rb +1 -1
  23. data/lib/proxy_fetcher/utils/http_client.rb +25 -21
  24. data/lib/proxy_fetcher/utils/proxy_validator.rb +20 -8
  25. data/lib/proxy_fetcher/version.rb +1 -1
  26. data/lib/proxy_fetcher.rb +32 -26
  27. data/proxy_fetcher.gemspec +7 -5
  28. data/spec/proxy_fetcher/client/client_spec.rb +10 -5
  29. data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
  30. metadata +15 -18
  31. data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -50
  32. data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
  33. data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
  34. data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
  35. data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
  36. data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
  37. data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09772e9bc018d8accb01401b2e8b1897804e0a948c67a7eeeef8b1a2e9fcd245'
4
- data.tar.gz: 16b49036c2ecdd06e23f53c4fe55982ec7ceb1531e9e223f31590eb83417c4e0
3
+ metadata.gz: 985e3e5cffffa62960dbe04510e232b5a8e652119acb2cc787824e0834f10870
4
+ data.tar.gz: beb50433ddf5d298ca1c7c45af357395cb24f87b695231159b3b52ad8429cd8b
5
5
  SHA512:
6
- metadata.gz: 526c8fdcfb1171c09f2e9bf6a4e13dbbc6e837963ce1c1ef7e2e48ed1ec3052e3dbc6a4d04bb33a0b3f083914149c22af4b4238a6c558825a91e3f26e112a378
7
- data.tar.gz: 8dddea87295d1825910e4eecb4a5a16836b06e0070cc07f823a4902d6952550ae3b016bfb2e29d0ce8cc16ba3add12ad28a6ab6fec8eccd8a1656e368d88be5d
6
+ metadata.gz: 28ee05704cdcf8dc48881119a208dad97408d3c50f34b691e6062ca6a8697f3ffe58ba5056c0c8d07f8fec9f5f977927877341572716fe6db3a28876b537264d
7
+ data.tar.gz: c0533aa9584e02300734385ae8552c5e50d9836f7647337eb797eb11aa50b0edd93bc6a54eb3b22059ac4eed26adbb8ea21dcca9285d9bceff019c67a797146c
@@ -0,0 +1,8 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: "/"
5
+ schedule:
6
+ interval: daily
7
+ time: "03:00"
8
+ open-pull-requests-limit: 10
@@ -0,0 +1,58 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ name: >-
8
+ Ruby ${{ matrix.ruby }} (${{ matrix.gemfile }})
9
+ env:
10
+ CI: true
11
+ runs-on: ${{ matrix.os }}
12
+ continue-on-error: ${{ endsWith(matrix.ruby, 'head') || matrix.ruby == 'debug' || matrix.experimental }}
13
+ if: |
14
+ !( contains(github.event.pull_request.title, '[ci skip]')
15
+ || contains(github.event.pull_request.title, '[skip ci]'))
16
+ strategy:
17
+ fail-fast: true
18
+ matrix:
19
+ experimental: [false]
20
+ os: [ ubuntu-latest ]
21
+ ruby:
22
+ - 2.6
23
+ - 2.7
24
+ - '3.0'
25
+ - '3.1'
26
+ gemfile:
27
+ - gemfiles/oga.gemfile
28
+ - gemfiles/nokogiri.gemfile
29
+ include:
30
+ - ruby: head
31
+ os: ubuntu-latest
32
+ gemfile: gemfiles/nokogiri.gemfile
33
+ experimental: true
34
+ - ruby: head
35
+ os: ubuntu-latest
36
+ gemfile: gemfiles/oga.gemfile
37
+ experimental: true
38
+ - ruby: jruby
39
+ os: ubuntu-latest
40
+ gemfile: gemfiles/nokogiri.gemfile
41
+ experimental: true
42
+ - ruby: truffleruby
43
+ os: ubuntu-latest
44
+ gemfile: gemfiles/nokogiri.gemfile
45
+ experimental: true
46
+ steps:
47
+ - name: Repo checkout
48
+ uses: actions/checkout@v2
49
+
50
+ - name: Setup Ruby
51
+ uses: ruby/setup-ruby@v1
52
+ with:
53
+ ruby-version: ${{ matrix.ruby }}
54
+ bundler-cache: true
55
+
56
+ - name: Run tests
57
+ timeout-minutes: 10
58
+ run: bundle exec rake spec
data/CHANGELOG.md CHANGED
@@ -4,7 +4,22 @@ Reverse Chronological Order:
4
4
 
5
5
  ## `master`
6
6
 
7
- * Add your description here
7
+ ...
8
+
9
+ ## `0.15.1` (2021-02-17)
10
+
11
+ * Support for Ruby 3.0
12
+
13
+ ## `0.15.0` (2021-01-26)
14
+
15
+ * Removed failing providers
16
+ * Added new
17
+ * Specs refactoring
18
+
19
+ ## `0.14.0` (2020-05-11)
20
+
21
+ * Add MTPro provider
22
+ * Add Proxypedia provider
8
23
 
9
24
  ## `0.13.0` (2020-03-09)
10
25
 
data/Gemfile CHANGED
@@ -6,9 +6,11 @@ gemspec
6
6
 
7
7
  gem "nokogiri", "~> 1.8"
8
8
  gem "oga", "~> 3.2"
9
- gem "rubocop", "~> 0.74"
9
+ gem "rubocop", "~> 1.0"
10
10
 
11
11
  group :test do
12
- gem "coveralls", require: false
12
+ gem "coveralls_reborn", require: false
13
+ # Until I find a way to introduce other MITM proxy
14
+ gem "webrick", "1.4.2"
13
15
  gem "evil-proxy", "~> 0.2"
14
16
  end
data/Rakefile CHANGED
@@ -3,6 +3,8 @@
3
3
  require "bundler/gem_tasks"
4
4
 
5
5
  require "rspec/core/rake_task"
6
- RSpec::Core::RakeTask.new(:spec)
6
+ RSpec::Core::RakeTask.new(:spec) do |t|
7
+ t.rspec_opts = '--tag "~unreliable"'
8
+ end
7
9
 
8
10
  task default: :spec
@@ -7,7 +7,7 @@ gemspec path: "../"
7
7
  gem "nokogiri", "~> 1.8"
8
8
 
9
9
  group :test do
10
- gem "coveralls", require: false
10
+ gem "coveralls_reborn", require: false
11
+ gem "webrick"
11
12
  gem "evil-proxy", "~> 0.2"
12
- gem "rspec", "~> 3.6"
13
13
  end
data/gemfiles/oga.gemfile CHANGED
@@ -4,10 +4,10 @@ source "https://rubygems.org"
4
4
 
5
5
  gemspec path: "../"
6
6
 
7
- gem "oga", "~> 2.0"
7
+ gem "oga", "~> 3.0"
8
8
 
9
9
  group :test do
10
- gem "coveralls", require: false
10
+ gem "coveralls_reborn", require: false
11
+ gem "webrick"
11
12
  gem "evil-proxy", "~> 0.2"
12
- gem "rspec", "~> 3.6"
13
13
  end
@@ -41,15 +41,15 @@ module ProxyFetcher
41
41
  # @return [String]
42
42
  # response body (requested resource content)
43
43
  #
44
- def self.execute(args)
45
- new(args).execute
44
+ def self.execute(**args)
45
+ new(**args).execute
46
46
  end
47
47
 
48
48
  # Initialize new HTTP request
49
49
  #
50
50
  # @return [Request]
51
51
  #
52
- def initialize(args)
52
+ def initialize(**args)
53
53
  raise ArgumentError, "args must be a Hash!" unless args.is_a?(Hash)
54
54
 
55
55
  @url = args.fetch(:url)
@@ -114,7 +114,7 @@ module ProxyFetcher
114
114
 
115
115
  # Sets default configuration options
116
116
  def reset!
117
- @logger = Logger.new(STDOUT)
117
+ @logger = Logger.new($stdout)
118
118
  @user_agent = DEFAULT_USER_AGENT
119
119
  @pool_size = 10
120
120
  @client_timeout = 3
@@ -83,7 +83,7 @@ module ProxyFetcher
83
83
  def clear(text)
84
84
  return "" if text.nil? || text.empty?
85
85
 
86
- text.strip.gsub(/[\t]/i, "")
86
+ text.strip.gsub(/\t/i, "")
87
87
  end
88
88
  end
89
89
  end
@@ -47,6 +47,8 @@ module ProxyFetcher
47
47
 
48
48
  ProxyFetcher.config.providers.each do |provider_name|
49
49
  threads << Thread.new do
50
+ Thread.current.report_on_exception = false
51
+
50
52
  provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
51
53
  provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
52
54
  provider_proxies = provider.fetch_proxies!(provider_filters)
@@ -11,7 +11,7 @@ module ProxyFetcher
11
11
 
12
12
  # [NOTE] Doesn't support filtering
13
13
  def xpath
14
- '//table[@id="proxylisttable"]/tbody/tr'
14
+ "//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
15
15
  end
16
16
 
17
17
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -45,27 +45,6 @@ module ProxyFetcher
45
45
  #
46
46
  def parse_type(html_node)
47
47
  https = html_node.content_at("td[6]")
48
- # frozen_string_literal: true
49
- # FreeProxyList provider class.
50
- # Provider URL to fetch proxy list
51
- # [NOTE] Doesn't support filtering
52
- # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
53
- # object.
54
- #
55
- # @param html_node [Object]
56
- # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
57
- #
58
- # @return [ProxyFetcher::Proxy]
59
- # Proxy object
60
- #
61
- # Parses HTML node to extract proxy type.
62
- #
63
- # @param html_node [Object]
64
- # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
65
- #
66
- # @return [String]
67
- # Proxy type
68
- #
69
48
  https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
70
49
  end
71
50
  end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyListSocks provider class.
6
+ class FreeProxyListSocks < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://www.socks-proxy.net/"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ "//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ ProxyFetcher::Proxy.new.tap do |proxy|
28
+ proxy.addr = html_node.content_at("td[1]")
29
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
30
+ proxy.country = html_node.content_at("td[4]")
31
+ proxy.type = parse_type(html_node)
32
+ proxy.anonymity = html_node.content_at("td[6]")
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ # Parses HTML node to extract proxy type.
39
+ #
40
+ # @param html_node [Object]
41
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
42
+ #
43
+ # @return [String]
44
+ # Proxy type
45
+ #
46
+ def parse_type(html_node)
47
+ https = html_node.content_at("td[5]")
48
+
49
+ return ProxyFetcher::Proxy::SOCKS4 if https&.casecmp("socks4")&.zero?
50
+ return ProxyFetcher::Proxy::SOCKS5 if https&.casecmp("socks5")&.zero?
51
+
52
+ "Unknown"
53
+ end
54
+ end
55
+
56
+ ProxyFetcher::Configuration.register_provider(:free_proxy_list_socks, FreeProxyListSocks)
57
+ end
58
+ end
@@ -9,8 +9,9 @@ module ProxyFetcher
9
9
  "https://www.sslproxies.org/"
10
10
  end
11
11
 
12
+ # [NOTE] Doesn't support filtering
12
13
  def xpath
13
- '//table[@id="proxylisttable"]/tbody/tr'
14
+ "//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
14
15
  end
15
16
 
16
17
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyListUS provider class.
6
+ class FreeProxyListUS < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://www.us-proxy.org/"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ "//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ ProxyFetcher::Proxy.new.tap do |proxy|
28
+ proxy.addr = html_node.content_at("td[1]")
29
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
30
+ proxy.country = html_node.content_at("td[4]")
31
+ proxy.anonymity = html_node.content_at("td[5]")
32
+ proxy.type = parse_type(html_node)
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ # Parses HTML node to extract proxy type.
39
+ #
40
+ # @param html_node [Object]
41
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
42
+ #
43
+ # @return [String]
44
+ # Proxy type
45
+ #
46
+ def parse_type(html_node)
47
+ https = html_node.content_at("td[7]")
48
+ https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
49
+ end
50
+ end
51
+
52
+ ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
53
+ end
54
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # MTPro provider class.
8
+ class MTPro < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://mtpro.xyz/api/?type=socks"
12
+ end
13
+
14
+ def load_proxy_list(filters = {})
15
+ html = load_html(provider_url, filters)
16
+ JSON.parse(html)
17
+ rescue JSON::ParserError
18
+ []
19
+ end
20
+
21
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
22
+ # object.
23
+ #
24
+ # @param node [Object]
25
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
26
+ #
27
+ # @return [ProxyFetcher::Proxy]
28
+ # Proxy object
29
+ #
30
+ def to_proxy(node)
31
+ ProxyFetcher::Proxy.new.tap do |proxy|
32
+ proxy.addr = node["ip"]
33
+ proxy.port = Integer(node["port"])
34
+ proxy.country = node["country"]
35
+ proxy.anonymity = "Unknown"
36
+ proxy.type = ProxyFetcher::Proxy::SOCKS5
37
+ end
38
+ end
39
+ end
40
+
41
+ ProxyFetcher::Configuration.register_provider(:mtpro, MTPro)
42
+ end
43
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyList provider class.
6
+ class Proxypedia < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://proxypedia.org"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ "//main/ul/li[position()>1]"
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.]
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ addr, port = html_node.content_at("a").to_s.split(":")
28
+
29
+ ProxyFetcher::Proxy.new.tap do |proxy|
30
+ proxy.addr = addr
31
+ proxy.port = Integer(port)
32
+ proxy.country = parse_country(html_node)
33
+ proxy.anonymity = "Unknown"
34
+ proxy.type = ProxyFetcher::Proxy::HTTP
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def parse_country(html_node)
41
+ text = html_node.content.to_s
42
+ text[/\((.+?)\)/, 1] || "Unknown"
43
+ end
44
+ end
45
+
46
+ ProxyFetcher::Configuration.register_provider(:proxypedia, Proxypedia)
47
+ end
48
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeHTTP < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=http"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(node)
51
+ addr, port = node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::HTTP
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_http, ProxyscrapeHTTP)
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeSOCKS4 < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(html_node)
51
+ addr, port = html_node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::SOCKS4
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_socks4, ProxyscrapeSOCKS4)
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeSOCKS5 < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(html_node)
51
+ addr, port = html_node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::SOCKS5
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_socks5, ProxyscrapeSOCKS5)
64
+ end
65
+ end
@@ -6,7 +6,7 @@ module ProxyFetcher
6
6
  class XRoxy < Base
7
7
  # Provider URL to fetch proxy list
8
8
  def provider_url
9
- "https://madison.xroxy.com/proxylist.html"
9
+ "https://www.xroxy.com/proxylist.htm"
10
10
  end
11
11
 
12
12
  def xpath
@@ -41,8 +41,8 @@ module ProxyFetcher
41
41
  # @return [String]
42
42
  # resource content
43
43
  #
44
- def self.fetch(*args)
45
- new(*args).fetch
44
+ def self.fetch(*args, **kwargs, &block)
45
+ new(*args, **kwargs, &block).fetch
46
46
  end
47
47
 
48
48
  # Initialize HTTP client instance
@@ -51,15 +51,17 @@ module ProxyFetcher
51
51
  #
52
52
  def initialize(url, method: :get, params: {}, headers: {})
53
53
  @url = url.to_s
54
- @method = method
54
+ @method = method.to_sym
55
55
  @params = params
56
56
  @headers = headers
57
57
 
58
- @http = HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
59
- @timeout = ProxyFetcher.config.provider_proxies_load_timeout
58
+ unless HTTP::Request::METHODS.include?(@method)
59
+ raise ArgumentError, "'#{@method}' is a wrong HTTP method name"
60
+ end
60
61
 
61
- @ssl_ctx = OpenSSL::SSL::SSLContext.new
62
- @ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
62
+ @timeout = ProxyFetcher.config.provider_proxies_load_timeout
63
+ @http = build_http_engine
64
+ @ssl_ctx = build_ssl_context
63
65
  end
64
66
 
65
67
  # Fetches resource content by sending HTTP request to it.
@@ -67,30 +69,32 @@ module ProxyFetcher
67
69
  # @return [String]
68
70
  # response body
69
71
  #
70
- def fetch
71
- response = process_http_request
72
+ def fetch(**options)
73
+ response = perform_http_request
74
+ return response if options.fetch(:raw, false)
75
+
72
76
  response.body.to_s
73
77
  rescue StandardError => e
74
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
78
+ ProxyFetcher.config.logger.warn("Failed to process request to #{url} (#{e.message})")
75
79
  ""
76
80
  end
77
81
 
78
- def fetch_with_headers
79
- process_http_request
80
- rescue StandardError => e
81
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
82
- HTTP::Response.new(version: "1.1", status: 500, body: "")
83
- end
84
-
85
82
  protected
86
83
 
87
- def process_http_request(http_method: method, http_params: params)
88
- unless HTTP::Request::METHODS.include?(http_method)
89
- raise ArgumentError, "'#{http_method}' is a wrong HTTP method name!"
84
+ def build_ssl_context
85
+ OpenSSL::SSL::SSLContext.new.tap do |context|
86
+ context.verify_mode = OpenSSL::SSL::VERIFY_NONE
90
87
  end
88
+ end
89
+
90
+ def build_http_engine
91
+ HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
92
+ end
91
93
 
94
+ def perform_http_request(http_method: method, http_params: params)
92
95
  http.public_send(
93
- http_method.to_sym, url,
96
+ http_method,
97
+ url,
94
98
  form: http_params,
95
99
  ssl_context: ssl_ctx
96
100
  )
@@ -10,27 +10,39 @@ module ProxyFetcher
10
10
 
11
11
  # Short variant to validate proxy.
12
12
  #
13
- # @param proxy_addr [String] proxy address or IP
14
- # @param proxy_port [String, Integer] proxy port
13
+ # @param address [String] proxy address or IP
14
+ # @param port [String, Integer] proxy port
15
15
  #
16
16
  # @return [Boolean]
17
17
  # true if connection to the server using proxy established, otherwise false
18
18
  #
19
- def self.connectable?(proxy_addr, proxy_port)
20
- new(proxy_addr, proxy_port).connectable?
19
+ def self.connectable?(address, port)
20
+ new(address, port).connectable?
21
21
  end
22
22
 
23
23
  # Initialize new ProxyValidator instance
24
24
  #
25
- # @param proxy_addr [String] proxy address or IP
26
- # @param proxy_port [String, Integer] proxy port
25
+ # @param address [String] Proxy address or IP
26
+ # @param port [String, Integer] Proxy port
27
+ # @param options [Hash] proxy options
28
+ # @option username [String] Proxy authentication username
29
+ # @option password [String] Proxy authentication password
30
+ # @option headers [Hash] Proxy headers
27
31
  #
28
32
  # @return [ProxyValidator]
29
33
  #
30
- def initialize(proxy_addr, proxy_port)
34
+ def initialize(address, port, options: {})
31
35
  timeout = ProxyFetcher.config.proxy_validation_timeout
36
+ proxy = [address, port.to_i]
32
37
 
33
- @http = HTTP.follow.via(proxy_addr, proxy_port.to_i).timeout(connect: timeout, read: timeout)
38
+ if options[:username] && options[:password]
39
+ proxy << options[:username]
40
+ proxy << options[:password]
41
+ end
42
+
43
+ proxy << options[:headers].to_h if options[:headers]
44
+
45
+ @http = HTTP.follow.via(*proxy).timeout(connect: timeout, read: timeout)
34
46
  end
35
47
 
36
48
  # Checks if proxy is connectable (can be used to connect
@@ -13,7 +13,7 @@ module ProxyFetcher
13
13
  # Major version number
14
14
  MAJOR = 0
15
15
  # Minor version number
16
- MINOR = 13
16
+ MINOR = 16
17
17
  # Smallest version number
18
18
  TINY = 0
19
19
 
data/lib/proxy_fetcher.rb CHANGED
@@ -4,41 +4,47 @@ require "uri"
4
4
  require "http"
5
5
  require "logger"
6
6
 
7
- require File.dirname(__FILE__) + "/proxy_fetcher/version"
7
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/version"
8
8
 
9
- require File.dirname(__FILE__) + "/proxy_fetcher/exceptions"
10
- require File.dirname(__FILE__) + "/proxy_fetcher/configuration"
11
- require File.dirname(__FILE__) + "/proxy_fetcher/configuration/providers_registry"
12
- require File.dirname(__FILE__) + "/proxy_fetcher/proxy"
13
- require File.dirname(__FILE__) + "/proxy_fetcher/manager"
14
- require File.dirname(__FILE__) + "/proxy_fetcher/null_logger"
9
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/exceptions"
10
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/configuration"
11
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/configuration/providers_registry"
12
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/proxy"
13
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/manager"
14
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/null_logger"
15
15
 
16
- require File.dirname(__FILE__) + "/proxy_fetcher/utils/http_client"
17
- require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_validator"
18
- require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_list_validator"
19
- require File.dirname(__FILE__) + "/proxy_fetcher/client/client"
20
- require File.dirname(__FILE__) + "/proxy_fetcher/client/request"
21
- require File.dirname(__FILE__) + "/proxy_fetcher/client/proxies_registry"
16
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/http_client"
17
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/proxy_validator"
18
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/proxy_list_validator"
19
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/client/client"
20
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/client/request"
21
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/client/proxies_registry"
22
22
 
23
- require File.dirname(__FILE__) + "/proxy_fetcher/document"
24
- require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters"
25
- require File.dirname(__FILE__) + "/proxy_fetcher/document/node"
26
- require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/abstract_adapter"
27
- require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/nokogiri_adapter"
28
- require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/oga_adapter"
23
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document"
24
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters"
25
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/node"
26
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/abstract_adapter"
27
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/nokogiri_adapter"
28
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/oga_adapter"
29
29
 
30
30
  ##
31
31
  # Ruby / JRuby lib for managing proxies
32
32
  module ProxyFetcher
33
33
  # ProxyFetcher providers namespace
34
34
  module Providers
35
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/base"
36
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list"
37
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_ssl"
38
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/gather_proxy"
39
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/http_tunnel"
40
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxy_list"
41
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/xroxy"
35
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/base"
36
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list"
37
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_socks"
38
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_ssl"
39
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_us"
40
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/http_tunnel"
41
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/mtpro"
42
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxy_list"
43
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxypedia"
44
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_http"
45
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_socks4"
46
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_socks5"
47
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/xroxy"
42
48
  end
43
49
 
44
50
  @__config_access_lock__ = Mutex.new
@@ -8,9 +8,11 @@ Gem::Specification.new do |gem|
8
8
  gem.name = "proxy_fetcher"
9
9
  gem.version = ProxyFetcher.gem_version
10
10
  gem.summary = "Ruby gem for dealing with proxy lists from different providers"
11
- gem.description = "This gem can help your Ruby application to make HTTP(S) requests " \
12
- "using proxies by fetching and validating proxy lists from " \
13
- "the different providers."
11
+ gem.description = <<-TEXT.strip.gsub(/[\s\n]+/, " ")
12
+ This gem can help your Ruby application to make HTTP(S) requests
13
+ using proxies by fetching and validating proxy lists from
14
+ the different providers.
15
+ TEXT
14
16
  gem.authors = ["Nikita Bulai"]
15
17
  gem.email = "bulajnikita@gmail.com"
16
18
  gem.require_paths = ["lib"]
@@ -21,8 +23,8 @@ Gem::Specification.new do |gem|
21
23
  gem.license = "MIT"
22
24
  gem.required_ruby_version = ">= 2.3.0"
23
25
 
24
- gem.add_runtime_dependency "http", ">= 3", "< 5"
26
+ gem.add_runtime_dependency "http", ">= 3"
25
27
 
26
28
  gem.add_development_dependency "rake", ">= 12.0"
27
- gem.add_development_dependency "rspec", "~> 3.5"
29
+ gem.add_development_dependency "rspec", "~> 3.9"
28
30
  end
@@ -3,6 +3,11 @@
3
3
  require "spec_helper"
4
4
  require "json"
5
5
 
6
+ begin
7
+ require "webrick"
8
+ rescue LoadError
9
+ # nop
10
+ end
6
11
  require "evil-proxy"
7
12
  require "evil-proxy/async"
8
13
 
@@ -118,15 +123,15 @@ describe ProxyFetcher::Client do
118
123
  end
119
124
  end
120
125
 
121
- xcontext "retries" do
126
+ context "retries" do
122
127
  it "raises an error when reaches max retries limit" do
123
128
  allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError)
124
129
 
125
- expect { ProxyFetcher::Client.get("http://httpbin.org") }
130
+ expect { ProxyFetcher::Client.get("http://httpbin.org", options: { max_retries: 10 }) }
126
131
  .to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
127
132
  end
128
133
 
129
- it "raises an error when http request returns an error" do
134
+ xit "raises an error when http request returns an error" do
130
135
  allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
131
136
 
132
137
  expect { ProxyFetcher::Client.get("http://httpbin.org") }
@@ -134,14 +139,14 @@ describe ProxyFetcher::Client do
134
139
  end
135
140
 
136
141
  it "refreshes proxy lists if no proxy found" do
137
- ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:"@proxies", [])
142
+ allow(ProxyFetcher::Manager.new).to receive(:proxies).and_return([])
138
143
 
139
144
  expect { ProxyFetcher::Client.get("http://httpbin.org") }
140
145
  .not_to raise_error
141
146
  end
142
147
  end
143
148
 
144
- context "redirects" do
149
+ xcontext "redirects" do
145
150
  it "follows redirect when present" do
146
151
  content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2")
147
152
 
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe "Proxy classes" do
6
+ [
7
+ [:free_proxy_list, "FreeProxyList"],
8
+ [:free_proxy_list_socks, "FreeProxyListSocks"],
9
+ [:free_proxy_list_ssl, "FreeProxyListSSL"],
10
+ [:free_proxy_list_us, "FreeProxyListUS"],
11
+ [:http_tunnel, "HTTPTunnel"],
12
+ [:mtpro, "MTPro"],
13
+ [:proxy_list, "ProxyList"],
14
+ [:proxypedia, "Proxypedia"],
15
+ [:proxyscrape_http, "ProxyscrapeHTTP"],
16
+ [:proxyscrape_socks4, "ProxyscrapeSOCKS4"],
17
+ [:proxyscrape_socks5, "ProxyscrapeSOCKS5"],
18
+ [:xroxy, "XRoxy"]
19
+ ].each do |(provider_name, provider_klass)|
20
+ describe Object.const_get("ProxyFetcher::Providers::#{provider_klass}") do
21
+ before :all do
22
+ ProxyFetcher.config.provider = provider_name
23
+ end
24
+
25
+ it_behaves_like "a manager"
26
+ end
27
+ end
28
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxy_fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Bulai
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-09 00:00:00.000000000 Z
11
+ date: 2022-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: http
@@ -17,9 +17,6 @@ dependencies:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '3'
20
- - - "<"
21
- - !ruby/object:Gem::Version
22
- version: '5'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
@@ -27,9 +24,6 @@ dependencies:
27
24
  - - ">="
28
25
  - !ruby/object:Gem::Version
29
26
  version: '3'
30
- - - "<"
31
- - !ruby/object:Gem::Version
32
- version: '5'
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: rake
35
29
  requirement: !ruby/object:Gem::Requirement
@@ -50,14 +44,14 @@ dependencies:
50
44
  requirements:
51
45
  - - "~>"
52
46
  - !ruby/object:Gem::Version
53
- version: '3.5'
47
+ version: '3.9'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
51
  requirements:
58
52
  - - "~>"
59
53
  - !ruby/object:Gem::Version
60
- version: '3.5'
54
+ version: '3.9'
61
55
  description: This gem can help your Ruby application to make HTTP(S) requests using
62
56
  proxies by fetching and validating proxy lists from the different providers.
63
57
  email: bulajnikita@gmail.com
@@ -67,6 +61,8 @@ executables:
67
61
  extensions: []
68
62
  extra_rdoc_files: []
69
63
  files:
64
+ - ".github/dependabot.yml"
65
+ - ".github/workflows/ci.yml"
70
66
  - ".gitignore"
71
67
  - CHANGELOG.md
72
68
  - CODE_OF_CONDUCT.md
@@ -94,10 +90,16 @@ files:
94
90
  - lib/proxy_fetcher/null_logger.rb
95
91
  - lib/proxy_fetcher/providers/base.rb
96
92
  - lib/proxy_fetcher/providers/free_proxy_list.rb
93
+ - lib/proxy_fetcher/providers/free_proxy_list_socks.rb
97
94
  - lib/proxy_fetcher/providers/free_proxy_list_ssl.rb
98
- - lib/proxy_fetcher/providers/gather_proxy.rb
95
+ - lib/proxy_fetcher/providers/free_proxy_list_us.rb
99
96
  - lib/proxy_fetcher/providers/http_tunnel.rb
97
+ - lib/proxy_fetcher/providers/mtpro.rb
100
98
  - lib/proxy_fetcher/providers/proxy_list.rb
99
+ - lib/proxy_fetcher/providers/proxypedia.rb
100
+ - lib/proxy_fetcher/providers/proxyscrape_http.rb
101
+ - lib/proxy_fetcher/providers/proxyscrape_socks4.rb
102
+ - lib/proxy_fetcher/providers/proxyscrape_socks5.rb
101
103
  - lib/proxy_fetcher/providers/xroxy.rb
102
104
  - lib/proxy_fetcher/proxy.rb
103
105
  - lib/proxy_fetcher/utils/http_client.rb
@@ -112,13 +114,8 @@ files:
112
114
  - spec/proxy_fetcher/document/node_spec.rb
113
115
  - spec/proxy_fetcher/manager_spec.rb
114
116
  - spec/proxy_fetcher/providers/base_spec.rb
115
- - spec/proxy_fetcher/providers/free_proxy_list_spec.rb
116
- - spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb
117
- - spec/proxy_fetcher/providers/gather_proxy_spec.rb
118
- - spec/proxy_fetcher/providers/http_tunnel_spec.rb
119
117
  - spec/proxy_fetcher/providers/multiple_providers_spec.rb
120
- - spec/proxy_fetcher/providers/proxy_list_spec.rb
121
- - spec/proxy_fetcher/providers/xroxy_spec.rb
118
+ - spec/proxy_fetcher/providers/proxy_classes_spec.rb
122
119
  - spec/proxy_fetcher/proxy_spec.rb
123
120
  - spec/proxy_fetcher/version_spec.rb
124
121
  - spec/spec_helper.rb
@@ -142,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
139
  - !ruby/object:Gem::Version
143
140
  version: '0'
144
141
  requirements: []
145
- rubygems_version: 3.0.2
142
+ rubygems_version: 3.0.8
146
143
  signing_key:
147
144
  specification_version: 4
148
145
  summary: Ruby gem for dealing with proxy lists from different providers
@@ -1,50 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "json"
4
-
5
- module ProxyFetcher
6
- module Providers
7
- # GatherProxy provider class.
8
- class GatherProxy < Base
9
- # Provider URL to fetch proxy list
10
- def provider_url
11
- "https://proxygather.com"
12
- end
13
-
14
- def xpath
15
- '//div[@class="proxy-list"]/table/script'
16
- end
17
-
18
- # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
19
- # object.
20
- #
21
- # @param html_node [Object]
22
- # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
23
- #
24
- # @return [ProxyFetcher::Proxy]
25
- # Proxy object
26
- #
27
- def to_proxy(html_node)
28
- json = parse_json(html_node)
29
-
30
- ProxyFetcher::Proxy.new.tap do |proxy|
31
- proxy.addr = json["PROXY_IP"]
32
- proxy.port = json["PROXY_PORT"].to_i(16)
33
- proxy.anonymity = json["PROXY_TYPE"]
34
- proxy.country = json["PROXY_COUNTRY"]
35
- proxy.response_time = json["PROXY_TIME"].to_i
36
- proxy.type = ProxyFetcher::Proxy::HTTP
37
- end
38
- end
39
-
40
- private
41
-
42
- def parse_json(html_node)
43
- javascript = html_node.content[/{.+}/im]
44
- JSON.parse(javascript)
45
- end
46
- end
47
-
48
- ProxyFetcher::Configuration.register_provider(:gather_proxy, GatherProxy)
49
- end
50
- end
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::FreeProxyList do
6
- before :all do
7
- ProxyFetcher.configure do |config|
8
- config.provider = :free_proxy_list
9
- end
10
- end
11
-
12
- it_behaves_like "a manager"
13
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::FreeProxyListSSL do
6
- before :all do
7
- ProxyFetcher.config.provider = :free_proxy_list_ssl
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::GatherProxy do
6
- before :all do
7
- ProxyFetcher.config.provider = :gather_proxy
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::HTTPTunnel do
6
- before :all do
7
- ProxyFetcher.config.provider = :http_tunnel
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::ProxyList do
6
- before :all do
7
- ProxyFetcher.config.provider = :proxy_list
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::XRoxy do
6
- before :all do
7
- ProxyFetcher.config.provider = :xroxy
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end