proxy_fetcher 0.13.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ci.yml +58 -0
  4. data/CHANGELOG.md +16 -1
  5. data/Gemfile +4 -2
  6. data/Rakefile +3 -1
  7. data/gemfiles/nokogiri.gemfile +2 -2
  8. data/gemfiles/oga.gemfile +3 -3
  9. data/lib/proxy_fetcher/client/request.rb +3 -3
  10. data/lib/proxy_fetcher/configuration.rb +1 -1
  11. data/lib/proxy_fetcher/document/node.rb +1 -1
  12. data/lib/proxy_fetcher/manager.rb +2 -0
  13. data/lib/proxy_fetcher/providers/free_proxy_list.rb +1 -22
  14. data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
  15. data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +2 -1
  16. data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
  17. data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
  18. data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
  19. data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
  20. data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
  21. data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
  22. data/lib/proxy_fetcher/providers/xroxy.rb +1 -1
  23. data/lib/proxy_fetcher/utils/http_client.rb +25 -21
  24. data/lib/proxy_fetcher/utils/proxy_validator.rb +20 -8
  25. data/lib/proxy_fetcher/version.rb +1 -1
  26. data/lib/proxy_fetcher.rb +32 -26
  27. data/proxy_fetcher.gemspec +7 -5
  28. data/spec/proxy_fetcher/client/client_spec.rb +10 -5
  29. data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
  30. metadata +15 -18
  31. data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -50
  32. data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
  33. data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
  34. data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
  35. data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
  36. data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
  37. data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09772e9bc018d8accb01401b2e8b1897804e0a948c67a7eeeef8b1a2e9fcd245'
4
- data.tar.gz: 16b49036c2ecdd06e23f53c4fe55982ec7ceb1531e9e223f31590eb83417c4e0
3
+ metadata.gz: 985e3e5cffffa62960dbe04510e232b5a8e652119acb2cc787824e0834f10870
4
+ data.tar.gz: beb50433ddf5d298ca1c7c45af357395cb24f87b695231159b3b52ad8429cd8b
5
5
  SHA512:
6
- metadata.gz: 526c8fdcfb1171c09f2e9bf6a4e13dbbc6e837963ce1c1ef7e2e48ed1ec3052e3dbc6a4d04bb33a0b3f083914149c22af4b4238a6c558825a91e3f26e112a378
7
- data.tar.gz: 8dddea87295d1825910e4eecb4a5a16836b06e0070cc07f823a4902d6952550ae3b016bfb2e29d0ce8cc16ba3add12ad28a6ab6fec8eccd8a1656e368d88be5d
6
+ metadata.gz: 28ee05704cdcf8dc48881119a208dad97408d3c50f34b691e6062ca6a8697f3ffe58ba5056c0c8d07f8fec9f5f977927877341572716fe6db3a28876b537264d
7
+ data.tar.gz: c0533aa9584e02300734385ae8552c5e50d9836f7647337eb797eb11aa50b0edd93bc6a54eb3b22059ac4eed26adbb8ea21dcca9285d9bceff019c67a797146c
@@ -0,0 +1,8 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: "/"
5
+ schedule:
6
+ interval: daily
7
+ time: "03:00"
8
+ open-pull-requests-limit: 10
@@ -0,0 +1,58 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ name: >-
8
+ Ruby ${{ matrix.ruby }} (${{ matrix.gemfile }})
9
+ env:
10
+ CI: true
11
+ runs-on: ${{ matrix.os }}
12
+ continue-on-error: ${{ endsWith(matrix.ruby, 'head') || matrix.ruby == 'debug' || matrix.experimental }}
13
+ if: |
14
+ !( contains(github.event.pull_request.title, '[ci skip]')
15
+ || contains(github.event.pull_request.title, '[skip ci]'))
16
+ strategy:
17
+ fail-fast: true
18
+ matrix:
19
+ experimental: [false]
20
+ os: [ ubuntu-latest ]
21
+ ruby:
22
+ - 2.6
23
+ - 2.7
24
+ - '3.0'
25
+ - '3.1'
26
+ gemfile:
27
+ - gemfiles/oga.gemfile
28
+ - gemfiles/nokogiri.gemfile
29
+ include:
30
+ - ruby: head
31
+ os: ubuntu-latest
32
+ gemfile: gemfiles/nokogiri.gemfile
33
+ experimental: true
34
+ - ruby: head
35
+ os: ubuntu-latest
36
+ gemfile: gemfiles/oga.gemfile
37
+ experimental: true
38
+ - ruby: jruby
39
+ os: ubuntu-latest
40
+ gemfile: gemfiles/nokogiri.gemfile
41
+ experimental: true
42
+ - ruby: truffleruby
43
+ os: ubuntu-latest
44
+ gemfile: gemfiles/nokogiri.gemfile
45
+ experimental: true
46
+ steps:
47
+ - name: Repo checkout
48
+ uses: actions/checkout@v2
49
+
50
+ - name: Setup Ruby
51
+ uses: ruby/setup-ruby@v1
52
+ with:
53
+ ruby-version: ${{ matrix.ruby }}
54
+ bundler-cache: true
55
+
56
+ - name: Run tests
57
+ timeout-minutes: 10
58
+ run: bundle exec rake spec
data/CHANGELOG.md CHANGED
@@ -4,7 +4,22 @@ Reverse Chronological Order:
4
4
 
5
5
  ## `master`
6
6
 
7
- * Add your description here
7
+ ...
8
+
9
+ ## `0.15.1` (2021-02-17)
10
+
11
+ * Support for Ruby 3.0
12
+
13
+ ## `0.15.0` (2021-01-26)
14
+
15
+ * Removed failing providers
16
+ * Added new
17
+ * Specs refactoring
18
+
19
+ ## `0.14.0` (2020-05-11)
20
+
21
+ * Add MTPro provider
22
+ * Add Proxypedia provider
8
23
 
9
24
  ## `0.13.0` (2020-03-09)
10
25
 
data/Gemfile CHANGED
@@ -6,9 +6,11 @@ gemspec
6
6
 
7
7
  gem "nokogiri", "~> 1.8"
8
8
  gem "oga", "~> 3.2"
9
- gem "rubocop", "~> 0.74"
9
+ gem "rubocop", "~> 1.0"
10
10
 
11
11
  group :test do
12
- gem "coveralls", require: false
12
+ gem "coveralls_reborn", require: false
13
+ # Until I find a way to introduce other MITM proxy
14
+ gem "webrick", "1.4.2"
13
15
  gem "evil-proxy", "~> 0.2"
14
16
  end
data/Rakefile CHANGED
@@ -3,6 +3,8 @@
3
3
  require "bundler/gem_tasks"
4
4
 
5
5
  require "rspec/core/rake_task"
6
- RSpec::Core::RakeTask.new(:spec)
6
+ RSpec::Core::RakeTask.new(:spec) do |t|
7
+ t.rspec_opts = '--tag "~unreliable"'
8
+ end
7
9
 
8
10
  task default: :spec
@@ -7,7 +7,7 @@ gemspec path: "../"
7
7
  gem "nokogiri", "~> 1.8"
8
8
 
9
9
  group :test do
10
- gem "coveralls", require: false
10
+ gem "coveralls_reborn", require: false
11
+ gem "webrick"
11
12
  gem "evil-proxy", "~> 0.2"
12
- gem "rspec", "~> 3.6"
13
13
  end
data/gemfiles/oga.gemfile CHANGED
@@ -4,10 +4,10 @@ source "https://rubygems.org"
4
4
 
5
5
  gemspec path: "../"
6
6
 
7
- gem "oga", "~> 2.0"
7
+ gem "oga", "~> 3.0"
8
8
 
9
9
  group :test do
10
- gem "coveralls", require: false
10
+ gem "coveralls_reborn", require: false
11
+ gem "webrick"
11
12
  gem "evil-proxy", "~> 0.2"
12
- gem "rspec", "~> 3.6"
13
13
  end
@@ -41,15 +41,15 @@ module ProxyFetcher
41
41
  # @return [String]
42
42
  # response body (requested resource content)
43
43
  #
44
- def self.execute(args)
45
- new(args).execute
44
+ def self.execute(**args)
45
+ new(**args).execute
46
46
  end
47
47
 
48
48
  # Initialize new HTTP request
49
49
  #
50
50
  # @return [Request]
51
51
  #
52
- def initialize(args)
52
+ def initialize(**args)
53
53
  raise ArgumentError, "args must be a Hash!" unless args.is_a?(Hash)
54
54
 
55
55
  @url = args.fetch(:url)
@@ -114,7 +114,7 @@ module ProxyFetcher
114
114
 
115
115
  # Sets default configuration options
116
116
  def reset!
117
- @logger = Logger.new(STDOUT)
117
+ @logger = Logger.new($stdout)
118
118
  @user_agent = DEFAULT_USER_AGENT
119
119
  @pool_size = 10
120
120
  @client_timeout = 3
@@ -83,7 +83,7 @@ module ProxyFetcher
83
83
  def clear(text)
84
84
  return "" if text.nil? || text.empty?
85
85
 
86
- text.strip.gsub(/[\t]/i, "")
86
+ text.strip.gsub(/\t/i, "")
87
87
  end
88
88
  end
89
89
  end
@@ -47,6 +47,8 @@ module ProxyFetcher
47
47
 
48
48
  ProxyFetcher.config.providers.each do |provider_name|
49
49
  threads << Thread.new do
50
+ Thread.current.report_on_exception = false
51
+
50
52
  provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
51
53
  provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
52
54
  provider_proxies = provider.fetch_proxies!(provider_filters)
@@ -11,7 +11,7 @@ module ProxyFetcher
11
11
 
12
12
  # [NOTE] Doesn't support filtering
13
13
  def xpath
14
- '//table[@id="proxylisttable"]/tbody/tr'
14
+ "//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
15
15
  end
16
16
 
17
17
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -45,27 +45,6 @@ module ProxyFetcher
45
45
  #
46
46
  def parse_type(html_node)
47
47
  https = html_node.content_at("td[6]")
48
- # frozen_string_literal: true
49
- # FreeProxyList provider class.
50
- # Provider URL to fetch proxy list
51
- # [NOTE] Doesn't support filtering
52
- # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
53
- # object.
54
- #
55
- # @param html_node [Object]
56
- # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
57
- #
58
- # @return [ProxyFetcher::Proxy]
59
- # Proxy object
60
- #
61
- # Parses HTML node to extract proxy type.
62
- #
63
- # @param html_node [Object]
64
- # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
65
- #
66
- # @return [String]
67
- # Proxy type
68
- #
69
48
  https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
70
49
  end
71
50
  end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyListSocks provider class.
6
+ class FreeProxyListSocks < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://www.socks-proxy.net/"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ "//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ ProxyFetcher::Proxy.new.tap do |proxy|
28
+ proxy.addr = html_node.content_at("td[1]")
29
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
30
+ proxy.country = html_node.content_at("td[4]")
31
+ proxy.type = parse_type(html_node)
32
+ proxy.anonymity = html_node.content_at("td[6]")
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ # Parses HTML node to extract proxy type.
39
+ #
40
+ # @param html_node [Object]
41
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
42
+ #
43
+ # @return [String]
44
+ # Proxy type
45
+ #
46
+ def parse_type(html_node)
47
+ https = html_node.content_at("td[5]")
48
+
49
+ return ProxyFetcher::Proxy::SOCKS4 if https&.casecmp("socks4")&.zero?
50
+ return ProxyFetcher::Proxy::SOCKS5 if https&.casecmp("socks5")&.zero?
51
+
52
+ "Unknown"
53
+ end
54
+ end
55
+
56
+ ProxyFetcher::Configuration.register_provider(:free_proxy_list_socks, FreeProxyListSocks)
57
+ end
58
+ end
@@ -9,8 +9,9 @@ module ProxyFetcher
9
9
  "https://www.sslproxies.org/"
10
10
  end
11
11
 
12
+ # [NOTE] Doesn't support filtering
12
13
  def xpath
13
- '//table[@id="proxylisttable"]/tbody/tr'
14
+ "//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
14
15
  end
15
16
 
16
17
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyListUS provider class.
6
+ class FreeProxyListUS < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://www.us-proxy.org/"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ "//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ ProxyFetcher::Proxy.new.tap do |proxy|
28
+ proxy.addr = html_node.content_at("td[1]")
29
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
30
+ proxy.country = html_node.content_at("td[4]")
31
+ proxy.anonymity = html_node.content_at("td[5]")
32
+ proxy.type = parse_type(html_node)
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ # Parses HTML node to extract proxy type.
39
+ #
40
+ # @param html_node [Object]
41
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
42
+ #
43
+ # @return [String]
44
+ # Proxy type
45
+ #
46
+ def parse_type(html_node)
47
+ https = html_node.content_at("td[7]")
48
+ https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
49
+ end
50
+ end
51
+
52
+ ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
53
+ end
54
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # MTPro provider class.
8
+ class MTPro < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://mtpro.xyz/api/?type=socks"
12
+ end
13
+
14
+ def load_proxy_list(filters = {})
15
+ html = load_html(provider_url, filters)
16
+ JSON.parse(html)
17
+ rescue JSON::ParserError
18
+ []
19
+ end
20
+
21
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
22
+ # object.
23
+ #
24
+ # @param node [Object]
25
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
26
+ #
27
+ # @return [ProxyFetcher::Proxy]
28
+ # Proxy object
29
+ #
30
+ def to_proxy(node)
31
+ ProxyFetcher::Proxy.new.tap do |proxy|
32
+ proxy.addr = node["ip"]
33
+ proxy.port = Integer(node["port"])
34
+ proxy.country = node["country"]
35
+ proxy.anonymity = "Unknown"
36
+ proxy.type = ProxyFetcher::Proxy::SOCKS5
37
+ end
38
+ end
39
+ end
40
+
41
+ ProxyFetcher::Configuration.register_provider(:mtpro, MTPro)
42
+ end
43
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyList provider class.
6
+ class Proxypedia < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://proxypedia.org"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ "//main/ul/li[position()>1]"
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.]
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ addr, port = html_node.content_at("a").to_s.split(":")
28
+
29
+ ProxyFetcher::Proxy.new.tap do |proxy|
30
+ proxy.addr = addr
31
+ proxy.port = Integer(port)
32
+ proxy.country = parse_country(html_node)
33
+ proxy.anonymity = "Unknown"
34
+ proxy.type = ProxyFetcher::Proxy::HTTP
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def parse_country(html_node)
41
+ text = html_node.content.to_s
42
+ text[/\((.+?)\)/, 1] || "Unknown"
43
+ end
44
+ end
45
+
46
+ ProxyFetcher::Configuration.register_provider(:proxypedia, Proxypedia)
47
+ end
48
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeHTTP < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=http"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(node)
51
+ addr, port = node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::HTTP
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_http, ProxyscrapeHTTP)
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeSOCKS4 < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(html_node)
51
+ addr, port = html_node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::SOCKS4
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_socks4, ProxyscrapeSOCKS4)
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeSOCKS5 < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(html_node)
51
+ addr, port = html_node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::SOCKS5
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_socks5, ProxyscrapeSOCKS5)
64
+ end
65
+ end
@@ -6,7 +6,7 @@ module ProxyFetcher
6
6
  class XRoxy < Base
7
7
  # Provider URL to fetch proxy list
8
8
  def provider_url
9
- "https://madison.xroxy.com/proxylist.html"
9
+ "https://www.xroxy.com/proxylist.htm"
10
10
  end
11
11
 
12
12
  def xpath
@@ -41,8 +41,8 @@ module ProxyFetcher
41
41
  # @return [String]
42
42
  # resource content
43
43
  #
44
- def self.fetch(*args)
45
- new(*args).fetch
44
+ def self.fetch(*args, **kwargs, &block)
45
+ new(*args, **kwargs, &block).fetch
46
46
  end
47
47
 
48
48
  # Initialize HTTP client instance
@@ -51,15 +51,17 @@ module ProxyFetcher
51
51
  #
52
52
  def initialize(url, method: :get, params: {}, headers: {})
53
53
  @url = url.to_s
54
- @method = method
54
+ @method = method.to_sym
55
55
  @params = params
56
56
  @headers = headers
57
57
 
58
- @http = HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
59
- @timeout = ProxyFetcher.config.provider_proxies_load_timeout
58
+ unless HTTP::Request::METHODS.include?(@method)
59
+ raise ArgumentError, "'#{@method}' is a wrong HTTP method name"
60
+ end
60
61
 
61
- @ssl_ctx = OpenSSL::SSL::SSLContext.new
62
- @ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
62
+ @timeout = ProxyFetcher.config.provider_proxies_load_timeout
63
+ @http = build_http_engine
64
+ @ssl_ctx = build_ssl_context
63
65
  end
64
66
 
65
67
  # Fetches resource content by sending HTTP request to it.
@@ -67,30 +69,32 @@ module ProxyFetcher
67
69
  # @return [String]
68
70
  # response body
69
71
  #
70
- def fetch
71
- response = process_http_request
72
+ def fetch(**options)
73
+ response = perform_http_request
74
+ return response if options.fetch(:raw, false)
75
+
72
76
  response.body.to_s
73
77
  rescue StandardError => e
74
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
78
+ ProxyFetcher.config.logger.warn("Failed to process request to #{url} (#{e.message})")
75
79
  ""
76
80
  end
77
81
 
78
- def fetch_with_headers
79
- process_http_request
80
- rescue StandardError => e
81
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
82
- HTTP::Response.new(version: "1.1", status: 500, body: "")
83
- end
84
-
85
82
  protected
86
83
 
87
- def process_http_request(http_method: method, http_params: params)
88
- unless HTTP::Request::METHODS.include?(http_method)
89
- raise ArgumentError, "'#{http_method}' is a wrong HTTP method name!"
84
+ def build_ssl_context
85
+ OpenSSL::SSL::SSLContext.new.tap do |context|
86
+ context.verify_mode = OpenSSL::SSL::VERIFY_NONE
90
87
  end
88
+ end
89
+
90
+ def build_http_engine
91
+ HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
92
+ end
91
93
 
94
+ def perform_http_request(http_method: method, http_params: params)
92
95
  http.public_send(
93
- http_method.to_sym, url,
96
+ http_method,
97
+ url,
94
98
  form: http_params,
95
99
  ssl_context: ssl_ctx
96
100
  )
@@ -10,27 +10,39 @@ module ProxyFetcher
10
10
 
11
11
  # Short variant to validate proxy.
12
12
  #
13
- # @param proxy_addr [String] proxy address or IP
14
- # @param proxy_port [String, Integer] proxy port
13
+ # @param address [String] proxy address or IP
14
+ # @param port [String, Integer] proxy port
15
15
  #
16
16
  # @return [Boolean]
17
17
  # true if connection to the server using proxy established, otherwise false
18
18
  #
19
- def self.connectable?(proxy_addr, proxy_port)
20
- new(proxy_addr, proxy_port).connectable?
19
+ def self.connectable?(address, port)
20
+ new(address, port).connectable?
21
21
  end
22
22
 
23
23
  # Initialize new ProxyValidator instance
24
24
  #
25
- # @param proxy_addr [String] proxy address or IP
26
- # @param proxy_port [String, Integer] proxy port
25
+ # @param address [String] Proxy address or IP
26
+ # @param port [String, Integer] Proxy port
27
+ # @param options [Hash] proxy options
28
+ # @option username [String] Proxy authentication username
29
+ # @option password [String] Proxy authentication password
30
+ # @option headers [Hash] Proxy headers
27
31
  #
28
32
  # @return [ProxyValidator]
29
33
  #
30
- def initialize(proxy_addr, proxy_port)
34
+ def initialize(address, port, options: {})
31
35
  timeout = ProxyFetcher.config.proxy_validation_timeout
36
+ proxy = [address, port.to_i]
32
37
 
33
- @http = HTTP.follow.via(proxy_addr, proxy_port.to_i).timeout(connect: timeout, read: timeout)
38
+ if options[:username] && options[:password]
39
+ proxy << options[:username]
40
+ proxy << options[:password]
41
+ end
42
+
43
+ proxy << options[:headers].to_h if options[:headers]
44
+
45
+ @http = HTTP.follow.via(*proxy).timeout(connect: timeout, read: timeout)
34
46
  end
35
47
 
36
48
  # Checks if proxy is connectable (can be used to connect
@@ -13,7 +13,7 @@ module ProxyFetcher
13
13
  # Major version number
14
14
  MAJOR = 0
15
15
  # Minor version number
16
- MINOR = 13
16
+ MINOR = 16
17
17
  # Smallest version number
18
18
  TINY = 0
19
19
 
data/lib/proxy_fetcher.rb CHANGED
@@ -4,41 +4,47 @@ require "uri"
4
4
  require "http"
5
5
  require "logger"
6
6
 
7
- require File.dirname(__FILE__) + "/proxy_fetcher/version"
7
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/version"
8
8
 
9
- require File.dirname(__FILE__) + "/proxy_fetcher/exceptions"
10
- require File.dirname(__FILE__) + "/proxy_fetcher/configuration"
11
- require File.dirname(__FILE__) + "/proxy_fetcher/configuration/providers_registry"
12
- require File.dirname(__FILE__) + "/proxy_fetcher/proxy"
13
- require File.dirname(__FILE__) + "/proxy_fetcher/manager"
14
- require File.dirname(__FILE__) + "/proxy_fetcher/null_logger"
9
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/exceptions"
10
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/configuration"
11
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/configuration/providers_registry"
12
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/proxy"
13
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/manager"
14
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/null_logger"
15
15
 
16
- require File.dirname(__FILE__) + "/proxy_fetcher/utils/http_client"
17
- require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_validator"
18
- require File.dirname(__FILE__) + "/proxy_fetcher/utils/proxy_list_validator"
19
- require File.dirname(__FILE__) + "/proxy_fetcher/client/client"
20
- require File.dirname(__FILE__) + "/proxy_fetcher/client/request"
21
- require File.dirname(__FILE__) + "/proxy_fetcher/client/proxies_registry"
16
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/http_client"
17
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/proxy_validator"
18
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/proxy_list_validator"
19
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/client/client"
20
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/client/request"
21
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/client/proxies_registry"
22
22
 
23
- require File.dirname(__FILE__) + "/proxy_fetcher/document"
24
- require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters"
25
- require File.dirname(__FILE__) + "/proxy_fetcher/document/node"
26
- require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/abstract_adapter"
27
- require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/nokogiri_adapter"
28
- require File.dirname(__FILE__) + "/proxy_fetcher/document/adapters/oga_adapter"
23
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document"
24
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters"
25
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/node"
26
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/abstract_adapter"
27
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/nokogiri_adapter"
28
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/oga_adapter"
29
29
 
30
30
  ##
31
31
  # Ruby / JRuby lib for managing proxies
32
32
  module ProxyFetcher
33
33
  # ProxyFetcher providers namespace
34
34
  module Providers
35
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/base"
36
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list"
37
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/free_proxy_list_ssl"
38
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/gather_proxy"
39
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/http_tunnel"
40
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/proxy_list"
41
- require File.dirname(__FILE__) + "/proxy_fetcher/providers/xroxy"
35
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/base"
36
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list"
37
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_socks"
38
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_ssl"
39
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_us"
40
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/http_tunnel"
41
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/mtpro"
42
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxy_list"
43
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxypedia"
44
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_http"
45
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_socks4"
46
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_socks5"
47
+ require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/xroxy"
42
48
  end
43
49
 
44
50
  @__config_access_lock__ = Mutex.new
@@ -8,9 +8,11 @@ Gem::Specification.new do |gem|
8
8
  gem.name = "proxy_fetcher"
9
9
  gem.version = ProxyFetcher.gem_version
10
10
  gem.summary = "Ruby gem for dealing with proxy lists from different providers"
11
- gem.description = "This gem can help your Ruby application to make HTTP(S) requests " \
12
- "using proxies by fetching and validating proxy lists from " \
13
- "the different providers."
11
+ gem.description = <<-TEXT.strip.gsub(/[\s\n]+/, " ")
12
+ This gem can help your Ruby application to make HTTP(S) requests
13
+ using proxies by fetching and validating proxy lists from
14
+ the different providers.
15
+ TEXT
14
16
  gem.authors = ["Nikita Bulai"]
15
17
  gem.email = "bulajnikita@gmail.com"
16
18
  gem.require_paths = ["lib"]
@@ -21,8 +23,8 @@ Gem::Specification.new do |gem|
21
23
  gem.license = "MIT"
22
24
  gem.required_ruby_version = ">= 2.3.0"
23
25
 
24
- gem.add_runtime_dependency "http", ">= 3", "< 5"
26
+ gem.add_runtime_dependency "http", ">= 3"
25
27
 
26
28
  gem.add_development_dependency "rake", ">= 12.0"
27
- gem.add_development_dependency "rspec", "~> 3.5"
29
+ gem.add_development_dependency "rspec", "~> 3.9"
28
30
  end
@@ -3,6 +3,11 @@
3
3
  require "spec_helper"
4
4
  require "json"
5
5
 
6
+ begin
7
+ require "webrick"
8
+ rescue LoadError
9
+ # nop
10
+ end
6
11
  require "evil-proxy"
7
12
  require "evil-proxy/async"
8
13
 
@@ -118,15 +123,15 @@ describe ProxyFetcher::Client do
118
123
  end
119
124
  end
120
125
 
121
- xcontext "retries" do
126
+ context "retries" do
122
127
  it "raises an error when reaches max retries limit" do
123
128
  allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError)
124
129
 
125
- expect { ProxyFetcher::Client.get("http://httpbin.org") }
130
+ expect { ProxyFetcher::Client.get("http://httpbin.org", options: { max_retries: 10 }) }
126
131
  .to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
127
132
  end
128
133
 
129
- it "raises an error when http request returns an error" do
134
+ xit "raises an error when http request returns an error" do
130
135
  allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
131
136
 
132
137
  expect { ProxyFetcher::Client.get("http://httpbin.org") }
@@ -134,14 +139,14 @@ describe ProxyFetcher::Client do
134
139
  end
135
140
 
136
141
  it "refreshes proxy lists if no proxy found" do
137
- ProxyFetcher::Client::ProxiesRegistry.manager.instance_variable_set(:"@proxies", [])
142
+ allow(ProxyFetcher::Manager.new).to receive(:proxies).and_return([])
138
143
 
139
144
  expect { ProxyFetcher::Client.get("http://httpbin.org") }
140
145
  .not_to raise_error
141
146
  end
142
147
  end
143
148
 
144
- context "redirects" do
149
+ xcontext "redirects" do
145
150
  it "follows redirect when present" do
146
151
  content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2")
147
152
 
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "spec_helper"
4
+
5
+ describe "Proxy classes" do
6
+ [
7
+ [:free_proxy_list, "FreeProxyList"],
8
+ [:free_proxy_list_socks, "FreeProxyListSocks"],
9
+ [:free_proxy_list_ssl, "FreeProxyListSSL"],
10
+ [:free_proxy_list_us, "FreeProxyListUS"],
11
+ [:http_tunnel, "HTTPTunnel"],
12
+ [:mtpro, "MTPro"],
13
+ [:proxy_list, "ProxyList"],
14
+ [:proxypedia, "Proxypedia"],
15
+ [:proxyscrape_http, "ProxyscrapeHTTP"],
16
+ [:proxyscrape_socks4, "ProxyscrapeSOCKS4"],
17
+ [:proxyscrape_socks5, "ProxyscrapeSOCKS5"],
18
+ [:xroxy, "XRoxy"]
19
+ ].each do |(provider_name, provider_klass)|
20
+ describe Object.const_get("ProxyFetcher::Providers::#{provider_klass}") do
21
+ before :all do
22
+ ProxyFetcher.config.provider = provider_name
23
+ end
24
+
25
+ it_behaves_like "a manager"
26
+ end
27
+ end
28
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxy_fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Bulai
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-09 00:00:00.000000000 Z
11
+ date: 2022-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: http
@@ -17,9 +17,6 @@ dependencies:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '3'
20
- - - "<"
21
- - !ruby/object:Gem::Version
22
- version: '5'
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
@@ -27,9 +24,6 @@ dependencies:
27
24
  - - ">="
28
25
  - !ruby/object:Gem::Version
29
26
  version: '3'
30
- - - "<"
31
- - !ruby/object:Gem::Version
32
- version: '5'
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: rake
35
29
  requirement: !ruby/object:Gem::Requirement
@@ -50,14 +44,14 @@ dependencies:
50
44
  requirements:
51
45
  - - "~>"
52
46
  - !ruby/object:Gem::Version
53
- version: '3.5'
47
+ version: '3.9'
54
48
  type: :development
55
49
  prerelease: false
56
50
  version_requirements: !ruby/object:Gem::Requirement
57
51
  requirements:
58
52
  - - "~>"
59
53
  - !ruby/object:Gem::Version
60
- version: '3.5'
54
+ version: '3.9'
61
55
  description: This gem can help your Ruby application to make HTTP(S) requests using
62
56
  proxies by fetching and validating proxy lists from the different providers.
63
57
  email: bulajnikita@gmail.com
@@ -67,6 +61,8 @@ executables:
67
61
  extensions: []
68
62
  extra_rdoc_files: []
69
63
  files:
64
+ - ".github/dependabot.yml"
65
+ - ".github/workflows/ci.yml"
70
66
  - ".gitignore"
71
67
  - CHANGELOG.md
72
68
  - CODE_OF_CONDUCT.md
@@ -94,10 +90,16 @@ files:
94
90
  - lib/proxy_fetcher/null_logger.rb
95
91
  - lib/proxy_fetcher/providers/base.rb
96
92
  - lib/proxy_fetcher/providers/free_proxy_list.rb
93
+ - lib/proxy_fetcher/providers/free_proxy_list_socks.rb
97
94
  - lib/proxy_fetcher/providers/free_proxy_list_ssl.rb
98
- - lib/proxy_fetcher/providers/gather_proxy.rb
95
+ - lib/proxy_fetcher/providers/free_proxy_list_us.rb
99
96
  - lib/proxy_fetcher/providers/http_tunnel.rb
97
+ - lib/proxy_fetcher/providers/mtpro.rb
100
98
  - lib/proxy_fetcher/providers/proxy_list.rb
99
+ - lib/proxy_fetcher/providers/proxypedia.rb
100
+ - lib/proxy_fetcher/providers/proxyscrape_http.rb
101
+ - lib/proxy_fetcher/providers/proxyscrape_socks4.rb
102
+ - lib/proxy_fetcher/providers/proxyscrape_socks5.rb
101
103
  - lib/proxy_fetcher/providers/xroxy.rb
102
104
  - lib/proxy_fetcher/proxy.rb
103
105
  - lib/proxy_fetcher/utils/http_client.rb
@@ -112,13 +114,8 @@ files:
112
114
  - spec/proxy_fetcher/document/node_spec.rb
113
115
  - spec/proxy_fetcher/manager_spec.rb
114
116
  - spec/proxy_fetcher/providers/base_spec.rb
115
- - spec/proxy_fetcher/providers/free_proxy_list_spec.rb
116
- - spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb
117
- - spec/proxy_fetcher/providers/gather_proxy_spec.rb
118
- - spec/proxy_fetcher/providers/http_tunnel_spec.rb
119
117
  - spec/proxy_fetcher/providers/multiple_providers_spec.rb
120
- - spec/proxy_fetcher/providers/proxy_list_spec.rb
121
- - spec/proxy_fetcher/providers/xroxy_spec.rb
118
+ - spec/proxy_fetcher/providers/proxy_classes_spec.rb
122
119
  - spec/proxy_fetcher/proxy_spec.rb
123
120
  - spec/proxy_fetcher/version_spec.rb
124
121
  - spec/spec_helper.rb
@@ -142,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
142
139
  - !ruby/object:Gem::Version
143
140
  version: '0'
144
141
  requirements: []
145
- rubygems_version: 3.0.2
142
+ rubygems_version: 3.0.8
146
143
  signing_key:
147
144
  specification_version: 4
148
145
  summary: Ruby gem for dealing with proxy lists from different providers
@@ -1,50 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "json"
4
-
5
- module ProxyFetcher
6
- module Providers
7
- # GatherProxy provider class.
8
- class GatherProxy < Base
9
- # Provider URL to fetch proxy list
10
- def provider_url
11
- "https://proxygather.com"
12
- end
13
-
14
- def xpath
15
- '//div[@class="proxy-list"]/table/script'
16
- end
17
-
18
- # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
19
- # object.
20
- #
21
- # @param html_node [Object]
22
- # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
23
- #
24
- # @return [ProxyFetcher::Proxy]
25
- # Proxy object
26
- #
27
- def to_proxy(html_node)
28
- json = parse_json(html_node)
29
-
30
- ProxyFetcher::Proxy.new.tap do |proxy|
31
- proxy.addr = json["PROXY_IP"]
32
- proxy.port = json["PROXY_PORT"].to_i(16)
33
- proxy.anonymity = json["PROXY_TYPE"]
34
- proxy.country = json["PROXY_COUNTRY"]
35
- proxy.response_time = json["PROXY_TIME"].to_i
36
- proxy.type = ProxyFetcher::Proxy::HTTP
37
- end
38
- end
39
-
40
- private
41
-
42
- def parse_json(html_node)
43
- javascript = html_node.content[/{.+}/im]
44
- JSON.parse(javascript)
45
- end
46
- end
47
-
48
- ProxyFetcher::Configuration.register_provider(:gather_proxy, GatherProxy)
49
- end
50
- end
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::FreeProxyList do
6
- before :all do
7
- ProxyFetcher.configure do |config|
8
- config.provider = :free_proxy_list
9
- end
10
- end
11
-
12
- it_behaves_like "a manager"
13
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::FreeProxyListSSL do
6
- before :all do
7
- ProxyFetcher.config.provider = :free_proxy_list_ssl
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::GatherProxy do
6
- before :all do
7
- ProxyFetcher.config.provider = :gather_proxy
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::HTTPTunnel do
6
- before :all do
7
- ProxyFetcher.config.provider = :http_tunnel
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::ProxyList do
6
- before :all do
7
- ProxyFetcher.config.provider = :proxy_list
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "spec_helper"
4
-
5
- describe ProxyFetcher::Providers::XRoxy do
6
- before :all do
7
- ProxyFetcher.config.provider = :xroxy
8
- end
9
-
10
- it_behaves_like "a manager"
11
- end