proxy_fetcher 0.11.0 → 0.15.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +32 -1
  3. data/Gemfile +4 -2
  4. data/Rakefile +3 -1
  5. data/gemfiles/nokogiri.gemfile +1 -1
  6. data/gemfiles/oga.gemfile +2 -2
  7. data/lib/proxy_fetcher.rb +42 -31
  8. data/lib/proxy_fetcher/client/request.rb +3 -3
  9. data/lib/proxy_fetcher/configuration.rb +13 -9
  10. data/lib/proxy_fetcher/document/node.rb +1 -1
  11. data/lib/proxy_fetcher/manager.rb +40 -7
  12. data/lib/proxy_fetcher/providers/base.rb +2 -1
  13. data/lib/proxy_fetcher/providers/free_proxy_list.rb +0 -21
  14. data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
  15. data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +1 -0
  16. data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
  17. data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
  18. data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
  19. data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
  20. data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
  21. data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
  22. data/lib/proxy_fetcher/providers/xroxy.rb +2 -2
  23. data/lib/proxy_fetcher/proxy.rb +12 -0
  24. data/lib/proxy_fetcher/utils/http_client.rb +25 -21
  25. data/lib/proxy_fetcher/utils/proxy_validator.rb +20 -8
  26. data/lib/proxy_fetcher/version.rb +2 -2
  27. data/proxy_fetcher.gemspec +6 -4
  28. data/spec/fixtures/proxies.txt +14 -0
  29. data/spec/proxy_fetcher/client/client_spec.rb +10 -5
  30. data/spec/proxy_fetcher/manager_spec.rb +18 -0
  31. data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
  32. metadata +15 -12
  33. data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -50
  34. data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
  35. data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
  36. data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
  37. data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
  38. data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
  39. data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
@@ -9,6 +9,7 @@ module ProxyFetcher
9
9
  "https://www.sslproxies.org/"
10
10
  end
11
11
 
12
+ # [NOTE] Doesn't support filtering
12
13
  def xpath
13
14
  '//table[@id="proxylisttable"]/tbody/tr'
14
15
  end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyListUS provider class.
6
+ class FreeProxyListUS < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://www.us-proxy.org/"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ '//table[@id="proxylisttable"]/tbody/tr'
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ ProxyFetcher::Proxy.new.tap do |proxy|
28
+ proxy.addr = html_node.content_at("td[1]")
29
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
30
+ proxy.country = html_node.content_at("td[4]")
31
+ proxy.anonymity = html_node.content_at("td[5]")
32
+ proxy.type = parse_type(html_node)
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ # Parses HTML node to extract proxy type.
39
+ #
40
+ # @param html_node [Object]
41
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
42
+ #
43
+ # @return [String]
44
+ # Proxy type
45
+ #
46
+ def parse_type(html_node)
47
+ https = html_node.content_at("td[7]")
48
+ https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
49
+ end
50
+ end
51
+
52
+ ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
53
+ end
54
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # MTPro provider class.
8
+ class MTPro < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://mtpro.xyz/api/?type=socks"
12
+ end
13
+
14
+ def load_proxy_list(filters = {})
15
+ html = load_html(provider_url, filters)
16
+ JSON.parse(html)
17
+ rescue JSON::ParserError
18
+ []
19
+ end
20
+
21
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
22
+ # object.
23
+ #
24
+ # @param node [Object]
25
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
26
+ #
27
+ # @return [ProxyFetcher::Proxy]
28
+ # Proxy object
29
+ #
30
+ def to_proxy(node)
31
+ ProxyFetcher::Proxy.new.tap do |proxy|
32
+ proxy.addr = node["ip"]
33
+ proxy.port = Integer(node["port"])
34
+ proxy.country = node["country"]
35
+ proxy.anonymity = "Unknown"
36
+ proxy.type = ProxyFetcher::Proxy::SOCKS5
37
+ end
38
+ end
39
+ end
40
+
41
+ ProxyFetcher::Configuration.register_provider(:mtpro, MTPro)
42
+ end
43
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyList provider class.
6
+ class Proxypedia < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://proxypedia.org"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ "//main/ul/li[position()>1]"
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.]
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ addr, port = html_node.content_at("a").to_s.split(":")
28
+
29
+ ProxyFetcher::Proxy.new.tap do |proxy|
30
+ proxy.addr = addr
31
+ proxy.port = Integer(port)
32
+ proxy.country = parse_country(html_node)
33
+ proxy.anonymity = "Unknown"
34
+ proxy.type = ProxyFetcher::Proxy::HTTP
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def parse_country(html_node)
41
+ text = html_node.content.to_s
42
+ text[/\((.+?)\)/, 1] || "Unknown"
43
+ end
44
+ end
45
+
46
+ ProxyFetcher::Configuration.register_provider(:proxypedia, Proxypedia)
47
+ end
48
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeHTTP < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=http"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(node)
51
+ addr, port = node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::HTTP
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_http, ProxyscrapeHTTP)
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeSOCKS4 < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(html_node)
51
+ addr, port = html_node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::SOCKS4
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_socks4, ProxyscrapeSOCKS4)
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeSOCKS5 < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(html_node)
51
+ addr, port = html_node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::SOCKS5
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_socks5, ProxyscrapeSOCKS5)
64
+ end
65
+ end
@@ -6,11 +6,11 @@ module ProxyFetcher
6
6
  class XRoxy < Base
7
7
  # Provider URL to fetch proxy list
8
8
  def provider_url
9
- "https://www.xroxy.com/free-proxy-lists/"
9
+ "https://www.xroxy.com/proxylist.htm"
10
10
  end
11
11
 
12
12
  def xpath
13
- "//div/table/tbody/tr"
13
+ "//tr[@class='row1' or @class='row0']"
14
14
  end
15
15
 
16
16
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -104,5 +104,17 @@ module ProxyFetcher
104
104
  URI::Generic.build(host: addr, port: port).to_s
105
105
  end
106
106
  end
107
+
108
+ def ==(other)
109
+ other.is_a?(Proxy) && addr == other.addr && port == other.port
110
+ end
111
+
112
+ def eql?(other)
113
+ hash.eql?(other.hash)
114
+ end
115
+
116
+ def hash
117
+ [addr.hash, port.hash].hash
118
+ end
107
119
  end
108
120
  end
@@ -41,8 +41,8 @@ module ProxyFetcher
41
41
  # @return [String]
42
42
  # resource content
43
43
  #
44
- def self.fetch(*args)
45
- new(*args).fetch
44
+ def self.fetch(*args, **kwargs, &block)
45
+ new(*args, **kwargs, &block).fetch
46
46
  end
47
47
 
48
48
  # Initialize HTTP client instance
@@ -51,15 +51,17 @@ module ProxyFetcher
51
51
  #
52
52
  def initialize(url, method: :get, params: {}, headers: {})
53
53
  @url = url.to_s
54
- @method = method
54
+ @method = method.to_sym
55
55
  @params = params
56
56
  @headers = headers
57
57
 
58
- @http = HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
59
- @timeout = ProxyFetcher.config.provider_proxies_load_timeout
58
+ unless HTTP::Request::METHODS.include?(@method)
59
+ raise ArgumentError, "'#{@method}' is a wrong HTTP method name"
60
+ end
60
61
 
61
- @ssl_ctx = OpenSSL::SSL::SSLContext.new
62
- @ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
62
+ @timeout = ProxyFetcher.config.provider_proxies_load_timeout
63
+ @http = build_http_engine
64
+ @ssl_ctx = build_ssl_context
63
65
  end
64
66
 
65
67
  # Fetches resource content by sending HTTP request to it.
@@ -67,30 +69,32 @@ module ProxyFetcher
67
69
  # @return [String]
68
70
  # response body
69
71
  #
70
- def fetch
71
- response = process_http_request
72
+ def fetch(**options)
73
+ response = perform_http_request
74
+ return response if options.fetch(:raw, false)
75
+
72
76
  response.body.to_s
73
77
  rescue StandardError => e
74
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
78
+ ProxyFetcher.config.logger.warn("Failed to process request to #{url} (#{e.message})")
75
79
  ""
76
80
  end
77
81
 
78
- def fetch_with_headers
79
- process_http_request
80
- rescue StandardError => e
81
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
82
- HTTP::Response.new(version: "1.1", status: 500, body: "")
83
- end
84
-
85
82
  protected
86
83
 
87
- def process_http_request(http_method: method, http_params: params)
88
- unless HTTP::Request::METHODS.include?(http_method)
89
- raise ArgumentError, "'#{http_method}' is a wrong HTTP method name!"
84
+ def build_ssl_context
85
+ OpenSSL::SSL::SSLContext.new.tap do |context|
86
+ context.verify_mode = OpenSSL::SSL::VERIFY_NONE
90
87
  end
88
+ end
89
+
90
+ def build_http_engine
91
+ HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
92
+ end
91
93
 
94
+ def perform_http_request(http_method: method, http_params: params)
92
95
  http.public_send(
93
- http_method.to_sym, url,
96
+ http_method,
97
+ url,
94
98
  form: http_params,
95
99
  ssl_context: ssl_ctx
96
100
  )