proxy_fetcher 0.11.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +32 -1
  3. data/Gemfile +4 -2
  4. data/Rakefile +3 -1
  5. data/gemfiles/nokogiri.gemfile +1 -1
  6. data/gemfiles/oga.gemfile +2 -2
  7. data/lib/proxy_fetcher.rb +42 -31
  8. data/lib/proxy_fetcher/client/request.rb +3 -3
  9. data/lib/proxy_fetcher/configuration.rb +13 -9
  10. data/lib/proxy_fetcher/document/node.rb +1 -1
  11. data/lib/proxy_fetcher/manager.rb +40 -7
  12. data/lib/proxy_fetcher/providers/base.rb +2 -1
  13. data/lib/proxy_fetcher/providers/free_proxy_list.rb +0 -21
  14. data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
  15. data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +1 -0
  16. data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
  17. data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
  18. data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
  19. data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
  20. data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
  21. data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
  22. data/lib/proxy_fetcher/providers/xroxy.rb +2 -2
  23. data/lib/proxy_fetcher/proxy.rb +12 -0
  24. data/lib/proxy_fetcher/utils/http_client.rb +25 -21
  25. data/lib/proxy_fetcher/utils/proxy_validator.rb +20 -8
  26. data/lib/proxy_fetcher/version.rb +2 -2
  27. data/proxy_fetcher.gemspec +6 -4
  28. data/spec/fixtures/proxies.txt +14 -0
  29. data/spec/proxy_fetcher/client/client_spec.rb +10 -5
  30. data/spec/proxy_fetcher/manager_spec.rb +18 -0
  31. data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
  32. metadata +15 -12
  33. data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -50
  34. data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
  35. data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
  36. data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
  37. data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
  38. data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
  39. data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
@@ -9,6 +9,7 @@ module ProxyFetcher
9
9
  "https://www.sslproxies.org/"
10
10
  end
11
11
 
12
+ # [NOTE] Doesn't support filtering
12
13
  def xpath
13
14
  '//table[@id="proxylisttable"]/tbody/tr'
14
15
  end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyListUS provider class.
6
+ class FreeProxyListUS < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://www.us-proxy.org/"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ '//table[@id="proxylisttable"]/tbody/tr'
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ ProxyFetcher::Proxy.new.tap do |proxy|
28
+ proxy.addr = html_node.content_at("td[1]")
29
+ proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
30
+ proxy.country = html_node.content_at("td[4]")
31
+ proxy.anonymity = html_node.content_at("td[5]")
32
+ proxy.type = parse_type(html_node)
33
+ end
34
+ end
35
+
36
+ private
37
+
38
+ # Parses HTML node to extract proxy type.
39
+ #
40
+ # @param html_node [Object]
41
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
42
+ #
43
+ # @return [String]
44
+ # Proxy type
45
+ #
46
+ def parse_type(html_node)
47
+ https = html_node.content_at("td[7]")
48
+ https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
49
+ end
50
+ end
51
+
52
+ ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
53
+ end
54
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # MTPro provider class.
8
+ class MTPro < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://mtpro.xyz/api/?type=socks"
12
+ end
13
+
14
+ def load_proxy_list(filters = {})
15
+ html = load_html(provider_url, filters)
16
+ JSON.parse(html)
17
+ rescue JSON::ParserError
18
+ []
19
+ end
20
+
21
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
22
+ # object.
23
+ #
24
+ # @param node [Object]
25
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
26
+ #
27
+ # @return [ProxyFetcher::Proxy]
28
+ # Proxy object
29
+ #
30
+ def to_proxy(node)
31
+ ProxyFetcher::Proxy.new.tap do |proxy|
32
+ proxy.addr = node["ip"]
33
+ proxy.port = Integer(node["port"])
34
+ proxy.country = node["country"]
35
+ proxy.anonymity = "Unknown"
36
+ proxy.type = ProxyFetcher::Proxy::SOCKS5
37
+ end
38
+ end
39
+ end
40
+
41
+ ProxyFetcher::Configuration.register_provider(:mtpro, MTPro)
42
+ end
43
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ module Providers
5
+ # FreeProxyList provider class.
6
+ class Proxypedia < Base
7
+ # Provider URL to fetch proxy list
8
+ def provider_url
9
+ "https://proxypedia.org"
10
+ end
11
+
12
+ # [NOTE] Doesn't support filtering
13
+ def xpath
14
+ "//main/ul/li[position()>1]"
15
+ end
16
+
17
+ # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
18
+ # object.]
19
+ #
20
+ # @param html_node [Object]
21
+ # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
22
+ #
23
+ # @return [ProxyFetcher::Proxy]
24
+ # Proxy object
25
+ #
26
+ def to_proxy(html_node)
27
+ addr, port = html_node.content_at("a").to_s.split(":")
28
+
29
+ ProxyFetcher::Proxy.new.tap do |proxy|
30
+ proxy.addr = addr
31
+ proxy.port = Integer(port)
32
+ proxy.country = parse_country(html_node)
33
+ proxy.anonymity = "Unknown"
34
+ proxy.type = ProxyFetcher::Proxy::HTTP
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def parse_country(html_node)
41
+ text = html_node.content.to_s
42
+ text[/\((.+?)\)/, 1] || "Unknown"
43
+ end
44
+ end
45
+
46
+ ProxyFetcher::Configuration.register_provider(:proxypedia, Proxypedia)
47
+ end
48
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeHTTP < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=http"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(node)
51
+ addr, port = node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::HTTP
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_http, ProxyscrapeHTTP)
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeSOCKS4 < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(html_node)
51
+ addr, port = html_node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::SOCKS4
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_socks4, ProxyscrapeSOCKS4)
64
+ end
65
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module ProxyFetcher
6
+ module Providers
7
+ # FreeProxyList provider class.
8
+ class ProxyscrapeSOCKS5 < Base
9
+ # Provider URL to fetch proxy list
10
+ def provider_url
11
+ "https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5"
12
+ end
13
+
14
+ # Loads provider HTML and parses it with internal document object.
15
+ #
16
+ # @param url [String]
17
+ # URL to fetch
18
+ #
19
+ # @param filters [Hash]
20
+ # filters for proxy provider
21
+ #
22
+ # @return [Array]
23
+ # Collection of extracted proxies with ports
24
+ #
25
+ def load_document(url, filters = {})
26
+ html = load_html(url, filters)
27
+
28
+ CSV.parse(html, col_sep: "\t").map(&:first)
29
+ end
30
+
31
+ # Fetches HTML content by sending HTTP request to the provider URL and
32
+ # parses the txt document to return all the proxy entries (ip addresses
33
+ # and ports).
34
+ #
35
+ # @return [Array]
36
+ # Collection of extracted proxies with ports
37
+ #
38
+ def load_proxy_list(filters = {})
39
+ load_document(provider_url, filters)
40
+ end
41
+
42
+ # Converts String to <code>ProxyFetcher::Proxy</code> object.
43
+ #
44
+ # @param node [String]
45
+ # String
46
+ #
47
+ # @return [ProxyFetcher::Proxy]
48
+ # Proxy object
49
+ #
50
+ def to_proxy(html_node)
51
+ addr, port = html_node.split(":")
52
+
53
+ ProxyFetcher::Proxy.new.tap do |proxy|
54
+ proxy.addr = addr
55
+ proxy.port = Integer(port)
56
+ proxy.country = "Unknown"
57
+ proxy.anonymity = "Unknown"
58
+ proxy.type = ProxyFetcher::Proxy::SOCKS5
59
+ end
60
+ end
61
+ end
62
+
63
+ ProxyFetcher::Configuration.register_provider(:proxyscrape_socks5, ProxyscrapeSOCKS5)
64
+ end
65
+ end
@@ -6,11 +6,11 @@ module ProxyFetcher
6
6
  class XRoxy < Base
7
7
  # Provider URL to fetch proxy list
8
8
  def provider_url
9
- "https://www.xroxy.com/free-proxy-lists/"
9
+ "https://www.xroxy.com/proxylist.htm"
10
10
  end
11
11
 
12
12
  def xpath
13
- "//div/table/tbody/tr"
13
+ "//tr[@class='row1' or @class='row0']"
14
14
  end
15
15
 
16
16
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -104,5 +104,17 @@ module ProxyFetcher
104
104
  URI::Generic.build(host: addr, port: port).to_s
105
105
  end
106
106
  end
107
+
108
+ def ==(other)
109
+ other.is_a?(Proxy) && addr == other.addr && port == other.port
110
+ end
111
+
112
+ def eql?(other)
113
+ hash.eql?(other.hash)
114
+ end
115
+
116
+ def hash
117
+ [addr.hash, port.hash].hash
118
+ end
107
119
  end
108
120
  end
@@ -41,8 +41,8 @@ module ProxyFetcher
41
41
  # @return [String]
42
42
  # resource content
43
43
  #
44
- def self.fetch(*args)
45
- new(*args).fetch
44
+ def self.fetch(*args, **kwargs, &block)
45
+ new(*args, **kwargs, &block).fetch
46
46
  end
47
47
 
48
48
  # Initialize HTTP client instance
@@ -51,15 +51,17 @@ module ProxyFetcher
51
51
  #
52
52
  def initialize(url, method: :get, params: {}, headers: {})
53
53
  @url = url.to_s
54
- @method = method
54
+ @method = method.to_sym
55
55
  @params = params
56
56
  @headers = headers
57
57
 
58
- @http = HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
59
- @timeout = ProxyFetcher.config.provider_proxies_load_timeout
58
+ unless HTTP::Request::METHODS.include?(@method)
59
+ raise ArgumentError, "'#{@method}' is a wrong HTTP method name"
60
+ end
60
61
 
61
- @ssl_ctx = OpenSSL::SSL::SSLContext.new
62
- @ssl_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
62
+ @timeout = ProxyFetcher.config.provider_proxies_load_timeout
63
+ @http = build_http_engine
64
+ @ssl_ctx = build_ssl_context
63
65
  end
64
66
 
65
67
  # Fetches resource content by sending HTTP request to it.
@@ -67,30 +69,32 @@ module ProxyFetcher
67
69
  # @return [String]
68
70
  # response body
69
71
  #
70
- def fetch
71
- response = process_http_request
72
+ def fetch(**options)
73
+ response = perform_http_request
74
+ return response if options.fetch(:raw, false)
75
+
72
76
  response.body.to_s
73
77
  rescue StandardError => e
74
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
78
+ ProxyFetcher.config.logger.warn("Failed to process request to #{url} (#{e.message})")
75
79
  ""
76
80
  end
77
81
 
78
- def fetch_with_headers
79
- process_http_request
80
- rescue StandardError => e
81
- ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
82
- HTTP::Response.new(version: "1.1", status: 500, body: "")
83
- end
84
-
85
82
  protected
86
83
 
87
- def process_http_request(http_method: method, http_params: params)
88
- unless HTTP::Request::METHODS.include?(http_method)
89
- raise ArgumentError, "'#{http_method}' is a wrong HTTP method name!"
84
+ def build_ssl_context
85
+ OpenSSL::SSL::SSLContext.new.tap do |context|
86
+ context.verify_mode = OpenSSL::SSL::VERIFY_NONE
90
87
  end
88
+ end
89
+
90
+ def build_http_engine
91
+ HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
92
+ end
91
93
 
94
+ def perform_http_request(http_method: method, http_params: params)
92
95
  http.public_send(
93
- http_method.to_sym, url,
96
+ http_method,
97
+ url,
94
98
  form: http_params,
95
99
  ssl_context: ssl_ctx
96
100
  )