proxy_fetcher 0.13.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ci.yml +58 -0
- data/CHANGELOG.md +16 -1
- data/Gemfile +4 -2
- data/Rakefile +3 -1
- data/gemfiles/nokogiri.gemfile +2 -2
- data/gemfiles/oga.gemfile +3 -3
- data/lib/proxy_fetcher/client/request.rb +3 -3
- data/lib/proxy_fetcher/configuration.rb +1 -1
- data/lib/proxy_fetcher/document/node.rb +1 -1
- data/lib/proxy_fetcher/manager.rb +2 -0
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +1 -22
- data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +2 -1
- data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
- data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
- data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
- data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
- data/lib/proxy_fetcher/providers/xroxy.rb +1 -1
- data/lib/proxy_fetcher/utils/http_client.rb +25 -21
- data/lib/proxy_fetcher/utils/proxy_validator.rb +20 -8
- data/lib/proxy_fetcher/version.rb +1 -1
- data/lib/proxy_fetcher.rb +32 -26
- data/proxy_fetcher.gemspec +7 -5
- data/spec/proxy_fetcher/client/client_spec.rb +10 -5
- data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
- metadata +15 -18
- data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -50
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 985e3e5cffffa62960dbe04510e232b5a8e652119acb2cc787824e0834f10870
|
4
|
+
data.tar.gz: beb50433ddf5d298ca1c7c45af357395cb24f87b695231159b3b52ad8429cd8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28ee05704cdcf8dc48881119a208dad97408d3c50f34b691e6062ca6a8697f3ffe58ba5056c0c8d07f8fec9f5f977927877341572716fe6db3a28876b537264d
|
7
|
+
data.tar.gz: c0533aa9584e02300734385ae8552c5e50d9836f7647337eb797eb11aa50b0edd93bc6a54eb3b22059ac4eed26adbb8ea21dcca9285d9bceff019c67a797146c
|
@@ -0,0 +1,58 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
name: >-
|
8
|
+
Ruby ${{ matrix.ruby }} (${{ matrix.gemfile }})
|
9
|
+
env:
|
10
|
+
CI: true
|
11
|
+
runs-on: ${{ matrix.os }}
|
12
|
+
continue-on-error: ${{ endsWith(matrix.ruby, 'head') || matrix.ruby == 'debug' || matrix.experimental }}
|
13
|
+
if: |
|
14
|
+
!( contains(github.event.pull_request.title, '[ci skip]')
|
15
|
+
|| contains(github.event.pull_request.title, '[skip ci]'))
|
16
|
+
strategy:
|
17
|
+
fail-fast: true
|
18
|
+
matrix:
|
19
|
+
experimental: [false]
|
20
|
+
os: [ ubuntu-latest ]
|
21
|
+
ruby:
|
22
|
+
- 2.6
|
23
|
+
- 2.7
|
24
|
+
- '3.0'
|
25
|
+
- '3.1'
|
26
|
+
gemfile:
|
27
|
+
- gemfiles/oga.gemfile
|
28
|
+
- gemfiles/nokogiri.gemfile
|
29
|
+
include:
|
30
|
+
- ruby: head
|
31
|
+
os: ubuntu-latest
|
32
|
+
gemfile: gemfiles/nokogiri.gemfile
|
33
|
+
experimental: true
|
34
|
+
- ruby: head
|
35
|
+
os: ubuntu-latest
|
36
|
+
gemfile: gemfiles/oga.gemfile
|
37
|
+
experimental: true
|
38
|
+
- ruby: jruby
|
39
|
+
os: ubuntu-latest
|
40
|
+
gemfile: gemfiles/nokogiri.gemfile
|
41
|
+
experimental: true
|
42
|
+
- ruby: truffleruby
|
43
|
+
os: ubuntu-latest
|
44
|
+
gemfile: gemfiles/nokogiri.gemfile
|
45
|
+
experimental: true
|
46
|
+
steps:
|
47
|
+
- name: Repo checkout
|
48
|
+
uses: actions/checkout@v2
|
49
|
+
|
50
|
+
- name: Setup Ruby
|
51
|
+
uses: ruby/setup-ruby@v1
|
52
|
+
with:
|
53
|
+
ruby-version: ${{ matrix.ruby }}
|
54
|
+
bundler-cache: true
|
55
|
+
|
56
|
+
- name: Run tests
|
57
|
+
timeout-minutes: 10
|
58
|
+
run: bundle exec rake spec
|
data/CHANGELOG.md
CHANGED
@@ -4,7 +4,22 @@ Reverse Chronological Order:
|
|
4
4
|
|
5
5
|
## `master`
|
6
6
|
|
7
|
-
|
7
|
+
...
|
8
|
+
|
9
|
+
## `0.15.1` (2021-02-17)
|
10
|
+
|
11
|
+
* Support for Ruby 3.0
|
12
|
+
|
13
|
+
## `0.15.0` (2021-01-26)
|
14
|
+
|
15
|
+
* Removed failing providers
|
16
|
+
* Added new
|
17
|
+
* Specs refactoring
|
18
|
+
|
19
|
+
## `0.14.0` (2020-05-11)
|
20
|
+
|
21
|
+
* Add MTPro provider
|
22
|
+
* Add Proxypedia provider
|
8
23
|
|
9
24
|
## `0.13.0` (2020-03-09)
|
10
25
|
|
data/Gemfile
CHANGED
@@ -6,9 +6,11 @@ gemspec
|
|
6
6
|
|
7
7
|
gem "nokogiri", "~> 1.8"
|
8
8
|
gem "oga", "~> 3.2"
|
9
|
-
gem "rubocop", "~> 0
|
9
|
+
gem "rubocop", "~> 1.0"
|
10
10
|
|
11
11
|
group :test do
|
12
|
-
gem "
|
12
|
+
gem "coveralls_reborn", require: false
|
13
|
+
# Until I find a way to introduce other MITM proxy
|
14
|
+
gem "webrick", "1.4.2"
|
13
15
|
gem "evil-proxy", "~> 0.2"
|
14
16
|
end
|
data/Rakefile
CHANGED
data/gemfiles/nokogiri.gemfile
CHANGED
data/gemfiles/oga.gemfile
CHANGED
@@ -4,10 +4,10 @@ source "https://rubygems.org"
|
|
4
4
|
|
5
5
|
gemspec path: "../"
|
6
6
|
|
7
|
-
gem "oga", "~>
|
7
|
+
gem "oga", "~> 3.0"
|
8
8
|
|
9
9
|
group :test do
|
10
|
-
gem "
|
10
|
+
gem "coveralls_reborn", require: false
|
11
|
+
gem "webrick"
|
11
12
|
gem "evil-proxy", "~> 0.2"
|
12
|
-
gem "rspec", "~> 3.6"
|
13
13
|
end
|
@@ -41,15 +41,15 @@ module ProxyFetcher
|
|
41
41
|
# @return [String]
|
42
42
|
# response body (requested resource content)
|
43
43
|
#
|
44
|
-
def self.execute(args)
|
45
|
-
new(args).execute
|
44
|
+
def self.execute(**args)
|
45
|
+
new(**args).execute
|
46
46
|
end
|
47
47
|
|
48
48
|
# Initialize new HTTP request
|
49
49
|
#
|
50
50
|
# @return [Request]
|
51
51
|
#
|
52
|
-
def initialize(args)
|
52
|
+
def initialize(**args)
|
53
53
|
raise ArgumentError, "args must be a Hash!" unless args.is_a?(Hash)
|
54
54
|
|
55
55
|
@url = args.fetch(:url)
|
@@ -47,6 +47,8 @@ module ProxyFetcher
|
|
47
47
|
|
48
48
|
ProxyFetcher.config.providers.each do |provider_name|
|
49
49
|
threads << Thread.new do
|
50
|
+
Thread.current.report_on_exception = false
|
51
|
+
|
50
52
|
provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
|
51
53
|
provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
|
52
54
|
provider_proxies = provider.fetch_proxies!(provider_filters)
|
@@ -11,7 +11,7 @@ module ProxyFetcher
|
|
11
11
|
|
12
12
|
# [NOTE] Doesn't support filtering
|
13
13
|
def xpath
|
14
|
-
|
14
|
+
"//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
|
15
15
|
end
|
16
16
|
|
17
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -45,27 +45,6 @@ module ProxyFetcher
|
|
45
45
|
#
|
46
46
|
def parse_type(html_node)
|
47
47
|
https = html_node.content_at("td[6]")
|
48
|
-
# frozen_string_literal: true
|
49
|
-
# FreeProxyList provider class.
|
50
|
-
# Provider URL to fetch proxy list
|
51
|
-
# [NOTE] Doesn't support filtering
|
52
|
-
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
53
|
-
# object.
|
54
|
-
#
|
55
|
-
# @param html_node [Object]
|
56
|
-
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
57
|
-
#
|
58
|
-
# @return [ProxyFetcher::Proxy]
|
59
|
-
# Proxy object
|
60
|
-
#
|
61
|
-
# Parses HTML node to extract proxy type.
|
62
|
-
#
|
63
|
-
# @param html_node [Object]
|
64
|
-
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
65
|
-
#
|
66
|
-
# @return [String]
|
67
|
-
# Proxy type
|
68
|
-
#
|
69
48
|
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
70
49
|
end
|
71
50
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListSocks provider class.
|
6
|
+
class FreeProxyListSocks < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.socks-proxy.net/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
"//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.type = parse_type(html_node)
|
32
|
+
proxy.anonymity = html_node.content_at("td[6]")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[5]")
|
48
|
+
|
49
|
+
return ProxyFetcher::Proxy::SOCKS4 if https&.casecmp("socks4")&.zero?
|
50
|
+
return ProxyFetcher::Proxy::SOCKS5 if https&.casecmp("socks5")&.zero?
|
51
|
+
|
52
|
+
"Unknown"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_socks, FreeProxyListSocks)
|
57
|
+
end
|
58
|
+
end
|
@@ -9,8 +9,9 @@ module ProxyFetcher
|
|
9
9
|
"https://www.sslproxies.org/"
|
10
10
|
end
|
11
11
|
|
12
|
+
# [NOTE] Doesn't support filtering
|
12
13
|
def xpath
|
13
|
-
|
14
|
+
"//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
|
14
15
|
end
|
15
16
|
|
16
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListUS provider class.
|
6
|
+
class FreeProxyListUS < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.us-proxy.org/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
"//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
32
|
+
proxy.type = parse_type(html_node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[7]")
|
48
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# MTPro provider class.
|
8
|
+
class MTPro < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://mtpro.xyz/api/?type=socks"
|
12
|
+
end
|
13
|
+
|
14
|
+
def load_proxy_list(filters = {})
|
15
|
+
html = load_html(provider_url, filters)
|
16
|
+
JSON.parse(html)
|
17
|
+
rescue JSON::ParserError
|
18
|
+
[]
|
19
|
+
end
|
20
|
+
|
21
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
22
|
+
# object.
|
23
|
+
#
|
24
|
+
# @param node [Object]
|
25
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
26
|
+
#
|
27
|
+
# @return [ProxyFetcher::Proxy]
|
28
|
+
# Proxy object
|
29
|
+
#
|
30
|
+
def to_proxy(node)
|
31
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
32
|
+
proxy.addr = node["ip"]
|
33
|
+
proxy.port = Integer(node["port"])
|
34
|
+
proxy.country = node["country"]
|
35
|
+
proxy.anonymity = "Unknown"
|
36
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS5
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
ProxyFetcher::Configuration.register_provider(:mtpro, MTPro)
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyList provider class.
|
6
|
+
class Proxypedia < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://proxypedia.org"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
"//main/ul/li[position()>1]"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.]
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
addr, port = html_node.content_at("a").to_s.split(":")
|
28
|
+
|
29
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
30
|
+
proxy.addr = addr
|
31
|
+
proxy.port = Integer(port)
|
32
|
+
proxy.country = parse_country(html_node)
|
33
|
+
proxy.anonymity = "Unknown"
|
34
|
+
proxy.type = ProxyFetcher::Proxy::HTTP
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse_country(html_node)
|
41
|
+
text = html_node.content.to_s
|
42
|
+
text[/\((.+?)\)/, 1] || "Unknown"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
ProxyFetcher::Configuration.register_provider(:proxypedia, Proxypedia)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeHTTP < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=http"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(node)
|
51
|
+
addr, port = node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::HTTP
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_http, ProxyscrapeHTTP)
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeSOCKS4 < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(html_node)
|
51
|
+
addr, port = html_node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS4
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_socks4, ProxyscrapeSOCKS4)
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeSOCKS5 < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(html_node)
|
51
|
+
addr, port = html_node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS5
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_socks5, ProxyscrapeSOCKS5)
|
64
|
+
end
|
65
|
+
end
|
@@ -41,8 +41,8 @@ module ProxyFetcher
|
|
41
41
|
# @return [String]
|
42
42
|
# resource content
|
43
43
|
#
|
44
|
-
def self.fetch(*args)
|
45
|
-
new(*args).fetch
|
44
|
+
def self.fetch(*args, **kwargs, &block)
|
45
|
+
new(*args, **kwargs, &block).fetch
|
46
46
|
end
|
47
47
|
|
48
48
|
# Initialize HTTP client instance
|
@@ -51,15 +51,17 @@ module ProxyFetcher
|
|
51
51
|
#
|
52
52
|
def initialize(url, method: :get, params: {}, headers: {})
|
53
53
|
@url = url.to_s
|
54
|
-
@method = method
|
54
|
+
@method = method.to_sym
|
55
55
|
@params = params
|
56
56
|
@headers = headers
|
57
57
|
|
58
|
-
|
59
|
-
|
58
|
+
unless HTTP::Request::METHODS.include?(@method)
|
59
|
+
raise ArgumentError, "'#{@method}' is a wrong HTTP method name"
|
60
|
+
end
|
60
61
|
|
61
|
-
@
|
62
|
-
@
|
62
|
+
@timeout = ProxyFetcher.config.provider_proxies_load_timeout
|
63
|
+
@http = build_http_engine
|
64
|
+
@ssl_ctx = build_ssl_context
|
63
65
|
end
|
64
66
|
|
65
67
|
# Fetches resource content by sending HTTP request to it.
|
@@ -67,30 +69,32 @@ module ProxyFetcher
|
|
67
69
|
# @return [String]
|
68
70
|
# response body
|
69
71
|
#
|
70
|
-
def fetch
|
71
|
-
response =
|
72
|
+
def fetch(**options)
|
73
|
+
response = perform_http_request
|
74
|
+
return response if options.fetch(:raw, false)
|
75
|
+
|
72
76
|
response.body.to_s
|
73
77
|
rescue StandardError => e
|
74
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
78
|
+
ProxyFetcher.config.logger.warn("Failed to process request to #{url} (#{e.message})")
|
75
79
|
""
|
76
80
|
end
|
77
81
|
|
78
|
-
def fetch_with_headers
|
79
|
-
process_http_request
|
80
|
-
rescue StandardError => e
|
81
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
82
|
-
HTTP::Response.new(version: "1.1", status: 500, body: "")
|
83
|
-
end
|
84
|
-
|
85
82
|
protected
|
86
83
|
|
87
|
-
def
|
88
|
-
|
89
|
-
|
84
|
+
def build_ssl_context
|
85
|
+
OpenSSL::SSL::SSLContext.new.tap do |context|
|
86
|
+
context.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
90
87
|
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def build_http_engine
|
91
|
+
HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
|
92
|
+
end
|
91
93
|
|
94
|
+
def perform_http_request(http_method: method, http_params: params)
|
92
95
|
http.public_send(
|
93
|
-
http_method
|
96
|
+
http_method,
|
97
|
+
url,
|
94
98
|
form: http_params,
|
95
99
|
ssl_context: ssl_ctx
|
96
100
|
)
|
@@ -10,27 +10,39 @@ module ProxyFetcher
|
|
10
10
|
|
11
11
|
# Short variant to validate proxy.
|
12
12
|
#
|
13
|
-
# @param
|
14
|
-
# @param
|
13
|
+
# @param address [String] proxy address or IP
|
14
|
+
# @param port [String, Integer] proxy port
|
15
15
|
#
|
16
16
|
# @return [Boolean]
|
17
17
|
# true if connection to the server using proxy established, otherwise false
|
18
18
|
#
|
19
|
-
def self.connectable?(
|
20
|
-
new(
|
19
|
+
def self.connectable?(address, port)
|
20
|
+
new(address, port).connectable?
|
21
21
|
end
|
22
22
|
|
23
23
|
# Initialize new ProxyValidator instance
|
24
24
|
#
|
25
|
-
# @param
|
26
|
-
# @param
|
25
|
+
# @param address [String] Proxy address or IP
|
26
|
+
# @param port [String, Integer] Proxy port
|
27
|
+
# @param options [Hash] proxy options
|
28
|
+
# @option username [String] Proxy authentication username
|
29
|
+
# @option password [String] Proxy authentication password
|
30
|
+
# @option headers [Hash] Proxy headers
|
27
31
|
#
|
28
32
|
# @return [ProxyValidator]
|
29
33
|
#
|
30
|
-
def initialize(
|
34
|
+
def initialize(address, port, options: {})
|
31
35
|
timeout = ProxyFetcher.config.proxy_validation_timeout
|
36
|
+
proxy = [address, port.to_i]
|
32
37
|
|
33
|
-
|
38
|
+
if options[:username] && options[:password]
|
39
|
+
proxy << options[:username]
|
40
|
+
proxy << options[:password]
|
41
|
+
end
|
42
|
+
|
43
|
+
proxy << options[:headers].to_h if options[:headers]
|
44
|
+
|
45
|
+
@http = HTTP.follow.via(*proxy).timeout(connect: timeout, read: timeout)
|
34
46
|
end
|
35
47
|
|
36
48
|
# Checks if proxy is connectable (can be used to connect
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -4,41 +4,47 @@ require "uri"
|
|
4
4
|
require "http"
|
5
5
|
require "logger"
|
6
6
|
|
7
|
-
require File.dirname(__FILE__)
|
7
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/version"
|
8
8
|
|
9
|
-
require File.dirname(__FILE__)
|
10
|
-
require File.dirname(__FILE__)
|
11
|
-
require File.dirname(__FILE__)
|
12
|
-
require File.dirname(__FILE__)
|
13
|
-
require File.dirname(__FILE__)
|
14
|
-
require File.dirname(__FILE__)
|
9
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/exceptions"
|
10
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/configuration"
|
11
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/configuration/providers_registry"
|
12
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/proxy"
|
13
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/manager"
|
14
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/null_logger"
|
15
15
|
|
16
|
-
require File.dirname(__FILE__)
|
17
|
-
require File.dirname(__FILE__)
|
18
|
-
require File.dirname(__FILE__)
|
19
|
-
require File.dirname(__FILE__)
|
20
|
-
require File.dirname(__FILE__)
|
21
|
-
require File.dirname(__FILE__)
|
16
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/http_client"
|
17
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/proxy_validator"
|
18
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/proxy_list_validator"
|
19
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/client/client"
|
20
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/client/request"
|
21
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/client/proxies_registry"
|
22
22
|
|
23
|
-
require File.dirname(__FILE__)
|
24
|
-
require File.dirname(__FILE__)
|
25
|
-
require File.dirname(__FILE__)
|
26
|
-
require File.dirname(__FILE__)
|
27
|
-
require File.dirname(__FILE__)
|
28
|
-
require File.dirname(__FILE__)
|
23
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document"
|
24
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters"
|
25
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/node"
|
26
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/abstract_adapter"
|
27
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/nokogiri_adapter"
|
28
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/oga_adapter"
|
29
29
|
|
30
30
|
##
|
31
31
|
# Ruby / JRuby lib for managing proxies
|
32
32
|
module ProxyFetcher
|
33
33
|
# ProxyFetcher providers namespace
|
34
34
|
module Providers
|
35
|
-
require File.dirname(__FILE__)
|
36
|
-
require File.dirname(__FILE__)
|
37
|
-
require File.dirname(__FILE__)
|
38
|
-
require File.dirname(__FILE__)
|
39
|
-
require File.dirname(__FILE__)
|
40
|
-
require File.dirname(__FILE__)
|
41
|
-
require File.dirname(__FILE__)
|
35
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/base"
|
36
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list"
|
37
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_socks"
|
38
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_ssl"
|
39
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_us"
|
40
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/http_tunnel"
|
41
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/mtpro"
|
42
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxy_list"
|
43
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxypedia"
|
44
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_http"
|
45
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_socks4"
|
46
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_socks5"
|
47
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/xroxy"
|
42
48
|
end
|
43
49
|
|
44
50
|
@__config_access_lock__ = Mutex.new
|
data/proxy_fetcher.gemspec
CHANGED
@@ -8,9 +8,11 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.name = "proxy_fetcher"
|
9
9
|
gem.version = ProxyFetcher.gem_version
|
10
10
|
gem.summary = "Ruby gem for dealing with proxy lists from different providers"
|
11
|
-
gem.description =
|
12
|
-
|
13
|
-
|
11
|
+
gem.description = <<-TEXT.strip.gsub(/[\s\n]+/, " ")
|
12
|
+
This gem can help your Ruby application to make HTTP(S) requests
|
13
|
+
using proxies by fetching and validating proxy lists from
|
14
|
+
the different providers.
|
15
|
+
TEXT
|
14
16
|
gem.authors = ["Nikita Bulai"]
|
15
17
|
gem.email = "bulajnikita@gmail.com"
|
16
18
|
gem.require_paths = ["lib"]
|
@@ -21,8 +23,8 @@ Gem::Specification.new do |gem|
|
|
21
23
|
gem.license = "MIT"
|
22
24
|
gem.required_ruby_version = ">= 2.3.0"
|
23
25
|
|
24
|
-
gem.add_runtime_dependency "http", ">= 3"
|
26
|
+
gem.add_runtime_dependency "http", ">= 3"
|
25
27
|
|
26
28
|
gem.add_development_dependency "rake", ">= 12.0"
|
27
|
-
gem.add_development_dependency "rspec", "~> 3.
|
29
|
+
gem.add_development_dependency "rspec", "~> 3.9"
|
28
30
|
end
|
@@ -3,6 +3,11 @@
|
|
3
3
|
require "spec_helper"
|
4
4
|
require "json"
|
5
5
|
|
6
|
+
begin
|
7
|
+
require "webrick"
|
8
|
+
rescue LoadError
|
9
|
+
# nop
|
10
|
+
end
|
6
11
|
require "evil-proxy"
|
7
12
|
require "evil-proxy/async"
|
8
13
|
|
@@ -118,15 +123,15 @@ describe ProxyFetcher::Client do
|
|
118
123
|
end
|
119
124
|
end
|
120
125
|
|
121
|
-
|
126
|
+
context "retries" do
|
122
127
|
it "raises an error when reaches max retries limit" do
|
123
128
|
allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError)
|
124
129
|
|
125
|
-
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
130
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org", options: { max_retries: 10 }) }
|
126
131
|
.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
127
132
|
end
|
128
133
|
|
129
|
-
|
134
|
+
xit "raises an error when http request returns an error" do
|
130
135
|
allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
|
131
136
|
|
132
137
|
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
@@ -134,14 +139,14 @@ describe ProxyFetcher::Client do
|
|
134
139
|
end
|
135
140
|
|
136
141
|
it "refreshes proxy lists if no proxy found" do
|
137
|
-
ProxyFetcher::
|
142
|
+
allow(ProxyFetcher::Manager.new).to receive(:proxies).and_return([])
|
138
143
|
|
139
144
|
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
140
145
|
.not_to raise_error
|
141
146
|
end
|
142
147
|
end
|
143
148
|
|
144
|
-
|
149
|
+
xcontext "redirects" do
|
145
150
|
it "follows redirect when present" do
|
146
151
|
content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2")
|
147
152
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "Proxy classes" do
|
6
|
+
[
|
7
|
+
[:free_proxy_list, "FreeProxyList"],
|
8
|
+
[:free_proxy_list_socks, "FreeProxyListSocks"],
|
9
|
+
[:free_proxy_list_ssl, "FreeProxyListSSL"],
|
10
|
+
[:free_proxy_list_us, "FreeProxyListUS"],
|
11
|
+
[:http_tunnel, "HTTPTunnel"],
|
12
|
+
[:mtpro, "MTPro"],
|
13
|
+
[:proxy_list, "ProxyList"],
|
14
|
+
[:proxypedia, "Proxypedia"],
|
15
|
+
[:proxyscrape_http, "ProxyscrapeHTTP"],
|
16
|
+
[:proxyscrape_socks4, "ProxyscrapeSOCKS4"],
|
17
|
+
[:proxyscrape_socks5, "ProxyscrapeSOCKS5"],
|
18
|
+
[:xroxy, "XRoxy"]
|
19
|
+
].each do |(provider_name, provider_klass)|
|
20
|
+
describe Object.const_get("ProxyFetcher::Providers::#{provider_klass}") do
|
21
|
+
before :all do
|
22
|
+
ProxyFetcher.config.provider = provider_name
|
23
|
+
end
|
24
|
+
|
25
|
+
it_behaves_like "a manager"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -17,9 +17,6 @@ dependencies:
|
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '3'
|
20
|
-
- - "<"
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: '5'
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -27,9 +24,6 @@ dependencies:
|
|
27
24
|
- - ">="
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '3'
|
30
|
-
- - "<"
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '5'
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: rake
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -50,14 +44,14 @@ dependencies:
|
|
50
44
|
requirements:
|
51
45
|
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
|
-
version: '3.
|
47
|
+
version: '3.9'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
51
|
requirements:
|
58
52
|
- - "~>"
|
59
53
|
- !ruby/object:Gem::Version
|
60
|
-
version: '3.
|
54
|
+
version: '3.9'
|
61
55
|
description: This gem can help your Ruby application to make HTTP(S) requests using
|
62
56
|
proxies by fetching and validating proxy lists from the different providers.
|
63
57
|
email: bulajnikita@gmail.com
|
@@ -67,6 +61,8 @@ executables:
|
|
67
61
|
extensions: []
|
68
62
|
extra_rdoc_files: []
|
69
63
|
files:
|
64
|
+
- ".github/dependabot.yml"
|
65
|
+
- ".github/workflows/ci.yml"
|
70
66
|
- ".gitignore"
|
71
67
|
- CHANGELOG.md
|
72
68
|
- CODE_OF_CONDUCT.md
|
@@ -94,10 +90,16 @@ files:
|
|
94
90
|
- lib/proxy_fetcher/null_logger.rb
|
95
91
|
- lib/proxy_fetcher/providers/base.rb
|
96
92
|
- lib/proxy_fetcher/providers/free_proxy_list.rb
|
93
|
+
- lib/proxy_fetcher/providers/free_proxy_list_socks.rb
|
97
94
|
- lib/proxy_fetcher/providers/free_proxy_list_ssl.rb
|
98
|
-
- lib/proxy_fetcher/providers/
|
95
|
+
- lib/proxy_fetcher/providers/free_proxy_list_us.rb
|
99
96
|
- lib/proxy_fetcher/providers/http_tunnel.rb
|
97
|
+
- lib/proxy_fetcher/providers/mtpro.rb
|
100
98
|
- lib/proxy_fetcher/providers/proxy_list.rb
|
99
|
+
- lib/proxy_fetcher/providers/proxypedia.rb
|
100
|
+
- lib/proxy_fetcher/providers/proxyscrape_http.rb
|
101
|
+
- lib/proxy_fetcher/providers/proxyscrape_socks4.rb
|
102
|
+
- lib/proxy_fetcher/providers/proxyscrape_socks5.rb
|
101
103
|
- lib/proxy_fetcher/providers/xroxy.rb
|
102
104
|
- lib/proxy_fetcher/proxy.rb
|
103
105
|
- lib/proxy_fetcher/utils/http_client.rb
|
@@ -112,13 +114,8 @@ files:
|
|
112
114
|
- spec/proxy_fetcher/document/node_spec.rb
|
113
115
|
- spec/proxy_fetcher/manager_spec.rb
|
114
116
|
- spec/proxy_fetcher/providers/base_spec.rb
|
115
|
-
- spec/proxy_fetcher/providers/free_proxy_list_spec.rb
|
116
|
-
- spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb
|
117
|
-
- spec/proxy_fetcher/providers/gather_proxy_spec.rb
|
118
|
-
- spec/proxy_fetcher/providers/http_tunnel_spec.rb
|
119
117
|
- spec/proxy_fetcher/providers/multiple_providers_spec.rb
|
120
|
-
- spec/proxy_fetcher/providers/
|
121
|
-
- spec/proxy_fetcher/providers/xroxy_spec.rb
|
118
|
+
- spec/proxy_fetcher/providers/proxy_classes_spec.rb
|
122
119
|
- spec/proxy_fetcher/proxy_spec.rb
|
123
120
|
- spec/proxy_fetcher/version_spec.rb
|
124
121
|
- spec/spec_helper.rb
|
@@ -142,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
139
|
- !ruby/object:Gem::Version
|
143
140
|
version: '0'
|
144
141
|
requirements: []
|
145
|
-
rubygems_version: 3.0.
|
142
|
+
rubygems_version: 3.0.8
|
146
143
|
signing_key:
|
147
144
|
specification_version: 4
|
148
145
|
summary: Ruby gem for dealing with proxy lists from different providers
|
@@ -1,50 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "json"
|
4
|
-
|
5
|
-
module ProxyFetcher
|
6
|
-
module Providers
|
7
|
-
# GatherProxy provider class.
|
8
|
-
class GatherProxy < Base
|
9
|
-
# Provider URL to fetch proxy list
|
10
|
-
def provider_url
|
11
|
-
"https://proxygather.com"
|
12
|
-
end
|
13
|
-
|
14
|
-
def xpath
|
15
|
-
'//div[@class="proxy-list"]/table/script'
|
16
|
-
end
|
17
|
-
|
18
|
-
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
19
|
-
# object.
|
20
|
-
#
|
21
|
-
# @param html_node [Object]
|
22
|
-
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
23
|
-
#
|
24
|
-
# @return [ProxyFetcher::Proxy]
|
25
|
-
# Proxy object
|
26
|
-
#
|
27
|
-
def to_proxy(html_node)
|
28
|
-
json = parse_json(html_node)
|
29
|
-
|
30
|
-
ProxyFetcher::Proxy.new.tap do |proxy|
|
31
|
-
proxy.addr = json["PROXY_IP"]
|
32
|
-
proxy.port = json["PROXY_PORT"].to_i(16)
|
33
|
-
proxy.anonymity = json["PROXY_TYPE"]
|
34
|
-
proxy.country = json["PROXY_COUNTRY"]
|
35
|
-
proxy.response_time = json["PROXY_TIME"].to_i
|
36
|
-
proxy.type = ProxyFetcher::Proxy::HTTP
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def parse_json(html_node)
|
43
|
-
javascript = html_node.content[/{.+}/im]
|
44
|
-
JSON.parse(javascript)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
ProxyFetcher::Configuration.register_provider(:gather_proxy, GatherProxy)
|
49
|
-
end
|
50
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helper"
|
4
|
-
|
5
|
-
describe ProxyFetcher::Providers::FreeProxyList do
|
6
|
-
before :all do
|
7
|
-
ProxyFetcher.configure do |config|
|
8
|
-
config.provider = :free_proxy_list
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it_behaves_like "a manager"
|
13
|
-
end
|