proxy_fetcher 0.13.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ci.yml +58 -0
- data/CHANGELOG.md +16 -1
- data/Gemfile +4 -2
- data/Rakefile +3 -1
- data/gemfiles/nokogiri.gemfile +2 -2
- data/gemfiles/oga.gemfile +3 -3
- data/lib/proxy_fetcher/client/request.rb +3 -3
- data/lib/proxy_fetcher/configuration.rb +1 -1
- data/lib/proxy_fetcher/document/node.rb +1 -1
- data/lib/proxy_fetcher/manager.rb +2 -0
- data/lib/proxy_fetcher/providers/free_proxy_list.rb +1 -22
- data/lib/proxy_fetcher/providers/free_proxy_list_socks.rb +58 -0
- data/lib/proxy_fetcher/providers/free_proxy_list_ssl.rb +2 -1
- data/lib/proxy_fetcher/providers/free_proxy_list_us.rb +54 -0
- data/lib/proxy_fetcher/providers/mtpro.rb +43 -0
- data/lib/proxy_fetcher/providers/proxypedia.rb +48 -0
- data/lib/proxy_fetcher/providers/proxyscrape_http.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks4.rb +65 -0
- data/lib/proxy_fetcher/providers/proxyscrape_socks5.rb +65 -0
- data/lib/proxy_fetcher/providers/xroxy.rb +1 -1
- data/lib/proxy_fetcher/utils/http_client.rb +25 -21
- data/lib/proxy_fetcher/utils/proxy_validator.rb +20 -8
- data/lib/proxy_fetcher/version.rb +1 -1
- data/lib/proxy_fetcher.rb +32 -26
- data/proxy_fetcher.gemspec +7 -5
- data/spec/proxy_fetcher/client/client_spec.rb +10 -5
- data/spec/proxy_fetcher/providers/proxy_classes_spec.rb +28 -0
- metadata +15 -18
- data/lib/proxy_fetcher/providers/gather_proxy.rb +0 -50
- data/spec/proxy_fetcher/providers/free_proxy_list_spec.rb +0 -13
- data/spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/gather_proxy_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/http_tunnel_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/proxy_list_spec.rb +0 -11
- data/spec/proxy_fetcher/providers/xroxy_spec.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 985e3e5cffffa62960dbe04510e232b5a8e652119acb2cc787824e0834f10870
|
4
|
+
data.tar.gz: beb50433ddf5d298ca1c7c45af357395cb24f87b695231159b3b52ad8429cd8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28ee05704cdcf8dc48881119a208dad97408d3c50f34b691e6062ca6a8697f3ffe58ba5056c0c8d07f8fec9f5f977927877341572716fe6db3a28876b537264d
|
7
|
+
data.tar.gz: c0533aa9584e02300734385ae8552c5e50d9836f7647337eb797eb11aa50b0edd93bc6a54eb3b22059ac4eed26adbb8ea21dcca9285d9bceff019c67a797146c
|
@@ -0,0 +1,58 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
name: >-
|
8
|
+
Ruby ${{ matrix.ruby }} (${{ matrix.gemfile }})
|
9
|
+
env:
|
10
|
+
CI: true
|
11
|
+
runs-on: ${{ matrix.os }}
|
12
|
+
continue-on-error: ${{ endsWith(matrix.ruby, 'head') || matrix.ruby == 'debug' || matrix.experimental }}
|
13
|
+
if: |
|
14
|
+
!( contains(github.event.pull_request.title, '[ci skip]')
|
15
|
+
|| contains(github.event.pull_request.title, '[skip ci]'))
|
16
|
+
strategy:
|
17
|
+
fail-fast: true
|
18
|
+
matrix:
|
19
|
+
experimental: [false]
|
20
|
+
os: [ ubuntu-latest ]
|
21
|
+
ruby:
|
22
|
+
- 2.6
|
23
|
+
- 2.7
|
24
|
+
- '3.0'
|
25
|
+
- '3.1'
|
26
|
+
gemfile:
|
27
|
+
- gemfiles/oga.gemfile
|
28
|
+
- gemfiles/nokogiri.gemfile
|
29
|
+
include:
|
30
|
+
- ruby: head
|
31
|
+
os: ubuntu-latest
|
32
|
+
gemfile: gemfiles/nokogiri.gemfile
|
33
|
+
experimental: true
|
34
|
+
- ruby: head
|
35
|
+
os: ubuntu-latest
|
36
|
+
gemfile: gemfiles/oga.gemfile
|
37
|
+
experimental: true
|
38
|
+
- ruby: jruby
|
39
|
+
os: ubuntu-latest
|
40
|
+
gemfile: gemfiles/nokogiri.gemfile
|
41
|
+
experimental: true
|
42
|
+
- ruby: truffleruby
|
43
|
+
os: ubuntu-latest
|
44
|
+
gemfile: gemfiles/nokogiri.gemfile
|
45
|
+
experimental: true
|
46
|
+
steps:
|
47
|
+
- name: Repo checkout
|
48
|
+
uses: actions/checkout@v2
|
49
|
+
|
50
|
+
- name: Setup Ruby
|
51
|
+
uses: ruby/setup-ruby@v1
|
52
|
+
with:
|
53
|
+
ruby-version: ${{ matrix.ruby }}
|
54
|
+
bundler-cache: true
|
55
|
+
|
56
|
+
- name: Run tests
|
57
|
+
timeout-minutes: 10
|
58
|
+
run: bundle exec rake spec
|
data/CHANGELOG.md
CHANGED
@@ -4,7 +4,22 @@ Reverse Chronological Order:
|
|
4
4
|
|
5
5
|
## `master`
|
6
6
|
|
7
|
-
|
7
|
+
...
|
8
|
+
|
9
|
+
## `0.15.1` (2021-02-17)
|
10
|
+
|
11
|
+
* Support for Ruby 3.0
|
12
|
+
|
13
|
+
## `0.15.0` (2021-01-26)
|
14
|
+
|
15
|
+
* Removed failing providers
|
16
|
+
* Added new
|
17
|
+
* Specs refactoring
|
18
|
+
|
19
|
+
## `0.14.0` (2020-05-11)
|
20
|
+
|
21
|
+
* Add MTPro provider
|
22
|
+
* Add Proxypedia provider
|
8
23
|
|
9
24
|
## `0.13.0` (2020-03-09)
|
10
25
|
|
data/Gemfile
CHANGED
@@ -6,9 +6,11 @@ gemspec
|
|
6
6
|
|
7
7
|
gem "nokogiri", "~> 1.8"
|
8
8
|
gem "oga", "~> 3.2"
|
9
|
-
gem "rubocop", "~> 0
|
9
|
+
gem "rubocop", "~> 1.0"
|
10
10
|
|
11
11
|
group :test do
|
12
|
-
gem "
|
12
|
+
gem "coveralls_reborn", require: false
|
13
|
+
# Until I find a way to introduce other MITM proxy
|
14
|
+
gem "webrick", "1.4.2"
|
13
15
|
gem "evil-proxy", "~> 0.2"
|
14
16
|
end
|
data/Rakefile
CHANGED
data/gemfiles/nokogiri.gemfile
CHANGED
data/gemfiles/oga.gemfile
CHANGED
@@ -4,10 +4,10 @@ source "https://rubygems.org"
|
|
4
4
|
|
5
5
|
gemspec path: "../"
|
6
6
|
|
7
|
-
gem "oga", "~>
|
7
|
+
gem "oga", "~> 3.0"
|
8
8
|
|
9
9
|
group :test do
|
10
|
-
gem "
|
10
|
+
gem "coveralls_reborn", require: false
|
11
|
+
gem "webrick"
|
11
12
|
gem "evil-proxy", "~> 0.2"
|
12
|
-
gem "rspec", "~> 3.6"
|
13
13
|
end
|
@@ -41,15 +41,15 @@ module ProxyFetcher
|
|
41
41
|
# @return [String]
|
42
42
|
# response body (requested resource content)
|
43
43
|
#
|
44
|
-
def self.execute(args)
|
45
|
-
new(args).execute
|
44
|
+
def self.execute(**args)
|
45
|
+
new(**args).execute
|
46
46
|
end
|
47
47
|
|
48
48
|
# Initialize new HTTP request
|
49
49
|
#
|
50
50
|
# @return [Request]
|
51
51
|
#
|
52
|
-
def initialize(args)
|
52
|
+
def initialize(**args)
|
53
53
|
raise ArgumentError, "args must be a Hash!" unless args.is_a?(Hash)
|
54
54
|
|
55
55
|
@url = args.fetch(:url)
|
@@ -47,6 +47,8 @@ module ProxyFetcher
|
|
47
47
|
|
48
48
|
ProxyFetcher.config.providers.each do |provider_name|
|
49
49
|
threads << Thread.new do
|
50
|
+
Thread.current.report_on_exception = false
|
51
|
+
|
50
52
|
provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
|
51
53
|
provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
|
52
54
|
provider_proxies = provider.fetch_proxies!(provider_filters)
|
@@ -11,7 +11,7 @@ module ProxyFetcher
|
|
11
11
|
|
12
12
|
# [NOTE] Doesn't support filtering
|
13
13
|
def xpath
|
14
|
-
|
14
|
+
"//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
|
15
15
|
end
|
16
16
|
|
17
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -45,27 +45,6 @@ module ProxyFetcher
|
|
45
45
|
#
|
46
46
|
def parse_type(html_node)
|
47
47
|
https = html_node.content_at("td[6]")
|
48
|
-
# frozen_string_literal: true
|
49
|
-
# FreeProxyList provider class.
|
50
|
-
# Provider URL to fetch proxy list
|
51
|
-
# [NOTE] Doesn't support filtering
|
52
|
-
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
53
|
-
# object.
|
54
|
-
#
|
55
|
-
# @param html_node [Object]
|
56
|
-
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
57
|
-
#
|
58
|
-
# @return [ProxyFetcher::Proxy]
|
59
|
-
# Proxy object
|
60
|
-
#
|
61
|
-
# Parses HTML node to extract proxy type.
|
62
|
-
#
|
63
|
-
# @param html_node [Object]
|
64
|
-
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
65
|
-
#
|
66
|
-
# @return [String]
|
67
|
-
# Proxy type
|
68
|
-
#
|
69
48
|
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
70
49
|
end
|
71
50
|
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListSocks provider class.
|
6
|
+
class FreeProxyListSocks < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.socks-proxy.net/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
"//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.type = parse_type(html_node)
|
32
|
+
proxy.anonymity = html_node.content_at("td[6]")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[5]")
|
48
|
+
|
49
|
+
return ProxyFetcher::Proxy::SOCKS4 if https&.casecmp("socks4")&.zero?
|
50
|
+
return ProxyFetcher::Proxy::SOCKS5 if https&.casecmp("socks5")&.zero?
|
51
|
+
|
52
|
+
"Unknown"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_socks, FreeProxyListSocks)
|
57
|
+
end
|
58
|
+
end
|
@@ -9,8 +9,9 @@ module ProxyFetcher
|
|
9
9
|
"https://www.sslproxies.org/"
|
10
10
|
end
|
11
11
|
|
12
|
+
# [NOTE] Doesn't support filtering
|
12
13
|
def xpath
|
13
|
-
|
14
|
+
"//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
|
14
15
|
end
|
15
16
|
|
16
17
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyListUS provider class.
|
6
|
+
class FreeProxyListUS < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://www.us-proxy.org/"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
"//table[./thead/tr/th[contains(text(), 'IP')]]/tbody/tr"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
28
|
+
proxy.addr = html_node.content_at("td[1]")
|
29
|
+
proxy.port = Integer(html_node.content_at("td[2]").gsub(/^0+/, ""))
|
30
|
+
proxy.country = html_node.content_at("td[4]")
|
31
|
+
proxy.anonymity = html_node.content_at("td[5]")
|
32
|
+
proxy.type = parse_type(html_node)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# Parses HTML node to extract proxy type.
|
39
|
+
#
|
40
|
+
# @param html_node [Object]
|
41
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
42
|
+
#
|
43
|
+
# @return [String]
|
44
|
+
# Proxy type
|
45
|
+
#
|
46
|
+
def parse_type(html_node)
|
47
|
+
https = html_node.content_at("td[7]")
|
48
|
+
https&.casecmp("yes")&.zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
ProxyFetcher::Configuration.register_provider(:free_proxy_list_us, FreeProxyListUS)
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# MTPro provider class.
|
8
|
+
class MTPro < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://mtpro.xyz/api/?type=socks"
|
12
|
+
end
|
13
|
+
|
14
|
+
def load_proxy_list(filters = {})
|
15
|
+
html = load_html(provider_url, filters)
|
16
|
+
JSON.parse(html)
|
17
|
+
rescue JSON::ParserError
|
18
|
+
[]
|
19
|
+
end
|
20
|
+
|
21
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
22
|
+
# object.
|
23
|
+
#
|
24
|
+
# @param node [Object]
|
25
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
26
|
+
#
|
27
|
+
# @return [ProxyFetcher::Proxy]
|
28
|
+
# Proxy object
|
29
|
+
#
|
30
|
+
def to_proxy(node)
|
31
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
32
|
+
proxy.addr = node["ip"]
|
33
|
+
proxy.port = Integer(node["port"])
|
34
|
+
proxy.country = node["country"]
|
35
|
+
proxy.anonymity = "Unknown"
|
36
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS5
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
ProxyFetcher::Configuration.register_provider(:mtpro, MTPro)
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
module Providers
|
5
|
+
# FreeProxyList provider class.
|
6
|
+
class Proxypedia < Base
|
7
|
+
# Provider URL to fetch proxy list
|
8
|
+
def provider_url
|
9
|
+
"https://proxypedia.org"
|
10
|
+
end
|
11
|
+
|
12
|
+
# [NOTE] Doesn't support filtering
|
13
|
+
def xpath
|
14
|
+
"//main/ul/li[position()>1]"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
18
|
+
# object.]
|
19
|
+
#
|
20
|
+
# @param html_node [Object]
|
21
|
+
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
22
|
+
#
|
23
|
+
# @return [ProxyFetcher::Proxy]
|
24
|
+
# Proxy object
|
25
|
+
#
|
26
|
+
def to_proxy(html_node)
|
27
|
+
addr, port = html_node.content_at("a").to_s.split(":")
|
28
|
+
|
29
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
30
|
+
proxy.addr = addr
|
31
|
+
proxy.port = Integer(port)
|
32
|
+
proxy.country = parse_country(html_node)
|
33
|
+
proxy.anonymity = "Unknown"
|
34
|
+
proxy.type = ProxyFetcher::Proxy::HTTP
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def parse_country(html_node)
|
41
|
+
text = html_node.content.to_s
|
42
|
+
text[/\((.+?)\)/, 1] || "Unknown"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
ProxyFetcher::Configuration.register_provider(:proxypedia, Proxypedia)
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeHTTP < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=http"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(node)
|
51
|
+
addr, port = node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::HTTP
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_http, ProxyscrapeHTTP)
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeSOCKS4 < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks4"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(html_node)
|
51
|
+
addr, port = html_node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS4
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_socks4, ProxyscrapeSOCKS4)
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "csv"
|
4
|
+
|
5
|
+
module ProxyFetcher
|
6
|
+
module Providers
|
7
|
+
# FreeProxyList provider class.
|
8
|
+
class ProxyscrapeSOCKS5 < Base
|
9
|
+
# Provider URL to fetch proxy list
|
10
|
+
def provider_url
|
11
|
+
"https://api.proxyscrape.com/v2/?request=getproxies&protocol=socks5"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Loads provider HTML and parses it with internal document object.
|
15
|
+
#
|
16
|
+
# @param url [String]
|
17
|
+
# URL to fetch
|
18
|
+
#
|
19
|
+
# @param filters [Hash]
|
20
|
+
# filters for proxy provider
|
21
|
+
#
|
22
|
+
# @return [Array]
|
23
|
+
# Collection of extracted proxies with ports
|
24
|
+
#
|
25
|
+
def load_document(url, filters = {})
|
26
|
+
html = load_html(url, filters)
|
27
|
+
|
28
|
+
CSV.parse(html, col_sep: "\t").map(&:first)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Fetches HTML content by sending HTTP request to the provider URL and
|
32
|
+
# parses the txt document to return all the proxy entries (ip addresses
|
33
|
+
# and ports).
|
34
|
+
#
|
35
|
+
# @return [Array]
|
36
|
+
# Collection of extracted proxies with ports
|
37
|
+
#
|
38
|
+
def load_proxy_list(filters = {})
|
39
|
+
load_document(provider_url, filters)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Converts String to <code>ProxyFetcher::Proxy</code> object.
|
43
|
+
#
|
44
|
+
# @param node [String]
|
45
|
+
# String
|
46
|
+
#
|
47
|
+
# @return [ProxyFetcher::Proxy]
|
48
|
+
# Proxy object
|
49
|
+
#
|
50
|
+
def to_proxy(html_node)
|
51
|
+
addr, port = html_node.split(":")
|
52
|
+
|
53
|
+
ProxyFetcher::Proxy.new.tap do |proxy|
|
54
|
+
proxy.addr = addr
|
55
|
+
proxy.port = Integer(port)
|
56
|
+
proxy.country = "Unknown"
|
57
|
+
proxy.anonymity = "Unknown"
|
58
|
+
proxy.type = ProxyFetcher::Proxy::SOCKS5
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
ProxyFetcher::Configuration.register_provider(:proxyscrape_socks5, ProxyscrapeSOCKS5)
|
64
|
+
end
|
65
|
+
end
|
@@ -41,8 +41,8 @@ module ProxyFetcher
|
|
41
41
|
# @return [String]
|
42
42
|
# resource content
|
43
43
|
#
|
44
|
-
def self.fetch(*args)
|
45
|
-
new(*args).fetch
|
44
|
+
def self.fetch(*args, **kwargs, &block)
|
45
|
+
new(*args, **kwargs, &block).fetch
|
46
46
|
end
|
47
47
|
|
48
48
|
# Initialize HTTP client instance
|
@@ -51,15 +51,17 @@ module ProxyFetcher
|
|
51
51
|
#
|
52
52
|
def initialize(url, method: :get, params: {}, headers: {})
|
53
53
|
@url = url.to_s
|
54
|
-
@method = method
|
54
|
+
@method = method.to_sym
|
55
55
|
@params = params
|
56
56
|
@headers = headers
|
57
57
|
|
58
|
-
|
59
|
-
|
58
|
+
unless HTTP::Request::METHODS.include?(@method)
|
59
|
+
raise ArgumentError, "'#{@method}' is a wrong HTTP method name"
|
60
|
+
end
|
60
61
|
|
61
|
-
@
|
62
|
-
@
|
62
|
+
@timeout = ProxyFetcher.config.provider_proxies_load_timeout
|
63
|
+
@http = build_http_engine
|
64
|
+
@ssl_ctx = build_ssl_context
|
63
65
|
end
|
64
66
|
|
65
67
|
# Fetches resource content by sending HTTP request to it.
|
@@ -67,30 +69,32 @@ module ProxyFetcher
|
|
67
69
|
# @return [String]
|
68
70
|
# response body
|
69
71
|
#
|
70
|
-
def fetch
|
71
|
-
response =
|
72
|
+
def fetch(**options)
|
73
|
+
response = perform_http_request
|
74
|
+
return response if options.fetch(:raw, false)
|
75
|
+
|
72
76
|
response.body.to_s
|
73
77
|
rescue StandardError => e
|
74
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
78
|
+
ProxyFetcher.config.logger.warn("Failed to process request to #{url} (#{e.message})")
|
75
79
|
""
|
76
80
|
end
|
77
81
|
|
78
|
-
def fetch_with_headers
|
79
|
-
process_http_request
|
80
|
-
rescue StandardError => e
|
81
|
-
ProxyFetcher.logger.warn("Failed to process request to #{url} (#{e.message})")
|
82
|
-
HTTP::Response.new(version: "1.1", status: 500, body: "")
|
83
|
-
end
|
84
|
-
|
85
82
|
protected
|
86
83
|
|
87
|
-
def
|
88
|
-
|
89
|
-
|
84
|
+
def build_ssl_context
|
85
|
+
OpenSSL::SSL::SSLContext.new.tap do |context|
|
86
|
+
context.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
90
87
|
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def build_http_engine
|
91
|
+
HTTP.headers(default_headers.merge(headers)).timeout(connect: timeout, read: timeout)
|
92
|
+
end
|
91
93
|
|
94
|
+
def perform_http_request(http_method: method, http_params: params)
|
92
95
|
http.public_send(
|
93
|
-
http_method
|
96
|
+
http_method,
|
97
|
+
url,
|
94
98
|
form: http_params,
|
95
99
|
ssl_context: ssl_ctx
|
96
100
|
)
|
@@ -10,27 +10,39 @@ module ProxyFetcher
|
|
10
10
|
|
11
11
|
# Short variant to validate proxy.
|
12
12
|
#
|
13
|
-
# @param
|
14
|
-
# @param
|
13
|
+
# @param address [String] proxy address or IP
|
14
|
+
# @param port [String, Integer] proxy port
|
15
15
|
#
|
16
16
|
# @return [Boolean]
|
17
17
|
# true if connection to the server using proxy established, otherwise false
|
18
18
|
#
|
19
|
-
def self.connectable?(
|
20
|
-
new(
|
19
|
+
def self.connectable?(address, port)
|
20
|
+
new(address, port).connectable?
|
21
21
|
end
|
22
22
|
|
23
23
|
# Initialize new ProxyValidator instance
|
24
24
|
#
|
25
|
-
# @param
|
26
|
-
# @param
|
25
|
+
# @param address [String] Proxy address or IP
|
26
|
+
# @param port [String, Integer] Proxy port
|
27
|
+
# @param options [Hash] proxy options
|
28
|
+
# @option username [String] Proxy authentication username
|
29
|
+
# @option password [String] Proxy authentication password
|
30
|
+
# @option headers [Hash] Proxy headers
|
27
31
|
#
|
28
32
|
# @return [ProxyValidator]
|
29
33
|
#
|
30
|
-
def initialize(
|
34
|
+
def initialize(address, port, options: {})
|
31
35
|
timeout = ProxyFetcher.config.proxy_validation_timeout
|
36
|
+
proxy = [address, port.to_i]
|
32
37
|
|
33
|
-
|
38
|
+
if options[:username] && options[:password]
|
39
|
+
proxy << options[:username]
|
40
|
+
proxy << options[:password]
|
41
|
+
end
|
42
|
+
|
43
|
+
proxy << options[:headers].to_h if options[:headers]
|
44
|
+
|
45
|
+
@http = HTTP.follow.via(*proxy).timeout(connect: timeout, read: timeout)
|
34
46
|
end
|
35
47
|
|
36
48
|
# Checks if proxy is connectable (can be used to connect
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -4,41 +4,47 @@ require "uri"
|
|
4
4
|
require "http"
|
5
5
|
require "logger"
|
6
6
|
|
7
|
-
require File.dirname(__FILE__)
|
7
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/version"
|
8
8
|
|
9
|
-
require File.dirname(__FILE__)
|
10
|
-
require File.dirname(__FILE__)
|
11
|
-
require File.dirname(__FILE__)
|
12
|
-
require File.dirname(__FILE__)
|
13
|
-
require File.dirname(__FILE__)
|
14
|
-
require File.dirname(__FILE__)
|
9
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/exceptions"
|
10
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/configuration"
|
11
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/configuration/providers_registry"
|
12
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/proxy"
|
13
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/manager"
|
14
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/null_logger"
|
15
15
|
|
16
|
-
require File.dirname(__FILE__)
|
17
|
-
require File.dirname(__FILE__)
|
18
|
-
require File.dirname(__FILE__)
|
19
|
-
require File.dirname(__FILE__)
|
20
|
-
require File.dirname(__FILE__)
|
21
|
-
require File.dirname(__FILE__)
|
16
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/http_client"
|
17
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/proxy_validator"
|
18
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/utils/proxy_list_validator"
|
19
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/client/client"
|
20
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/client/request"
|
21
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/client/proxies_registry"
|
22
22
|
|
23
|
-
require File.dirname(__FILE__)
|
24
|
-
require File.dirname(__FILE__)
|
25
|
-
require File.dirname(__FILE__)
|
26
|
-
require File.dirname(__FILE__)
|
27
|
-
require File.dirname(__FILE__)
|
28
|
-
require File.dirname(__FILE__)
|
23
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document"
|
24
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters"
|
25
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/node"
|
26
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/abstract_adapter"
|
27
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/nokogiri_adapter"
|
28
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/document/adapters/oga_adapter"
|
29
29
|
|
30
30
|
##
|
31
31
|
# Ruby / JRuby lib for managing proxies
|
32
32
|
module ProxyFetcher
|
33
33
|
# ProxyFetcher providers namespace
|
34
34
|
module Providers
|
35
|
-
require File.dirname(__FILE__)
|
36
|
-
require File.dirname(__FILE__)
|
37
|
-
require File.dirname(__FILE__)
|
38
|
-
require File.dirname(__FILE__)
|
39
|
-
require File.dirname(__FILE__)
|
40
|
-
require File.dirname(__FILE__)
|
41
|
-
require File.dirname(__FILE__)
|
35
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/base"
|
36
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list"
|
37
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_socks"
|
38
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_ssl"
|
39
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/free_proxy_list_us"
|
40
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/http_tunnel"
|
41
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/mtpro"
|
42
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxy_list"
|
43
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxypedia"
|
44
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_http"
|
45
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_socks4"
|
46
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/proxyscrape_socks5"
|
47
|
+
require "#{File.dirname(__FILE__)}/proxy_fetcher/providers/xroxy"
|
42
48
|
end
|
43
49
|
|
44
50
|
@__config_access_lock__ = Mutex.new
|
data/proxy_fetcher.gemspec
CHANGED
@@ -8,9 +8,11 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.name = "proxy_fetcher"
|
9
9
|
gem.version = ProxyFetcher.gem_version
|
10
10
|
gem.summary = "Ruby gem for dealing with proxy lists from different providers"
|
11
|
-
gem.description =
|
12
|
-
|
13
|
-
|
11
|
+
gem.description = <<-TEXT.strip.gsub(/[\s\n]+/, " ")
|
12
|
+
This gem can help your Ruby application to make HTTP(S) requests
|
13
|
+
using proxies by fetching and validating proxy lists from
|
14
|
+
the different providers.
|
15
|
+
TEXT
|
14
16
|
gem.authors = ["Nikita Bulai"]
|
15
17
|
gem.email = "bulajnikita@gmail.com"
|
16
18
|
gem.require_paths = ["lib"]
|
@@ -21,8 +23,8 @@ Gem::Specification.new do |gem|
|
|
21
23
|
gem.license = "MIT"
|
22
24
|
gem.required_ruby_version = ">= 2.3.0"
|
23
25
|
|
24
|
-
gem.add_runtime_dependency "http", ">= 3"
|
26
|
+
gem.add_runtime_dependency "http", ">= 3"
|
25
27
|
|
26
28
|
gem.add_development_dependency "rake", ">= 12.0"
|
27
|
-
gem.add_development_dependency "rspec", "~> 3.
|
29
|
+
gem.add_development_dependency "rspec", "~> 3.9"
|
28
30
|
end
|
@@ -3,6 +3,11 @@
|
|
3
3
|
require "spec_helper"
|
4
4
|
require "json"
|
5
5
|
|
6
|
+
begin
|
7
|
+
require "webrick"
|
8
|
+
rescue LoadError
|
9
|
+
# nop
|
10
|
+
end
|
6
11
|
require "evil-proxy"
|
7
12
|
require "evil-proxy/async"
|
8
13
|
|
@@ -118,15 +123,15 @@ describe ProxyFetcher::Client do
|
|
118
123
|
end
|
119
124
|
end
|
120
125
|
|
121
|
-
|
126
|
+
context "retries" do
|
122
127
|
it "raises an error when reaches max retries limit" do
|
123
128
|
allow(ProxyFetcher::Client::Request).to receive(:execute).and_raise(StandardError)
|
124
129
|
|
125
|
-
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
130
|
+
expect { ProxyFetcher::Client.get("http://httpbin.org", options: { max_retries: 10 }) }
|
126
131
|
.to raise_error(ProxyFetcher::Exceptions::MaximumRetriesReached)
|
127
132
|
end
|
128
133
|
|
129
|
-
|
134
|
+
xit "raises an error when http request returns an error" do
|
130
135
|
allow_any_instance_of(HTTP::Client).to receive(:get).and_return(StandardError.new)
|
131
136
|
|
132
137
|
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
@@ -134,14 +139,14 @@ describe ProxyFetcher::Client do
|
|
134
139
|
end
|
135
140
|
|
136
141
|
it "refreshes proxy lists if no proxy found" do
|
137
|
-
ProxyFetcher::
|
142
|
+
allow(ProxyFetcher::Manager.new).to receive(:proxies).and_return([])
|
138
143
|
|
139
144
|
expect { ProxyFetcher::Client.get("http://httpbin.org") }
|
140
145
|
.not_to raise_error
|
141
146
|
end
|
142
147
|
end
|
143
148
|
|
144
|
-
|
149
|
+
xcontext "redirects" do
|
145
150
|
it "follows redirect when present" do
|
146
151
|
content = ProxyFetcher::Client.get("http://httpbin.org/absolute-redirect/2")
|
147
152
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
|
5
|
+
describe "Proxy classes" do
|
6
|
+
[
|
7
|
+
[:free_proxy_list, "FreeProxyList"],
|
8
|
+
[:free_proxy_list_socks, "FreeProxyListSocks"],
|
9
|
+
[:free_proxy_list_ssl, "FreeProxyListSSL"],
|
10
|
+
[:free_proxy_list_us, "FreeProxyListUS"],
|
11
|
+
[:http_tunnel, "HTTPTunnel"],
|
12
|
+
[:mtpro, "MTPro"],
|
13
|
+
[:proxy_list, "ProxyList"],
|
14
|
+
[:proxypedia, "Proxypedia"],
|
15
|
+
[:proxyscrape_http, "ProxyscrapeHTTP"],
|
16
|
+
[:proxyscrape_socks4, "ProxyscrapeSOCKS4"],
|
17
|
+
[:proxyscrape_socks5, "ProxyscrapeSOCKS5"],
|
18
|
+
[:xroxy, "XRoxy"]
|
19
|
+
].each do |(provider_name, provider_klass)|
|
20
|
+
describe Object.const_get("ProxyFetcher::Providers::#{provider_klass}") do
|
21
|
+
before :all do
|
22
|
+
ProxyFetcher.config.provider = provider_name
|
23
|
+
end
|
24
|
+
|
25
|
+
it_behaves_like "a manager"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.16.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -17,9 +17,6 @@ dependencies:
|
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '3'
|
20
|
-
- - "<"
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: '5'
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -27,9 +24,6 @@ dependencies:
|
|
27
24
|
- - ">="
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '3'
|
30
|
-
- - "<"
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '5'
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: rake
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -50,14 +44,14 @@ dependencies:
|
|
50
44
|
requirements:
|
51
45
|
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
|
-
version: '3.
|
47
|
+
version: '3.9'
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
51
|
requirements:
|
58
52
|
- - "~>"
|
59
53
|
- !ruby/object:Gem::Version
|
60
|
-
version: '3.
|
54
|
+
version: '3.9'
|
61
55
|
description: This gem can help your Ruby application to make HTTP(S) requests using
|
62
56
|
proxies by fetching and validating proxy lists from the different providers.
|
63
57
|
email: bulajnikita@gmail.com
|
@@ -67,6 +61,8 @@ executables:
|
|
67
61
|
extensions: []
|
68
62
|
extra_rdoc_files: []
|
69
63
|
files:
|
64
|
+
- ".github/dependabot.yml"
|
65
|
+
- ".github/workflows/ci.yml"
|
70
66
|
- ".gitignore"
|
71
67
|
- CHANGELOG.md
|
72
68
|
- CODE_OF_CONDUCT.md
|
@@ -94,10 +90,16 @@ files:
|
|
94
90
|
- lib/proxy_fetcher/null_logger.rb
|
95
91
|
- lib/proxy_fetcher/providers/base.rb
|
96
92
|
- lib/proxy_fetcher/providers/free_proxy_list.rb
|
93
|
+
- lib/proxy_fetcher/providers/free_proxy_list_socks.rb
|
97
94
|
- lib/proxy_fetcher/providers/free_proxy_list_ssl.rb
|
98
|
-
- lib/proxy_fetcher/providers/
|
95
|
+
- lib/proxy_fetcher/providers/free_proxy_list_us.rb
|
99
96
|
- lib/proxy_fetcher/providers/http_tunnel.rb
|
97
|
+
- lib/proxy_fetcher/providers/mtpro.rb
|
100
98
|
- lib/proxy_fetcher/providers/proxy_list.rb
|
99
|
+
- lib/proxy_fetcher/providers/proxypedia.rb
|
100
|
+
- lib/proxy_fetcher/providers/proxyscrape_http.rb
|
101
|
+
- lib/proxy_fetcher/providers/proxyscrape_socks4.rb
|
102
|
+
- lib/proxy_fetcher/providers/proxyscrape_socks5.rb
|
101
103
|
- lib/proxy_fetcher/providers/xroxy.rb
|
102
104
|
- lib/proxy_fetcher/proxy.rb
|
103
105
|
- lib/proxy_fetcher/utils/http_client.rb
|
@@ -112,13 +114,8 @@ files:
|
|
112
114
|
- spec/proxy_fetcher/document/node_spec.rb
|
113
115
|
- spec/proxy_fetcher/manager_spec.rb
|
114
116
|
- spec/proxy_fetcher/providers/base_spec.rb
|
115
|
-
- spec/proxy_fetcher/providers/free_proxy_list_spec.rb
|
116
|
-
- spec/proxy_fetcher/providers/free_proxy_list_ssl_spec.rb
|
117
|
-
- spec/proxy_fetcher/providers/gather_proxy_spec.rb
|
118
|
-
- spec/proxy_fetcher/providers/http_tunnel_spec.rb
|
119
117
|
- spec/proxy_fetcher/providers/multiple_providers_spec.rb
|
120
|
-
- spec/proxy_fetcher/providers/
|
121
|
-
- spec/proxy_fetcher/providers/xroxy_spec.rb
|
118
|
+
- spec/proxy_fetcher/providers/proxy_classes_spec.rb
|
122
119
|
- spec/proxy_fetcher/proxy_spec.rb
|
123
120
|
- spec/proxy_fetcher/version_spec.rb
|
124
121
|
- spec/spec_helper.rb
|
@@ -142,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
139
|
- !ruby/object:Gem::Version
|
143
140
|
version: '0'
|
144
141
|
requirements: []
|
145
|
-
rubygems_version: 3.0.
|
142
|
+
rubygems_version: 3.0.8
|
146
143
|
signing_key:
|
147
144
|
specification_version: 4
|
148
145
|
summary: Ruby gem for dealing with proxy lists from different providers
|
@@ -1,50 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "json"
|
4
|
-
|
5
|
-
module ProxyFetcher
|
6
|
-
module Providers
|
7
|
-
# GatherProxy provider class.
|
8
|
-
class GatherProxy < Base
|
9
|
-
# Provider URL to fetch proxy list
|
10
|
-
def provider_url
|
11
|
-
"https://proxygather.com"
|
12
|
-
end
|
13
|
-
|
14
|
-
def xpath
|
15
|
-
'//div[@class="proxy-list"]/table/script'
|
16
|
-
end
|
17
|
-
|
18
|
-
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
19
|
-
# object.
|
20
|
-
#
|
21
|
-
# @param html_node [Object]
|
22
|
-
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
23
|
-
#
|
24
|
-
# @return [ProxyFetcher::Proxy]
|
25
|
-
# Proxy object
|
26
|
-
#
|
27
|
-
def to_proxy(html_node)
|
28
|
-
json = parse_json(html_node)
|
29
|
-
|
30
|
-
ProxyFetcher::Proxy.new.tap do |proxy|
|
31
|
-
proxy.addr = json["PROXY_IP"]
|
32
|
-
proxy.port = json["PROXY_PORT"].to_i(16)
|
33
|
-
proxy.anonymity = json["PROXY_TYPE"]
|
34
|
-
proxy.country = json["PROXY_COUNTRY"]
|
35
|
-
proxy.response_time = json["PROXY_TIME"].to_i
|
36
|
-
proxy.type = ProxyFetcher::Proxy::HTTP
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def parse_json(html_node)
|
43
|
-
javascript = html_node.content[/{.+}/im]
|
44
|
-
JSON.parse(javascript)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
ProxyFetcher::Configuration.register_provider(:gather_proxy, GatherProxy)
|
49
|
-
end
|
50
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "spec_helper"
|
4
|
-
|
5
|
-
describe ProxyFetcher::Providers::FreeProxyList do
|
6
|
-
before :all do
|
7
|
-
ProxyFetcher.configure do |config|
|
8
|
-
config.provider = :free_proxy_list
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it_behaves_like "a manager"
|
13
|
-
end
|