proxy_fetcher 0.7.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/lib/proxy_fetcher.rb +1 -0
- data/lib/proxy_fetcher/manager.rb +20 -16
- data/lib/proxy_fetcher/providers/proxy_docker.rb +1 -1
- data/lib/proxy_fetcher/providers/xroxy.rb +8 -23
- data/lib/proxy_fetcher/utils/proxy_list_validator.rb +47 -0
- data/lib/proxy_fetcher/version.rb +2 -2
- data/spec/support/manager_examples.rb +3 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad122ebb9d6241313981ef764c41a1b940dfdc12
|
4
|
+
data.tar.gz: 2cf2d359bfd74122fbfdf9d97d94c1e70b008cd5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4ebe17a6ebfb52a14267856053ad750676d4b28e85e138aa172d778b85368ec85ccd04af55cdceb53e1aaad3db5cdab09d42ee87767d56b1fc5578c1c6ba114b
|
7
|
+
data.tar.gz: 6133dfc0b9810831ae7e79c2795ad3214a4b1c78df71e7a2a910429dd21d6b67a7a2f22c29b284fc30d68fff4ea9907fc39edefbb630027eb9a1e2cd4eb2ae24
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,24 @@
|
|
2
2
|
|
3
3
|
Reverse Chronological Order:
|
4
4
|
|
5
|
+
## `master`
|
6
|
+
|
7
|
+
* Add your changelog here
|
8
|
+
|
9
|
+
## `0.8.0` (2018-11-12)
|
10
|
+
|
11
|
+
* Improve speed of proxy list loading.
|
12
|
+
* Improve speed of proxies cleanup.
|
13
|
+
* Fix ProxyDocker provider
|
14
|
+
|
15
|
+
## `0.7.2` (2018-08-13)
|
16
|
+
|
17
|
+
* Fix XRoxy provider
|
18
|
+
|
19
|
+
## `0.7.1` (2018-07-13)
|
20
|
+
|
21
|
+
* Fix XRoxy provider
|
22
|
+
|
5
23
|
## `0.7.0` (2018-06-04)
|
6
24
|
|
7
25
|
* Migrate to `HTTP.rb` instead of `Net::HTTP`
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -15,6 +15,7 @@ require File.dirname(__FILE__) + '/proxy_fetcher/null_logger'
|
|
15
15
|
|
16
16
|
require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client'
|
17
17
|
require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator'
|
18
|
+
require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_list_validator'
|
18
19
|
require File.dirname(__FILE__) + '/proxy_fetcher/client/client'
|
19
20
|
require File.dirname(__FILE__) + '/proxy_fetcher/client/request'
|
20
21
|
require File.dirname(__FILE__) + '/proxy_fetcher/client/proxies_registry'
|
@@ -31,12 +31,24 @@ module ProxyFetcher
|
|
31
31
|
def refresh_list!(filters = nil)
|
32
32
|
@proxies = []
|
33
33
|
|
34
|
+
threads = []
|
35
|
+
lock = Mutex.new
|
36
|
+
|
34
37
|
ProxyFetcher.config.providers.each do |provider_name|
|
35
|
-
|
36
|
-
|
38
|
+
threads << Thread.new do
|
39
|
+
provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
|
40
|
+
provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
|
41
|
+
provider_proxies = provider.fetch_proxies!(provider_filters)
|
37
42
|
|
38
|
-
|
43
|
+
lock.synchronize do
|
44
|
+
@proxies.concat(provider_proxies)
|
45
|
+
end
|
46
|
+
end
|
39
47
|
end
|
48
|
+
|
49
|
+
threads.each(&:join)
|
50
|
+
|
51
|
+
@proxies
|
40
52
|
end
|
41
53
|
|
42
54
|
alias fetch! refresh_list!
|
@@ -78,20 +90,12 @@ module ProxyFetcher
|
|
78
90
|
alias pop! get!
|
79
91
|
|
80
92
|
# Clean current proxy list from dead proxies (that doesn't respond by timeout)
|
93
|
+
#
|
94
|
+
# @return [Array<ProxyFetcher::Proxy>]
|
95
|
+
# list of valid proxies
|
81
96
|
def cleanup!
|
82
|
-
|
83
|
-
|
84
|
-
proxies.dup.each_slice(ProxyFetcher.config.pool_size) do |proxy_group|
|
85
|
-
threads = proxy_group.map do |group_proxy|
|
86
|
-
Thread.new(group_proxy, proxies) do |proxy, proxies|
|
87
|
-
lock.synchronize { proxies.delete(proxy) } unless proxy.connectable?
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
threads.each(&:join)
|
92
|
-
end
|
93
|
-
|
94
|
-
@proxies
|
97
|
+
valid_proxies = ProxyListValidator.new(@proxies).validate
|
98
|
+
@proxies &= valid_proxies
|
95
99
|
end
|
96
100
|
|
97
101
|
alias validate! cleanup!
|
@@ -17,7 +17,7 @@ module ProxyFetcher
|
|
17
17
|
# [NOTE] Doesn't support direct filters
|
18
18
|
def load_proxy_list(*)
|
19
19
|
doc = load_document(PROVIDER_URL, {})
|
20
|
-
doc.xpath('//table[contains(@class, "table")]/tr[(
|
20
|
+
doc.xpath('//table[contains(@class, "table")]/tbody/tr[(count(td)>2)]')
|
21
21
|
end
|
22
22
|
|
23
23
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -5,7 +5,7 @@ module ProxyFetcher
|
|
5
5
|
# XRoxy provider class.
|
6
6
|
class XRoxy < Base
|
7
7
|
# Provider URL to fetch proxy list
|
8
|
-
PROVIDER_URL = 'https://www.xroxy.com/
|
8
|
+
PROVIDER_URL = 'https://www.xroxy.com/free-proxy-lists/'.freeze
|
9
9
|
|
10
10
|
# Fetches HTML content by sending HTTP request to the provider URL and
|
11
11
|
# parses the document (built as abstract <code>ProxyFetcher::Document</code>)
|
@@ -16,7 +16,7 @@ module ProxyFetcher
|
|
16
16
|
#
|
17
17
|
def load_proxy_list(filters = { type: 'All_http' })
|
18
18
|
doc = load_document(PROVIDER_URL, filters)
|
19
|
-
doc.xpath('//div
|
19
|
+
doc.xpath('//div/table/tbody/tr')
|
20
20
|
end
|
21
21
|
|
22
22
|
# Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
|
@@ -30,29 +30,14 @@ module ProxyFetcher
|
|
30
30
|
#
|
31
31
|
def to_proxy(html_node)
|
32
32
|
ProxyFetcher::Proxy.new.tap do |proxy|
|
33
|
-
proxy.addr = html_node.content_at('td[
|
34
|
-
proxy.port = Integer(html_node.content_at('td[
|
35
|
-
proxy.anonymity = html_node.content_at('td[
|
36
|
-
proxy.country = html_node.content_at('td[
|
37
|
-
proxy.response_time = Integer(html_node.content_at('td[
|
38
|
-
proxy.type =
|
33
|
+
proxy.addr = html_node.content_at('td[1]')
|
34
|
+
proxy.port = Integer(html_node.content_at('td[2]').gsub(/^0+/, ''))
|
35
|
+
proxy.anonymity = html_node.content_at('td[3]')
|
36
|
+
proxy.country = html_node.content_at('td[5]')
|
37
|
+
proxy.response_time = Integer(html_node.content_at('td[6]'))
|
38
|
+
proxy.type = html_node.content_at('td[3]')
|
39
39
|
end
|
40
40
|
end
|
41
|
-
|
42
|
-
private
|
43
|
-
|
44
|
-
# Parses HTML node to extract proxy type.
|
45
|
-
#
|
46
|
-
# @param html_node [Object]
|
47
|
-
# HTML node from the <code>ProxyFetcher::Document</code> DOM model.
|
48
|
-
#
|
49
|
-
# @return [String]
|
50
|
-
# Proxy type
|
51
|
-
#
|
52
|
-
def parse_type(html_node)
|
53
|
-
https = html_node.content_at('td[5]')
|
54
|
-
https.casecmp('true').zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
|
55
|
-
end
|
56
41
|
end
|
57
42
|
|
58
43
|
ProxyFetcher::Configuration.register_provider(:xroxy, XRoxy)
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module ProxyFetcher
|
4
|
+
# This class validates list of proxies.
|
5
|
+
# Each proxy is validated using <code>ProxyFetcher::ProxyValidator</code>.
|
6
|
+
class ProxyListValidator
|
7
|
+
# @!attribute [r] proxies
|
8
|
+
# @return [Array<ProxyFetcher::Proxy>] Source array of proxies
|
9
|
+
attr_reader :proxies
|
10
|
+
# @!attribute [r] valid_proxies
|
11
|
+
# @return [Array<ProxyFetcher::Proxy>] Array of valid proxies after validation
|
12
|
+
attr_reader :valid_proxies
|
13
|
+
|
14
|
+
# @param [Array<ProxyFetcher::Proxy>] *proxies
|
15
|
+
# Any number of <code>ProxyFetcher::Proxy</code> to validate
|
16
|
+
def initialize(*proxies)
|
17
|
+
@proxies = proxies.flatten
|
18
|
+
end
|
19
|
+
|
20
|
+
# Performs validation
|
21
|
+
#
|
22
|
+
# @return [Array<ProxyFetcher::Proxy>]
|
23
|
+
# list of valid proxies
|
24
|
+
def validate
|
25
|
+
target_proxies = @proxies.dup
|
26
|
+
target_proxies_lock = Mutex.new
|
27
|
+
connectable_proxies = []
|
28
|
+
connectable_proxies_lock = Mutex.new
|
29
|
+
threads = []
|
30
|
+
|
31
|
+
ProxyFetcher.config.pool_size.times do
|
32
|
+
threads << Thread.new do
|
33
|
+
loop do
|
34
|
+
proxy = target_proxies_lock.synchronize { target_proxies.shift }
|
35
|
+
break unless proxy
|
36
|
+
|
37
|
+
connectable_proxies_lock.synchronize { connectable_proxies << proxy } if proxy.connectable?
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
threads.each(&:join)
|
43
|
+
|
44
|
+
@valid_proxies = connectable_proxies
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -35,7 +35,9 @@ RSpec.shared_examples 'a manager' do
|
|
35
35
|
|
36
36
|
manager = ProxyFetcher::Manager.new
|
37
37
|
|
38
|
-
expect
|
38
|
+
expect do
|
39
|
+
manager.cleanup!
|
40
|
+
end.to change { manager.proxies }.to([])
|
39
41
|
end
|
40
42
|
|
41
43
|
it "doesn't pollute the output with array of proxies" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-11-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: http
|
@@ -80,6 +80,7 @@ files:
|
|
80
80
|
- lib/proxy_fetcher/providers/xroxy.rb
|
81
81
|
- lib/proxy_fetcher/proxy.rb
|
82
82
|
- lib/proxy_fetcher/utils/http_client.rb
|
83
|
+
- lib/proxy_fetcher/utils/proxy_list_validator.rb
|
83
84
|
- lib/proxy_fetcher/utils/proxy_validator.rb
|
84
85
|
- lib/proxy_fetcher/version.rb
|
85
86
|
- proxy_fetcher.gemspec
|