proxy_fetcher 0.7.1 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 59ba9fd7169617766f99b048a1c889e0213ac57c
4
- data.tar.gz: 03e3d51d71fb5c06c0c7c7d5808a8a43c79c50a4
3
+ metadata.gz: ad122ebb9d6241313981ef764c41a1b940dfdc12
4
+ data.tar.gz: 2cf2d359bfd74122fbfdf9d97d94c1e70b008cd5
5
5
  SHA512:
6
- metadata.gz: 495ccb751c9f927f1ef31e1b2536c600aec0f68189ea11d1f8061d59bfe41caf2dd0e818d9b780a5b8279c66b20c05873ffba4976cadb101c4aff2a7a449bc94
7
- data.tar.gz: a2aae32a4a3649b10399e23520fbf0a19df3ca564a3519290352bd51366cda6f8a709aa7a1783de31e5e474ab23bd86ea2fb41fff320a81eb57a16b3ef111b30
6
+ metadata.gz: 4ebe17a6ebfb52a14267856053ad750676d4b28e85e138aa172d778b85368ec85ccd04af55cdceb53e1aaad3db5cdab09d42ee87767d56b1fc5578c1c6ba114b
7
+ data.tar.gz: 6133dfc0b9810831ae7e79c2795ad3214a4b1c78df71e7a2a910429dd21d6b67a7a2f22c29b284fc30d68fff4ea9907fc39edefbb630027eb9a1e2cd4eb2ae24
data/CHANGELOG.md CHANGED
@@ -2,6 +2,24 @@
2
2
 
3
3
  Reverse Chronological Order:
4
4
 
5
+ ## `master`
6
+
7
+ * Add your changelog here
8
+
9
+ ## `0.8.0` (2018-11-12)
10
+
11
+ * Improve speed of proxy list loading.
12
+ * Improve speed of proxies cleanup.
13
+ * Fix ProxyDocker provider
14
+
15
+ ## `0.7.2` (2018-08-13)
16
+
17
+ * Fix XRoxy provider
18
+
19
+ ## `0.7.1` (2018-07-13)
20
+
21
+ * Fix XRoxy provider
22
+
5
23
  ## `0.7.0` (2018-06-04)
6
24
 
7
25
  * Migrate to `HTTP.rb` instead of `Net::HTTP`
data/lib/proxy_fetcher.rb CHANGED
@@ -15,6 +15,7 @@ require File.dirname(__FILE__) + '/proxy_fetcher/null_logger'
15
15
 
16
16
  require File.dirname(__FILE__) + '/proxy_fetcher/utils/http_client'
17
17
  require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_validator'
18
+ require File.dirname(__FILE__) + '/proxy_fetcher/utils/proxy_list_validator'
18
19
  require File.dirname(__FILE__) + '/proxy_fetcher/client/client'
19
20
  require File.dirname(__FILE__) + '/proxy_fetcher/client/request'
20
21
  require File.dirname(__FILE__) + '/proxy_fetcher/client/proxies_registry'
@@ -31,12 +31,24 @@ module ProxyFetcher
31
31
  def refresh_list!(filters = nil)
32
32
  @proxies = []
33
33
 
34
+ threads = []
35
+ lock = Mutex.new
36
+
34
37
  ProxyFetcher.config.providers.each do |provider_name|
35
- provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
36
- provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
38
+ threads << Thread.new do
39
+ provider = ProxyFetcher::Configuration.providers_registry.class_for(provider_name)
40
+ provider_filters = filters && filters.fetch(provider_name.to_sym, filters)
41
+ provider_proxies = provider.fetch_proxies!(provider_filters)
37
42
 
38
- @proxies.concat(provider.fetch_proxies!(provider_filters))
43
+ lock.synchronize do
44
+ @proxies.concat(provider_proxies)
45
+ end
46
+ end
39
47
  end
48
+
49
+ threads.each(&:join)
50
+
51
+ @proxies
40
52
  end
41
53
 
42
54
  alias fetch! refresh_list!
@@ -78,20 +90,12 @@ module ProxyFetcher
78
90
  alias pop! get!
79
91
 
80
92
  # Clean current proxy list from dead proxies (that doesn't respond by timeout)
93
+ #
94
+ # @return [Array<ProxyFetcher::Proxy>]
95
+ # list of valid proxies
81
96
  def cleanup!
82
- lock = Mutex.new
83
-
84
- proxies.dup.each_slice(ProxyFetcher.config.pool_size) do |proxy_group|
85
- threads = proxy_group.map do |group_proxy|
86
- Thread.new(group_proxy, proxies) do |proxy, proxies|
87
- lock.synchronize { proxies.delete(proxy) } unless proxy.connectable?
88
- end
89
- end
90
-
91
- threads.each(&:join)
92
- end
93
-
94
- @proxies
97
+ valid_proxies = ProxyListValidator.new(@proxies).validate
98
+ @proxies &= valid_proxies
95
99
  end
96
100
 
97
101
  alias validate! cleanup!
@@ -17,7 +17,7 @@ module ProxyFetcher
17
17
  # [NOTE] Doesn't support direct filters
18
18
  def load_proxy_list(*)
19
19
  doc = load_document(PROVIDER_URL, {})
20
- doc.xpath('//table[contains(@class, "table")]/tr[(not(@id="proxy-table-header")) and (count(td)>2)]')
20
+ doc.xpath('//table[contains(@class, "table")]/tbody/tr[(count(td)>2)]')
21
21
  end
22
22
 
23
23
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -5,7 +5,7 @@ module ProxyFetcher
5
5
  # XRoxy provider class.
6
6
  class XRoxy < Base
7
7
  # Provider URL to fetch proxy list
8
- PROVIDER_URL = 'https://www.xroxy.com/proxylist.php'.freeze
8
+ PROVIDER_URL = 'https://www.xroxy.com/free-proxy-lists/'.freeze
9
9
 
10
10
  # Fetches HTML content by sending HTTP request to the provider URL and
11
11
  # parses the document (built as abstract <code>ProxyFetcher::Document</code>)
@@ -16,7 +16,7 @@ module ProxyFetcher
16
16
  #
17
17
  def load_proxy_list(filters = { type: 'All_http' })
18
18
  doc = load_document(PROVIDER_URL, filters)
19
- doc.xpath('//div[@id="content"]/table[1]/tr[contains(@class, "row")]')
19
+ doc.xpath('//div/table/tbody/tr')
20
20
  end
21
21
 
22
22
  # Converts HTML node (entry of N tags) to <code>ProxyFetcher::Proxy</code>
@@ -30,29 +30,14 @@ module ProxyFetcher
30
30
  #
31
31
  def to_proxy(html_node)
32
32
  ProxyFetcher::Proxy.new.tap do |proxy|
33
- proxy.addr = html_node.content_at('td[2]')
34
- proxy.port = Integer(html_node.content_at('td[3]').gsub(/^0+/, ''))
35
- proxy.anonymity = html_node.content_at('td[4]')
36
- proxy.country = html_node.content_at('td[6]')
37
- proxy.response_time = Integer(html_node.content_at('td[7]'))
38
- proxy.type = parse_type(html_node)
33
+ proxy.addr = html_node.content_at('td[1]')
34
+ proxy.port = Integer(html_node.content_at('td[2]').gsub(/^0+/, ''))
35
+ proxy.anonymity = html_node.content_at('td[3]')
36
+ proxy.country = html_node.content_at('td[5]')
37
+ proxy.response_time = Integer(html_node.content_at('td[6]'))
38
+ proxy.type = html_node.content_at('td[3]')
39
39
  end
40
40
  end
41
-
42
- private
43
-
44
- # Parses HTML node to extract proxy type.
45
- #
46
- # @param html_node [Object]
47
- # HTML node from the <code>ProxyFetcher::Document</code> DOM model.
48
- #
49
- # @return [String]
50
- # Proxy type
51
- #
52
- def parse_type(html_node)
53
- https = html_node.content_at('td[5]')
54
- https.casecmp('true').zero? ? ProxyFetcher::Proxy::HTTPS : ProxyFetcher::Proxy::HTTP
55
- end
56
41
  end
57
42
 
58
43
  ProxyFetcher::Configuration.register_provider(:xroxy, XRoxy)
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ProxyFetcher
4
+ # This class validates list of proxies.
5
+ # Each proxy is validated using <code>ProxyFetcher::ProxyValidator</code>.
6
+ class ProxyListValidator
7
+ # @!attribute [r] proxies
8
+ # @return [Array<ProxyFetcher::Proxy>] Source array of proxies
9
+ attr_reader :proxies
10
+ # @!attribute [r] valid_proxies
11
+ # @return [Array<ProxyFetcher::Proxy>] Array of valid proxies after validation
12
+ attr_reader :valid_proxies
13
+
14
+ # @param [Array<ProxyFetcher::Proxy>] *proxies
15
+ # Any number of <code>ProxyFetcher::Proxy</code> to validate
16
+ def initialize(*proxies)
17
+ @proxies = proxies.flatten
18
+ end
19
+
20
+ # Performs validation
21
+ #
22
+ # @return [Array<ProxyFetcher::Proxy>]
23
+ # list of valid proxies
24
+ def validate
25
+ target_proxies = @proxies.dup
26
+ target_proxies_lock = Mutex.new
27
+ connectable_proxies = []
28
+ connectable_proxies_lock = Mutex.new
29
+ threads = []
30
+
31
+ ProxyFetcher.config.pool_size.times do
32
+ threads << Thread.new do
33
+ loop do
34
+ proxy = target_proxies_lock.synchronize { target_proxies.shift }
35
+ break unless proxy
36
+
37
+ connectable_proxies_lock.synchronize { connectable_proxies << proxy } if proxy.connectable?
38
+ end
39
+ end
40
+ end
41
+
42
+ threads.each(&:join)
43
+
44
+ @valid_proxies = connectable_proxies
45
+ end
46
+ end
47
+ end
@@ -13,9 +13,9 @@ module ProxyFetcher
13
13
  # Major version number
14
14
  MAJOR = 0
15
15
  # Minor version number
16
- MINOR = 7
16
+ MINOR = 8
17
17
  # Smallest version number
18
- TINY = 1
18
+ TINY = 0
19
19
 
20
20
  # Full version number
21
21
  STRING = [MAJOR, MINOR, TINY].compact.join('.')
@@ -35,7 +35,9 @@ RSpec.shared_examples 'a manager' do
35
35
 
36
36
  manager = ProxyFetcher::Manager.new
37
37
 
38
- expect { manager.cleanup! }.to change { manager.proxies }.to([])
38
+ expect do
39
+ manager.cleanup!
40
+ end.to change { manager.proxies }.to([])
39
41
  end
40
42
 
41
43
  it "doesn't pollute the output with array of proxies" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxy_fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Bulai
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-13 00:00:00.000000000 Z
11
+ date: 2018-11-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: http
@@ -80,6 +80,7 @@ files:
80
80
  - lib/proxy_fetcher/providers/xroxy.rb
81
81
  - lib/proxy_fetcher/proxy.rb
82
82
  - lib/proxy_fetcher/utils/http_client.rb
83
+ - lib/proxy_fetcher/utils/proxy_list_validator.rb
83
84
  - lib/proxy_fetcher/utils/proxy_validator.rb
84
85
  - lib/proxy_fetcher/version.rb
85
86
  - proxy_fetcher.gemspec