proxy_fetcher 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 929248449265daefef4079decba21be77d36c610
4
- data.tar.gz: 1efc0967583b3a1cc3164b5a524d1b9cfa6e4a1e
3
+ metadata.gz: dac8f3b98762e29d5067e32689cef73be6905d4b
4
+ data.tar.gz: 5b488bb4180524c2cf64640aa031c9b2604c81cc
5
5
  SHA512:
6
- metadata.gz: 9c12cd3940d6f81a7a9ee462d8b9e4a8e364013a735172c1ca9444a2d69741f96642de532384cd1de8d6a4f1e77daa038048b6a1140c9e835e2a9f70d28d3a8c
7
- data.tar.gz: 3e01791ebbdff7608f206a85a86cc7a7701b830aad425475f47fb1d14b4bd11c32225a279a72a131b40b1dbd441973da0b2f5fb4e9381da0c09caa9cfeeb129e
6
+ metadata.gz: aa4ff379c85314d81e660c84e65f9772a72b5a0d36110fcda2cff37d4ced8745cc82378bc98d290ab78c4c51868b201504a1067e2741ca3385d285a36bc525a8
7
+ data.tar.gz: 2fbc3811ca622aadda97e235b650a8603367fd4851d5a5319e71f994cc4b7520e2e88cbc45d4299c3fd74b58a569d0bf762e35c16eba1aaa9da01b1553759c3d
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  # Ruby lib for managing proxies
2
+ [![Gem Version](https://badge.fury.io/rb/proxy_fetcher.svg)](http://badge.fury.io/rb/proxy_fetcher)
2
3
  [![Build Status](https://travis-ci.org/nbulaj/proxy_fetcher.svg?branch=master)](https://travis-ci.org/nbulaj/proxy_fetcher)
3
4
  [![Coverage Status](https://coveralls.io/repos/github/nbulaj/proxy_fetcher/badge.svg)](https://coveralls.io/github/nbulaj/proxy_fetcher)
4
5
  [![License](http://img.shields.io/badge/license-MIT-brightgreen.svg)](#license)
@@ -38,6 +39,15 @@ manager.proxies
38
39
  # @response_time=5217, @speed=48, @connection_time=100, @type="HTTP", @anonymity="High">, ... ]
39
40
  ```
40
41
 
42
+ You can initialize proxy manager without loading proxy list from the remote server by passing `refresh: false` on initialization:
43
+
44
+ ```ruby
45
+ manager = ProxyFetcher::Manager.new(refresh: false) # just initialize class instance
46
+ manager.proxies
47
+
48
+ #=> []
49
+ ```
50
+
41
51
  Get raw proxy URLs:
42
52
 
43
53
  ```ruby
@@ -51,7 +61,7 @@ manager.raw_proxies
51
61
  If `ProxyFetcher::Manager` was already initialized somewhere, you can refresh the proxy list by calling `#refresh_list!` method:
52
62
 
53
63
  ```ruby
54
- manager.refresh_list!
64
+ manager.refresh_list! # or manager.fetch!
55
65
 
56
66
  #=> [#<ProxyFetcher::Proxy:0x00000002879680 @addr="97.77.104.22", @port=3128, @country="USA",
57
67
  # @response_time=5217, @speed=48, @connection_time=100, @type="HTTP", @anonymity="High">, ... ]
@@ -79,7 +89,7 @@ Also you can call next instance method for every Proxy object:
79
89
  If you wanna clear current proxy manager list from dead servers, you can just call `cleanup!` method:
80
90
 
81
91
  ```ruby
82
- manager.cleanup!
92
+ manager.cleanup! # or manager.validate!
83
93
  ```
84
94
 
85
95
  To change open/read timeout for `cleanup!` and `connectable?` methods yu need to change ProxyFetcher::Manager config:
@@ -92,6 +102,10 @@ manager = ProxyFetcher::Manager.new
92
102
  manager.cleanup!
93
103
  ```
94
104
 
105
+ ## TODO
106
+
107
+ # Proxy filters
108
+
95
109
  ## Contributing
96
110
 
97
111
  You are very welcome to help improve ProxyFetcher if you have suggestions for features that other people can use.
data/lib/proxy_fetcher.rb CHANGED
@@ -37,7 +37,7 @@ module ProxyFetcher
37
37
 
38
38
  alias_method :fetch!, :refresh_list!
39
39
 
40
- # Clean current proxies list from dead proxies
40
+ # Clean current proxy list from dead proxies (doesn't respond by timeout)
41
41
  def cleanup!
42
42
  proxies.keep_if(&:connectable?)
43
43
  end
@@ -56,6 +56,7 @@ module ProxyFetcher
56
56
 
57
57
  private
58
58
 
59
+ # Get HTML from the requested URL
59
60
  def load_html(url)
60
61
  uri = URI.parse(url)
61
62
  http = Net::HTTP.new(uri.host, uri.port)
@@ -11,6 +11,7 @@ module ProxyFetcher
11
11
 
12
12
  def connectable?
13
13
  connection = Net::HTTP.new(addr, port)
14
+ connection.use_ssl = true if https?
14
15
  connection.open_timeout = ProxyFetcher::Manager.config.open_timeout
15
16
  connection.read_timeout = ProxyFetcher::Manager.config.read_timeout
16
17
 
@@ -21,12 +22,14 @@ module ProxyFetcher
21
22
  false
22
23
  end
23
24
 
25
+ alias_method :valid?, :connectable?
26
+
24
27
  def http?
25
28
  type.casecmp('http').zero?
26
29
  end
27
30
 
28
31
  def https?
29
- !http?
32
+ type.casecmp('https').zero?
30
33
  end
31
34
 
32
35
  def uri
@@ -39,6 +42,7 @@ module ProxyFetcher
39
42
 
40
43
  private
41
44
 
45
+ # HideMyAss proxy list rows parsing by columns
42
46
  def parse_row!(html)
43
47
  html.xpath('td').each_with_index do |td, index|
44
48
  case index
@@ -67,11 +71,11 @@ module ProxyFetcher
67
71
  good = []
68
72
  bytes = []
69
73
  css = html.at_xpath('span/style/text()').to_s
70
- css.split.each { |l| good << $1 if l.match(/\.(.+?)\{.*inline/) }
74
+ css.split.each { |l| good << Regexp.last_match(1) if l =~ /\.(.+?)\{.*inline/ }
71
75
 
72
76
  html.xpath('span/span | span | span/text()').each do |span|
73
77
  if span.is_a?(Nokogiri::XML::Text)
74
- bytes << $1 if span.content.strip.match(/\.{0,1}(.+)\.{0,1}/)
78
+ bytes << Regexp.last_match(1) if span.content.strip =~ /\.{0,1}(.+)\.{0,1}/
75
79
  elsif (span['style'] && span['style'] =~ /inline/) ||
76
80
  (span['class'] && good.include?(span['class'])) ||
77
81
  (span['class'] =~ /^[0-9]/)
@@ -9,7 +9,7 @@ module ProxyFetcher
9
9
  # Minor version number
10
10
  MINOR = 1
11
11
  # Smallest version number
12
- TINY = 1
12
+ TINY = 2
13
13
 
14
14
  # Full version number
15
15
  STRING = [MAJOR, MINOR, TINY].compact.join('.')
@@ -28,4 +28,9 @@ describe ProxyFetcher::Manager do
28
28
 
29
29
  expect { manager.cleanup! }.to change { manager.proxies }.to([])
30
30
  end
31
+
32
+ it "doesn't pollute the output with array of proxies" do
33
+ manager = described_class.new(refresh: false)
34
+ expect(manager.inspect).to eq(manager.to_s)
35
+ end
31
36
  end
@@ -12,11 +12,16 @@ describe ProxyFetcher::Proxy do
12
12
  expect(proxy.https?).to be_falsey.or(be_truthy)
13
13
  end
14
14
 
15
- it 'checks connection status' do
15
+ it 'not connectable if IP addr is wrong' do
16
16
  allow_any_instance_of(ProxyFetcher::Proxy).to receive(:addr).and_return('192.168.1.1')
17
17
  expect(proxy.connectable?).to be_falsey
18
18
  end
19
19
 
20
+ it "not connectable if server doesn't respond to head" do
21
+ allow_any_instance_of(Net::HTTP).to receive(:request_head).and_return(false)
22
+ expect(proxy.connectable?).to be_falsey
23
+ end
24
+
20
25
  it 'returns URI::Generic' do
21
26
  expect(proxy.uri).to be_a(URI::Generic)
22
27
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: proxy_fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nikita Bulai