proxy_fetcher 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -2
- data/lib/proxy_fetcher.rb +2 -1
- data/lib/proxy_fetcher/proxy.rb +7 -3
- data/lib/proxy_fetcher/version.rb +1 -1
- data/spec/proxy_fetcher/manager_spec.rb +5 -0
- data/spec/proxy_fetcher/proxy_spec.rb +6 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dac8f3b98762e29d5067e32689cef73be6905d4b
|
4
|
+
data.tar.gz: 5b488bb4180524c2cf64640aa031c9b2604c81cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa4ff379c85314d81e660c84e65f9772a72b5a0d36110fcda2cff37d4ced8745cc82378bc98d290ab78c4c51868b201504a1067e2741ca3385d285a36bc525a8
|
7
|
+
data.tar.gz: 2fbc3811ca622aadda97e235b650a8603367fd4851d5a5319e71f994cc4b7520e2e88cbc45d4299c3fd74b58a569d0bf762e35c16eba1aaa9da01b1553759c3d
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# Ruby lib for managing proxies
|
2
|
+
[![Gem Version](https://badge.fury.io/rb/proxy_fetcher.svg)](http://badge.fury.io/rb/proxy_fetcher)
|
2
3
|
[![Build Status](https://travis-ci.org/nbulaj/proxy_fetcher.svg?branch=master)](https://travis-ci.org/nbulaj/proxy_fetcher)
|
3
4
|
[![Coverage Status](https://coveralls.io/repos/github/nbulaj/proxy_fetcher/badge.svg)](https://coveralls.io/github/nbulaj/proxy_fetcher)
|
4
5
|
[![License](http://img.shields.io/badge/license-MIT-brightgreen.svg)](#license)
|
@@ -38,6 +39,15 @@ manager.proxies
|
|
38
39
|
# @response_time=5217, @speed=48, @connection_time=100, @type="HTTP", @anonymity="High">, ... ]
|
39
40
|
```
|
40
41
|
|
42
|
+
You can initialize proxy manager without loading proxy list from the remote server by passing `refresh: false` on initialization:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
manager = ProxyFetcher::Manager.new(refresh: false) # just initialize class instance
|
46
|
+
manager.proxies
|
47
|
+
|
48
|
+
#=> []
|
49
|
+
```
|
50
|
+
|
41
51
|
Get raw proxy URLs:
|
42
52
|
|
43
53
|
```ruby
|
@@ -51,7 +61,7 @@ manager.raw_proxies
|
|
51
61
|
If `ProxyFetcher::Manager` was already initialized somewhere, you can refresh the proxy list by calling `#refresh_list!` method:
|
52
62
|
|
53
63
|
```ruby
|
54
|
-
manager.refresh_list!
|
64
|
+
manager.refresh_list! # or manager.fetch!
|
55
65
|
|
56
66
|
#=> [#<ProxyFetcher::Proxy:0x00000002879680 @addr="97.77.104.22", @port=3128, @country="USA",
|
57
67
|
# @response_time=5217, @speed=48, @connection_time=100, @type="HTTP", @anonymity="High">, ... ]
|
@@ -79,7 +89,7 @@ Also you can call next instance method for every Proxy object:
|
|
79
89
|
If you wanna clear current proxy manager list from dead servers, you can just call `cleanup!` method:
|
80
90
|
|
81
91
|
```ruby
|
82
|
-
manager.cleanup!
|
92
|
+
manager.cleanup! # or manager.validate!
|
83
93
|
```
|
84
94
|
|
85
95
|
To change open/read timeout for `cleanup!` and `connectable?` methods yu need to change ProxyFetcher::Manager config:
|
@@ -92,6 +102,10 @@ manager = ProxyFetcher::Manager.new
|
|
92
102
|
manager.cleanup!
|
93
103
|
```
|
94
104
|
|
105
|
+
## TODO
|
106
|
+
|
107
|
+
# Proxy filters
|
108
|
+
|
95
109
|
## Contributing
|
96
110
|
|
97
111
|
You are very welcome to help improve ProxyFetcher if you have suggestions for features that other people can use.
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -37,7 +37,7 @@ module ProxyFetcher
|
|
37
37
|
|
38
38
|
alias_method :fetch!, :refresh_list!
|
39
39
|
|
40
|
-
# Clean current
|
40
|
+
# Clean current proxy list from dead proxies (doesn't respond by timeout)
|
41
41
|
def cleanup!
|
42
42
|
proxies.keep_if(&:connectable?)
|
43
43
|
end
|
@@ -56,6 +56,7 @@ module ProxyFetcher
|
|
56
56
|
|
57
57
|
private
|
58
58
|
|
59
|
+
# Get HTML from the requested URL
|
59
60
|
def load_html(url)
|
60
61
|
uri = URI.parse(url)
|
61
62
|
http = Net::HTTP.new(uri.host, uri.port)
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -11,6 +11,7 @@ module ProxyFetcher
|
|
11
11
|
|
12
12
|
def connectable?
|
13
13
|
connection = Net::HTTP.new(addr, port)
|
14
|
+
connection.use_ssl = true if https?
|
14
15
|
connection.open_timeout = ProxyFetcher::Manager.config.open_timeout
|
15
16
|
connection.read_timeout = ProxyFetcher::Manager.config.read_timeout
|
16
17
|
|
@@ -21,12 +22,14 @@ module ProxyFetcher
|
|
21
22
|
false
|
22
23
|
end
|
23
24
|
|
25
|
+
alias_method :valid?, :connectable?
|
26
|
+
|
24
27
|
def http?
|
25
28
|
type.casecmp('http').zero?
|
26
29
|
end
|
27
30
|
|
28
31
|
def https?
|
29
|
-
|
32
|
+
type.casecmp('https').zero?
|
30
33
|
end
|
31
34
|
|
32
35
|
def uri
|
@@ -39,6 +42,7 @@ module ProxyFetcher
|
|
39
42
|
|
40
43
|
private
|
41
44
|
|
45
|
+
# HideMyAss proxy list rows parsing by columns
|
42
46
|
def parse_row!(html)
|
43
47
|
html.xpath('td').each_with_index do |td, index|
|
44
48
|
case index
|
@@ -67,11 +71,11 @@ module ProxyFetcher
|
|
67
71
|
good = []
|
68
72
|
bytes = []
|
69
73
|
css = html.at_xpath('span/style/text()').to_s
|
70
|
-
css.split.each { |l| good <<
|
74
|
+
css.split.each { |l| good << Regexp.last_match(1) if l =~ /\.(.+?)\{.*inline/ }
|
71
75
|
|
72
76
|
html.xpath('span/span | span | span/text()').each do |span|
|
73
77
|
if span.is_a?(Nokogiri::XML::Text)
|
74
|
-
bytes <<
|
78
|
+
bytes << Regexp.last_match(1) if span.content.strip =~ /\.{0,1}(.+)\.{0,1}/
|
75
79
|
elsif (span['style'] && span['style'] =~ /inline/) ||
|
76
80
|
(span['class'] && good.include?(span['class'])) ||
|
77
81
|
(span['class'] =~ /^[0-9]/)
|
@@ -28,4 +28,9 @@ describe ProxyFetcher::Manager do
|
|
28
28
|
|
29
29
|
expect { manager.cleanup! }.to change { manager.proxies }.to([])
|
30
30
|
end
|
31
|
+
|
32
|
+
it "doesn't pollute the output with array of proxies" do
|
33
|
+
manager = described_class.new(refresh: false)
|
34
|
+
expect(manager.inspect).to eq(manager.to_s)
|
35
|
+
end
|
31
36
|
end
|
@@ -12,11 +12,16 @@ describe ProxyFetcher::Proxy do
|
|
12
12
|
expect(proxy.https?).to be_falsey.or(be_truthy)
|
13
13
|
end
|
14
14
|
|
15
|
-
it '
|
15
|
+
it 'not connectable if IP addr is wrong' do
|
16
16
|
allow_any_instance_of(ProxyFetcher::Proxy).to receive(:addr).and_return('192.168.1.1')
|
17
17
|
expect(proxy.connectable?).to be_falsey
|
18
18
|
end
|
19
19
|
|
20
|
+
it "not connectable if server doesn't respond to head" do
|
21
|
+
allow_any_instance_of(Net::HTTP).to receive(:request_head).and_return(false)
|
22
|
+
expect(proxy.connectable?).to be_falsey
|
23
|
+
end
|
24
|
+
|
20
25
|
it 'returns URI::Generic' do
|
21
26
|
expect(proxy.uri).to be_a(URI::Generic)
|
22
27
|
end
|