proxy_fetcher 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -2
- data/lib/proxy_fetcher.rb +2 -1
- data/lib/proxy_fetcher/proxy.rb +7 -3
- data/lib/proxy_fetcher/version.rb +1 -1
- data/spec/proxy_fetcher/manager_spec.rb +5 -0
- data/spec/proxy_fetcher/proxy_spec.rb +6 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dac8f3b98762e29d5067e32689cef73be6905d4b
|
4
|
+
data.tar.gz: 5b488bb4180524c2cf64640aa031c9b2604c81cc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa4ff379c85314d81e660c84e65f9772a72b5a0d36110fcda2cff37d4ced8745cc82378bc98d290ab78c4c51868b201504a1067e2741ca3385d285a36bc525a8
|
7
|
+
data.tar.gz: 2fbc3811ca622aadda97e235b650a8603367fd4851d5a5319e71f994cc4b7520e2e88cbc45d4299c3fd74b58a569d0bf762e35c16eba1aaa9da01b1553759c3d
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# Ruby lib for managing proxies
|
2
|
+
[](http://badge.fury.io/rb/proxy_fetcher)
|
2
3
|
[](https://travis-ci.org/nbulaj/proxy_fetcher)
|
3
4
|
[](https://coveralls.io/github/nbulaj/proxy_fetcher)
|
4
5
|
[](#license)
|
@@ -38,6 +39,15 @@ manager.proxies
|
|
38
39
|
# @response_time=5217, @speed=48, @connection_time=100, @type="HTTP", @anonymity="High">, ... ]
|
39
40
|
```
|
40
41
|
|
42
|
+
You can initialize proxy manager without loading proxy list from the remote server by passing `refresh: false` on initialization:
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
manager = ProxyFetcher::Manager.new(refresh: false) # just initialize class instance
|
46
|
+
manager.proxies
|
47
|
+
|
48
|
+
#=> []
|
49
|
+
```
|
50
|
+
|
41
51
|
Get raw proxy URLs:
|
42
52
|
|
43
53
|
```ruby
|
@@ -51,7 +61,7 @@ manager.raw_proxies
|
|
51
61
|
If `ProxyFetcher::Manager` was already initialized somewhere, you can refresh the proxy list by calling `#refresh_list!` method:
|
52
62
|
|
53
63
|
```ruby
|
54
|
-
manager.refresh_list!
|
64
|
+
manager.refresh_list! # or manager.fetch!
|
55
65
|
|
56
66
|
#=> [#<ProxyFetcher::Proxy:0x00000002879680 @addr="97.77.104.22", @port=3128, @country="USA",
|
57
67
|
# @response_time=5217, @speed=48, @connection_time=100, @type="HTTP", @anonymity="High">, ... ]
|
@@ -79,7 +89,7 @@ Also you can call next instance method for every Proxy object:
|
|
79
89
|
If you wanna clear current proxy manager list from dead servers, you can just call `cleanup!` method:
|
80
90
|
|
81
91
|
```ruby
|
82
|
-
manager.cleanup!
|
92
|
+
manager.cleanup! # or manager.validate!
|
83
93
|
```
|
84
94
|
|
85
95
|
To change open/read timeout for `cleanup!` and `connectable?` methods yu need to change ProxyFetcher::Manager config:
|
@@ -92,6 +102,10 @@ manager = ProxyFetcher::Manager.new
|
|
92
102
|
manager.cleanup!
|
93
103
|
```
|
94
104
|
|
105
|
+
## TODO
|
106
|
+
|
107
|
+
# Proxy filters
|
108
|
+
|
95
109
|
## Contributing
|
96
110
|
|
97
111
|
You are very welcome to help improve ProxyFetcher if you have suggestions for features that other people can use.
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -37,7 +37,7 @@ module ProxyFetcher
|
|
37
37
|
|
38
38
|
alias_method :fetch!, :refresh_list!
|
39
39
|
|
40
|
-
# Clean current
|
40
|
+
# Clean current proxy list from dead proxies (doesn't respond by timeout)
|
41
41
|
def cleanup!
|
42
42
|
proxies.keep_if(&:connectable?)
|
43
43
|
end
|
@@ -56,6 +56,7 @@ module ProxyFetcher
|
|
56
56
|
|
57
57
|
private
|
58
58
|
|
59
|
+
# Get HTML from the requested URL
|
59
60
|
def load_html(url)
|
60
61
|
uri = URI.parse(url)
|
61
62
|
http = Net::HTTP.new(uri.host, uri.port)
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -11,6 +11,7 @@ module ProxyFetcher
|
|
11
11
|
|
12
12
|
def connectable?
|
13
13
|
connection = Net::HTTP.new(addr, port)
|
14
|
+
connection.use_ssl = true if https?
|
14
15
|
connection.open_timeout = ProxyFetcher::Manager.config.open_timeout
|
15
16
|
connection.read_timeout = ProxyFetcher::Manager.config.read_timeout
|
16
17
|
|
@@ -21,12 +22,14 @@ module ProxyFetcher
|
|
21
22
|
false
|
22
23
|
end
|
23
24
|
|
25
|
+
alias_method :valid?, :connectable?
|
26
|
+
|
24
27
|
def http?
|
25
28
|
type.casecmp('http').zero?
|
26
29
|
end
|
27
30
|
|
28
31
|
def https?
|
29
|
-
|
32
|
+
type.casecmp('https').zero?
|
30
33
|
end
|
31
34
|
|
32
35
|
def uri
|
@@ -39,6 +42,7 @@ module ProxyFetcher
|
|
39
42
|
|
40
43
|
private
|
41
44
|
|
45
|
+
# HideMyAss proxy list rows parsing by columns
|
42
46
|
def parse_row!(html)
|
43
47
|
html.xpath('td').each_with_index do |td, index|
|
44
48
|
case index
|
@@ -67,11 +71,11 @@ module ProxyFetcher
|
|
67
71
|
good = []
|
68
72
|
bytes = []
|
69
73
|
css = html.at_xpath('span/style/text()').to_s
|
70
|
-
css.split.each { |l| good <<
|
74
|
+
css.split.each { |l| good << Regexp.last_match(1) if l =~ /\.(.+?)\{.*inline/ }
|
71
75
|
|
72
76
|
html.xpath('span/span | span | span/text()').each do |span|
|
73
77
|
if span.is_a?(Nokogiri::XML::Text)
|
74
|
-
bytes <<
|
78
|
+
bytes << Regexp.last_match(1) if span.content.strip =~ /\.{0,1}(.+)\.{0,1}/
|
75
79
|
elsif (span['style'] && span['style'] =~ /inline/) ||
|
76
80
|
(span['class'] && good.include?(span['class'])) ||
|
77
81
|
(span['class'] =~ /^[0-9]/)
|
@@ -28,4 +28,9 @@ describe ProxyFetcher::Manager do
|
|
28
28
|
|
29
29
|
expect { manager.cleanup! }.to change { manager.proxies }.to([])
|
30
30
|
end
|
31
|
+
|
32
|
+
it "doesn't pollute the output with array of proxies" do
|
33
|
+
manager = described_class.new(refresh: false)
|
34
|
+
expect(manager.inspect).to eq(manager.to_s)
|
35
|
+
end
|
31
36
|
end
|
@@ -12,11 +12,16 @@ describe ProxyFetcher::Proxy do
|
|
12
12
|
expect(proxy.https?).to be_falsey.or(be_truthy)
|
13
13
|
end
|
14
14
|
|
15
|
-
it '
|
15
|
+
it 'not connectable if IP addr is wrong' do
|
16
16
|
allow_any_instance_of(ProxyFetcher::Proxy).to receive(:addr).and_return('192.168.1.1')
|
17
17
|
expect(proxy.connectable?).to be_falsey
|
18
18
|
end
|
19
19
|
|
20
|
+
it "not connectable if server doesn't respond to head" do
|
21
|
+
allow_any_instance_of(Net::HTTP).to receive(:request_head).and_return(false)
|
22
|
+
expect(proxy.connectable?).to be_falsey
|
23
|
+
end
|
24
|
+
|
20
25
|
it 'returns URI::Generic' do
|
21
26
|
expect(proxy.uri).to be_a(URI::Generic)
|
22
27
|
end
|