proxy_fetcher 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -3
- data/lib/proxy_fetcher.rb +1 -86
- data/lib/proxy_fetcher/manager.rb +88 -0
- data/lib/proxy_fetcher/proxy.rb +13 -1
- data/lib/proxy_fetcher/version.rb +1 -1
- data/proxy_fetcher.gemspec +1 -1
- data/spec/proxy_fetcher/proxy_spec.rb +35 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04cf23c5f1bb6abfd29e5d6180a5a9e167a2d147
|
4
|
+
data.tar.gz: 5590d391eee582e511027a8282ef38917aa653a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ccd0ef56339916919c8bf1b70dd3332d9972d6db1cc5b676b28b16691161955cffafbfa25bb310ba5b1d78a7b02dc7e8e440bb0a019736442b8bcbcfa3f8a004
|
7
|
+
data.tar.gz: e72892832780fddb424adc29dcb3b500ede2998d452c333aeffcb2a4766483a079288ec3e52e1eeca4ff61f7801ddae3ba80cddac2d527eaeabed09cbd8f6736
|
data/README.md
CHANGED
@@ -15,6 +15,12 @@ If using bundler, first add 'proxy_fetcher' to your Gemfile:
|
|
15
15
|
gem 'proxy_fetcher', '~> 0.1'
|
16
16
|
```
|
17
17
|
|
18
|
+
or if you want to use the latest version (from `master` branch), then:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
gem 'proxy_fetcher', git: 'https://github.com/nbulaj/proxy_fetcher.git'
|
22
|
+
```
|
23
|
+
|
18
24
|
And run:
|
19
25
|
|
20
26
|
```sh
|
@@ -88,8 +94,8 @@ Also you can call next instance method for every Proxy object:
|
|
88
94
|
|
89
95
|
You can use two methods to get the first proxy from the list:
|
90
96
|
|
91
|
-
* `get` (will return first proxy and move it to the end of the list)
|
92
|
-
* `get!` (will return first **connectable** proxy and move it to the end of the list; all the proxies till the working one will be removed)
|
97
|
+
* `get` or aliased `pop` (will return first proxy and move it to the end of the list)
|
98
|
+
* `get!` or aliased `pop!` (will return first **connectable** proxy and move it to the end of the list; all the proxies till the working one will be removed)
|
93
99
|
|
94
100
|
If you wanna clear current proxy manager list from dead servers, you can just call `cleanup!` method:
|
95
101
|
|
@@ -97,6 +103,12 @@ If you wanna clear current proxy manager list from dead servers, you can just ca
|
|
97
103
|
manager.cleanup! # or manager.validate!
|
98
104
|
```
|
99
105
|
|
106
|
+
You can sort or find any proxy by speed using next 3 instance methods:
|
107
|
+
|
108
|
+
* `fast?`
|
109
|
+
* `medium?`
|
110
|
+
* `slow?`'
|
111
|
+
|
100
112
|
To change open/read timeout for `cleanup!` and `connectable?` methods yu need to change ProxyFetcher::Manager config:
|
101
113
|
|
102
114
|
```ruby
|
@@ -123,7 +135,7 @@ To contribute:
|
|
123
135
|
4. Add documentation for your feature or bug fix.
|
124
136
|
5. Run <tt>rake doc:yard</tt>. If your changes are not 100% documented, go back to step 4.
|
125
137
|
6. Add tests for your feature or bug fix.
|
126
|
-
7. Run `rake` to make sure all tests pass.
|
138
|
+
7. Run `rake spec` to make sure all tests pass.
|
127
139
|
8. Commit your changes (`git commit -am 'Add new feature'`).
|
128
140
|
9. Push to the branch (`git push origin my-new-feature`).
|
129
141
|
10. Create new pull request.
|
@@ -135,3 +147,5 @@ Thanks.
|
|
135
147
|
proxy_fetcher gem is released under the [MIT License](http://www.opensource.org/licenses/MIT).
|
136
148
|
|
137
149
|
Copyright (c) 2017 Nikita Bulai (bulajnikita@gmail.com).
|
150
|
+
|
151
|
+
Some parser code (c) [pifleo](https://gist.github.com/pifleo/3889803)
|
data/lib/proxy_fetcher.rb
CHANGED
@@ -4,92 +4,7 @@ require 'nokogiri'
|
|
4
4
|
|
5
5
|
require 'proxy_fetcher/configuration'
|
6
6
|
require 'proxy_fetcher/proxy'
|
7
|
+
require 'proxy_fetcher/manager'
|
7
8
|
|
8
9
|
module ProxyFetcher
|
9
|
-
class Manager
|
10
|
-
PROXY_PROVIDER_URL = 'http://proxylist.hidemyass.com/'.freeze
|
11
|
-
|
12
|
-
class << self
|
13
|
-
def config
|
14
|
-
@config ||= ProxyFetcher::Configuration.new
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
attr_reader :proxies
|
19
|
-
|
20
|
-
# refresh: true - load proxy list from the remote server on initialization
|
21
|
-
# refresh: false - just initialize the class, proxy list will be empty ([])
|
22
|
-
def initialize(refresh: true)
|
23
|
-
if refresh
|
24
|
-
refresh_list!
|
25
|
-
else
|
26
|
-
@proxies = []
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
# Update current proxy list from the provider
|
31
|
-
def refresh_list!
|
32
|
-
doc = Nokogiri::HTML(load_html(PROXY_PROVIDER_URL))
|
33
|
-
rows = doc.xpath('//table[@id="listable"]/tbody/tr')
|
34
|
-
|
35
|
-
@proxies = rows.map { |row| Proxy.new(row) }
|
36
|
-
end
|
37
|
-
|
38
|
-
alias_method :fetch!, :refresh_list!
|
39
|
-
|
40
|
-
# Pop just first proxy (and back it to the end of the proxy list)
|
41
|
-
def get
|
42
|
-
return if @proxies.empty?
|
43
|
-
|
44
|
-
first_proxy = @proxies.shift
|
45
|
-
@proxies << first_proxy
|
46
|
-
|
47
|
-
first_proxy
|
48
|
-
end
|
49
|
-
|
50
|
-
alias_method :pop, :get
|
51
|
-
|
52
|
-
# Pop first valid proxy (and back it to the end of the proxy list)
|
53
|
-
# Invalid proxies will be removed from the list
|
54
|
-
def get!
|
55
|
-
index = @proxies.find_index(&:connectable?)
|
56
|
-
return if index.nil?
|
57
|
-
|
58
|
-
proxy = @proxies.delete_at(index)
|
59
|
-
tail = @proxies[index..-1]
|
60
|
-
|
61
|
-
@proxies = tail << proxy
|
62
|
-
|
63
|
-
proxy
|
64
|
-
end
|
65
|
-
|
66
|
-
alias_method :pop!, :get!
|
67
|
-
|
68
|
-
# Clean current proxy list from dead proxies (doesn't respond by timeout)
|
69
|
-
def cleanup!
|
70
|
-
proxies.keep_if(&:connectable?)
|
71
|
-
end
|
72
|
-
|
73
|
-
alias_method :validate!, :cleanup!
|
74
|
-
|
75
|
-
# Just schema + host + port
|
76
|
-
def raw_proxies
|
77
|
-
proxies.map(&:url)
|
78
|
-
end
|
79
|
-
|
80
|
-
# No need to put all the attr_readers
|
81
|
-
def inspect
|
82
|
-
to_s
|
83
|
-
end
|
84
|
-
|
85
|
-
private
|
86
|
-
|
87
|
-
# Get HTML from the requested URL
|
88
|
-
def load_html(url)
|
89
|
-
uri = URI.parse(url)
|
90
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
91
|
-
response = http.get(uri.request_uri)
|
92
|
-
response.body
|
93
|
-
end
|
94
|
-
end
|
95
10
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module ProxyFetcher
|
2
|
+
class Manager
|
3
|
+
PROXY_PROVIDER_URL = 'http://proxylist.hidemyass.com/'.freeze
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def config
|
7
|
+
@config ||= ProxyFetcher::Configuration.new
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :proxies
|
12
|
+
|
13
|
+
# refresh: true - load proxy list from the remote server on initialization
|
14
|
+
# refresh: false - just initialize the class, proxy list will be empty ([])
|
15
|
+
def initialize(refresh: true)
|
16
|
+
if refresh
|
17
|
+
refresh_list!
|
18
|
+
else
|
19
|
+
@proxies = []
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Update current proxy list from the provider
|
24
|
+
def refresh_list!
|
25
|
+
doc = Nokogiri::HTML(load_html(PROXY_PROVIDER_URL))
|
26
|
+
rows = doc.xpath('//table[@id="listable"]/tbody/tr')
|
27
|
+
|
28
|
+
@proxies = rows.map { |row| Proxy.new(row) }
|
29
|
+
end
|
30
|
+
|
31
|
+
alias fetch! refresh_list!
|
32
|
+
|
33
|
+
# Pop just first proxy (and back it to the end of the proxy list)
|
34
|
+
def get
|
35
|
+
return if @proxies.empty?
|
36
|
+
|
37
|
+
first_proxy = @proxies.shift
|
38
|
+
@proxies << first_proxy
|
39
|
+
|
40
|
+
first_proxy
|
41
|
+
end
|
42
|
+
|
43
|
+
alias pop get
|
44
|
+
|
45
|
+
# Pop first valid proxy (and back it to the end of the proxy list)
|
46
|
+
# Invalid proxies will be removed from the list
|
47
|
+
def get!
|
48
|
+
index = @proxies.find_index(&:connectable?)
|
49
|
+
return if index.nil?
|
50
|
+
|
51
|
+
proxy = @proxies.delete_at(index)
|
52
|
+
tail = @proxies[index..-1]
|
53
|
+
|
54
|
+
@proxies = tail << proxy
|
55
|
+
|
56
|
+
proxy
|
57
|
+
end
|
58
|
+
|
59
|
+
alias pop! get!
|
60
|
+
|
61
|
+
# Clean current proxy list from dead proxies (doesn't respond by timeout)
|
62
|
+
def cleanup!
|
63
|
+
proxies.keep_if(&:connectable?)
|
64
|
+
end
|
65
|
+
|
66
|
+
alias validate! cleanup!
|
67
|
+
|
68
|
+
# Just schema + host + port
|
69
|
+
def raw_proxies
|
70
|
+
proxies.map(&:url)
|
71
|
+
end
|
72
|
+
|
73
|
+
# No need to put all the attr_readers
|
74
|
+
def inspect
|
75
|
+
to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
# Get HTML from the requested URL
|
81
|
+
def load_html(url)
|
82
|
+
uri = URI.parse(url)
|
83
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
84
|
+
response = http.get(uri.request_uri)
|
85
|
+
response.body
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
data/lib/proxy_fetcher/proxy.rb
CHANGED
@@ -22,7 +22,7 @@ module ProxyFetcher
|
|
22
22
|
false
|
23
23
|
end
|
24
24
|
|
25
|
-
|
25
|
+
alias valid? connectable?
|
26
26
|
|
27
27
|
def http?
|
28
28
|
type.casecmp('http').zero?
|
@@ -32,6 +32,18 @@ module ProxyFetcher
|
|
32
32
|
type.casecmp('https').zero?
|
33
33
|
end
|
34
34
|
|
35
|
+
def fast?
|
36
|
+
speed >= 66
|
37
|
+
end
|
38
|
+
|
39
|
+
def medium?
|
40
|
+
speed >= 33 && speed < 66
|
41
|
+
end
|
42
|
+
|
43
|
+
def slow?
|
44
|
+
speed < 33
|
45
|
+
end
|
46
|
+
|
35
47
|
def uri
|
36
48
|
URI::Generic.build(host: addr, port: port, scheme: type)
|
37
49
|
end
|
data/proxy_fetcher.gemspec
CHANGED
@@ -5,7 +5,7 @@ require 'proxy_fetcher/version'
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
6
|
gem.name = 'proxy_fetcher'
|
7
7
|
gem.version = ProxyFetcher.gem_version
|
8
|
-
gem.date = '2017-05-
|
8
|
+
gem.date = '2017-05-31'
|
9
9
|
gem.summary = 'Ruby gem for dealing with proxy lists '
|
10
10
|
gem.description = 'This gem can help your Ruby application to make HTTP(S) requests ' \
|
11
11
|
'from proxy server, fetching and validating current proxy lists from the HideMyAss service.'
|
@@ -8,8 +8,13 @@ describe ProxyFetcher::Proxy do
|
|
8
8
|
let(:proxy) { @manager.proxies.first }
|
9
9
|
|
10
10
|
it 'checks schema' do
|
11
|
-
|
12
|
-
expect(proxy.
|
11
|
+
proxy.instance_variable_set(:@type, 'HTTP')
|
12
|
+
expect(proxy.http?).to be_truthy
|
13
|
+
expect(proxy.https?).to be_falsey
|
14
|
+
|
15
|
+
proxy.instance_variable_set(:@type, 'HTTPS')
|
16
|
+
expect(proxy.https?).to be_truthy
|
17
|
+
expect(proxy.http?).to be_falsey
|
13
18
|
end
|
14
19
|
|
15
20
|
it 'not connectable if IP addr is wrong' do
|
@@ -20,6 +25,7 @@ describe ProxyFetcher::Proxy do
|
|
20
25
|
it "not connectable if server doesn't respond to head" do
|
21
26
|
allow_any_instance_of(Net::HTTP).to receive(:start).and_return(false)
|
22
27
|
expect(proxy.connectable?).to be_falsey
|
28
|
+
expect(proxy.valid?).to be_falsey
|
23
29
|
end
|
24
30
|
|
25
31
|
it 'returns URI::Generic' do
|
@@ -29,4 +35,31 @@ describe ProxyFetcher::Proxy do
|
|
29
35
|
it 'returns URL' do
|
30
36
|
expect(proxy.url).to be_a(String)
|
31
37
|
end
|
38
|
+
|
39
|
+
it 'must be slow if speed < 33' do
|
40
|
+
proxy.instance_variable_set(:@speed, 32)
|
41
|
+
expect(proxy.slow?).to be_truthy
|
42
|
+
|
43
|
+
proxy.instance_variable_set(:@speed, 33)
|
44
|
+
expect(proxy.slow?).to be_falsey
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'must be medium if speed >= 33 and < 66' do
|
48
|
+
proxy.instance_variable_set(:@speed, 32)
|
49
|
+
expect(proxy.medium?).to be_falsey
|
50
|
+
|
51
|
+
proxy.instance_variable_set(:@speed, 33)
|
52
|
+
expect(proxy.medium?).to be_truthy
|
53
|
+
|
54
|
+
proxy.instance_variable_set(:@speed, 65)
|
55
|
+
expect(proxy.medium?).to be_truthy
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'must be fast if speed >= 66' do
|
59
|
+
proxy.instance_variable_set(:@speed, 65)
|
60
|
+
expect(proxy.fast?).to be_falsey
|
61
|
+
|
62
|
+
proxy.instance_variable_set(:@speed, 66)
|
63
|
+
expect(proxy.fast?).to be_truthy
|
64
|
+
end
|
32
65
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: proxy_fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nikita Bulai
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-05-
|
11
|
+
date: 2017-05-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -59,6 +59,7 @@ files:
|
|
59
59
|
- Rakefile
|
60
60
|
- lib/proxy_fetcher.rb
|
61
61
|
- lib/proxy_fetcher/configuration.rb
|
62
|
+
- lib/proxy_fetcher/manager.rb
|
62
63
|
- lib/proxy_fetcher/proxy.rb
|
63
64
|
- lib/proxy_fetcher/version.rb
|
64
65
|
- proxy_fetcher.gemspec
|