proxy_rotater 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d1fd8349487bfac47f83d0d6b227b76453a6b3de
4
+ data.tar.gz: dad5a184ac81cba814338ccaf7d2ec702d5424f5
5
+ SHA512:
6
+ metadata.gz: e9484518601163b5275c887c3df3eb5afc52e275bbac87dc3d906ebda0762d3a9ab4500fe22cf923ab7b5ff11b3b0eb3644f6ba9ce65f377518fe8588f126fd6
7
+ data.tar.gz: b4bf49e0a8e22d03d7d74ce2729fd6c53add00e21ae2eb470a9caf6a5966b23718a363ff87d2d3e837c8a18f65b0c9654d6cbda8dc4b18eaa4fb5b82ef37a120
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in proxy_rotater.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 shim0mura
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # ProxyRotater
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'proxy_rotater'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install proxy_rotater
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'proxy_rotater'
@@ -0,0 +1,159 @@
1
+ require "nokogiri"
2
+ require "httparty"
3
+ require "parallel"
4
+
5
+ require "proxy_rotater/version"
6
+ require "proxy_rotater/proxy"
7
+ require "proxy_rotater/public_proxy/getproxy"
8
+
9
+ class ProxyRotater
10
+ CONCURRENT_PROCESS = 10
11
+ PROXY_UPDATE_WAIT = 60 * 15
12
+
13
+ def initialize(domain, options = {})
14
+ @domain = domain
15
+ @interval_sec = 1
16
+ @req_limit_per_hour = 500
17
+ @request_workers = CONCURRENT_PROCESS
18
+
19
+ @available = []
20
+ @over_heated = []
21
+ @failed = []
22
+
23
+ @custom_fail = []
24
+
25
+ get_proxies
26
+ end
27
+
28
+ def get(urls, do_retry = true)
29
+ urls = [urls] unless urls.kind_of?(Array)
30
+ retry_url = []
31
+
32
+ concurrency = urls.size > @request_workers ? @request_workers : urls.size
33
+ results = urls.each_slice(concurrency).map do |round_url|
34
+ round_result = Parallel.map_with_index(round_url, in_processes: concurrency) do |url, i|
35
+ res = @available[i].get_url(url)
36
+ if res.nil?
37
+ next
38
+ end
39
+
40
+ if @custom_fail.any?{|error_proc| error_proc.call(res)}
41
+ @available.timeout = true
42
+ next
43
+ end
44
+ {
45
+ body: res.body,
46
+ response: res.response
47
+ }
48
+ end
49
+ check
50
+ round_result.each_with_index.inject({}) do |hash, value|
51
+ retry_url << round_url[value[1]] if value[0].nil?
52
+ key = round_url[value[1]]
53
+ hash[key] = value[0]
54
+ hash
55
+ end
56
+ end
57
+
58
+ r = {}.tap do |hash|
59
+ results.each{|r|hash.merge!(r)}
60
+ end
61
+
62
+ unless retry_url.empty?
63
+ if do_retry
64
+ retried = get(retry_url, false)
65
+ r.merge!(retried)
66
+ end
67
+ end
68
+ r
69
+ end
70
+
71
+ def check
72
+ disable_timeouted
73
+ check_req_limit
74
+ revival
75
+ get_proxies if @available.size < @request_workers
76
+ sleep PROXY_UPDATE_WAIT if @available.empty?
77
+ sort
78
+ end
79
+
80
+ def add_custom_fail(&block)
81
+ @custom_fail << block
82
+ end
83
+
84
+ private
85
+ def get_proxies
86
+ values = get_proxylist_from_html
87
+
88
+ proxies = Parallel.map(values, in_processes: values.size) do |value|
89
+ Proxy.new(value)
90
+ end
91
+ proxies = proxies.group_by{|proxy|proxy.timeout}
92
+
93
+ @available = proxies[false]
94
+ @failed = proxies[true]
95
+ end
96
+
97
+ def disable_timeouted
98
+ proxies = @available.group_by{|proxy|proxy.timeout}
99
+ @available = proxies[false]
100
+ @failed.concat(proxies[true]) unless proxies[true].nil?
101
+ end
102
+
103
+ def check_req_limit
104
+ proxies = @available.group_by do |proxy|
105
+ if is_per_sec_limit_over?(proxy.get_request_intervals)
106
+ proxy.return_at = Time.now.to_f + @interval_sec
107
+ :disabled
108
+ elsif is_per_hour_limit_over?(proxy.requested_at)
109
+ proxy.return_at = Time.now.to_f + 60 * 30
110
+ :disabled
111
+ else
112
+ :available
113
+ end
114
+ end
115
+
116
+ @available = proxies[:available] if proxies[:available]
117
+ @over_heated.concat(proxies[:disabled]) if proxies[:disabled]
118
+ end
119
+
120
+ def is_per_sec_limit_over?(req_intervals)
121
+ size = req_intervals.size
122
+ return false if size == 0
123
+ req_time_ave = req_intervals
124
+ .slice(-1 * size, 10)
125
+ .inject(0){|sum, i|sum += i}/size
126
+ req_time_ave > @interval_sec
127
+ end
128
+
129
+ def is_per_hour_limit_over?(req_times)
130
+ now = Time.now.to_f
131
+ in_hour_index = req_times.find_index do |timestamp|
132
+ timestamp > now - (60 * 60)
133
+ end
134
+ return false if in_hour_index.nil?
135
+ req_times.slice(in_hour_index..-1).size > @req_limit_per_hour
136
+ end
137
+
138
+ def revival
139
+ proxies = @over_heated.group_by do |proxy|
140
+ proxy.return_at < Time.now.to_f
141
+ end
142
+ return if proxies.empty?
143
+ @over_heated = proxies[false]
144
+ @available.concat(proxies[true]) if proxies[true]
145
+ end
146
+
147
+ def sort
148
+ @available.sort_by{|proxy| proxy.last_response_time}
149
+ end
150
+
151
+ def get_proxylist_from_html
152
+ list = GetProxy.get
153
+ existing_proxies = [].concat([@available, @failed, @over_heated]).flatten!
154
+ list.delete_if do |line|
155
+ existing_proxies.any?{|proxy| proxy.ip_address == line[:ip_address]}
156
+ end
157
+ end
158
+
159
+ end
@@ -0,0 +1,81 @@
1
+ class Proxy
2
+ include HTTParty
3
+ default_timeout 4
4
+
5
+ ROUND_NUM = 2
6
+ TEST_REQUEST_URL = "http://www.google.co.jp"
7
+ BENCHMARK_TIMES = 3
8
+
9
+ attr_reader :timeout, :requested_at, :last_response_time
10
+
11
+ def initialize(options)
12
+ options.merge!(
13
+ requested_at: [],
14
+ response_time: nil,
15
+ last_response_time: nil,
16
+ checked_at: nil,
17
+ timeout: false,
18
+ return_at: nil
19
+ )
20
+ options.keys.each do |key|
21
+ instance_variable_set("@#{key}", options[key])
22
+ end
23
+
24
+ self.class.http_proxy(@ip_address, @port)
25
+ benchmark
26
+ end
27
+
28
+ def get_url(url)
29
+ start = Time.now
30
+ # TODO: redirect too deepの時にどうにかする
31
+ begin
32
+ res = self.class.get(url)
33
+ rescue Errno::ETIMEDOUT, Errno::ECONNRESET, EOFError, Errno::ECONNREFUSED => e
34
+ @timeout = true
35
+ return nil
36
+ rescue Net::ReadTimeout, Net::OpenTimeout => e
37
+ @timeout = true
38
+ return nil
39
+ end
40
+ finish = Time.now
41
+ @requested_at << finish.to_f
42
+ @last_response_time = (finish.to_f - start.to_f).round(ROUND_NUM)
43
+ return res
44
+ end
45
+
46
+ def benchmark
47
+ urls = BENCHMARK_TIMES.times.map{|i| TEST_REQUEST_URL}
48
+ response_times = Parallel.map(urls, in_threads: BENCHMARK_TIMES) do |url|
49
+ start = Time.now.to_f
50
+ res = get_url(TEST_REQUEST_URL)
51
+ finish = Time.now.to_f
52
+ res.nil? ? nil : (finish - start).round(ROUND_NUM)
53
+ end
54
+
55
+ # TODO: 微妙なのでリファクタリングする
56
+ # sliceとかまとめられそう
57
+ # ifの中はresponse_timeどうにかしたほうがいいのでは?
58
+ if response_times.include?(nil)
59
+ reset_size = @requested_at.size
60
+ @requested_at.slice!(-1 * reset_size, reset_size)
61
+ return
62
+ end
63
+
64
+ @response_time = (response_times.inject(0){|v,s|v+s}/response_times.size).round(ROUND_NUM)
65
+ @checked_at = Time.now
66
+
67
+ # ベンチマークの結果はused_rateとrequested_atに影響しないようにする
68
+ @requested_at.slice!(-1 * BENCHMARK_TIMES, BENCHMARK_TIMES)
69
+ end
70
+
71
+ def get_request_intervals
72
+ diff = []
73
+ @requested_at.each_with_index do |num, i|
74
+ next if i == 0
75
+ diff << @requested_at[i] - @requested_at[i-1]
76
+ end
77
+
78
+ return diff
79
+ end
80
+
81
+ end
@@ -0,0 +1,22 @@
1
+ module GetProxy
2
+
3
+ URL = "http://www.getproxy.jp/"
4
+
5
+ def self.get
6
+ page = HTTParty.get(URL)
7
+ html = Nokogiri::HTML(page.body)
8
+
9
+ tr = html.css("#mytable tr")[1..-1]
10
+ values = tr.map do |line|
11
+ td = line.css("td")
12
+ matched_address = td[0].css("strong").first.content
13
+ .match(/^((?:\d{,3})(?:\.\d{,3}){3})\:(\d{,5})$/)
14
+ values = {
15
+ ip_address: matched_address[1],
16
+ port: matched_address[2],
17
+ country: td[1].content
18
+ }
19
+ end
20
+ end
21
+
22
+ end
@@ -0,0 +1,3 @@
1
+ class ProxyRotater
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'proxy_rotater'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "proxy_rotater"
8
+ spec.version = ProxyRotater::VERSION
9
+ spec.authors = ["shim0mura"]
10
+ spec.email = ["tatshimomura@gmail.com"]
11
+ spec.description = %q{rotate proxy}
12
+ spec.summary = %q{rotate proxy}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+
25
+ spec.add_runtime_dependency "httparty"
26
+ spec.add_runtime_dependency "parallel"
27
+ spec.add_runtime_dependency "nokogiri"
28
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe ProxyRotater do
4
+ it 'should have a version number' do
5
+ ProxyRotater::VERSION.should_not be_nil
6
+ end
7
+
8
+ it 'should do something useful' do
9
+ false.should be_true
10
+ end
11
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'proxy_rotater'
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: proxy_rotater
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - shim0mura
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: httparty
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: parallel
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: rotate proxy
98
+ email:
99
+ - tatshimomura@gmail.com
100
+ executables:
101
+ - proxy_rotater
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - .gitignore
106
+ - .rspec
107
+ - .travis.yml
108
+ - Gemfile
109
+ - LICENSE.txt
110
+ - README.md
111
+ - Rakefile
112
+ - bin/proxy_rotater
113
+ - lib/proxy_rotater.rb
114
+ - lib/proxy_rotater/proxy.rb
115
+ - lib/proxy_rotater/public_proxy/getproxy.rb
116
+ - lib/proxy_rotater/version.rb
117
+ - proxy_rotater.gemspec
118
+ - spec/proxy_rotater_spec.rb
119
+ - spec/spec_helper.rb
120
+ homepage: ''
121
+ licenses:
122
+ - MIT
123
+ metadata: {}
124
+ post_install_message:
125
+ rdoc_options: []
126
+ require_paths:
127
+ - lib
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - '>='
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ requirements: []
139
+ rubyforge_project:
140
+ rubygems_version: 2.0.3
141
+ signing_key:
142
+ specification_version: 4
143
+ summary: rotate proxy
144
+ test_files:
145
+ - spec/proxy_rotater_spec.rb
146
+ - spec/spec_helper.rb