http_utilities 1.1.2 → 1.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -9
- data/http_utilities.gemspec +10 -17
- data/lib/http_utilities/http/client.rb +62 -8
- data/lib/http_utilities/http/mechanize/client.rb +5 -1
- data/lib/http_utilities/http/proxy_support.rb +10 -0
- data/lib/http_utilities/http/request.rb +2 -3
- data/lib/http_utilities/http/response.rb +15 -4
- data/lib/http_utilities/http/user_agent.rb +1 -1
- data/lib/http_utilities/proxies/mongo/proxy_module.rb +97 -0
- data/lib/http_utilities/proxies/mysql/proxy_module.rb +93 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +34 -66
- data/lib/http_utilities/proxies/proxy_seeder.rb +13 -11
- data/lib/http_utilities.rb +9 -17
- data/spec/http_utilities/client_spec.rb +5 -107
- data/spec/spec_helper.rb +3 -5
- metadata +21 -41
- data/lib/http_utilities/http/adapters/curb.rb +0 -107
- data/lib/http_utilities/http/adapters/net_http.rb +0 -135
- data/lib/http_utilities/http/adapters/open_uri.rb +0 -46
- data/lib/http_utilities/http/cookies.rb +0 -49
- data/lib/http_utilities/http/format.rb +0 -26
- data/lib/http_utilities/http/get.rb +0 -67
- data/lib/http_utilities/http/post.rb +0 -32
- data/lib/http_utilities/proxies/proxy_module.rb +0 -89
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08cfb5e367ca9f757c3eb75ff8958a55af204bf6
|
4
|
+
data.tar.gz: ec280ce2d9e5bc1f0e1ad7fbd724a6abb438409c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0395ba295966ce1622021940273827ff880d691c78c3f8ed71f45edeebf418932e602bf9219e2d3ad6b245e3830de4a59aac73d363b394c9c73a631bab8f531d
|
7
|
+
data.tar.gz: 057783241fe6465bff927446dc03c31c10faa3a20784a34caff5edd6bdebc4617c6cb584d15ad0783d4c4babc4c4e2e39efa8566e0a6b83e90e08e2bc1613f81
|
data/Gemfile
CHANGED
@@ -1,16 +1,11 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
|
-
gem
|
4
|
-
gem
|
5
|
-
gem
|
6
|
-
gem "net-ssh", ">= 2.8"
|
3
|
+
gem 'nokogiri'
|
4
|
+
gem 'mechanize'
|
5
|
+
gem 'net-ssh'
|
7
6
|
|
8
7
|
gem "activerecord-import", :require => false
|
9
8
|
|
10
|
-
platforms :ruby do
|
11
|
-
gem 'curb'
|
12
|
-
end
|
13
|
-
|
14
9
|
group :development, :test do
|
15
10
|
gem 'rails'
|
16
11
|
gem 'jeweler'
|
@@ -18,6 +13,6 @@ group :development, :test do
|
|
18
13
|
gem 'sqlite3'
|
19
14
|
|
20
15
|
platforms :ruby do
|
21
|
-
gem "mysql2"
|
16
|
+
gem "mysql2"
|
22
17
|
end
|
23
18
|
end
|
data/http_utilities.gemspec
CHANGED
@@ -3,24 +3,23 @@ Gem::Specification.new do |s|
|
|
3
3
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.3.5") if s.respond_to? :required_rubygems_version=
|
4
4
|
|
5
5
|
s.name = "http_utilities"
|
6
|
-
s.version = "1.1.
|
6
|
+
s.version = "1.1.5"
|
7
7
|
|
8
8
|
s.authors = ["Sebastian Johnsson"]
|
9
|
-
s.description = "Wrapper for
|
9
|
+
s.description = "Wrapper for Faraday with additional functionality"
|
10
10
|
|
11
11
|
s.homepage = "http://github.com/Agiley/http_utilities"
|
12
|
-
s.summary = "Wrapper for
|
12
|
+
s.summary = "Wrapper for Faraday with additional functionality"
|
13
13
|
|
14
|
-
s.add_dependency(%q<nokogiri>, [">= 1.
|
15
|
-
s.add_dependency(%q<mechanize>, [">= 2.
|
16
|
-
s.add_dependency(%q<
|
17
|
-
s.add_dependency(%q<net-ssh>, [">= 2.8"])
|
18
|
-
s.add_dependency(%q<activerecord-import>, [">= 0"])
|
14
|
+
s.add_dependency(%q<nokogiri>, [">= 1.6"])
|
15
|
+
s.add_dependency(%q<mechanize>, [">= 2.7"])
|
16
|
+
s.add_dependency(%q<net-ssh>, [">= 2.9"])
|
19
17
|
|
20
18
|
s.add_development_dependency(%q<rails>, [">= 0"])
|
21
19
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
22
20
|
s.add_development_dependency(%q<sqlite3>, [">= 0"])
|
23
|
-
s.add_development_dependency(%q<mysql2>, [">= 0
|
21
|
+
s.add_development_dependency(%q<mysql2>, [">= 0"])
|
22
|
+
s.add_development_dependency(%q<activerecord-import>, [">= 0"])
|
24
23
|
|
25
24
|
# = MANIFEST =
|
26
25
|
s.files = %w[
|
@@ -38,16 +37,9 @@ Gem::Specification.new do |s|
|
|
38
37
|
lib/generators/templates/http_utilities.rb
|
39
38
|
lib/generators/templates/user_agents.yml
|
40
39
|
lib/http_utilities.rb
|
41
|
-
lib/http_utilities/http/adapters/curb.rb
|
42
|
-
lib/http_utilities/http/adapters/net_http.rb
|
43
|
-
lib/http_utilities/http/adapters/open_uri.rb
|
44
40
|
lib/http_utilities/http/client.rb
|
45
|
-
lib/http_utilities/http/cookies.rb
|
46
|
-
lib/http_utilities/http/format.rb
|
47
|
-
lib/http_utilities/http/get.rb
|
48
41
|
lib/http_utilities/http/logger.rb
|
49
42
|
lib/http_utilities/http/mechanize/client.rb
|
50
|
-
lib/http_utilities/http/post.rb
|
51
43
|
lib/http_utilities/http/proxy_support.rb
|
52
44
|
lib/http_utilities/http/request.rb
|
53
45
|
lib/http_utilities/http/response.rb
|
@@ -57,8 +49,9 @@ Gem::Specification.new do |s|
|
|
57
49
|
lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb
|
58
50
|
lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb
|
59
51
|
lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb
|
52
|
+
lib/http_utilities/proxies/mongo/proxy_module.rb
|
53
|
+
lib/http_utilities/proxies/mysql/proxy_module.rb
|
60
54
|
lib/http_utilities/proxies/proxy_checker.rb
|
61
|
-
lib/http_utilities/proxies/proxy_module.rb
|
62
55
|
lib/http_utilities/proxies/proxy_seeder.rb
|
63
56
|
lib/http_utilities/railtie.rb
|
64
57
|
lib/tasks/http_utilities_tasks.rake
|
@@ -1,21 +1,75 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
2
|
require 'open-uri'
|
3
|
-
require 'net/http'
|
4
3
|
require 'uri'
|
5
4
|
require 'cgi'
|
6
5
|
|
7
6
|
module HttpUtilities
|
8
7
|
module Http
|
9
8
|
class Client
|
10
|
-
include HttpUtilities::Http::Cookies
|
11
|
-
include HttpUtilities::Http::Url
|
12
|
-
include HttpUtilities::Http::Get
|
13
|
-
include HttpUtilities::Http::Post
|
14
9
|
include HttpUtilities::Http::Logger
|
10
|
+
|
11
|
+
def get(url, arguments: {}, options: {}, retries: 3)
|
12
|
+
response = nil
|
13
|
+
request = build_request(options)
|
14
|
+
|
15
|
+
begin
|
16
|
+
response = request.interface.get(url, arguments)
|
17
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
18
|
+
|
19
|
+
rescue Faraday::TimeoutError, Net::ReadTimeout, Timeout::Error, StandardError => e
|
20
|
+
log(:error, "[HttpUtilities::Http::Client] - An error occurred while trying to fetch the response. Error Class: #{e.class.name}. Error Message: #{e.message}.")
|
21
|
+
retries -= 1
|
22
|
+
retry if retries > 0
|
23
|
+
end
|
15
24
|
|
16
|
-
|
17
|
-
|
18
|
-
|
25
|
+
return response
|
26
|
+
end
|
27
|
+
|
28
|
+
def post(url, data: nil, options: {}, retries: 3)
|
29
|
+
response = nil
|
30
|
+
request = build_request(options)
|
31
|
+
|
32
|
+
begin
|
33
|
+
response = request.interface.post(url, data)
|
34
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
35
|
+
|
36
|
+
rescue Faraday::TimeoutError, Net::ReadTimeout, Timeout::Error, StandardError => e
|
37
|
+
log(:error, "[HttpUtilities::Http::Client] - An error occurred while trying to fetch the response. Error Class: #{e.class.name}. Error Message: #{e.message}.")
|
38
|
+
retries -= 1
|
39
|
+
retry if retries > 0
|
40
|
+
end
|
41
|
+
|
42
|
+
return response
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def build_request(options = {}, faraday_options = {})
|
47
|
+
options = options.dup
|
48
|
+
options = options.merge(ssl: {:verify => false})
|
49
|
+
|
50
|
+
adapter = options.delete(:adapter) { |opt| Faraday.default_adapter }
|
51
|
+
timeout = options.delete(:timeout) { |opt| 60 }
|
52
|
+
open_timeout = options.delete(:open_timeout) { |opt| 60 }
|
53
|
+
|
54
|
+
request = HttpUtilities::Http::Request.new
|
55
|
+
request.set_proxy_options(options)
|
56
|
+
|
57
|
+
proxy_options = request.generate_proxy_options
|
58
|
+
|
59
|
+
connection = Faraday.new(faraday_options) do |builder|
|
60
|
+
builder.headers[:user_agent] = request.user_agent
|
61
|
+
builder.options[:timeout] = timeout
|
62
|
+
builder.options[:open_timeout] = open_timeout
|
63
|
+
#builder.response :logger
|
64
|
+
builder.proxy proxy_options unless proxy_options.empty?
|
65
|
+
builder.adapter adapter
|
66
|
+
end
|
67
|
+
|
68
|
+
request.interface = connection
|
69
|
+
|
70
|
+
return request
|
71
|
+
end
|
72
|
+
|
19
73
|
end
|
20
74
|
end
|
21
75
|
end
|
@@ -30,7 +30,11 @@ module HttpUtilities
|
|
30
30
|
self.agent.log = ::Logger.new(logger) if (verbose)
|
31
31
|
|
32
32
|
self.set_proxy_options(options)
|
33
|
-
|
33
|
+
|
34
|
+
if (self.proxy[:host] && self.proxy[:port])
|
35
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Will use proxy #{self.proxy[:host]}:#{self.proxy[:port]} for Mechanize.")
|
36
|
+
self.agent.set_proxy(self.proxy[:host], self.proxy[:port], self.proxy[:username], self.proxy[:password])
|
37
|
+
end
|
34
38
|
|
35
39
|
self.set_user_agent
|
36
40
|
(self.user_agent) ? self.agent.user_agent = self.user_agent : self.agent.user_agent_alias = 'Mac Safari'
|
@@ -84,6 +84,16 @@ module HttpUtilities
|
|
84
84
|
def using_proxy?
|
85
85
|
return (self.proxy[:host] && self.proxy[:port] && self.proxy[:port] > 0)
|
86
86
|
end
|
87
|
+
|
88
|
+
def generate_proxy_options
|
89
|
+
proxy_options = {}
|
90
|
+
|
91
|
+
proxy_options[:uri] = "http://#{self.proxy[:host]}:#{self.proxy[:port]}"
|
92
|
+
proxy_options[:user] = self.proxy[:username] if self.proxy[:username] && self.proxy[:username].present?
|
93
|
+
proxy_options[:password] = self.proxy[:password] if self.proxy[:password] && self.proxy[:password].present?
|
94
|
+
|
95
|
+
return proxy_options
|
96
|
+
end
|
87
97
|
|
88
98
|
end
|
89
99
|
end
|
@@ -5,12 +5,11 @@ module HttpUtilities
|
|
5
5
|
include HttpUtilities::Http::ProxySupport
|
6
6
|
include HttpUtilities::Http::UserAgent
|
7
7
|
|
8
|
-
attr_accessor :interface, :proxy, :
|
8
|
+
attr_accessor :interface, :proxy, :user_agent
|
9
9
|
|
10
|
-
def initialize(interface = nil, proxy = {}
|
10
|
+
def initialize(interface = nil, proxy = {})
|
11
11
|
self.interface = interface
|
12
12
|
self.proxy = proxy
|
13
|
-
self.cookies = cookies
|
14
13
|
|
15
14
|
self.set_user_agent
|
16
15
|
end
|
@@ -1,15 +1,14 @@
|
|
1
1
|
module HttpUtilities
|
2
2
|
module Http
|
3
3
|
class Response
|
4
|
-
include HttpUtilities::Http::Format
|
5
4
|
include HttpUtilities::Http::Logger
|
6
5
|
|
7
6
|
attr_accessor :body, :parsed_body, :page, :format, :request, :force_encoding
|
8
7
|
|
9
|
-
def initialize(
|
10
|
-
options = options.
|
8
|
+
def initialize(response = nil, request = nil, options = {})
|
9
|
+
options = options.dup
|
11
10
|
|
12
|
-
self.body = body
|
11
|
+
self.body = (response && response.body) ? response.body : nil
|
13
12
|
self.request = request
|
14
13
|
|
15
14
|
self.parsed_body = nil
|
@@ -34,6 +33,18 @@ module HttpUtilities
|
|
34
33
|
def parse_response
|
35
34
|
self.send("as_#{self.format}".to_sym) if (self.body && self.format)
|
36
35
|
end
|
36
|
+
|
37
|
+
def as_html
|
38
|
+
self.parsed_body = (self.body && self.body != "") ? Nokogiri::HTML(self.body.to_s.force_encoding("utf-8"), nil, "utf-8") : nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def as_xml
|
42
|
+
self.parsed_body = (self.body && self.body != "") ? Nokogiri::XML(self.body.to_s.force_encoding("utf-8"), nil, "utf-8") : nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def as_json
|
46
|
+
self.parsed_body = (self.body && self.body != "") ? self.body.to_s.force_encoding("utf-8").to_json : nil
|
47
|
+
end
|
37
48
|
|
38
49
|
def set_page(page)
|
39
50
|
self.page = page
|
@@ -5,7 +5,7 @@ module HttpUtilities
|
|
5
5
|
module UserAgent
|
6
6
|
|
7
7
|
def set_user_agent
|
8
|
-
user_agent = (USER_AGENTS && USER_AGENTS.any?) ? USER_AGENTS
|
8
|
+
user_agent = (USER_AGENTS && USER_AGENTS.any?) ? USER_AGENTS.sample : nil
|
9
9
|
self.user_agent = user_agent if (user_agent && self.respond_to?(:user_agent=))
|
10
10
|
end
|
11
11
|
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Proxies
|
3
|
+
module Mongo
|
4
|
+
|
5
|
+
module ProxyModule
|
6
|
+
|
7
|
+
def self.included(base)
|
8
|
+
base.send :extend, ClassMethods
|
9
|
+
base.send :include, InstanceMethods
|
10
|
+
end
|
11
|
+
|
12
|
+
module ClassMethods
|
13
|
+
def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
|
14
|
+
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
15
|
+
|
16
|
+
proxies = proxies.any_of(
|
17
|
+
{:last_checked_at.exists => false},
|
18
|
+
{:last_checked_at.ne => nil},
|
19
|
+
{:last_checked_at.exists => true, :last_checked_at.ne => nil, :last_checked_at.lt => date}
|
20
|
+
)
|
21
|
+
|
22
|
+
proxies = proxies.any_of(
|
23
|
+
{:failed_attempts.exists => false},
|
24
|
+
{:failed_attempts.in => ["", nil]},
|
25
|
+
{:failed_attempts.exists => true, :failed_attempts.nin => ["", nil], :failed_attempts.lte => maximum_failed_attempts}
|
26
|
+
)
|
27
|
+
|
28
|
+
proxies = proxies.order_by([[:valid_proxy, :asc], [:failed_attempts, :asc], [:last_checked_at, :asc]])
|
29
|
+
proxies = proxies.limit(limit)
|
30
|
+
|
31
|
+
return proxies
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil, retries: 3)
|
35
|
+
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
36
|
+
proxies = proxies.where(valid_proxy: true)
|
37
|
+
proxies = proxies.where(:failed_attempts.lte => maximum_failed_attempts) if maximum_failed_attempts
|
38
|
+
proxy = nil
|
39
|
+
|
40
|
+
begin
|
41
|
+
proxy = proxies.skip(rand(proxies.count)).first
|
42
|
+
|
43
|
+
rescue StandardError
|
44
|
+
retries -= 1
|
45
|
+
retry if retries > 0
|
46
|
+
end
|
47
|
+
|
48
|
+
return proxy
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
52
|
+
proxies = ::Proxy.where(nil)
|
53
|
+
proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
|
54
|
+
proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
|
55
|
+
|
56
|
+
return proxies
|
57
|
+
end
|
58
|
+
|
59
|
+
def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
|
60
|
+
proxy_address = "#{proxy_host}:#{proxy_port}"
|
61
|
+
proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
|
62
|
+
|
63
|
+
return proxy_address
|
64
|
+
end
|
65
|
+
|
66
|
+
def format_proxy_credentials(username, password)
|
67
|
+
return "#{username}:#{password}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
module InstanceMethods
|
72
|
+
def proxy_address(include_http = false)
|
73
|
+
return ::Proxy.format_proxy_address(self.host, self.port, include_http)
|
74
|
+
end
|
75
|
+
|
76
|
+
def proxy_credentials
|
77
|
+
return ::Proxy.format_proxy_credentials(self.username, self.password)
|
78
|
+
end
|
79
|
+
|
80
|
+
def socks_proxy_credentials
|
81
|
+
credentials = {}
|
82
|
+
|
83
|
+
if (!self.username.empty? && !self.password.empty?)
|
84
|
+
credentials = {user: self.username, password: self.password}
|
85
|
+
elsif (!self.username.empty? && self.password.empty?)
|
86
|
+
credentials = {user: self.username}
|
87
|
+
end
|
88
|
+
|
89
|
+
return credentials
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Proxies
|
3
|
+
module Mysql
|
4
|
+
|
5
|
+
module ProxyModule
|
6
|
+
|
7
|
+
def self.included(base)
|
8
|
+
base.send :extend, ClassMethods
|
9
|
+
base.send :include, InstanceMethods
|
10
|
+
end
|
11
|
+
|
12
|
+
module ClassMethods
|
13
|
+
def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
|
14
|
+
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
15
|
+
proxies = proxies.where(["(last_checked_at IS NULL OR last_checked_at < ?)", date])
|
16
|
+
proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts])
|
17
|
+
proxies = proxies.order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC")
|
18
|
+
proxies = proxies.limit(limit)
|
19
|
+
|
20
|
+
return proxies
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil)
|
24
|
+
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
25
|
+
proxies = proxies.where(["valid_proxy = ? AND last_checked_at IS NOT NULL", true])
|
26
|
+
proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts]) if maximum_failed_attempts
|
27
|
+
|
28
|
+
order_clause = case ActiveRecord::Base.connection.class.name
|
29
|
+
when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
|
30
|
+
"RAND() DESC"
|
31
|
+
when "ActiveRecord::ConnectionAdapters::SQLite3Adapter"
|
32
|
+
"RANDOM() DESC"
|
33
|
+
else
|
34
|
+
"RAND() DESC"
|
35
|
+
end
|
36
|
+
|
37
|
+
proxies = proxies.order(order_clause)
|
38
|
+
|
39
|
+
proxy = nil
|
40
|
+
|
41
|
+
uncached do
|
42
|
+
proxy = proxies.limit(1).first
|
43
|
+
end
|
44
|
+
|
45
|
+
return proxy
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
49
|
+
proxies = ::Proxy.where(nil)
|
50
|
+
proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
|
51
|
+
proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
|
52
|
+
|
53
|
+
return proxies
|
54
|
+
end
|
55
|
+
|
56
|
+
def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
|
57
|
+
proxy_address = "#{proxy_host}:#{proxy_port}"
|
58
|
+
proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
|
59
|
+
return proxy_address
|
60
|
+
end
|
61
|
+
|
62
|
+
def format_proxy_credentials(username, password)
|
63
|
+
return "#{username}:#{password}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
module InstanceMethods
|
68
|
+
def proxy_address(include_http = false)
|
69
|
+
return ::Proxy.format_proxy_address(self.host, self.port, include_http)
|
70
|
+
end
|
71
|
+
|
72
|
+
def proxy_credentials
|
73
|
+
return ::Proxy.format_proxy_credentials(self.username, self.password)
|
74
|
+
end
|
75
|
+
|
76
|
+
def socks_proxy_credentials
|
77
|
+
credentials = {}
|
78
|
+
|
79
|
+
if (!self.username.empty? && !self.password.empty?)
|
80
|
+
credentials = {user: self.username, password: self.password}
|
81
|
+
elsif (!self.username.empty? && self.password.empty?)
|
82
|
+
credentials = {user: self.username}
|
83
|
+
end
|
84
|
+
|
85
|
+
return credentials
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
require 'socket'
|
4
4
|
require 'net/ssh/proxy/socks5'
|
5
|
-
require 'activerecord-import'
|
6
5
|
|
7
6
|
module HttpUtilities
|
8
7
|
module Proxies
|
@@ -11,9 +10,8 @@ module HttpUtilities
|
|
11
10
|
attr_accessor :limit, :minimum_successful_attempts, :maximum_failed_attempts
|
12
11
|
|
13
12
|
def initialize
|
14
|
-
self.client = HttpUtilities::Http::
|
15
|
-
|
16
|
-
|
13
|
+
self.client = HttpUtilities::Http::Client.new
|
14
|
+
|
17
15
|
self.processed_proxies = []
|
18
16
|
|
19
17
|
self.limit = 1000
|
@@ -23,7 +21,6 @@ module HttpUtilities
|
|
23
21
|
|
24
22
|
def check_and_update_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
|
25
23
|
check_proxies(protocol: protocol, proxy_type: proxy_type, mode: mode, maximum_failed_attempts: maximum_failed_attempts)
|
26
|
-
update_proxies
|
27
24
|
end
|
28
25
|
|
29
26
|
def check_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
|
@@ -77,86 +74,57 @@ module HttpUtilities
|
|
77
74
|
valid_proxy = false
|
78
75
|
end
|
79
76
|
|
80
|
-
|
81
|
-
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
|
82
|
-
else
|
83
|
-
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
|
84
|
-
end
|
85
|
-
|
86
|
-
self.processed_proxies << {proxy: proxy, valid: valid_proxy}
|
77
|
+
update_proxy(proxy, valid_proxy)
|
87
78
|
end
|
88
79
|
|
89
|
-
def check_http_proxy(proxy, timeout
|
90
|
-
document = nil
|
91
|
-
valid_proxy = false
|
92
|
-
|
80
|
+
def check_http_proxy(proxy, test_url: "http://www.google.com/robots.txt", timeout: 10)
|
93
81
|
options = {
|
94
82
|
use_proxy: true,
|
95
|
-
proxy: proxy.
|
83
|
+
proxy: {host: proxy.host, port: proxy.port},
|
96
84
|
proxy_protocol: proxy.protocol,
|
97
|
-
timeout: timeout
|
85
|
+
timeout: timeout
|
98
86
|
}
|
99
|
-
|
100
|
-
Rails.logger.info "#{Time.now}: Fetching Google.com with proxy #{proxy.proxy_address}."
|
101
87
|
|
102
|
-
|
103
|
-
|
104
|
-
if (page)
|
105
|
-
parser = self.client.get_parser(page)
|
106
|
-
title = parser.at_css("head title")
|
107
|
-
|
108
|
-
if (title && title.content)
|
109
|
-
begin
|
110
|
-
title = title.content.encode("UTF-8").strip.downcase
|
111
|
-
body_content = page.body.to_s.encode("UTF-8").strip.downcase
|
112
|
-
|
113
|
-
valid_proxy = (title.eql?("google") || !(body_content =~ /google home/i).nil?)
|
114
|
-
|
115
|
-
Rails.logger.info "Title is: #{title}. Proxy #{proxy.proxy_address}"
|
116
|
-
|
117
|
-
rescue Exception => e
|
118
|
-
Rails.logger.error "Exception occured while trying to check proxy #{proxy.proxy_address}. Error Class: #{e.class}. Error Message: #{e.message}"
|
119
|
-
valid_proxy = false
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
88
|
+
options.merge!(proxy_username: proxy.username) if proxy.username && proxy.username.present?
|
89
|
+
options.merge!(proxy_password: proxy.password) if proxy.password && proxy.password.present?
|
123
90
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
end
|
91
|
+
Rails.logger.info "#{Time.now}: Fetching robots.txt for Google.com with proxy #{proxy.proxy_address}. Using authentication? #{options.has_key?(:proxy_username).to_s}"
|
92
|
+
|
93
|
+
response = self.client.get(test_url, options: options)
|
94
|
+
valid_proxy = (response && response.body && response.body =~ /Allow: \/search\/about/i)
|
129
95
|
|
130
|
-
|
96
|
+
update_proxy(proxy, valid_proxy)
|
131
97
|
end
|
132
|
-
|
98
|
+
|
133
99
|
def update_proxies
|
134
|
-
columns = [:host, :port, :last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts]
|
135
|
-
values = []
|
136
|
-
|
137
100
|
Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"
|
138
101
|
|
139
102
|
if (self.processed_proxies && self.processed_proxies.any?)
|
140
103
|
self.processed_proxies.each do |value|
|
141
|
-
|
142
|
-
valid = value[:valid]
|
143
|
-
successful_attempts = proxy.successful_attempts
|
144
|
-
failed_attempts = proxy.failed_attempts
|
145
|
-
|
146
|
-
if (valid)
|
147
|
-
successful_attempts += 1
|
148
|
-
else
|
149
|
-
failed_attempts += 1
|
150
|
-
end
|
151
|
-
|
152
|
-
is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
|
153
|
-
value_arr = [proxy.host, proxy.port, Time.now, is_valid, successful_attempts, failed_attempts]
|
154
|
-
values << value_arr
|
104
|
+
update_proxy(value[:proxy], value[:valid])
|
155
105
|
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def update_proxy(proxy, valid)
|
110
|
+
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is #{valid ? "working" : "not working"}!"
|
111
|
+
|
112
|
+
successful_attempts = proxy.successful_attempts || 0
|
113
|
+
failed_attempts = proxy.failed_attempts || 0
|
156
114
|
|
157
|
-
|
115
|
+
if (valid)
|
116
|
+
successful_attempts += 1
|
117
|
+
else
|
118
|
+
failed_attempts += 1
|
158
119
|
end
|
159
120
|
|
121
|
+
is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
|
122
|
+
|
123
|
+
proxy.valid_proxy = is_valid
|
124
|
+
proxy.successful_attempts = successful_attempts
|
125
|
+
proxy.failed_attempts = failed_attempts
|
126
|
+
proxy.last_checked_at = Time.now
|
127
|
+
proxy.save
|
160
128
|
end
|
161
129
|
|
162
130
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module HttpUtilities
|
2
2
|
module Proxies
|
3
3
|
class ProxySeeder
|
4
|
-
require 'activerecord-import'
|
5
4
|
attr_accessor :protocols, :proxy_types, :categories
|
6
5
|
|
7
6
|
def initialize
|
@@ -27,20 +26,23 @@ module HttpUtilities
|
|
27
26
|
end
|
28
27
|
|
29
28
|
def bulk_import_proxies(proxy_list, protocol, proxy_type, category)
|
30
|
-
columns
|
31
|
-
category
|
29
|
+
columns = [:host, :port, :protocol, :proxy_type, :category]
|
30
|
+
category = (category && !category.eql?('unspecified')) ? category : nil
|
32
31
|
|
33
32
|
begin
|
34
|
-
values = []
|
35
|
-
|
36
33
|
proxy_list.slice!(0..1000).each do |proxy|
|
37
|
-
host
|
38
|
-
port
|
39
|
-
|
40
|
-
|
34
|
+
host = proxy[:host]
|
35
|
+
port = proxy[:port]
|
36
|
+
|
37
|
+
proxy = ::Proxy.where(host: host, port: port).first || ::Proxy.new
|
38
|
+
proxy.host = host
|
39
|
+
proxy.port = port
|
40
|
+
proxy.protocol = protocol
|
41
|
+
proxy.proxy_type = proxy_type
|
42
|
+
proxy.category = category
|
43
|
+
proxy.save
|
41
44
|
end
|
42
|
-
|
43
|
-
::Proxy.import(columns, values, :on_duplicate_key_update => [:proxy_type], :validate => false) if (values && values.any?)
|
45
|
+
|
44
46
|
end while (proxy_list && proxy_list.any?)
|
45
47
|
end
|
46
48
|
|