http_utilities 1.1.2 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -9
- data/http_utilities.gemspec +10 -17
- data/lib/http_utilities/http/client.rb +62 -8
- data/lib/http_utilities/http/mechanize/client.rb +5 -1
- data/lib/http_utilities/http/proxy_support.rb +10 -0
- data/lib/http_utilities/http/request.rb +2 -3
- data/lib/http_utilities/http/response.rb +15 -4
- data/lib/http_utilities/http/user_agent.rb +1 -1
- data/lib/http_utilities/proxies/mongo/proxy_module.rb +97 -0
- data/lib/http_utilities/proxies/mysql/proxy_module.rb +93 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +34 -66
- data/lib/http_utilities/proxies/proxy_seeder.rb +13 -11
- data/lib/http_utilities.rb +9 -17
- data/spec/http_utilities/client_spec.rb +5 -107
- data/spec/spec_helper.rb +3 -5
- metadata +21 -41
- data/lib/http_utilities/http/adapters/curb.rb +0 -107
- data/lib/http_utilities/http/adapters/net_http.rb +0 -135
- data/lib/http_utilities/http/adapters/open_uri.rb +0 -46
- data/lib/http_utilities/http/cookies.rb +0 -49
- data/lib/http_utilities/http/format.rb +0 -26
- data/lib/http_utilities/http/get.rb +0 -67
- data/lib/http_utilities/http/post.rb +0 -32
- data/lib/http_utilities/proxies/proxy_module.rb +0 -89
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 08cfb5e367ca9f757c3eb75ff8958a55af204bf6
|
4
|
+
data.tar.gz: ec280ce2d9e5bc1f0e1ad7fbd724a6abb438409c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0395ba295966ce1622021940273827ff880d691c78c3f8ed71f45edeebf418932e602bf9219e2d3ad6b245e3830de4a59aac73d363b394c9c73a631bab8f531d
|
7
|
+
data.tar.gz: 057783241fe6465bff927446dc03c31c10faa3a20784a34caff5edd6bdebc4617c6cb584d15ad0783d4c4babc4c4e2e39efa8566e0a6b83e90e08e2bc1613f81
|
data/Gemfile
CHANGED
@@ -1,16 +1,11 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
|
-
gem
|
4
|
-
gem
|
5
|
-
gem
|
6
|
-
gem "net-ssh", ">= 2.8"
|
3
|
+
gem 'nokogiri'
|
4
|
+
gem 'mechanize'
|
5
|
+
gem 'net-ssh'
|
7
6
|
|
8
7
|
gem "activerecord-import", :require => false
|
9
8
|
|
10
|
-
platforms :ruby do
|
11
|
-
gem 'curb'
|
12
|
-
end
|
13
|
-
|
14
9
|
group :development, :test do
|
15
10
|
gem 'rails'
|
16
11
|
gem 'jeweler'
|
@@ -18,6 +13,6 @@ group :development, :test do
|
|
18
13
|
gem 'sqlite3'
|
19
14
|
|
20
15
|
platforms :ruby do
|
21
|
-
gem "mysql2"
|
16
|
+
gem "mysql2"
|
22
17
|
end
|
23
18
|
end
|
data/http_utilities.gemspec
CHANGED
@@ -3,24 +3,23 @@ Gem::Specification.new do |s|
|
|
3
3
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.3.5") if s.respond_to? :required_rubygems_version=
|
4
4
|
|
5
5
|
s.name = "http_utilities"
|
6
|
-
s.version = "1.1.
|
6
|
+
s.version = "1.1.5"
|
7
7
|
|
8
8
|
s.authors = ["Sebastian Johnsson"]
|
9
|
-
s.description = "Wrapper for
|
9
|
+
s.description = "Wrapper for Faraday with additional functionality"
|
10
10
|
|
11
11
|
s.homepage = "http://github.com/Agiley/http_utilities"
|
12
|
-
s.summary = "Wrapper for
|
12
|
+
s.summary = "Wrapper for Faraday with additional functionality"
|
13
13
|
|
14
|
-
s.add_dependency(%q<nokogiri>, [">= 1.
|
15
|
-
s.add_dependency(%q<mechanize>, [">= 2.
|
16
|
-
s.add_dependency(%q<
|
17
|
-
s.add_dependency(%q<net-ssh>, [">= 2.8"])
|
18
|
-
s.add_dependency(%q<activerecord-import>, [">= 0"])
|
14
|
+
s.add_dependency(%q<nokogiri>, [">= 1.6"])
|
15
|
+
s.add_dependency(%q<mechanize>, [">= 2.7"])
|
16
|
+
s.add_dependency(%q<net-ssh>, [">= 2.9"])
|
19
17
|
|
20
18
|
s.add_development_dependency(%q<rails>, [">= 0"])
|
21
19
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
22
20
|
s.add_development_dependency(%q<sqlite3>, [">= 0"])
|
23
|
-
s.add_development_dependency(%q<mysql2>, [">= 0
|
21
|
+
s.add_development_dependency(%q<mysql2>, [">= 0"])
|
22
|
+
s.add_development_dependency(%q<activerecord-import>, [">= 0"])
|
24
23
|
|
25
24
|
# = MANIFEST =
|
26
25
|
s.files = %w[
|
@@ -38,16 +37,9 @@ Gem::Specification.new do |s|
|
|
38
37
|
lib/generators/templates/http_utilities.rb
|
39
38
|
lib/generators/templates/user_agents.yml
|
40
39
|
lib/http_utilities.rb
|
41
|
-
lib/http_utilities/http/adapters/curb.rb
|
42
|
-
lib/http_utilities/http/adapters/net_http.rb
|
43
|
-
lib/http_utilities/http/adapters/open_uri.rb
|
44
40
|
lib/http_utilities/http/client.rb
|
45
|
-
lib/http_utilities/http/cookies.rb
|
46
|
-
lib/http_utilities/http/format.rb
|
47
|
-
lib/http_utilities/http/get.rb
|
48
41
|
lib/http_utilities/http/logger.rb
|
49
42
|
lib/http_utilities/http/mechanize/client.rb
|
50
|
-
lib/http_utilities/http/post.rb
|
51
43
|
lib/http_utilities/http/proxy_support.rb
|
52
44
|
lib/http_utilities/http/request.rb
|
53
45
|
lib/http_utilities/http/response.rb
|
@@ -57,8 +49,9 @@ Gem::Specification.new do |s|
|
|
57
49
|
lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb
|
58
50
|
lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb
|
59
51
|
lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb
|
52
|
+
lib/http_utilities/proxies/mongo/proxy_module.rb
|
53
|
+
lib/http_utilities/proxies/mysql/proxy_module.rb
|
60
54
|
lib/http_utilities/proxies/proxy_checker.rb
|
61
|
-
lib/http_utilities/proxies/proxy_module.rb
|
62
55
|
lib/http_utilities/proxies/proxy_seeder.rb
|
63
56
|
lib/http_utilities/railtie.rb
|
64
57
|
lib/tasks/http_utilities_tasks.rake
|
@@ -1,21 +1,75 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
2
|
require 'open-uri'
|
3
|
-
require 'net/http'
|
4
3
|
require 'uri'
|
5
4
|
require 'cgi'
|
6
5
|
|
7
6
|
module HttpUtilities
|
8
7
|
module Http
|
9
8
|
class Client
|
10
|
-
include HttpUtilities::Http::Cookies
|
11
|
-
include HttpUtilities::Http::Url
|
12
|
-
include HttpUtilities::Http::Get
|
13
|
-
include HttpUtilities::Http::Post
|
14
9
|
include HttpUtilities::Http::Logger
|
10
|
+
|
11
|
+
def get(url, arguments: {}, options: {}, retries: 3)
|
12
|
+
response = nil
|
13
|
+
request = build_request(options)
|
14
|
+
|
15
|
+
begin
|
16
|
+
response = request.interface.get(url, arguments)
|
17
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
18
|
+
|
19
|
+
rescue Faraday::TimeoutError, Net::ReadTimeout, Timeout::Error, StandardError => e
|
20
|
+
log(:error, "[HttpUtilities::Http::Client] - An error occurred while trying to fetch the response. Error Class: #{e.class.name}. Error Message: #{e.message}.")
|
21
|
+
retries -= 1
|
22
|
+
retry if retries > 0
|
23
|
+
end
|
15
24
|
|
16
|
-
|
17
|
-
|
18
|
-
|
25
|
+
return response
|
26
|
+
end
|
27
|
+
|
28
|
+
def post(url, data: nil, options: {}, retries: 3)
|
29
|
+
response = nil
|
30
|
+
request = build_request(options)
|
31
|
+
|
32
|
+
begin
|
33
|
+
response = request.interface.post(url, data)
|
34
|
+
response = HttpUtilities::Http::Response.new(response, request, options)
|
35
|
+
|
36
|
+
rescue Faraday::TimeoutError, Net::ReadTimeout, Timeout::Error, StandardError => e
|
37
|
+
log(:error, "[HttpUtilities::Http::Client] - An error occurred while trying to fetch the response. Error Class: #{e.class.name}. Error Message: #{e.message}.")
|
38
|
+
retries -= 1
|
39
|
+
retry if retries > 0
|
40
|
+
end
|
41
|
+
|
42
|
+
return response
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def build_request(options = {}, faraday_options = {})
|
47
|
+
options = options.dup
|
48
|
+
options = options.merge(ssl: {:verify => false})
|
49
|
+
|
50
|
+
adapter = options.delete(:adapter) { |opt| Faraday.default_adapter }
|
51
|
+
timeout = options.delete(:timeout) { |opt| 60 }
|
52
|
+
open_timeout = options.delete(:open_timeout) { |opt| 60 }
|
53
|
+
|
54
|
+
request = HttpUtilities::Http::Request.new
|
55
|
+
request.set_proxy_options(options)
|
56
|
+
|
57
|
+
proxy_options = request.generate_proxy_options
|
58
|
+
|
59
|
+
connection = Faraday.new(faraday_options) do |builder|
|
60
|
+
builder.headers[:user_agent] = request.user_agent
|
61
|
+
builder.options[:timeout] = timeout
|
62
|
+
builder.options[:open_timeout] = open_timeout
|
63
|
+
#builder.response :logger
|
64
|
+
builder.proxy proxy_options unless proxy_options.empty?
|
65
|
+
builder.adapter adapter
|
66
|
+
end
|
67
|
+
|
68
|
+
request.interface = connection
|
69
|
+
|
70
|
+
return request
|
71
|
+
end
|
72
|
+
|
19
73
|
end
|
20
74
|
end
|
21
75
|
end
|
@@ -30,7 +30,11 @@ module HttpUtilities
|
|
30
30
|
self.agent.log = ::Logger.new(logger) if (verbose)
|
31
31
|
|
32
32
|
self.set_proxy_options(options)
|
33
|
-
|
33
|
+
|
34
|
+
if (self.proxy[:host] && self.proxy[:port])
|
35
|
+
log(:info, "[HttpUtilities::Http::Mechanize::Client] - Will use proxy #{self.proxy[:host]}:#{self.proxy[:port]} for Mechanize.")
|
36
|
+
self.agent.set_proxy(self.proxy[:host], self.proxy[:port], self.proxy[:username], self.proxy[:password])
|
37
|
+
end
|
34
38
|
|
35
39
|
self.set_user_agent
|
36
40
|
(self.user_agent) ? self.agent.user_agent = self.user_agent : self.agent.user_agent_alias = 'Mac Safari'
|
@@ -84,6 +84,16 @@ module HttpUtilities
|
|
84
84
|
def using_proxy?
|
85
85
|
return (self.proxy[:host] && self.proxy[:port] && self.proxy[:port] > 0)
|
86
86
|
end
|
87
|
+
|
88
|
+
def generate_proxy_options
|
89
|
+
proxy_options = {}
|
90
|
+
|
91
|
+
proxy_options[:uri] = "http://#{self.proxy[:host]}:#{self.proxy[:port]}"
|
92
|
+
proxy_options[:user] = self.proxy[:username] if self.proxy[:username] && self.proxy[:username].present?
|
93
|
+
proxy_options[:password] = self.proxy[:password] if self.proxy[:password] && self.proxy[:password].present?
|
94
|
+
|
95
|
+
return proxy_options
|
96
|
+
end
|
87
97
|
|
88
98
|
end
|
89
99
|
end
|
@@ -5,12 +5,11 @@ module HttpUtilities
|
|
5
5
|
include HttpUtilities::Http::ProxySupport
|
6
6
|
include HttpUtilities::Http::UserAgent
|
7
7
|
|
8
|
-
attr_accessor :interface, :proxy, :
|
8
|
+
attr_accessor :interface, :proxy, :user_agent
|
9
9
|
|
10
|
-
def initialize(interface = nil, proxy = {}
|
10
|
+
def initialize(interface = nil, proxy = {})
|
11
11
|
self.interface = interface
|
12
12
|
self.proxy = proxy
|
13
|
-
self.cookies = cookies
|
14
13
|
|
15
14
|
self.set_user_agent
|
16
15
|
end
|
@@ -1,15 +1,14 @@
|
|
1
1
|
module HttpUtilities
|
2
2
|
module Http
|
3
3
|
class Response
|
4
|
-
include HttpUtilities::Http::Format
|
5
4
|
include HttpUtilities::Http::Logger
|
6
5
|
|
7
6
|
attr_accessor :body, :parsed_body, :page, :format, :request, :force_encoding
|
8
7
|
|
9
|
-
def initialize(
|
10
|
-
options = options.
|
8
|
+
def initialize(response = nil, request = nil, options = {})
|
9
|
+
options = options.dup
|
11
10
|
|
12
|
-
self.body = body
|
11
|
+
self.body = (response && response.body) ? response.body : nil
|
13
12
|
self.request = request
|
14
13
|
|
15
14
|
self.parsed_body = nil
|
@@ -34,6 +33,18 @@ module HttpUtilities
|
|
34
33
|
def parse_response
|
35
34
|
self.send("as_#{self.format}".to_sym) if (self.body && self.format)
|
36
35
|
end
|
36
|
+
|
37
|
+
def as_html
|
38
|
+
self.parsed_body = (self.body && self.body != "") ? Nokogiri::HTML(self.body.to_s.force_encoding("utf-8"), nil, "utf-8") : nil
|
39
|
+
end
|
40
|
+
|
41
|
+
def as_xml
|
42
|
+
self.parsed_body = (self.body && self.body != "") ? Nokogiri::XML(self.body.to_s.force_encoding("utf-8"), nil, "utf-8") : nil
|
43
|
+
end
|
44
|
+
|
45
|
+
def as_json
|
46
|
+
self.parsed_body = (self.body && self.body != "") ? self.body.to_s.force_encoding("utf-8").to_json : nil
|
47
|
+
end
|
37
48
|
|
38
49
|
def set_page(page)
|
39
50
|
self.page = page
|
@@ -5,7 +5,7 @@ module HttpUtilities
|
|
5
5
|
module UserAgent
|
6
6
|
|
7
7
|
def set_user_agent
|
8
|
-
user_agent = (USER_AGENTS && USER_AGENTS.any?) ? USER_AGENTS
|
8
|
+
user_agent = (USER_AGENTS && USER_AGENTS.any?) ? USER_AGENTS.sample : nil
|
9
9
|
self.user_agent = user_agent if (user_agent && self.respond_to?(:user_agent=))
|
10
10
|
end
|
11
11
|
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Proxies
|
3
|
+
module Mongo
|
4
|
+
|
5
|
+
module ProxyModule
|
6
|
+
|
7
|
+
def self.included(base)
|
8
|
+
base.send :extend, ClassMethods
|
9
|
+
base.send :include, InstanceMethods
|
10
|
+
end
|
11
|
+
|
12
|
+
module ClassMethods
|
13
|
+
def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
|
14
|
+
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
15
|
+
|
16
|
+
proxies = proxies.any_of(
|
17
|
+
{:last_checked_at.exists => false},
|
18
|
+
{:last_checked_at.ne => nil},
|
19
|
+
{:last_checked_at.exists => true, :last_checked_at.ne => nil, :last_checked_at.lt => date}
|
20
|
+
)
|
21
|
+
|
22
|
+
proxies = proxies.any_of(
|
23
|
+
{:failed_attempts.exists => false},
|
24
|
+
{:failed_attempts.in => ["", nil]},
|
25
|
+
{:failed_attempts.exists => true, :failed_attempts.nin => ["", nil], :failed_attempts.lte => maximum_failed_attempts}
|
26
|
+
)
|
27
|
+
|
28
|
+
proxies = proxies.order_by([[:valid_proxy, :asc], [:failed_attempts, :asc], [:last_checked_at, :asc]])
|
29
|
+
proxies = proxies.limit(limit)
|
30
|
+
|
31
|
+
return proxies
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil, retries: 3)
|
35
|
+
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
36
|
+
proxies = proxies.where(valid_proxy: true)
|
37
|
+
proxies = proxies.where(:failed_attempts.lte => maximum_failed_attempts) if maximum_failed_attempts
|
38
|
+
proxy = nil
|
39
|
+
|
40
|
+
begin
|
41
|
+
proxy = proxies.skip(rand(proxies.count)).first
|
42
|
+
|
43
|
+
rescue StandardError
|
44
|
+
retries -= 1
|
45
|
+
retry if retries > 0
|
46
|
+
end
|
47
|
+
|
48
|
+
return proxy
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
52
|
+
proxies = ::Proxy.where(nil)
|
53
|
+
proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
|
54
|
+
proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
|
55
|
+
|
56
|
+
return proxies
|
57
|
+
end
|
58
|
+
|
59
|
+
def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
|
60
|
+
proxy_address = "#{proxy_host}:#{proxy_port}"
|
61
|
+
proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
|
62
|
+
|
63
|
+
return proxy_address
|
64
|
+
end
|
65
|
+
|
66
|
+
def format_proxy_credentials(username, password)
|
67
|
+
return "#{username}:#{password}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
module InstanceMethods
|
72
|
+
def proxy_address(include_http = false)
|
73
|
+
return ::Proxy.format_proxy_address(self.host, self.port, include_http)
|
74
|
+
end
|
75
|
+
|
76
|
+
def proxy_credentials
|
77
|
+
return ::Proxy.format_proxy_credentials(self.username, self.password)
|
78
|
+
end
|
79
|
+
|
80
|
+
def socks_proxy_credentials
|
81
|
+
credentials = {}
|
82
|
+
|
83
|
+
if (!self.username.empty? && !self.password.empty?)
|
84
|
+
credentials = {user: self.username, password: self.password}
|
85
|
+
elsif (!self.username.empty? && self.password.empty?)
|
86
|
+
credentials = {user: self.username}
|
87
|
+
end
|
88
|
+
|
89
|
+
return credentials
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Proxies
|
3
|
+
module Mysql
|
4
|
+
|
5
|
+
module ProxyModule
|
6
|
+
|
7
|
+
def self.included(base)
|
8
|
+
base.send :extend, ClassMethods
|
9
|
+
base.send :include, InstanceMethods
|
10
|
+
end
|
11
|
+
|
12
|
+
module ClassMethods
|
13
|
+
def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
|
14
|
+
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
15
|
+
proxies = proxies.where(["(last_checked_at IS NULL OR last_checked_at < ?)", date])
|
16
|
+
proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts])
|
17
|
+
proxies = proxies.order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC")
|
18
|
+
proxies = proxies.limit(limit)
|
19
|
+
|
20
|
+
return proxies
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil)
|
24
|
+
proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
25
|
+
proxies = proxies.where(["valid_proxy = ? AND last_checked_at IS NOT NULL", true])
|
26
|
+
proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts]) if maximum_failed_attempts
|
27
|
+
|
28
|
+
order_clause = case ActiveRecord::Base.connection.class.name
|
29
|
+
when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
|
30
|
+
"RAND() DESC"
|
31
|
+
when "ActiveRecord::ConnectionAdapters::SQLite3Adapter"
|
32
|
+
"RANDOM() DESC"
|
33
|
+
else
|
34
|
+
"RAND() DESC"
|
35
|
+
end
|
36
|
+
|
37
|
+
proxies = proxies.order(order_clause)
|
38
|
+
|
39
|
+
proxy = nil
|
40
|
+
|
41
|
+
uncached do
|
42
|
+
proxy = proxies.limit(1).first
|
43
|
+
end
|
44
|
+
|
45
|
+
return proxy
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
|
49
|
+
proxies = ::Proxy.where(nil)
|
50
|
+
proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
|
51
|
+
proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
|
52
|
+
|
53
|
+
return proxies
|
54
|
+
end
|
55
|
+
|
56
|
+
def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
|
57
|
+
proxy_address = "#{proxy_host}:#{proxy_port}"
|
58
|
+
proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
|
59
|
+
return proxy_address
|
60
|
+
end
|
61
|
+
|
62
|
+
def format_proxy_credentials(username, password)
|
63
|
+
return "#{username}:#{password}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
module InstanceMethods
|
68
|
+
def proxy_address(include_http = false)
|
69
|
+
return ::Proxy.format_proxy_address(self.host, self.port, include_http)
|
70
|
+
end
|
71
|
+
|
72
|
+
def proxy_credentials
|
73
|
+
return ::Proxy.format_proxy_credentials(self.username, self.password)
|
74
|
+
end
|
75
|
+
|
76
|
+
def socks_proxy_credentials
|
77
|
+
credentials = {}
|
78
|
+
|
79
|
+
if (!self.username.empty? && !self.password.empty?)
|
80
|
+
credentials = {user: self.username, password: self.password}
|
81
|
+
elsif (!self.username.empty? && self.password.empty?)
|
82
|
+
credentials = {user: self.username}
|
83
|
+
end
|
84
|
+
|
85
|
+
return credentials
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
require 'socket'
|
4
4
|
require 'net/ssh/proxy/socks5'
|
5
|
-
require 'activerecord-import'
|
6
5
|
|
7
6
|
module HttpUtilities
|
8
7
|
module Proxies
|
@@ -11,9 +10,8 @@ module HttpUtilities
|
|
11
10
|
attr_accessor :limit, :minimum_successful_attempts, :maximum_failed_attempts
|
12
11
|
|
13
12
|
def initialize
|
14
|
-
self.client = HttpUtilities::Http::
|
15
|
-
|
16
|
-
|
13
|
+
self.client = HttpUtilities::Http::Client.new
|
14
|
+
|
17
15
|
self.processed_proxies = []
|
18
16
|
|
19
17
|
self.limit = 1000
|
@@ -23,7 +21,6 @@ module HttpUtilities
|
|
23
21
|
|
24
22
|
def check_and_update_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
|
25
23
|
check_proxies(protocol: protocol, proxy_type: proxy_type, mode: mode, maximum_failed_attempts: maximum_failed_attempts)
|
26
|
-
update_proxies
|
27
24
|
end
|
28
25
|
|
29
26
|
def check_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
|
@@ -77,86 +74,57 @@ module HttpUtilities
|
|
77
74
|
valid_proxy = false
|
78
75
|
end
|
79
76
|
|
80
|
-
|
81
|
-
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
|
82
|
-
else
|
83
|
-
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
|
84
|
-
end
|
85
|
-
|
86
|
-
self.processed_proxies << {proxy: proxy, valid: valid_proxy}
|
77
|
+
update_proxy(proxy, valid_proxy)
|
87
78
|
end
|
88
79
|
|
89
|
-
def check_http_proxy(proxy, timeout
|
90
|
-
document = nil
|
91
|
-
valid_proxy = false
|
92
|
-
|
80
|
+
def check_http_proxy(proxy, test_url: "http://www.google.com/robots.txt", timeout: 10)
|
93
81
|
options = {
|
94
82
|
use_proxy: true,
|
95
|
-
proxy: proxy.
|
83
|
+
proxy: {host: proxy.host, port: proxy.port},
|
96
84
|
proxy_protocol: proxy.protocol,
|
97
|
-
timeout: timeout
|
85
|
+
timeout: timeout
|
98
86
|
}
|
99
|
-
|
100
|
-
Rails.logger.info "#{Time.now}: Fetching Google.com with proxy #{proxy.proxy_address}."
|
101
87
|
|
102
|
-
|
103
|
-
|
104
|
-
if (page)
|
105
|
-
parser = self.client.get_parser(page)
|
106
|
-
title = parser.at_css("head title")
|
107
|
-
|
108
|
-
if (title && title.content)
|
109
|
-
begin
|
110
|
-
title = title.content.encode("UTF-8").strip.downcase
|
111
|
-
body_content = page.body.to_s.encode("UTF-8").strip.downcase
|
112
|
-
|
113
|
-
valid_proxy = (title.eql?("google") || !(body_content =~ /google home/i).nil?)
|
114
|
-
|
115
|
-
Rails.logger.info "Title is: #{title}. Proxy #{proxy.proxy_address}"
|
116
|
-
|
117
|
-
rescue Exception => e
|
118
|
-
Rails.logger.error "Exception occured while trying to check proxy #{proxy.proxy_address}. Error Class: #{e.class}. Error Message: #{e.message}"
|
119
|
-
valid_proxy = false
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
88
|
+
options.merge!(proxy_username: proxy.username) if proxy.username && proxy.username.present?
|
89
|
+
options.merge!(proxy_password: proxy.password) if proxy.password && proxy.password.present?
|
123
90
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
end
|
91
|
+
Rails.logger.info "#{Time.now}: Fetching robots.txt for Google.com with proxy #{proxy.proxy_address}. Using authentication? #{options.has_key?(:proxy_username).to_s}"
|
92
|
+
|
93
|
+
response = self.client.get(test_url, options: options)
|
94
|
+
valid_proxy = (response && response.body && response.body =~ /Allow: \/search\/about/i)
|
129
95
|
|
130
|
-
|
96
|
+
update_proxy(proxy, valid_proxy)
|
131
97
|
end
|
132
|
-
|
98
|
+
|
133
99
|
def update_proxies
|
134
|
-
columns = [:host, :port, :last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts]
|
135
|
-
values = []
|
136
|
-
|
137
100
|
Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"
|
138
101
|
|
139
102
|
if (self.processed_proxies && self.processed_proxies.any?)
|
140
103
|
self.processed_proxies.each do |value|
|
141
|
-
|
142
|
-
valid = value[:valid]
|
143
|
-
successful_attempts = proxy.successful_attempts
|
144
|
-
failed_attempts = proxy.failed_attempts
|
145
|
-
|
146
|
-
if (valid)
|
147
|
-
successful_attempts += 1
|
148
|
-
else
|
149
|
-
failed_attempts += 1
|
150
|
-
end
|
151
|
-
|
152
|
-
is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
|
153
|
-
value_arr = [proxy.host, proxy.port, Time.now, is_valid, successful_attempts, failed_attempts]
|
154
|
-
values << value_arr
|
104
|
+
update_proxy(value[:proxy], value[:valid])
|
155
105
|
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def update_proxy(proxy, valid)
|
110
|
+
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is #{valid ? "working" : "not working"}!"
|
111
|
+
|
112
|
+
successful_attempts = proxy.successful_attempts || 0
|
113
|
+
failed_attempts = proxy.failed_attempts || 0
|
156
114
|
|
157
|
-
|
115
|
+
if (valid)
|
116
|
+
successful_attempts += 1
|
117
|
+
else
|
118
|
+
failed_attempts += 1
|
158
119
|
end
|
159
120
|
|
121
|
+
is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
|
122
|
+
|
123
|
+
proxy.valid_proxy = is_valid
|
124
|
+
proxy.successful_attempts = successful_attempts
|
125
|
+
proxy.failed_attempts = failed_attempts
|
126
|
+
proxy.last_checked_at = Time.now
|
127
|
+
proxy.save
|
160
128
|
end
|
161
129
|
|
162
130
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module HttpUtilities
|
2
2
|
module Proxies
|
3
3
|
class ProxySeeder
|
4
|
-
require 'activerecord-import'
|
5
4
|
attr_accessor :protocols, :proxy_types, :categories
|
6
5
|
|
7
6
|
def initialize
|
@@ -27,20 +26,23 @@ module HttpUtilities
|
|
27
26
|
end
|
28
27
|
|
29
28
|
def bulk_import_proxies(proxy_list, protocol, proxy_type, category)
|
30
|
-
columns
|
31
|
-
category
|
29
|
+
columns = [:host, :port, :protocol, :proxy_type, :category]
|
30
|
+
category = (category && !category.eql?('unspecified')) ? category : nil
|
32
31
|
|
33
32
|
begin
|
34
|
-
values = []
|
35
|
-
|
36
33
|
proxy_list.slice!(0..1000).each do |proxy|
|
37
|
-
host
|
38
|
-
port
|
39
|
-
|
40
|
-
|
34
|
+
host = proxy[:host]
|
35
|
+
port = proxy[:port]
|
36
|
+
|
37
|
+
proxy = ::Proxy.where(host: host, port: port).first || ::Proxy.new
|
38
|
+
proxy.host = host
|
39
|
+
proxy.port = port
|
40
|
+
proxy.protocol = protocol
|
41
|
+
proxy.proxy_type = proxy_type
|
42
|
+
proxy.category = category
|
43
|
+
proxy.save
|
41
44
|
end
|
42
|
-
|
43
|
-
::Proxy.import(columns, values, :on_duplicate_key_update => [:proxy_type], :validate => false) if (values && values.any?)
|
45
|
+
|
44
46
|
end while (proxy_list && proxy_list.any?)
|
45
47
|
end
|
46
48
|
|