http_utilities 1.1.2 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a18294693d9cd95171156f4fccdf461a8c45127
4
- data.tar.gz: 6d32ede9f59c390b64ebcfcc47ec00901fe6e77f
3
+ metadata.gz: 08cfb5e367ca9f757c3eb75ff8958a55af204bf6
4
+ data.tar.gz: ec280ce2d9e5bc1f0e1ad7fbd724a6abb438409c
5
5
  SHA512:
6
- metadata.gz: 51c959c41979b8c1a253c1b5d43e019ac19e692b9fbb7f92b04da37be639099949bd540e2ed2b068f30d9943a2a3f46212401bc416e4a1a44730d1758e553f09
7
- data.tar.gz: 0fe6a191e9f0e802622d5eb24a2529e0b6c9d1c1cdc1525347570492c1d27aae74baa15e5650566dbc69701397e58d9df47ea4fb8e105002aeea8b69ca98c930
6
+ metadata.gz: 0395ba295966ce1622021940273827ff880d691c78c3f8ed71f45edeebf418932e602bf9219e2d3ad6b245e3830de4a59aac73d363b394c9c73a631bab8f531d
7
+ data.tar.gz: 057783241fe6465bff927446dc03c31c10faa3a20784a34caff5edd6bdebc4617c6cb584d15ad0783d4c4babc4c4e2e39efa8566e0a6b83e90e08e2bc1613f81
data/Gemfile CHANGED
@@ -1,16 +1,11 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- gem "nokogiri", ">= 1.5.5"
4
- gem "mechanize", ">= 2.5"
5
- gem "multi_xml", ">= 0.5"
6
- gem "net-ssh", ">= 2.8"
3
+ gem 'nokogiri'
4
+ gem 'mechanize'
5
+ gem 'net-ssh'
7
6
 
8
7
  gem "activerecord-import", :require => false
9
8
 
10
- platforms :ruby do
11
- gem 'curb'
12
- end
13
-
14
9
  group :development, :test do
15
10
  gem 'rails'
16
11
  gem 'jeweler'
@@ -18,6 +13,6 @@ group :development, :test do
18
13
  gem 'sqlite3'
19
14
 
20
15
  platforms :ruby do
21
- gem "mysql2", ">= 0.3.11"
16
+ gem "mysql2"
22
17
  end
23
18
  end
@@ -3,24 +3,23 @@ Gem::Specification.new do |s|
3
3
  s.required_rubygems_version = Gem::Requirement.new(">= 1.3.5") if s.respond_to? :required_rubygems_version=
4
4
 
5
5
  s.name = "http_utilities"
6
- s.version = "1.1.2"
6
+ s.version = "1.1.5"
7
7
 
8
8
  s.authors = ["Sebastian Johnsson"]
9
- s.description = "Wrapper for common Http Libraries (Net:HTTP/Open URI/Curl)"
9
+ s.description = "Wrapper for Faraday with additional functionality"
10
10
 
11
11
  s.homepage = "http://github.com/Agiley/http_utilities"
12
- s.summary = "Wrapper for common Http Libraries (Net:HTTP/Open URI/Curl)"
12
+ s.summary = "Wrapper for Faraday with additional functionality"
13
13
 
14
- s.add_dependency(%q<nokogiri>, [">= 1.5.5"])
15
- s.add_dependency(%q<mechanize>, [">= 2.5"])
16
- s.add_dependency(%q<multi_xml>, [">= 0.5"])
17
- s.add_dependency(%q<net-ssh>, [">= 2.8"])
18
- s.add_dependency(%q<activerecord-import>, [">= 0"])
14
+ s.add_dependency(%q<nokogiri>, [">= 1.6"])
15
+ s.add_dependency(%q<mechanize>, [">= 2.7"])
16
+ s.add_dependency(%q<net-ssh>, [">= 2.9"])
19
17
 
20
18
  s.add_development_dependency(%q<rails>, [">= 0"])
21
19
  s.add_development_dependency(%q<rspec>, [">= 0"])
22
20
  s.add_development_dependency(%q<sqlite3>, [">= 0"])
23
- s.add_development_dependency(%q<mysql2>, [">= 0.3.11"])
21
+ s.add_development_dependency(%q<mysql2>, [">= 0"])
22
+ s.add_development_dependency(%q<activerecord-import>, [">= 0"])
24
23
 
25
24
  # = MANIFEST =
26
25
  s.files = %w[
@@ -38,16 +37,9 @@ Gem::Specification.new do |s|
38
37
  lib/generators/templates/http_utilities.rb
39
38
  lib/generators/templates/user_agents.yml
40
39
  lib/http_utilities.rb
41
- lib/http_utilities/http/adapters/curb.rb
42
- lib/http_utilities/http/adapters/net_http.rb
43
- lib/http_utilities/http/adapters/open_uri.rb
44
40
  lib/http_utilities/http/client.rb
45
- lib/http_utilities/http/cookies.rb
46
- lib/http_utilities/http/format.rb
47
- lib/http_utilities/http/get.rb
48
41
  lib/http_utilities/http/logger.rb
49
42
  lib/http_utilities/http/mechanize/client.rb
50
- lib/http_utilities/http/post.rb
51
43
  lib/http_utilities/http/proxy_support.rb
52
44
  lib/http_utilities/http/request.rb
53
45
  lib/http_utilities/http/response.rb
@@ -57,8 +49,9 @@ Gem::Specification.new do |s|
57
49
  lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb
58
50
  lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb
59
51
  lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb
52
+ lib/http_utilities/proxies/mongo/proxy_module.rb
53
+ lib/http_utilities/proxies/mysql/proxy_module.rb
60
54
  lib/http_utilities/proxies/proxy_checker.rb
61
- lib/http_utilities/proxies/proxy_module.rb
62
55
  lib/http_utilities/proxies/proxy_seeder.rb
63
56
  lib/http_utilities/railtie.rb
64
57
  lib/tasks/http_utilities_tasks.rake
@@ -1,21 +1,75 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  require 'open-uri'
3
- require 'net/http'
4
3
  require 'uri'
5
4
  require 'cgi'
6
5
 
7
6
  module HttpUtilities
8
7
  module Http
9
8
  class Client
10
- include HttpUtilities::Http::Cookies
11
- include HttpUtilities::Http::Url
12
- include HttpUtilities::Http::Get
13
- include HttpUtilities::Http::Post
14
9
  include HttpUtilities::Http::Logger
10
+
11
+ def get(url, arguments: {}, options: {}, retries: 3)
12
+ response = nil
13
+ request = build_request(options)
14
+
15
+ begin
16
+ response = request.interface.get(url, arguments)
17
+ response = HttpUtilities::Http::Response.new(response, request, options)
18
+
19
+ rescue Faraday::TimeoutError, Net::ReadTimeout, Timeout::Error, StandardError => e
20
+ log(:error, "[HttpUtilities::Http::Client] - An error occurred while trying to fetch the response. Error Class: #{e.class.name}. Error Message: #{e.message}.")
21
+ retries -= 1
22
+ retry if retries > 0
23
+ end
15
24
 
16
- include HttpUtilities::Http::Adapters::NetHttp
17
- include HttpUtilities::Http::Adapters::OpenUri
18
- include HttpUtilities::Http::Adapters::Curb
25
+ return response
26
+ end
27
+
28
+ def post(url, data: nil, options: {}, retries: 3)
29
+ response = nil
30
+ request = build_request(options)
31
+
32
+ begin
33
+ response = request.interface.post(url, data)
34
+ response = HttpUtilities::Http::Response.new(response, request, options)
35
+
36
+ rescue Faraday::TimeoutError, Net::ReadTimeout, Timeout::Error, StandardError => e
37
+ log(:error, "[HttpUtilities::Http::Client] - An error occurred while trying to fetch the response. Error Class: #{e.class.name}. Error Message: #{e.message}.")
38
+ retries -= 1
39
+ retry if retries > 0
40
+ end
41
+
42
+ return response
43
+ end
44
+
45
+ private
46
+ def build_request(options = {}, faraday_options = {})
47
+ options = options.dup
48
+ options = options.merge(ssl: {:verify => false})
49
+
50
+ adapter = options.delete(:adapter) { |opt| Faraday.default_adapter }
51
+ timeout = options.delete(:timeout) { |opt| 60 }
52
+ open_timeout = options.delete(:open_timeout) { |opt| 60 }
53
+
54
+ request = HttpUtilities::Http::Request.new
55
+ request.set_proxy_options(options)
56
+
57
+ proxy_options = request.generate_proxy_options
58
+
59
+ connection = Faraday.new(faraday_options) do |builder|
60
+ builder.headers[:user_agent] = request.user_agent
61
+ builder.options[:timeout] = timeout
62
+ builder.options[:open_timeout] = open_timeout
63
+ #builder.response :logger
64
+ builder.proxy proxy_options unless proxy_options.empty?
65
+ builder.adapter adapter
66
+ end
67
+
68
+ request.interface = connection
69
+
70
+ return request
71
+ end
72
+
19
73
  end
20
74
  end
21
75
  end
@@ -30,7 +30,11 @@ module HttpUtilities
30
30
  self.agent.log = ::Logger.new(logger) if (verbose)
31
31
 
32
32
  self.set_proxy_options(options)
33
- self.agent.set_proxy(self.proxy[:host], self.proxy[:port], self.proxy[:username], self.proxy[:password]) if (self.proxy[:host] && self.proxy[:port])
33
+
34
+ if (self.proxy[:host] && self.proxy[:port])
35
+ log(:info, "[HttpUtilities::Http::Mechanize::Client] - Will use proxy #{self.proxy[:host]}:#{self.proxy[:port]} for Mechanize.")
36
+ self.agent.set_proxy(self.proxy[:host], self.proxy[:port], self.proxy[:username], self.proxy[:password])
37
+ end
34
38
 
35
39
  self.set_user_agent
36
40
  (self.user_agent) ? self.agent.user_agent = self.user_agent : self.agent.user_agent_alias = 'Mac Safari'
@@ -84,6 +84,16 @@ module HttpUtilities
84
84
  def using_proxy?
85
85
  return (self.proxy[:host] && self.proxy[:port] && self.proxy[:port] > 0)
86
86
  end
87
+
88
+ def generate_proxy_options
89
+ proxy_options = {}
90
+
91
+ proxy_options[:uri] = "http://#{self.proxy[:host]}:#{self.proxy[:port]}"
92
+ proxy_options[:user] = self.proxy[:username] if self.proxy[:username] && self.proxy[:username].present?
93
+ proxy_options[:password] = self.proxy[:password] if self.proxy[:password] && self.proxy[:password].present?
94
+
95
+ return proxy_options
96
+ end
87
97
 
88
98
  end
89
99
  end
@@ -5,12 +5,11 @@ module HttpUtilities
5
5
  include HttpUtilities::Http::ProxySupport
6
6
  include HttpUtilities::Http::UserAgent
7
7
 
8
- attr_accessor :interface, :proxy, :cookies, :user_agent
8
+ attr_accessor :interface, :proxy, :user_agent
9
9
 
10
- def initialize(interface = nil, proxy = {}, cookies = [])
10
+ def initialize(interface = nil, proxy = {})
11
11
  self.interface = interface
12
12
  self.proxy = proxy
13
- self.cookies = cookies
14
13
 
15
14
  self.set_user_agent
16
15
  end
@@ -1,15 +1,14 @@
1
1
  module HttpUtilities
2
2
  module Http
3
3
  class Response
4
- include HttpUtilities::Http::Format
5
4
  include HttpUtilities::Http::Logger
6
5
 
7
6
  attr_accessor :body, :parsed_body, :page, :format, :request, :force_encoding
8
7
 
9
- def initialize(body = nil, request = nil, options = {})
10
- options = options.clone()
8
+ def initialize(response = nil, request = nil, options = {})
9
+ options = options.dup
11
10
 
12
- self.body = body
11
+ self.body = (response && response.body) ? response.body : nil
13
12
  self.request = request
14
13
 
15
14
  self.parsed_body = nil
@@ -34,6 +33,18 @@ module HttpUtilities
34
33
  def parse_response
35
34
  self.send("as_#{self.format}".to_sym) if (self.body && self.format)
36
35
  end
36
+
37
+ def as_html
38
+ self.parsed_body = (self.body && self.body != "") ? Nokogiri::HTML(self.body.to_s.force_encoding("utf-8"), nil, "utf-8") : nil
39
+ end
40
+
41
+ def as_xml
42
+ self.parsed_body = (self.body && self.body != "") ? Nokogiri::XML(self.body.to_s.force_encoding("utf-8"), nil, "utf-8") : nil
43
+ end
44
+
45
+ def as_json
46
+ self.parsed_body = (self.body && self.body != "") ? self.body.to_s.force_encoding("utf-8").to_json : nil
47
+ end
37
48
 
38
49
  def set_page(page)
39
50
  self.page = page
@@ -5,7 +5,7 @@ module HttpUtilities
5
5
  module UserAgent
6
6
 
7
7
  def set_user_agent
8
- user_agent = (USER_AGENTS && USER_AGENTS.any?) ? USER_AGENTS[rand(USER_AGENTS.size)] : ""
8
+ user_agent = (USER_AGENTS && USER_AGENTS.any?) ? USER_AGENTS.sample : nil
9
9
  self.user_agent = user_agent if (user_agent && self.respond_to?(:user_agent=))
10
10
  end
11
11
 
@@ -0,0 +1,97 @@
1
+ module HttpUtilities
2
+ module Proxies
3
+ module Mongo
4
+
5
+ module ProxyModule
6
+
7
+ def self.included(base)
8
+ base.send :extend, ClassMethods
9
+ base.send :include, InstanceMethods
10
+ end
11
+
12
+ module ClassMethods
13
+ def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
14
+ proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
15
+
16
+ proxies = proxies.any_of(
17
+ {:last_checked_at.exists => false},
18
+ {:last_checked_at.ne => nil},
19
+ {:last_checked_at.exists => true, :last_checked_at.ne => nil, :last_checked_at.lt => date}
20
+ )
21
+
22
+ proxies = proxies.any_of(
23
+ {:failed_attempts.exists => false},
24
+ {:failed_attempts.in => ["", nil]},
25
+ {:failed_attempts.exists => true, :failed_attempts.nin => ["", nil], :failed_attempts.lte => maximum_failed_attempts}
26
+ )
27
+
28
+ proxies = proxies.order_by([[:valid_proxy, :asc], [:failed_attempts, :asc], [:last_checked_at, :asc]])
29
+ proxies = proxies.limit(limit)
30
+
31
+ return proxies
32
+ end
33
+
34
+ def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil, retries: 3)
35
+ proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
36
+ proxies = proxies.where(valid_proxy: true)
37
+ proxies = proxies.where(:failed_attempts.lte => maximum_failed_attempts) if maximum_failed_attempts
38
+ proxy = nil
39
+
40
+ begin
41
+ proxy = proxies.skip(rand(proxies.count)).first
42
+
43
+ rescue StandardError
44
+ retries -= 1
45
+ retry if retries > 0
46
+ end
47
+
48
+ return proxy
49
+ end
50
+
51
+ def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
52
+ proxies = ::Proxy.where(nil)
53
+ proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
54
+ proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
55
+
56
+ return proxies
57
+ end
58
+
59
+ def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
60
+ proxy_address = "#{proxy_host}:#{proxy_port}"
61
+ proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
62
+
63
+ return proxy_address
64
+ end
65
+
66
+ def format_proxy_credentials(username, password)
67
+ return "#{username}:#{password}"
68
+ end
69
+ end
70
+
71
+ module InstanceMethods
72
+ def proxy_address(include_http = false)
73
+ return ::Proxy.format_proxy_address(self.host, self.port, include_http)
74
+ end
75
+
76
+ def proxy_credentials
77
+ return ::Proxy.format_proxy_credentials(self.username, self.password)
78
+ end
79
+
80
+ def socks_proxy_credentials
81
+ credentials = {}
82
+
83
+ if (!self.username.empty? && !self.password.empty?)
84
+ credentials = {user: self.username, password: self.password}
85
+ elsif (!self.username.empty? && self.password.empty?)
86
+ credentials = {user: self.username}
87
+ end
88
+
89
+ return credentials
90
+ end
91
+ end
92
+
93
+ end
94
+
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,93 @@
1
+ module HttpUtilities
2
+ module Proxies
3
+ module Mysql
4
+
5
+ module ProxyModule
6
+
7
+ def self.included(base)
8
+ base.send :extend, ClassMethods
9
+ base.send :include, InstanceMethods
10
+ end
11
+
12
+ module ClassMethods
13
+ def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
14
+ proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
15
+ proxies = proxies.where(["(last_checked_at IS NULL OR last_checked_at < ?)", date])
16
+ proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts])
17
+ proxies = proxies.order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC")
18
+ proxies = proxies.limit(limit)
19
+
20
+ return proxies
21
+ end
22
+
23
+ def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil)
24
+ proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
25
+ proxies = proxies.where(["valid_proxy = ? AND last_checked_at IS NOT NULL", true])
26
+ proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts]) if maximum_failed_attempts
27
+
28
+ order_clause = case ActiveRecord::Base.connection.class.name
29
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
30
+ "RAND() DESC"
31
+ when "ActiveRecord::ConnectionAdapters::SQLite3Adapter"
32
+ "RANDOM() DESC"
33
+ else
34
+ "RAND() DESC"
35
+ end
36
+
37
+ proxies = proxies.order(order_clause)
38
+
39
+ proxy = nil
40
+
41
+ uncached do
42
+ proxy = proxies.limit(1).first
43
+ end
44
+
45
+ return proxy
46
+ end
47
+
48
+ def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
49
+ proxies = ::Proxy.where(nil)
50
+ proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
51
+ proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
52
+
53
+ return proxies
54
+ end
55
+
56
+ def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
57
+ proxy_address = "#{proxy_host}:#{proxy_port}"
58
+ proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
59
+ return proxy_address
60
+ end
61
+
62
+ def format_proxy_credentials(username, password)
63
+ return "#{username}:#{password}"
64
+ end
65
+ end
66
+
67
+ module InstanceMethods
68
+ def proxy_address(include_http = false)
69
+ return ::Proxy.format_proxy_address(self.host, self.port, include_http)
70
+ end
71
+
72
+ def proxy_credentials
73
+ return ::Proxy.format_proxy_credentials(self.username, self.password)
74
+ end
75
+
76
+ def socks_proxy_credentials
77
+ credentials = {}
78
+
79
+ if (!self.username.empty? && !self.password.empty?)
80
+ credentials = {user: self.username, password: self.password}
81
+ elsif (!self.username.empty? && self.password.empty?)
82
+ credentials = {user: self.username}
83
+ end
84
+
85
+ return credentials
86
+ end
87
+ end
88
+
89
+ end
90
+
91
+ end
92
+ end
93
+ end
@@ -2,7 +2,6 @@
2
2
 
3
3
  require 'socket'
4
4
  require 'net/ssh/proxy/socks5'
5
- require 'activerecord-import'
6
5
 
7
6
  module HttpUtilities
8
7
  module Proxies
@@ -11,9 +10,8 @@ module HttpUtilities
11
10
  attr_accessor :limit, :minimum_successful_attempts, :maximum_failed_attempts
12
11
 
13
12
  def initialize
14
- self.client = HttpUtilities::Http::Mechanize::Client.new(verbose: false)
15
- self.client.agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
16
-
13
+ self.client = HttpUtilities::Http::Client.new
14
+
17
15
  self.processed_proxies = []
18
16
 
19
17
  self.limit = 1000
@@ -23,7 +21,6 @@ module HttpUtilities
23
21
 
24
22
  def check_and_update_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
25
23
  check_proxies(protocol: protocol, proxy_type: proxy_type, mode: mode, maximum_failed_attempts: maximum_failed_attempts)
26
- update_proxies
27
24
  end
28
25
 
29
26
  def check_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
@@ -77,86 +74,57 @@ module HttpUtilities
77
74
  valid_proxy = false
78
75
  end
79
76
 
80
- if (valid_proxy)
81
- Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
82
- else
83
- Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
84
- end
85
-
86
- self.processed_proxies << {proxy: proxy, valid: valid_proxy}
77
+ update_proxy(proxy, valid_proxy)
87
78
  end
88
79
 
89
- def check_http_proxy(proxy, timeout = 60)
90
- document = nil
91
- valid_proxy = false
92
-
80
+ def check_http_proxy(proxy, test_url: "http://www.google.com/robots.txt", timeout: 10)
93
81
  options = {
94
82
  use_proxy: true,
95
- proxy: proxy.proxy_address,
83
+ proxy: {host: proxy.host, port: proxy.port},
96
84
  proxy_protocol: proxy.protocol,
97
- timeout: timeout,
85
+ timeout: timeout
98
86
  }
99
-
100
- Rails.logger.info "#{Time.now}: Fetching Google.com with proxy #{proxy.proxy_address}."
101
87
 
102
- page = self.client.get_page("https://www.google.com/webhp?hl=en&gws_rd=ssl", options)
103
-
104
- if (page)
105
- parser = self.client.get_parser(page)
106
- title = parser.at_css("head title")
107
-
108
- if (title && title.content)
109
- begin
110
- title = title.content.encode("UTF-8").strip.downcase
111
- body_content = page.body.to_s.encode("UTF-8").strip.downcase
112
-
113
- valid_proxy = (title.eql?("google") || !(body_content =~ /google home/i).nil?)
114
-
115
- Rails.logger.info "Title is: #{title}. Proxy #{proxy.proxy_address}"
116
-
117
- rescue Exception => e
118
- Rails.logger.error "Exception occured while trying to check proxy #{proxy.proxy_address}. Error Class: #{e.class}. Error Message: #{e.message}"
119
- valid_proxy = false
120
- end
121
- end
122
- end
88
+ options.merge!(proxy_username: proxy.username) if proxy.username && proxy.username.present?
89
+ options.merge!(proxy_password: proxy.password) if proxy.password && proxy.password.present?
123
90
 
124
- if (valid_proxy)
125
- Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
126
- else
127
- Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
128
- end
91
+ Rails.logger.info "#{Time.now}: Fetching robots.txt for Google.com with proxy #{proxy.proxy_address}. Using authentication? #{options.has_key?(:proxy_username).to_s}"
92
+
93
+ response = self.client.get(test_url, options: options)
94
+ valid_proxy = (response && response.body && response.body =~ /Allow: \/search\/about/i)
129
95
 
130
- self.processed_proxies << {proxy: proxy, valid: valid_proxy}
96
+ update_proxy(proxy, valid_proxy)
131
97
  end
132
-
98
+
133
99
  def update_proxies
134
- columns = [:host, :port, :last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts]
135
- values = []
136
-
137
100
  Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"
138
101
 
139
102
  if (self.processed_proxies && self.processed_proxies.any?)
140
103
  self.processed_proxies.each do |value|
141
- proxy = value[:proxy]
142
- valid = value[:valid]
143
- successful_attempts = proxy.successful_attempts
144
- failed_attempts = proxy.failed_attempts
145
-
146
- if (valid)
147
- successful_attempts += 1
148
- else
149
- failed_attempts += 1
150
- end
151
-
152
- is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
153
- value_arr = [proxy.host, proxy.port, Time.now, is_valid, successful_attempts, failed_attempts]
154
- values << value_arr
104
+ update_proxy(value[:proxy], value[:valid])
155
105
  end
106
+ end
107
+ end
108
+
109
+ def update_proxy(proxy, valid)
110
+ Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is #{valid ? "working" : "not working"}!"
111
+
112
+ successful_attempts = proxy.successful_attempts || 0
113
+ failed_attempts = proxy.failed_attempts || 0
156
114
 
157
- ::Proxy.import(columns, values, :on_duplicate_key_update => [:last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts], :validate => false)
115
+ if (valid)
116
+ successful_attempts += 1
117
+ else
118
+ failed_attempts += 1
158
119
  end
159
120
 
121
+ is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
122
+
123
+ proxy.valid_proxy = is_valid
124
+ proxy.successful_attempts = successful_attempts
125
+ proxy.failed_attempts = failed_attempts
126
+ proxy.last_checked_at = Time.now
127
+ proxy.save
160
128
  end
161
129
 
162
130
  end
@@ -1,7 +1,6 @@
1
1
  module HttpUtilities
2
2
  module Proxies
3
3
  class ProxySeeder
4
- require 'activerecord-import'
5
4
  attr_accessor :protocols, :proxy_types, :categories
6
5
 
7
6
  def initialize
@@ -27,20 +26,23 @@ module HttpUtilities
27
26
  end
28
27
 
29
28
  def bulk_import_proxies(proxy_list, protocol, proxy_type, category)
30
- columns = [:host, :port, :protocol, :proxy_type, :category]
31
- category = (category && !category.eql?('unspecified')) ? category : nil
29
+ columns = [:host, :port, :protocol, :proxy_type, :category]
30
+ category = (category && !category.eql?('unspecified')) ? category : nil
32
31
 
33
32
  begin
34
- values = []
35
-
36
33
  proxy_list.slice!(0..1000).each do |proxy|
37
- host = proxy[:host]
38
- port = proxy[:port]
39
- value_arr = [host, port, protocol, proxy_type, category]
40
- values << value_arr
34
+ host = proxy[:host]
35
+ port = proxy[:port]
36
+
37
+ proxy = ::Proxy.where(host: host, port: port).first || ::Proxy.new
38
+ proxy.host = host
39
+ proxy.port = port
40
+ proxy.protocol = protocol
41
+ proxy.proxy_type = proxy_type
42
+ proxy.category = category
43
+ proxy.save
41
44
  end
42
-
43
- ::Proxy.import(columns, values, :on_duplicate_key_update => [:proxy_type], :validate => false) if (values && values.any?)
45
+
44
46
  end while (proxy_list && proxy_list.any?)
45
47
  end
46
48