http_utilities 1.1.2 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1a18294693d9cd95171156f4fccdf461a8c45127
4
- data.tar.gz: 6d32ede9f59c390b64ebcfcc47ec00901fe6e77f
3
+ metadata.gz: 08cfb5e367ca9f757c3eb75ff8958a55af204bf6
4
+ data.tar.gz: ec280ce2d9e5bc1f0e1ad7fbd724a6abb438409c
5
5
  SHA512:
6
- metadata.gz: 51c959c41979b8c1a253c1b5d43e019ac19e692b9fbb7f92b04da37be639099949bd540e2ed2b068f30d9943a2a3f46212401bc416e4a1a44730d1758e553f09
7
- data.tar.gz: 0fe6a191e9f0e802622d5eb24a2529e0b6c9d1c1cdc1525347570492c1d27aae74baa15e5650566dbc69701397e58d9df47ea4fb8e105002aeea8b69ca98c930
6
+ metadata.gz: 0395ba295966ce1622021940273827ff880d691c78c3f8ed71f45edeebf418932e602bf9219e2d3ad6b245e3830de4a59aac73d363b394c9c73a631bab8f531d
7
+ data.tar.gz: 057783241fe6465bff927446dc03c31c10faa3a20784a34caff5edd6bdebc4617c6cb584d15ad0783d4c4babc4c4e2e39efa8566e0a6b83e90e08e2bc1613f81
data/Gemfile CHANGED
@@ -1,16 +1,11 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- gem "nokogiri", ">= 1.5.5"
4
- gem "mechanize", ">= 2.5"
5
- gem "multi_xml", ">= 0.5"
6
- gem "net-ssh", ">= 2.8"
3
+ gem 'nokogiri'
4
+ gem 'mechanize'
5
+ gem 'net-ssh'
7
6
 
8
7
  gem "activerecord-import", :require => false
9
8
 
10
- platforms :ruby do
11
- gem 'curb'
12
- end
13
-
14
9
  group :development, :test do
15
10
  gem 'rails'
16
11
  gem 'jeweler'
@@ -18,6 +13,6 @@ group :development, :test do
18
13
  gem 'sqlite3'
19
14
 
20
15
  platforms :ruby do
21
- gem "mysql2", ">= 0.3.11"
16
+ gem "mysql2"
22
17
  end
23
18
  end
@@ -3,24 +3,23 @@ Gem::Specification.new do |s|
3
3
  s.required_rubygems_version = Gem::Requirement.new(">= 1.3.5") if s.respond_to? :required_rubygems_version=
4
4
 
5
5
  s.name = "http_utilities"
6
- s.version = "1.1.2"
6
+ s.version = "1.1.5"
7
7
 
8
8
  s.authors = ["Sebastian Johnsson"]
9
- s.description = "Wrapper for common Http Libraries (Net:HTTP/Open URI/Curl)"
9
+ s.description = "Wrapper for Faraday with additional functionality"
10
10
 
11
11
  s.homepage = "http://github.com/Agiley/http_utilities"
12
- s.summary = "Wrapper for common Http Libraries (Net:HTTP/Open URI/Curl)"
12
+ s.summary = "Wrapper for Faraday with additional functionality"
13
13
 
14
- s.add_dependency(%q<nokogiri>, [">= 1.5.5"])
15
- s.add_dependency(%q<mechanize>, [">= 2.5"])
16
- s.add_dependency(%q<multi_xml>, [">= 0.5"])
17
- s.add_dependency(%q<net-ssh>, [">= 2.8"])
18
- s.add_dependency(%q<activerecord-import>, [">= 0"])
14
+ s.add_dependency(%q<nokogiri>, [">= 1.6"])
15
+ s.add_dependency(%q<mechanize>, [">= 2.7"])
16
+ s.add_dependency(%q<net-ssh>, [">= 2.9"])
19
17
 
20
18
  s.add_development_dependency(%q<rails>, [">= 0"])
21
19
  s.add_development_dependency(%q<rspec>, [">= 0"])
22
20
  s.add_development_dependency(%q<sqlite3>, [">= 0"])
23
- s.add_development_dependency(%q<mysql2>, [">= 0.3.11"])
21
+ s.add_development_dependency(%q<mysql2>, [">= 0"])
22
+ s.add_development_dependency(%q<activerecord-import>, [">= 0"])
24
23
 
25
24
  # = MANIFEST =
26
25
  s.files = %w[
@@ -38,16 +37,9 @@ Gem::Specification.new do |s|
38
37
  lib/generators/templates/http_utilities.rb
39
38
  lib/generators/templates/user_agents.yml
40
39
  lib/http_utilities.rb
41
- lib/http_utilities/http/adapters/curb.rb
42
- lib/http_utilities/http/adapters/net_http.rb
43
- lib/http_utilities/http/adapters/open_uri.rb
44
40
  lib/http_utilities/http/client.rb
45
- lib/http_utilities/http/cookies.rb
46
- lib/http_utilities/http/format.rb
47
- lib/http_utilities/http/get.rb
48
41
  lib/http_utilities/http/logger.rb
49
42
  lib/http_utilities/http/mechanize/client.rb
50
- lib/http_utilities/http/post.rb
51
43
  lib/http_utilities/http/proxy_support.rb
52
44
  lib/http_utilities/http/request.rb
53
45
  lib/http_utilities/http/response.rb
@@ -57,8 +49,9 @@ Gem::Specification.new do |s|
57
49
  lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb
58
50
  lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb
59
51
  lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb
52
+ lib/http_utilities/proxies/mongo/proxy_module.rb
53
+ lib/http_utilities/proxies/mysql/proxy_module.rb
60
54
  lib/http_utilities/proxies/proxy_checker.rb
61
- lib/http_utilities/proxies/proxy_module.rb
62
55
  lib/http_utilities/proxies/proxy_seeder.rb
63
56
  lib/http_utilities/railtie.rb
64
57
  lib/tasks/http_utilities_tasks.rake
@@ -1,21 +1,75 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  require 'open-uri'
3
- require 'net/http'
4
3
  require 'uri'
5
4
  require 'cgi'
6
5
 
7
6
  module HttpUtilities
8
7
  module Http
9
8
  class Client
10
- include HttpUtilities::Http::Cookies
11
- include HttpUtilities::Http::Url
12
- include HttpUtilities::Http::Get
13
- include HttpUtilities::Http::Post
14
9
  include HttpUtilities::Http::Logger
10
+
11
+ def get(url, arguments: {}, options: {}, retries: 3)
12
+ response = nil
13
+ request = build_request(options)
14
+
15
+ begin
16
+ response = request.interface.get(url, arguments)
17
+ response = HttpUtilities::Http::Response.new(response, request, options)
18
+
19
+ rescue Faraday::TimeoutError, Net::ReadTimeout, Timeout::Error, StandardError => e
20
+ log(:error, "[HttpUtilities::Http::Client] - An error occurred while trying to fetch the response. Error Class: #{e.class.name}. Error Message: #{e.message}.")
21
+ retries -= 1
22
+ retry if retries > 0
23
+ end
15
24
 
16
- include HttpUtilities::Http::Adapters::NetHttp
17
- include HttpUtilities::Http::Adapters::OpenUri
18
- include HttpUtilities::Http::Adapters::Curb
25
+ return response
26
+ end
27
+
28
+ def post(url, data: nil, options: {}, retries: 3)
29
+ response = nil
30
+ request = build_request(options)
31
+
32
+ begin
33
+ response = request.interface.post(url, data)
34
+ response = HttpUtilities::Http::Response.new(response, request, options)
35
+
36
+ rescue Faraday::TimeoutError, Net::ReadTimeout, Timeout::Error, StandardError => e
37
+ log(:error, "[HttpUtilities::Http::Client] - An error occurred while trying to fetch the response. Error Class: #{e.class.name}. Error Message: #{e.message}.")
38
+ retries -= 1
39
+ retry if retries > 0
40
+ end
41
+
42
+ return response
43
+ end
44
+
45
+ private
46
+ def build_request(options = {}, faraday_options = {})
47
+ options = options.dup
48
+ options = options.merge(ssl: {:verify => false})
49
+
50
+ adapter = options.delete(:adapter) { |opt| Faraday.default_adapter }
51
+ timeout = options.delete(:timeout) { |opt| 60 }
52
+ open_timeout = options.delete(:open_timeout) { |opt| 60 }
53
+
54
+ request = HttpUtilities::Http::Request.new
55
+ request.set_proxy_options(options)
56
+
57
+ proxy_options = request.generate_proxy_options
58
+
59
+ connection = Faraday.new(faraday_options) do |builder|
60
+ builder.headers[:user_agent] = request.user_agent
61
+ builder.options[:timeout] = timeout
62
+ builder.options[:open_timeout] = open_timeout
63
+ #builder.response :logger
64
+ builder.proxy proxy_options unless proxy_options.empty?
65
+ builder.adapter adapter
66
+ end
67
+
68
+ request.interface = connection
69
+
70
+ return request
71
+ end
72
+
19
73
  end
20
74
  end
21
75
  end
@@ -30,7 +30,11 @@ module HttpUtilities
30
30
  self.agent.log = ::Logger.new(logger) if (verbose)
31
31
 
32
32
  self.set_proxy_options(options)
33
- self.agent.set_proxy(self.proxy[:host], self.proxy[:port], self.proxy[:username], self.proxy[:password]) if (self.proxy[:host] && self.proxy[:port])
33
+
34
+ if (self.proxy[:host] && self.proxy[:port])
35
+ log(:info, "[HttpUtilities::Http::Mechanize::Client] - Will use proxy #{self.proxy[:host]}:#{self.proxy[:port]} for Mechanize.")
36
+ self.agent.set_proxy(self.proxy[:host], self.proxy[:port], self.proxy[:username], self.proxy[:password])
37
+ end
34
38
 
35
39
  self.set_user_agent
36
40
  (self.user_agent) ? self.agent.user_agent = self.user_agent : self.agent.user_agent_alias = 'Mac Safari'
@@ -84,6 +84,16 @@ module HttpUtilities
84
84
  def using_proxy?
85
85
  return (self.proxy[:host] && self.proxy[:port] && self.proxy[:port] > 0)
86
86
  end
87
+
88
+ def generate_proxy_options
89
+ proxy_options = {}
90
+
91
+ proxy_options[:uri] = "http://#{self.proxy[:host]}:#{self.proxy[:port]}"
92
+ proxy_options[:user] = self.proxy[:username] if self.proxy[:username] && self.proxy[:username].present?
93
+ proxy_options[:password] = self.proxy[:password] if self.proxy[:password] && self.proxy[:password].present?
94
+
95
+ return proxy_options
96
+ end
87
97
 
88
98
  end
89
99
  end
@@ -5,12 +5,11 @@ module HttpUtilities
5
5
  include HttpUtilities::Http::ProxySupport
6
6
  include HttpUtilities::Http::UserAgent
7
7
 
8
- attr_accessor :interface, :proxy, :cookies, :user_agent
8
+ attr_accessor :interface, :proxy, :user_agent
9
9
 
10
- def initialize(interface = nil, proxy = {}, cookies = [])
10
+ def initialize(interface = nil, proxy = {})
11
11
  self.interface = interface
12
12
  self.proxy = proxy
13
- self.cookies = cookies
14
13
 
15
14
  self.set_user_agent
16
15
  end
@@ -1,15 +1,14 @@
1
1
  module HttpUtilities
2
2
  module Http
3
3
  class Response
4
- include HttpUtilities::Http::Format
5
4
  include HttpUtilities::Http::Logger
6
5
 
7
6
  attr_accessor :body, :parsed_body, :page, :format, :request, :force_encoding
8
7
 
9
- def initialize(body = nil, request = nil, options = {})
10
- options = options.clone()
8
+ def initialize(response = nil, request = nil, options = {})
9
+ options = options.dup
11
10
 
12
- self.body = body
11
+ self.body = (response && response.body) ? response.body : nil
13
12
  self.request = request
14
13
 
15
14
  self.parsed_body = nil
@@ -34,6 +33,18 @@ module HttpUtilities
34
33
  def parse_response
35
34
  self.send("as_#{self.format}".to_sym) if (self.body && self.format)
36
35
  end
36
+
37
+ def as_html
38
+ self.parsed_body = (self.body && self.body != "") ? Nokogiri::HTML(self.body.to_s.force_encoding("utf-8"), nil, "utf-8") : nil
39
+ end
40
+
41
+ def as_xml
42
+ self.parsed_body = (self.body && self.body != "") ? Nokogiri::XML(self.body.to_s.force_encoding("utf-8"), nil, "utf-8") : nil
43
+ end
44
+
45
+ def as_json
46
+ self.parsed_body = (self.body && self.body != "") ? self.body.to_s.force_encoding("utf-8").to_json : nil
47
+ end
37
48
 
38
49
  def set_page(page)
39
50
  self.page = page
@@ -5,7 +5,7 @@ module HttpUtilities
5
5
  module UserAgent
6
6
 
7
7
  def set_user_agent
8
- user_agent = (USER_AGENTS && USER_AGENTS.any?) ? USER_AGENTS[rand(USER_AGENTS.size)] : ""
8
+ user_agent = (USER_AGENTS && USER_AGENTS.any?) ? USER_AGENTS.sample : nil
9
9
  self.user_agent = user_agent if (user_agent && self.respond_to?(:user_agent=))
10
10
  end
11
11
 
@@ -0,0 +1,97 @@
1
+ module HttpUtilities
2
+ module Proxies
3
+ module Mongo
4
+
5
+ module ProxyModule
6
+
7
+ def self.included(base)
8
+ base.send :extend, ClassMethods
9
+ base.send :include, InstanceMethods
10
+ end
11
+
12
+ module ClassMethods
13
+ def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
14
+ proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
15
+
16
+ proxies = proxies.any_of(
17
+ {:last_checked_at.exists => false},
18
+ {:last_checked_at.ne => nil},
19
+ {:last_checked_at.exists => true, :last_checked_at.ne => nil, :last_checked_at.lt => date}
20
+ )
21
+
22
+ proxies = proxies.any_of(
23
+ {:failed_attempts.exists => false},
24
+ {:failed_attempts.in => ["", nil]},
25
+ {:failed_attempts.exists => true, :failed_attempts.nin => ["", nil], :failed_attempts.lte => maximum_failed_attempts}
26
+ )
27
+
28
+ proxies = proxies.order_by([[:valid_proxy, :asc], [:failed_attempts, :asc], [:last_checked_at, :asc]])
29
+ proxies = proxies.limit(limit)
30
+
31
+ return proxies
32
+ end
33
+
34
+ def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil, retries: 3)
35
+ proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
36
+ proxies = proxies.where(valid_proxy: true)
37
+ proxies = proxies.where(:failed_attempts.lte => maximum_failed_attempts) if maximum_failed_attempts
38
+ proxy = nil
39
+
40
+ begin
41
+ proxy = proxies.skip(rand(proxies.count)).first
42
+
43
+ rescue StandardError
44
+ retries -= 1
45
+ retry if retries > 0
46
+ end
47
+
48
+ return proxy
49
+ end
50
+
51
+ def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
52
+ proxies = ::Proxy.where(nil)
53
+ proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
54
+ proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
55
+
56
+ return proxies
57
+ end
58
+
59
+ def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
60
+ proxy_address = "#{proxy_host}:#{proxy_port}"
61
+ proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
62
+
63
+ return proxy_address
64
+ end
65
+
66
+ def format_proxy_credentials(username, password)
67
+ return "#{username}:#{password}"
68
+ end
69
+ end
70
+
71
+ module InstanceMethods
72
+ def proxy_address(include_http = false)
73
+ return ::Proxy.format_proxy_address(self.host, self.port, include_http)
74
+ end
75
+
76
+ def proxy_credentials
77
+ return ::Proxy.format_proxy_credentials(self.username, self.password)
78
+ end
79
+
80
+ def socks_proxy_credentials
81
+ credentials = {}
82
+
83
+ if (!self.username.empty? && !self.password.empty?)
84
+ credentials = {user: self.username, password: self.password}
85
+ elsif (!self.username.empty? && self.password.empty?)
86
+ credentials = {user: self.username}
87
+ end
88
+
89
+ return credentials
90
+ end
91
+ end
92
+
93
+ end
94
+
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,93 @@
1
+ module HttpUtilities
2
+ module Proxies
3
+ module Mysql
4
+
5
+ module ProxyModule
6
+
7
+ def self.included(base)
8
+ base.send :extend, ClassMethods
9
+ base.send :include, InstanceMethods
10
+ end
11
+
12
+ module ClassMethods
13
+ def should_be_checked(protocol: :all, proxy_type: :all, date: Time.now, limit: 10, maximum_failed_attempts: 10)
14
+ proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
15
+ proxies = proxies.where(["(last_checked_at IS NULL OR last_checked_at < ?)", date])
16
+ proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts])
17
+ proxies = proxies.order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC")
18
+ proxies = proxies.limit(limit)
19
+
20
+ return proxies
21
+ end
22
+
23
+ def get_random_proxy(protocol: :all, proxy_type: :all, maximum_failed_attempts: nil)
24
+ proxies = get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
25
+ proxies = proxies.where(["valid_proxy = ? AND last_checked_at IS NOT NULL", true])
26
+ proxies = proxies.where(["failed_attempts <= ?", maximum_failed_attempts]) if maximum_failed_attempts
27
+
28
+ order_clause = case ActiveRecord::Base.connection.class.name
29
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter"
30
+ "RAND() DESC"
31
+ when "ActiveRecord::ConnectionAdapters::SQLite3Adapter"
32
+ "RANDOM() DESC"
33
+ else
34
+ "RAND() DESC"
35
+ end
36
+
37
+ proxies = proxies.order(order_clause)
38
+
39
+ proxy = nil
40
+
41
+ uncached do
42
+ proxy = proxies.limit(1).first
43
+ end
44
+
45
+ return proxy
46
+ end
47
+
48
+ def get_proxies_for_protocol_and_proxy_type(protocol, proxy_type)
49
+ proxies = ::Proxy.where(nil)
50
+ proxies = proxies.where(protocol: protocol) if (protocol && !protocol.downcase.to_sym.eql?(:all))
51
+ proxies = proxies.where(proxy_type: proxy_type) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
52
+
53
+ return proxies
54
+ end
55
+
56
+ def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
57
+ proxy_address = "#{proxy_host}:#{proxy_port}"
58
+ proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
59
+ return proxy_address
60
+ end
61
+
62
+ def format_proxy_credentials(username, password)
63
+ return "#{username}:#{password}"
64
+ end
65
+ end
66
+
67
+ module InstanceMethods
68
+ def proxy_address(include_http = false)
69
+ return ::Proxy.format_proxy_address(self.host, self.port, include_http)
70
+ end
71
+
72
+ def proxy_credentials
73
+ return ::Proxy.format_proxy_credentials(self.username, self.password)
74
+ end
75
+
76
+ def socks_proxy_credentials
77
+ credentials = {}
78
+
79
+ if (!self.username.empty? && !self.password.empty?)
80
+ credentials = {user: self.username, password: self.password}
81
+ elsif (!self.username.empty? && self.password.empty?)
82
+ credentials = {user: self.username}
83
+ end
84
+
85
+ return credentials
86
+ end
87
+ end
88
+
89
+ end
90
+
91
+ end
92
+ end
93
+ end
@@ -2,7 +2,6 @@
2
2
 
3
3
  require 'socket'
4
4
  require 'net/ssh/proxy/socks5'
5
- require 'activerecord-import'
6
5
 
7
6
  module HttpUtilities
8
7
  module Proxies
@@ -11,9 +10,8 @@ module HttpUtilities
11
10
  attr_accessor :limit, :minimum_successful_attempts, :maximum_failed_attempts
12
11
 
13
12
  def initialize
14
- self.client = HttpUtilities::Http::Mechanize::Client.new(verbose: false)
15
- self.client.agent.agent.http.verify_mode = OpenSSL::SSL::VERIFY_NONE
16
-
13
+ self.client = HttpUtilities::Http::Client.new
14
+
17
15
  self.processed_proxies = []
18
16
 
19
17
  self.limit = 1000
@@ -23,7 +21,6 @@ module HttpUtilities
23
21
 
24
22
  def check_and_update_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
25
23
  check_proxies(protocol: protocol, proxy_type: proxy_type, mode: mode, maximum_failed_attempts: maximum_failed_attempts)
26
- update_proxies
27
24
  end
28
25
 
29
26
  def check_proxies(protocol: :all, proxy_type: :all, mode: :synchronous, maximum_failed_attempts: self.maximum_failed_attempts)
@@ -77,86 +74,57 @@ module HttpUtilities
77
74
  valid_proxy = false
78
75
  end
79
76
 
80
- if (valid_proxy)
81
- Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
82
- else
83
- Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
84
- end
85
-
86
- self.processed_proxies << {proxy: proxy, valid: valid_proxy}
77
+ update_proxy(proxy, valid_proxy)
87
78
  end
88
79
 
89
- def check_http_proxy(proxy, timeout = 60)
90
- document = nil
91
- valid_proxy = false
92
-
80
+ def check_http_proxy(proxy, test_url: "http://www.google.com/robots.txt", timeout: 10)
93
81
  options = {
94
82
  use_proxy: true,
95
- proxy: proxy.proxy_address,
83
+ proxy: {host: proxy.host, port: proxy.port},
96
84
  proxy_protocol: proxy.protocol,
97
- timeout: timeout,
85
+ timeout: timeout
98
86
  }
99
-
100
- Rails.logger.info "#{Time.now}: Fetching Google.com with proxy #{proxy.proxy_address}."
101
87
 
102
- page = self.client.get_page("https://www.google.com/webhp?hl=en&gws_rd=ssl", options)
103
-
104
- if (page)
105
- parser = self.client.get_parser(page)
106
- title = parser.at_css("head title")
107
-
108
- if (title && title.content)
109
- begin
110
- title = title.content.encode("UTF-8").strip.downcase
111
- body_content = page.body.to_s.encode("UTF-8").strip.downcase
112
-
113
- valid_proxy = (title.eql?("google") || !(body_content =~ /google home/i).nil?)
114
-
115
- Rails.logger.info "Title is: #{title}. Proxy #{proxy.proxy_address}"
116
-
117
- rescue Exception => e
118
- Rails.logger.error "Exception occured while trying to check proxy #{proxy.proxy_address}. Error Class: #{e.class}. Error Message: #{e.message}"
119
- valid_proxy = false
120
- end
121
- end
122
- end
88
+ options.merge!(proxy_username: proxy.username) if proxy.username && proxy.username.present?
89
+ options.merge!(proxy_password: proxy.password) if proxy.password && proxy.password.present?
123
90
 
124
- if (valid_proxy)
125
- Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
126
- else
127
- Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
128
- end
91
+ Rails.logger.info "#{Time.now}: Fetching robots.txt for Google.com with proxy #{proxy.proxy_address}. Using authentication? #{options.has_key?(:proxy_username).to_s}"
92
+
93
+ response = self.client.get(test_url, options: options)
94
+ valid_proxy = (response && response.body && response.body =~ /Allow: \/search\/about/i)
129
95
 
130
- self.processed_proxies << {proxy: proxy, valid: valid_proxy}
96
+ update_proxy(proxy, valid_proxy)
131
97
  end
132
-
98
+
133
99
  def update_proxies
134
- columns = [:host, :port, :last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts]
135
- values = []
136
-
137
100
  Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"
138
101
 
139
102
  if (self.processed_proxies && self.processed_proxies.any?)
140
103
  self.processed_proxies.each do |value|
141
- proxy = value[:proxy]
142
- valid = value[:valid]
143
- successful_attempts = proxy.successful_attempts
144
- failed_attempts = proxy.failed_attempts
145
-
146
- if (valid)
147
- successful_attempts += 1
148
- else
149
- failed_attempts += 1
150
- end
151
-
152
- is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
153
- value_arr = [proxy.host, proxy.port, Time.now, is_valid, successful_attempts, failed_attempts]
154
- values << value_arr
104
+ update_proxy(value[:proxy], value[:valid])
155
105
  end
106
+ end
107
+ end
108
+
109
+ def update_proxy(proxy, valid)
110
+ Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is #{valid ? "working" : "not working"}!"
111
+
112
+ successful_attempts = proxy.successful_attempts || 0
113
+ failed_attempts = proxy.failed_attempts || 0
156
114
 
157
- ::Proxy.import(columns, values, :on_duplicate_key_update => [:last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts], :validate => false)
115
+ if (valid)
116
+ successful_attempts += 1
117
+ else
118
+ failed_attempts += 1
158
119
  end
159
120
 
121
+ is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
122
+
123
+ proxy.valid_proxy = is_valid
124
+ proxy.successful_attempts = successful_attempts
125
+ proxy.failed_attempts = failed_attempts
126
+ proxy.last_checked_at = Time.now
127
+ proxy.save
160
128
  end
161
129
 
162
130
  end
@@ -1,7 +1,6 @@
1
1
  module HttpUtilities
2
2
  module Proxies
3
3
  class ProxySeeder
4
- require 'activerecord-import'
5
4
  attr_accessor :protocols, :proxy_types, :categories
6
5
 
7
6
  def initialize
@@ -27,20 +26,23 @@ module HttpUtilities
27
26
  end
28
27
 
29
28
  def bulk_import_proxies(proxy_list, protocol, proxy_type, category)
30
- columns = [:host, :port, :protocol, :proxy_type, :category]
31
- category = (category && !category.eql?('unspecified')) ? category : nil
29
+ columns = [:host, :port, :protocol, :proxy_type, :category]
30
+ category = (category && !category.eql?('unspecified')) ? category : nil
32
31
 
33
32
  begin
34
- values = []
35
-
36
33
  proxy_list.slice!(0..1000).each do |proxy|
37
- host = proxy[:host]
38
- port = proxy[:port]
39
- value_arr = [host, port, protocol, proxy_type, category]
40
- values << value_arr
34
+ host = proxy[:host]
35
+ port = proxy[:port]
36
+
37
+ proxy = ::Proxy.where(host: host, port: port).first || ::Proxy.new
38
+ proxy.host = host
39
+ proxy.port = port
40
+ proxy.protocol = protocol
41
+ proxy.proxy_type = proxy_type
42
+ proxy.category = category
43
+ proxy.save
41
44
  end
42
-
43
- ::Proxy.import(columns, values, :on_duplicate_key_update => [:proxy_type], :validate => false) if (values && values.any?)
45
+
44
46
  end while (proxy_list && proxy_list.any?)
45
47
  end
46
48