http_utilities 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +22 -0
- data/README +15 -0
- data/Rakefile +87 -0
- data/VERSION +1 -0
- data/http_utilities.gemspec +78 -0
- data/lib/generators/active_record/http_utilities_generator.rb +21 -0
- data/lib/generators/active_record/templates/migration.rb +34 -0
- data/lib/generators/active_record/templates/proxy.rb +3 -0
- data/lib/generators/helpers/file_helper.rb +35 -0
- data/lib/generators/helpers/orm_helpers.rb +15 -0
- data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
- data/lib/generators/templates/http_utilities.rb +2 -0
- data/lib/generators/templates/user_agents.yml +3419 -0
- data/lib/http_utilities/http/adapters/curb.rb +107 -0
- data/lib/http_utilities/http/adapters/net_http.rb +130 -0
- data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
- data/lib/http_utilities/http/client.rb +22 -0
- data/lib/http_utilities/http/cookies.rb +49 -0
- data/lib/http_utilities/http/format.rb +26 -0
- data/lib/http_utilities/http/get.rb +67 -0
- data/lib/http_utilities/http/logger.rb +11 -0
- data/lib/http_utilities/http/mechanize/client.rb +197 -0
- data/lib/http_utilities/http/post.rb +32 -0
- data/lib/http_utilities/http/proxy_support.rb +88 -0
- data/lib/http_utilities/http/request.rb +20 -0
- data/lib/http_utilities/http/response.rb +50 -0
- data/lib/http_utilities/http/url.rb +48 -0
- data/lib/http_utilities/http/user_agent.rb +3380 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
- data/lib/http_utilities/proxies/proxy_module.rb +70 -0
- data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
- data/lib/http_utilities/railtie.rb +11 -0
- data/lib/http_utilities.rb +47 -0
- data/lib/tasks/http_utilities_tasks.rake +19 -0
- data/spec/database.yml.example +10 -0
- data/spec/http_utilities/client_spec.rb +145 -0
- data/spec/http_utilities/mechanize_client_spec.rb +35 -0
- data/spec/http_utilities/proxy_checker_spec.rb +11 -0
- data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
- data/spec/http_utilities/proxy_spec.rb +114 -0
- data/spec/models.rb +6 -0
- data/spec/schema.rb +30 -0
- data/spec/spec_helper.rb +50 -0
- metadata +209 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Jobs
|
3
|
+
module Resque
|
4
|
+
module Proxies
|
5
|
+
class CheckProxiesJob
|
6
|
+
@queue = :proxies
|
7
|
+
|
8
|
+
def perform(protocol = :all, proxy_type = :all, mode = :synchronous)
|
9
|
+
HttpUtilities::Proxies::ProxyChecker.new.check_proxies(protocol.to_sym, proxy_type.to_sym, mode.to_sym)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Jobs
|
3
|
+
module Resque
|
4
|
+
module Proxies
|
5
|
+
class CheckProxyJob
|
6
|
+
@queue = :proxies
|
7
|
+
|
8
|
+
def self.perform(proxy_id)
|
9
|
+
proxy_object = ::Proxy.where(:id => proxy_id).first
|
10
|
+
|
11
|
+
if (proxy_object)
|
12
|
+
checker = HttpUtilities::Proxies::ProxyChecker.new
|
13
|
+
checker.check_proxy(proxy_object)
|
14
|
+
checker.update_proxies
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Jobs
|
3
|
+
module Sidekiq
|
4
|
+
module Proxies
|
5
|
+
class CheckProxiesJob
|
6
|
+
include ::Sidekiq::Worker
|
7
|
+
sidekiq_options :queue => :proxies,
|
8
|
+
:unique => false
|
9
|
+
|
10
|
+
def perform(protocol = :all, proxy_type = :all, mode = :synchronous)
|
11
|
+
HttpUtilities::Proxies::ProxyChecker.new.check_proxies(protocol.to_sym, proxy_type.to_sym, mode.to_sym)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Jobs
|
3
|
+
module Sidekiq
|
4
|
+
module Proxies
|
5
|
+
class CheckProxyJob
|
6
|
+
include ::Sidekiq::Worker
|
7
|
+
sidekiq_options :queue => :proxies
|
8
|
+
|
9
|
+
def perform(proxy_id)
|
10
|
+
proxy_object = ::Proxy.where(:id => proxy_id).first
|
11
|
+
|
12
|
+
if (proxy_object)
|
13
|
+
checker = HttpUtilities::Proxies::ProxyChecker.new
|
14
|
+
checker.check_proxy(proxy_object)
|
15
|
+
checker.update_proxies
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
3
|
+
module HttpUtilities
|
4
|
+
module Proxies
|
5
|
+
class ProxyChecker
|
6
|
+
require 'activerecord-import'
|
7
|
+
attr_accessor :client, :processed_proxies
|
8
|
+
attr_accessor :limit, :minimum_successful_attempts, :maximum_failed_attempts
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
self.client = HttpUtilities::Http::Client.new
|
12
|
+
self.processed_proxies = []
|
13
|
+
|
14
|
+
self.limit = 1000
|
15
|
+
self.minimum_successful_attempts = 1
|
16
|
+
self.maximum_failed_attempts = 10
|
17
|
+
end
|
18
|
+
|
19
|
+
def check_and_update_proxies(protocol = :all, proxy_type = :all, mode = :synchronous)
|
20
|
+
check_proxies(protocol, proxy_type, mode)
|
21
|
+
update_proxies
|
22
|
+
end
|
23
|
+
|
24
|
+
def check_proxies(protocol = :all, proxy_type = :all, mode = :synchronous)
|
25
|
+
proxies = Proxy.should_be_checked(protocol, proxy_type, Time.now, self.limit)
|
26
|
+
|
27
|
+
if (proxies && proxies.any?)
|
28
|
+
Rails.logger.info "Found #{proxies.size} #{proxy_type} proxies to check."
|
29
|
+
|
30
|
+
proxies.each do |proxy|
|
31
|
+
case mode
|
32
|
+
when :synchronous
|
33
|
+
check_proxy(proxy)
|
34
|
+
when :resque
|
35
|
+
Resque.enqueue(HttpUtilities::Jobs::Resque::Proxies::CheckProxyJob, proxy.id)
|
36
|
+
when :sidekiq
|
37
|
+
HttpUtilities::Jobs::Sidekiq::Proxies::CheckProxyJob.perform_async(proxy.id)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
else
|
42
|
+
Rails.logger.info "Couldn't find any proxies to check!"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def check_proxy(proxy, timeout = 60)
|
47
|
+
document = nil
|
48
|
+
valid_proxy = false
|
49
|
+
|
50
|
+
options = {:method => :net_http,
|
51
|
+
:use_proxy => true,
|
52
|
+
:proxy => proxy.proxy_address,
|
53
|
+
:proxy_protocol => proxy.protocol,
|
54
|
+
:timeout => timeout,
|
55
|
+
:maximum_redirects => 1,
|
56
|
+
:disable_auth => true
|
57
|
+
}
|
58
|
+
|
59
|
+
Rails.logger.info "#{Time.now}: Fetching Proxy #{proxy.proxy_address}."
|
60
|
+
|
61
|
+
response = self.client.retrieve_parsed_html("http://www.google.com/webhp?hl=en", options)
|
62
|
+
|
63
|
+
if (response && response.parsed_body)
|
64
|
+
title = response.parsed_body.css("title").first
|
65
|
+
|
66
|
+
if (title && title.content)
|
67
|
+
begin
|
68
|
+
title = title.content.encode("UTF-8").strip.downcase
|
69
|
+
body_content = response.parsed_body.content.to_s.encode("UTF-8").strip.downcase
|
70
|
+
|
71
|
+
valid_proxy = (title.eql?("google") || !(body_content =~ /google home/i).nil?)
|
72
|
+
|
73
|
+
Rails.logger.info "Title is: #{title}. Proxy #{proxy.proxy_address}"
|
74
|
+
|
75
|
+
rescue Exception => e
|
76
|
+
Rails.logger.error "Exception occured while trying to validate proxy. Error Class: #{e.class}. Error Message: #{e.message}"
|
77
|
+
valid_proxy = false
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
if (valid_proxy)
|
83
|
+
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
|
84
|
+
else
|
85
|
+
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
|
86
|
+
end
|
87
|
+
|
88
|
+
self.processed_proxies << {:proxy => proxy, :valid => valid_proxy}
|
89
|
+
end
|
90
|
+
|
91
|
+
def update_proxies()
|
92
|
+
columns = [:host, :port, :last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts]
|
93
|
+
values = []
|
94
|
+
|
95
|
+
Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"
|
96
|
+
|
97
|
+
if (self.processed_proxies && self.processed_proxies.any?)
|
98
|
+
self.processed_proxies.each do |value|
|
99
|
+
proxy = value[:proxy]
|
100
|
+
valid = value[:valid]
|
101
|
+
successful_attempts = proxy.successful_attempts
|
102
|
+
failed_attempts = proxy.failed_attempts
|
103
|
+
|
104
|
+
if (valid)
|
105
|
+
successful_attempts += 1
|
106
|
+
else
|
107
|
+
failed_attempts += 1
|
108
|
+
end
|
109
|
+
|
110
|
+
is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
|
111
|
+
value_arr = [proxy.host, proxy.port, Time.now, is_valid, successful_attempts, failed_attempts]
|
112
|
+
values << value_arr
|
113
|
+
end
|
114
|
+
|
115
|
+
::Proxy.import(columns, values, :on_duplicate_key_update => [:last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts], :validate => false)
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Proxies
|
3
|
+
module ProxyModule
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.send :extend, ClassMethods
|
7
|
+
base.send :include, InstanceMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
def should_be_checked(protocol = :all, proxy_type = :all, date = Time.now, limit = 10)
|
12
|
+
conditions = set_protocol_and_proxy_type_conditions(protocol, proxy_type)
|
13
|
+
conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["(last_checked_at IS NULL OR last_checked_at < ?)", date])
|
14
|
+
conditions << "failed_attempts <= 10"
|
15
|
+
query = conditions.join(" AND ")
|
16
|
+
|
17
|
+
where(query).order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC").limit(limit)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_random_proxy(protocol = :all, proxy_type = :all)
|
21
|
+
conditions = set_protocol_and_proxy_type_conditions(protocol, proxy_type)
|
22
|
+
conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["valid_proxy = ?", true])
|
23
|
+
conditions << "last_checked_at IS NOT NULL"
|
24
|
+
query = conditions.join(" AND ")
|
25
|
+
|
26
|
+
order_clause = case ActiveRecord::Base.connection.class.name
|
27
|
+
when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter" then "RAND() DESC"
|
28
|
+
when "ActiveRecord::ConnectionAdapters::SQLite3Adapter" then "RANDOM() DESC"
|
29
|
+
end
|
30
|
+
|
31
|
+
proxy = nil
|
32
|
+
|
33
|
+
uncached do
|
34
|
+
proxy = where(query).order(order_clause).limit(1).first
|
35
|
+
end
|
36
|
+
|
37
|
+
return proxy
|
38
|
+
end
|
39
|
+
|
40
|
+
def set_protocol_and_proxy_type_conditions(protocol, proxy_type)
|
41
|
+
conditions = []
|
42
|
+
conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["protocol = ?", protocol]) if (protocol && !protocol.downcase.to_sym.eql?(:all))
|
43
|
+
conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["proxy_type = ?", proxy_type]) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
|
44
|
+
return conditions
|
45
|
+
end
|
46
|
+
|
47
|
+
def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
|
48
|
+
proxy_address = "#{proxy_host}:#{proxy_port}"
|
49
|
+
proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
|
50
|
+
return proxy_address
|
51
|
+
end
|
52
|
+
|
53
|
+
def format_proxy_credentials(username, password)
|
54
|
+
return "#{username}:#{password}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
module InstanceMethods
|
59
|
+
def proxy_address(include_http = false)
|
60
|
+
return ::Proxy.format_proxy_address(self.host, self.port, include_http)
|
61
|
+
end
|
62
|
+
|
63
|
+
def proxy_credentials
|
64
|
+
return ::Proxy.format_proxy_credentials(self.username, self.password)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Proxies
|
3
|
+
class ProxySeeder
|
4
|
+
require 'activerecord-import'
|
5
|
+
attr_accessor :protocols, :proxy_types, :categories
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
self.protocols = ['http', 'socks5']
|
9
|
+
self.proxy_types = ['public', 'shared', 'private']
|
10
|
+
self.categories = ['L1', 'L2', 'L3', 'unspecified']
|
11
|
+
end
|
12
|
+
|
13
|
+
def seed
|
14
|
+
import_proxies
|
15
|
+
end
|
16
|
+
|
17
|
+
def import_proxies
|
18
|
+
proxy_data = parse_proxies
|
19
|
+
|
20
|
+
proxy_data.each do |protocol, types|
|
21
|
+
types.each do |type, categories|
|
22
|
+
categories.each do |category, proxies|
|
23
|
+
bulk_import_proxies(proxies, protocol, type, category)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end if (proxy_data && !proxy_data.empty?)
|
27
|
+
end
|
28
|
+
|
29
|
+
def bulk_import_proxies(proxy_list, protocol, proxy_type, category)
|
30
|
+
columns = [:host, :port, :protocol, :proxy_type, :category]
|
31
|
+
category = (category && !category.eql?('unspecified')) ? category : nil
|
32
|
+
|
33
|
+
begin
|
34
|
+
values = []
|
35
|
+
|
36
|
+
proxy_list.slice!(0..1000).each do |proxy|
|
37
|
+
host = proxy[:host]
|
38
|
+
port = proxy[:port]
|
39
|
+
value_arr = [host, port, protocol, proxy_type, category]
|
40
|
+
values << value_arr
|
41
|
+
end
|
42
|
+
|
43
|
+
::Proxy.import(columns, values, :on_duplicate_key_update => [:proxy_type], :validate => false) if (values && values.any?)
|
44
|
+
end while (proxy_list && proxy_list.any?)
|
45
|
+
end
|
46
|
+
|
47
|
+
def parse_proxies
|
48
|
+
proxies = {}
|
49
|
+
|
50
|
+
self.protocols.each do |protocol|
|
51
|
+
proxies[protocol] = {}
|
52
|
+
|
53
|
+
self.proxy_types.each do |proxy_type|
|
54
|
+
proxies[protocol][proxy_type] = {}
|
55
|
+
proxies[protocol][proxy_type]['unspecified'] = []
|
56
|
+
|
57
|
+
if (protocol.eql?("http"))
|
58
|
+
self.categories.each do |category|
|
59
|
+
proxies[protocol][proxy_type][category] = get_proxies_from_files("#{get_seed_root}#{protocol}/#{proxy_type}/#{category}/*.txt")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
proxies[protocol][proxy_type]['unspecified'] = proxies[protocol][proxy_type]['unspecified'] + get_proxies_from_files("#{get_seed_root}#{protocol}/#{proxy_type}/*.txt")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
return proxies
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_proxies_from_files(pattern)
|
71
|
+
proxies = []
|
72
|
+
file_paths = Dir.glob(pattern)
|
73
|
+
|
74
|
+
file_paths.each do |file_path|
|
75
|
+
proxy_rows = []
|
76
|
+
File.open(file_path, 'r') {|f| proxy_rows = f.readlines("\n") }
|
77
|
+
|
78
|
+
proxy_rows.each do |row|
|
79
|
+
host, port = nil
|
80
|
+
|
81
|
+
parts = row.include?(":") ? row.split(":") : nil
|
82
|
+
|
83
|
+
if (parts && parts.any? && parts.size >= 2)
|
84
|
+
host = parts.first
|
85
|
+
port = parts.second.to_i
|
86
|
+
end
|
87
|
+
|
88
|
+
proxies << {:host => host, :port => port} if (host && port)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
return proxies
|
93
|
+
end
|
94
|
+
|
95
|
+
def get_seed_root
|
96
|
+
rails_seed_root = defined?(Rails) ? "#{Rails.root}/db/seed_data/proxies/" : nil
|
97
|
+
gem_seed_root = File.join(File.dirname(__FILE__), "../../generators/templates/seed_data/proxies/")
|
98
|
+
|
99
|
+
return (rails_seed_root && File.exists?(rails_seed_root)) ? rails_seed_root : gem_seed_root
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module HttpUtilities
|
3
|
+
VERSION = "1.0.1"
|
4
|
+
|
5
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/railtie') if defined?(Rails)
|
6
|
+
|
7
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/proxy_support')
|
8
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/cookies')
|
9
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/user_agent')
|
10
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/url')
|
11
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/format')
|
12
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/get')
|
13
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/post')
|
14
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/logger')
|
15
|
+
|
16
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/request')
|
17
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/response')
|
18
|
+
|
19
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/net_http')
|
20
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/open_uri')
|
21
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/curb')
|
22
|
+
|
23
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/client')
|
24
|
+
|
25
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/mechanize/client')
|
26
|
+
|
27
|
+
if defined?(ActiveRecord)
|
28
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_module')
|
29
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_checker')
|
30
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_seeder')
|
31
|
+
|
32
|
+
if defined?(Resque)
|
33
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/jobs/resque/proxies/check_proxies_job')
|
34
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/jobs/resque/proxies/check_proxy_job')
|
35
|
+
end
|
36
|
+
|
37
|
+
if defined?(Sidekiq)
|
38
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/jobs/sidekiq/proxies/check_proxies_job')
|
39
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/jobs/sidekiq/proxies/check_proxy_job')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
if defined?(MultiXml)
|
44
|
+
MultiXml.parser = :nokogiri
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
namespace :http_utilities do
|
2
|
+
namespace :proxies do
|
3
|
+
desc "Proxy tasks"
|
4
|
+
|
5
|
+
task :seed_proxies => :environment do |task, args|
|
6
|
+
seeder = HttpUtilities::Proxies::ProxySeeder.new
|
7
|
+
seeder.seed
|
8
|
+
end
|
9
|
+
|
10
|
+
task :check_proxies, [:protocol, :proxy_type, :processing_method] => [:environment] do |task, args|
|
11
|
+
protocol = (args.protocol) ? args.protocol.to_sym : :http
|
12
|
+
proxy_type = (args.proxy_type) ? args.proxy_type.to_sym : :public
|
13
|
+
processing_method = (args.processing_method) ? args.processing_method.to_sym : :synchronous
|
14
|
+
|
15
|
+
proxy_checker = HttpUtilities::Proxies::ProxyChecker.new
|
16
|
+
proxy_checker.check_and_update_proxies(protocol, proxy_type, processing_method)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
require File.expand_path('../../spec_helper', __FILE__)
|
2
|
+
|
3
|
+
describe HttpUtilities::Http::Client do
|
4
|
+
|
5
|
+
describe "when modules have been included" do
|
6
|
+
before(:each) do
|
7
|
+
@client = HttpUtilities::Http::Client.new
|
8
|
+
@request = HttpUtilities::Http::Request.new
|
9
|
+
@response = HttpUtilities::Http::Response.new
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should respond to a net http module method" do
|
13
|
+
@client.should respond_to(:post_and_retrieve_content_using_net_http)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should respond to a open uri module method" do
|
17
|
+
@client.should respond_to(:retrieve_open_uri_content)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should respond to a curb module method" do
|
21
|
+
@client.should respond_to(:post_and_retrieve_content_using_curl)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should respond to a proxy module method" do
|
25
|
+
@request.should respond_to(:set_proxy_options)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should respond to a cookies module method" do
|
29
|
+
@client.should respond_to(:handle_cookies)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should respond to a user agent module method" do
|
33
|
+
@request.should respond_to(:user_agent)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should respond to a request module method" do
|
37
|
+
@client.should respond_to(:generate_request_url)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should respond to a get module method" do
|
41
|
+
@client.should respond_to(:retrieve_raw_content)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should respond to a post module method" do
|
45
|
+
@client.should respond_to(:post_and_retrieve_content)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should respond to a format module method" do
|
49
|
+
@response.should respond_to(:as_html)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "when initialized" do
|
54
|
+
before(:each) do
|
55
|
+
@client = HttpUtilities::Http::Client.new
|
56
|
+
@request = HttpUtilities::Http::Request.new
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should have assigned user agent" do
|
60
|
+
@request.user_agent.should_not == nil
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should return a properly formatted request url using supplied parameters" do
|
64
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
65
|
+
@client.generate_request_url(params).should == "http://www.google.com?q=ruby%20on%20rails&start=0"
|
66
|
+
end
|
67
|
+
|
68
|
+
describe "when retrieving content using Net::Http" do
|
69
|
+
it "should fetch Google results as unparsed HTML" do
|
70
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
71
|
+
response = @client.retrieve_raw_content(@client.generate_request_url(params), {:method => :net_http})
|
72
|
+
response.body.should be_a(String)
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should fetch Google results as a Nokogiri::HTML::Document" do
|
76
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
77
|
+
response = @client.retrieve_parsed_html(@client.generate_request_url(params), {:method => :net_http})
|
78
|
+
response.parsed_body.should be_a(Nokogiri::HTML::Document)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should fetch Google Weather data a Nokogiri::XML::Document" do
|
82
|
+
params = {:url => "http://www.google.com/ig/api", :weather => 90120}
|
83
|
+
response = @client.retrieve_parsed_xml(@client.generate_request_url(params), {:method => :net_http})
|
84
|
+
response.parsed_body.should be_a(Nokogiri::XML::Document)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "when retrieving content using a proxy" do
|
89
|
+
it "should have the proxy instance variable properly set" do
|
90
|
+
options = {:method => :net_http, :proxy => "127.0.0.1:80", :response_only => false}
|
91
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
92
|
+
|
93
|
+
response = @client.retrieve_parsed_html(@client.generate_request_url(params), options)
|
94
|
+
proxy = response.request.proxy
|
95
|
+
|
96
|
+
proxy.should_not be_nil
|
97
|
+
proxy[:host].should == '127.0.0.1'
|
98
|
+
proxy[:port].should == 80
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
describe "when persisting cookies" do
|
103
|
+
it "should have the cookie instance variable properly set" do
|
104
|
+
options = {:method => :net_http, :use_cookies => true, :save_cookies => true, :response_only => false}
|
105
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
106
|
+
|
107
|
+
response = @client.retrieve_parsed_html(@client.generate_request_url(params), options)
|
108
|
+
cookies = response.request.cookies
|
109
|
+
|
110
|
+
cookies.should_not be_nil
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe "when posting content" do
|
115
|
+
before(:each) do
|
116
|
+
@trackback_url = "http://techcrunch.com/wp-trackback.php?p=314942"
|
117
|
+
@post_data = {
|
118
|
+
:url => "http://www.google.com",
|
119
|
+
:blog_name => "Testing",
|
120
|
+
:title => "Title",
|
121
|
+
:excerpt => "Testing..."
|
122
|
+
}
|
123
|
+
end
|
124
|
+
|
125
|
+
if (!defined?(JRUBY_VERSION))
|
126
|
+
it "should send a trackback to a TechCrunch post using Curb and return the response as a Nokogiri::XML::Document" do
|
127
|
+
options = {:method => :curl}
|
128
|
+
|
129
|
+
response = @client.post_and_retrieve_parsed_xml(@trackback_url, @post_data, options)
|
130
|
+
response.parsed_body.should be_a(Nokogiri::XML::Document)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should send a trackback to a TechCrunch post using Net::Http and return the response as a Nokogiri::XML::Document" do
|
135
|
+
options = {:method => :net_http}
|
136
|
+
|
137
|
+
response = @client.post_and_retrieve_parsed_xml(@trackback_url, @post_data, options)
|
138
|
+
response.parsed_body.should be_a(Nokogiri::XML::Document)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path('../../spec_helper', __FILE__)
|
2
|
+
|
3
|
+
describe HttpUtilities::Http::Mechanize::Client do
|
4
|
+
|
5
|
+
describe "when modules have been included" do
|
6
|
+
before(:each) do
|
7
|
+
@client = HttpUtilities::Http::Mechanize::Client.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should respond to a user agent module method" do
|
11
|
+
@client.should respond_to(:user_agent)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should respond to a request module method" do
|
15
|
+
@client.should respond_to(:generate_request_url)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "when initialized" do
|
20
|
+
before(:each) do
|
21
|
+
@client = HttpUtilities::Http::Mechanize::Client.new
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should have assigned user agents" do
|
25
|
+
@client.user_agent.should_not be_nil
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should submit a google search query successfully" do
|
29
|
+
#mock this later on...
|
30
|
+
page = @client.set_form_and_submit("http://www.google.com/webhp", {:name => "f"}, :first, {:q => {:type => :input, :value => "Ruby on Rails"}})
|
31
|
+
page.parser.should_not be_nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|