http_utilities 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +22 -0
- data/README +15 -0
- data/Rakefile +87 -0
- data/VERSION +1 -0
- data/http_utilities.gemspec +78 -0
- data/lib/generators/active_record/http_utilities_generator.rb +21 -0
- data/lib/generators/active_record/templates/migration.rb +34 -0
- data/lib/generators/active_record/templates/proxy.rb +3 -0
- data/lib/generators/helpers/file_helper.rb +35 -0
- data/lib/generators/helpers/orm_helpers.rb +15 -0
- data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
- data/lib/generators/templates/http_utilities.rb +2 -0
- data/lib/generators/templates/user_agents.yml +3419 -0
- data/lib/http_utilities/http/adapters/curb.rb +107 -0
- data/lib/http_utilities/http/adapters/net_http.rb +130 -0
- data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
- data/lib/http_utilities/http/client.rb +22 -0
- data/lib/http_utilities/http/cookies.rb +49 -0
- data/lib/http_utilities/http/format.rb +26 -0
- data/lib/http_utilities/http/get.rb +67 -0
- data/lib/http_utilities/http/logger.rb +11 -0
- data/lib/http_utilities/http/mechanize/client.rb +197 -0
- data/lib/http_utilities/http/post.rb +32 -0
- data/lib/http_utilities/http/proxy_support.rb +88 -0
- data/lib/http_utilities/http/request.rb +20 -0
- data/lib/http_utilities/http/response.rb +50 -0
- data/lib/http_utilities/http/url.rb +48 -0
- data/lib/http_utilities/http/user_agent.rb +3380 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
- data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
- data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
- data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
- data/lib/http_utilities/proxies/proxy_module.rb +70 -0
- data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
- data/lib/http_utilities/railtie.rb +11 -0
- data/lib/http_utilities.rb +47 -0
- data/lib/tasks/http_utilities_tasks.rake +19 -0
- data/spec/database.yml.example +10 -0
- data/spec/http_utilities/client_spec.rb +145 -0
- data/spec/http_utilities/mechanize_client_spec.rb +35 -0
- data/spec/http_utilities/proxy_checker_spec.rb +11 -0
- data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
- data/spec/http_utilities/proxy_spec.rb +114 -0
- data/spec/models.rb +6 -0
- data/spec/schema.rb +30 -0
- data/spec/spec_helper.rb +50 -0
- metadata +209 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Jobs
|
3
|
+
module Resque
|
4
|
+
module Proxies
|
5
|
+
class CheckProxiesJob
|
6
|
+
@queue = :proxies
|
7
|
+
|
8
|
+
def perform(protocol = :all, proxy_type = :all, mode = :synchronous)
|
9
|
+
HttpUtilities::Proxies::ProxyChecker.new.check_proxies(protocol.to_sym, proxy_type.to_sym, mode.to_sym)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Jobs
|
3
|
+
module Resque
|
4
|
+
module Proxies
|
5
|
+
class CheckProxyJob
|
6
|
+
@queue = :proxies
|
7
|
+
|
8
|
+
def self.perform(proxy_id)
|
9
|
+
proxy_object = ::Proxy.where(:id => proxy_id).first
|
10
|
+
|
11
|
+
if (proxy_object)
|
12
|
+
checker = HttpUtilities::Proxies::ProxyChecker.new
|
13
|
+
checker.check_proxy(proxy_object)
|
14
|
+
checker.update_proxies
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Jobs
|
3
|
+
module Sidekiq
|
4
|
+
module Proxies
|
5
|
+
class CheckProxiesJob
|
6
|
+
include ::Sidekiq::Worker
|
7
|
+
sidekiq_options :queue => :proxies,
|
8
|
+
:unique => false
|
9
|
+
|
10
|
+
def perform(protocol = :all, proxy_type = :all, mode = :synchronous)
|
11
|
+
HttpUtilities::Proxies::ProxyChecker.new.check_proxies(protocol.to_sym, proxy_type.to_sym, mode.to_sym)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Jobs
|
3
|
+
module Sidekiq
|
4
|
+
module Proxies
|
5
|
+
class CheckProxyJob
|
6
|
+
include ::Sidekiq::Worker
|
7
|
+
sidekiq_options :queue => :proxies
|
8
|
+
|
9
|
+
def perform(proxy_id)
|
10
|
+
proxy_object = ::Proxy.where(:id => proxy_id).first
|
11
|
+
|
12
|
+
if (proxy_object)
|
13
|
+
checker = HttpUtilities::Proxies::ProxyChecker.new
|
14
|
+
checker.check_proxy(proxy_object)
|
15
|
+
checker.update_proxies
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
|
3
|
+
module HttpUtilities
|
4
|
+
module Proxies
|
5
|
+
class ProxyChecker
|
6
|
+
require 'activerecord-import'
|
7
|
+
attr_accessor :client, :processed_proxies
|
8
|
+
attr_accessor :limit, :minimum_successful_attempts, :maximum_failed_attempts
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
self.client = HttpUtilities::Http::Client.new
|
12
|
+
self.processed_proxies = []
|
13
|
+
|
14
|
+
self.limit = 1000
|
15
|
+
self.minimum_successful_attempts = 1
|
16
|
+
self.maximum_failed_attempts = 10
|
17
|
+
end
|
18
|
+
|
19
|
+
def check_and_update_proxies(protocol = :all, proxy_type = :all, mode = :synchronous)
|
20
|
+
check_proxies(protocol, proxy_type, mode)
|
21
|
+
update_proxies
|
22
|
+
end
|
23
|
+
|
24
|
+
def check_proxies(protocol = :all, proxy_type = :all, mode = :synchronous)
|
25
|
+
proxies = Proxy.should_be_checked(protocol, proxy_type, Time.now, self.limit)
|
26
|
+
|
27
|
+
if (proxies && proxies.any?)
|
28
|
+
Rails.logger.info "Found #{proxies.size} #{proxy_type} proxies to check."
|
29
|
+
|
30
|
+
proxies.each do |proxy|
|
31
|
+
case mode
|
32
|
+
when :synchronous
|
33
|
+
check_proxy(proxy)
|
34
|
+
when :resque
|
35
|
+
Resque.enqueue(HttpUtilities::Jobs::Resque::Proxies::CheckProxyJob, proxy.id)
|
36
|
+
when :sidekiq
|
37
|
+
HttpUtilities::Jobs::Sidekiq::Proxies::CheckProxyJob.perform_async(proxy.id)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
else
|
42
|
+
Rails.logger.info "Couldn't find any proxies to check!"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def check_proxy(proxy, timeout = 60)
|
47
|
+
document = nil
|
48
|
+
valid_proxy = false
|
49
|
+
|
50
|
+
options = {:method => :net_http,
|
51
|
+
:use_proxy => true,
|
52
|
+
:proxy => proxy.proxy_address,
|
53
|
+
:proxy_protocol => proxy.protocol,
|
54
|
+
:timeout => timeout,
|
55
|
+
:maximum_redirects => 1,
|
56
|
+
:disable_auth => true
|
57
|
+
}
|
58
|
+
|
59
|
+
Rails.logger.info "#{Time.now}: Fetching Proxy #{proxy.proxy_address}."
|
60
|
+
|
61
|
+
response = self.client.retrieve_parsed_html("http://www.google.com/webhp?hl=en", options)
|
62
|
+
|
63
|
+
if (response && response.parsed_body)
|
64
|
+
title = response.parsed_body.css("title").first
|
65
|
+
|
66
|
+
if (title && title.content)
|
67
|
+
begin
|
68
|
+
title = title.content.encode("UTF-8").strip.downcase
|
69
|
+
body_content = response.parsed_body.content.to_s.encode("UTF-8").strip.downcase
|
70
|
+
|
71
|
+
valid_proxy = (title.eql?("google") || !(body_content =~ /google home/i).nil?)
|
72
|
+
|
73
|
+
Rails.logger.info "Title is: #{title}. Proxy #{proxy.proxy_address}"
|
74
|
+
|
75
|
+
rescue Exception => e
|
76
|
+
Rails.logger.error "Exception occured while trying to validate proxy. Error Class: #{e.class}. Error Message: #{e.message}"
|
77
|
+
valid_proxy = false
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
if (valid_proxy)
|
83
|
+
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
|
84
|
+
else
|
85
|
+
Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
|
86
|
+
end
|
87
|
+
|
88
|
+
self.processed_proxies << {:proxy => proxy, :valid => valid_proxy}
|
89
|
+
end
|
90
|
+
|
91
|
+
def update_proxies()
|
92
|
+
columns = [:host, :port, :last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts]
|
93
|
+
values = []
|
94
|
+
|
95
|
+
Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"
|
96
|
+
|
97
|
+
if (self.processed_proxies && self.processed_proxies.any?)
|
98
|
+
self.processed_proxies.each do |value|
|
99
|
+
proxy = value[:proxy]
|
100
|
+
valid = value[:valid]
|
101
|
+
successful_attempts = proxy.successful_attempts
|
102
|
+
failed_attempts = proxy.failed_attempts
|
103
|
+
|
104
|
+
if (valid)
|
105
|
+
successful_attempts += 1
|
106
|
+
else
|
107
|
+
failed_attempts += 1
|
108
|
+
end
|
109
|
+
|
110
|
+
is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
|
111
|
+
value_arr = [proxy.host, proxy.port, Time.now, is_valid, successful_attempts, failed_attempts]
|
112
|
+
values << value_arr
|
113
|
+
end
|
114
|
+
|
115
|
+
::Proxy.import(columns, values, :on_duplicate_key_update => [:last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts], :validate => false)
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Proxies
|
3
|
+
module ProxyModule
|
4
|
+
|
5
|
+
def self.included(base)
|
6
|
+
base.send :extend, ClassMethods
|
7
|
+
base.send :include, InstanceMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
def should_be_checked(protocol = :all, proxy_type = :all, date = Time.now, limit = 10)
|
12
|
+
conditions = set_protocol_and_proxy_type_conditions(protocol, proxy_type)
|
13
|
+
conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["(last_checked_at IS NULL OR last_checked_at < ?)", date])
|
14
|
+
conditions << "failed_attempts <= 10"
|
15
|
+
query = conditions.join(" AND ")
|
16
|
+
|
17
|
+
where(query).order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC").limit(limit)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_random_proxy(protocol = :all, proxy_type = :all)
|
21
|
+
conditions = set_protocol_and_proxy_type_conditions(protocol, proxy_type)
|
22
|
+
conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["valid_proxy = ?", true])
|
23
|
+
conditions << "last_checked_at IS NOT NULL"
|
24
|
+
query = conditions.join(" AND ")
|
25
|
+
|
26
|
+
order_clause = case ActiveRecord::Base.connection.class.name
|
27
|
+
when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter" then "RAND() DESC"
|
28
|
+
when "ActiveRecord::ConnectionAdapters::SQLite3Adapter" then "RANDOM() DESC"
|
29
|
+
end
|
30
|
+
|
31
|
+
proxy = nil
|
32
|
+
|
33
|
+
uncached do
|
34
|
+
proxy = where(query).order(order_clause).limit(1).first
|
35
|
+
end
|
36
|
+
|
37
|
+
return proxy
|
38
|
+
end
|
39
|
+
|
40
|
+
def set_protocol_and_proxy_type_conditions(protocol, proxy_type)
|
41
|
+
conditions = []
|
42
|
+
conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["protocol = ?", protocol]) if (protocol && !protocol.downcase.to_sym.eql?(:all))
|
43
|
+
conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["proxy_type = ?", proxy_type]) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
|
44
|
+
return conditions
|
45
|
+
end
|
46
|
+
|
47
|
+
def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
|
48
|
+
proxy_address = "#{proxy_host}:#{proxy_port}"
|
49
|
+
proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
|
50
|
+
return proxy_address
|
51
|
+
end
|
52
|
+
|
53
|
+
def format_proxy_credentials(username, password)
|
54
|
+
return "#{username}:#{password}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
module InstanceMethods
|
59
|
+
def proxy_address(include_http = false)
|
60
|
+
return ::Proxy.format_proxy_address(self.host, self.port, include_http)
|
61
|
+
end
|
62
|
+
|
63
|
+
def proxy_credentials
|
64
|
+
return ::Proxy.format_proxy_credentials(self.username, self.password)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module HttpUtilities
|
2
|
+
module Proxies
|
3
|
+
class ProxySeeder
|
4
|
+
require 'activerecord-import'
|
5
|
+
attr_accessor :protocols, :proxy_types, :categories
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
self.protocols = ['http', 'socks5']
|
9
|
+
self.proxy_types = ['public', 'shared', 'private']
|
10
|
+
self.categories = ['L1', 'L2', 'L3', 'unspecified']
|
11
|
+
end
|
12
|
+
|
13
|
+
def seed
|
14
|
+
import_proxies
|
15
|
+
end
|
16
|
+
|
17
|
+
def import_proxies
|
18
|
+
proxy_data = parse_proxies
|
19
|
+
|
20
|
+
proxy_data.each do |protocol, types|
|
21
|
+
types.each do |type, categories|
|
22
|
+
categories.each do |category, proxies|
|
23
|
+
bulk_import_proxies(proxies, protocol, type, category)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end if (proxy_data && !proxy_data.empty?)
|
27
|
+
end
|
28
|
+
|
29
|
+
def bulk_import_proxies(proxy_list, protocol, proxy_type, category)
|
30
|
+
columns = [:host, :port, :protocol, :proxy_type, :category]
|
31
|
+
category = (category && !category.eql?('unspecified')) ? category : nil
|
32
|
+
|
33
|
+
begin
|
34
|
+
values = []
|
35
|
+
|
36
|
+
proxy_list.slice!(0..1000).each do |proxy|
|
37
|
+
host = proxy[:host]
|
38
|
+
port = proxy[:port]
|
39
|
+
value_arr = [host, port, protocol, proxy_type, category]
|
40
|
+
values << value_arr
|
41
|
+
end
|
42
|
+
|
43
|
+
::Proxy.import(columns, values, :on_duplicate_key_update => [:proxy_type], :validate => false) if (values && values.any?)
|
44
|
+
end while (proxy_list && proxy_list.any?)
|
45
|
+
end
|
46
|
+
|
47
|
+
def parse_proxies
|
48
|
+
proxies = {}
|
49
|
+
|
50
|
+
self.protocols.each do |protocol|
|
51
|
+
proxies[protocol] = {}
|
52
|
+
|
53
|
+
self.proxy_types.each do |proxy_type|
|
54
|
+
proxies[protocol][proxy_type] = {}
|
55
|
+
proxies[protocol][proxy_type]['unspecified'] = []
|
56
|
+
|
57
|
+
if (protocol.eql?("http"))
|
58
|
+
self.categories.each do |category|
|
59
|
+
proxies[protocol][proxy_type][category] = get_proxies_from_files("#{get_seed_root}#{protocol}/#{proxy_type}/#{category}/*.txt")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
proxies[protocol][proxy_type]['unspecified'] = proxies[protocol][proxy_type]['unspecified'] + get_proxies_from_files("#{get_seed_root}#{protocol}/#{proxy_type}/*.txt")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
return proxies
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_proxies_from_files(pattern)
|
71
|
+
proxies = []
|
72
|
+
file_paths = Dir.glob(pattern)
|
73
|
+
|
74
|
+
file_paths.each do |file_path|
|
75
|
+
proxy_rows = []
|
76
|
+
File.open(file_path, 'r') {|f| proxy_rows = f.readlines("\n") }
|
77
|
+
|
78
|
+
proxy_rows.each do |row|
|
79
|
+
host, port = nil
|
80
|
+
|
81
|
+
parts = row.include?(":") ? row.split(":") : nil
|
82
|
+
|
83
|
+
if (parts && parts.any? && parts.size >= 2)
|
84
|
+
host = parts.first
|
85
|
+
port = parts.second.to_i
|
86
|
+
end
|
87
|
+
|
88
|
+
proxies << {:host => host, :port => port} if (host && port)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
return proxies
|
93
|
+
end
|
94
|
+
|
95
|
+
def get_seed_root
|
96
|
+
rails_seed_root = defined?(Rails) ? "#{Rails.root}/db/seed_data/proxies/" : nil
|
97
|
+
gem_seed_root = File.join(File.dirname(__FILE__), "../../generators/templates/seed_data/proxies/")
|
98
|
+
|
99
|
+
return (rails_seed_root && File.exists?(rails_seed_root)) ? rails_seed_root : gem_seed_root
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# -*- encoding : utf-8 -*-
|
2
|
+
module HttpUtilities
|
3
|
+
VERSION = "1.0.1"
|
4
|
+
|
5
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/railtie') if defined?(Rails)
|
6
|
+
|
7
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/proxy_support')
|
8
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/cookies')
|
9
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/user_agent')
|
10
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/url')
|
11
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/format')
|
12
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/get')
|
13
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/post')
|
14
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/logger')
|
15
|
+
|
16
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/request')
|
17
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/response')
|
18
|
+
|
19
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/net_http')
|
20
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/open_uri')
|
21
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/curb')
|
22
|
+
|
23
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/client')
|
24
|
+
|
25
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/http/mechanize/client')
|
26
|
+
|
27
|
+
if defined?(ActiveRecord)
|
28
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_module')
|
29
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_checker')
|
30
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_seeder')
|
31
|
+
|
32
|
+
if defined?(Resque)
|
33
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/jobs/resque/proxies/check_proxies_job')
|
34
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/jobs/resque/proxies/check_proxy_job')
|
35
|
+
end
|
36
|
+
|
37
|
+
if defined?(Sidekiq)
|
38
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/jobs/sidekiq/proxies/check_proxies_job')
|
39
|
+
require File.join(File.dirname(__FILE__), 'http_utilities/jobs/sidekiq/proxies/check_proxy_job')
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
if defined?(MultiXml)
|
44
|
+
MultiXml.parser = :nokogiri
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
namespace :http_utilities do
|
2
|
+
namespace :proxies do
|
3
|
+
desc "Proxy tasks"
|
4
|
+
|
5
|
+
task :seed_proxies => :environment do |task, args|
|
6
|
+
seeder = HttpUtilities::Proxies::ProxySeeder.new
|
7
|
+
seeder.seed
|
8
|
+
end
|
9
|
+
|
10
|
+
task :check_proxies, [:protocol, :proxy_type, :processing_method] => [:environment] do |task, args|
|
11
|
+
protocol = (args.protocol) ? args.protocol.to_sym : :http
|
12
|
+
proxy_type = (args.proxy_type) ? args.proxy_type.to_sym : :public
|
13
|
+
processing_method = (args.processing_method) ? args.processing_method.to_sym : :synchronous
|
14
|
+
|
15
|
+
proxy_checker = HttpUtilities::Proxies::ProxyChecker.new
|
16
|
+
proxy_checker.check_and_update_proxies(protocol, proxy_type, processing_method)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
require File.expand_path('../../spec_helper', __FILE__)
|
2
|
+
|
3
|
+
describe HttpUtilities::Http::Client do
|
4
|
+
|
5
|
+
describe "when modules have been included" do
|
6
|
+
before(:each) do
|
7
|
+
@client = HttpUtilities::Http::Client.new
|
8
|
+
@request = HttpUtilities::Http::Request.new
|
9
|
+
@response = HttpUtilities::Http::Response.new
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should respond to a net http module method" do
|
13
|
+
@client.should respond_to(:post_and_retrieve_content_using_net_http)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should respond to a open uri module method" do
|
17
|
+
@client.should respond_to(:retrieve_open_uri_content)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should respond to a curb module method" do
|
21
|
+
@client.should respond_to(:post_and_retrieve_content_using_curl)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should respond to a proxy module method" do
|
25
|
+
@request.should respond_to(:set_proxy_options)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should respond to a cookies module method" do
|
29
|
+
@client.should respond_to(:handle_cookies)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should respond to a user agent module method" do
|
33
|
+
@request.should respond_to(:user_agent)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should respond to a request module method" do
|
37
|
+
@client.should respond_to(:generate_request_url)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should respond to a get module method" do
|
41
|
+
@client.should respond_to(:retrieve_raw_content)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should respond to a post module method" do
|
45
|
+
@client.should respond_to(:post_and_retrieve_content)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should respond to a format module method" do
|
49
|
+
@response.should respond_to(:as_html)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "when initialized" do
|
54
|
+
before(:each) do
|
55
|
+
@client = HttpUtilities::Http::Client.new
|
56
|
+
@request = HttpUtilities::Http::Request.new
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should have assigned user agent" do
|
60
|
+
@request.user_agent.should_not == nil
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should return a properly formatted request url using supplied parameters" do
|
64
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
65
|
+
@client.generate_request_url(params).should == "http://www.google.com?q=ruby%20on%20rails&start=0"
|
66
|
+
end
|
67
|
+
|
68
|
+
describe "when retrieving content using Net::Http" do
|
69
|
+
it "should fetch Google results as unparsed HTML" do
|
70
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
71
|
+
response = @client.retrieve_raw_content(@client.generate_request_url(params), {:method => :net_http})
|
72
|
+
response.body.should be_a(String)
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should fetch Google results as a Nokogiri::HTML::Document" do
|
76
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
77
|
+
response = @client.retrieve_parsed_html(@client.generate_request_url(params), {:method => :net_http})
|
78
|
+
response.parsed_body.should be_a(Nokogiri::HTML::Document)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should fetch Google Weather data a Nokogiri::XML::Document" do
|
82
|
+
params = {:url => "http://www.google.com/ig/api", :weather => 90120}
|
83
|
+
response = @client.retrieve_parsed_xml(@client.generate_request_url(params), {:method => :net_http})
|
84
|
+
response.parsed_body.should be_a(Nokogiri::XML::Document)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
describe "when retrieving content using a proxy" do
|
89
|
+
it "should have the proxy instance variable properly set" do
|
90
|
+
options = {:method => :net_http, :proxy => "127.0.0.1:80", :response_only => false}
|
91
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
92
|
+
|
93
|
+
response = @client.retrieve_parsed_html(@client.generate_request_url(params), options)
|
94
|
+
proxy = response.request.proxy
|
95
|
+
|
96
|
+
proxy.should_not be_nil
|
97
|
+
proxy[:host].should == '127.0.0.1'
|
98
|
+
proxy[:port].should == 80
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
describe "when persisting cookies" do
|
103
|
+
it "should have the cookie instance variable properly set" do
|
104
|
+
options = {:method => :net_http, :use_cookies => true, :save_cookies => true, :response_only => false}
|
105
|
+
params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
|
106
|
+
|
107
|
+
response = @client.retrieve_parsed_html(@client.generate_request_url(params), options)
|
108
|
+
cookies = response.request.cookies
|
109
|
+
|
110
|
+
cookies.should_not be_nil
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
describe "when posting content" do
|
115
|
+
before(:each) do
|
116
|
+
@trackback_url = "http://techcrunch.com/wp-trackback.php?p=314942"
|
117
|
+
@post_data = {
|
118
|
+
:url => "http://www.google.com",
|
119
|
+
:blog_name => "Testing",
|
120
|
+
:title => "Title",
|
121
|
+
:excerpt => "Testing..."
|
122
|
+
}
|
123
|
+
end
|
124
|
+
|
125
|
+
if (!defined?(JRUBY_VERSION))
|
126
|
+
it "should send a trackback to a TechCrunch post using Curb and return the response as a Nokogiri::XML::Document" do
|
127
|
+
options = {:method => :curl}
|
128
|
+
|
129
|
+
response = @client.post_and_retrieve_parsed_xml(@trackback_url, @post_data, options)
|
130
|
+
response.parsed_body.should be_a(Nokogiri::XML::Document)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should send a trackback to a TechCrunch post using Net::Http and return the response as a Nokogiri::XML::Document" do
|
135
|
+
options = {:method => :net_http}
|
136
|
+
|
137
|
+
response = @client.post_and_retrieve_parsed_xml(@trackback_url, @post_data, options)
|
138
|
+
response.parsed_body.should be_a(Nokogiri::XML::Document)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path('../../spec_helper', __FILE__)
|
2
|
+
|
3
|
+
describe HttpUtilities::Http::Mechanize::Client do
|
4
|
+
|
5
|
+
describe "when modules have been included" do
|
6
|
+
before(:each) do
|
7
|
+
@client = HttpUtilities::Http::Mechanize::Client.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should respond to a user agent module method" do
|
11
|
+
@client.should respond_to(:user_agent)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should respond to a request module method" do
|
15
|
+
@client.should respond_to(:generate_request_url)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "when initialized" do
|
20
|
+
before(:each) do
|
21
|
+
@client = HttpUtilities::Http::Mechanize::Client.new
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should have assigned user agents" do
|
25
|
+
@client.user_agent.should_not be_nil
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should submit a google search query successfully" do
|
29
|
+
#mock this later on...
|
30
|
+
page = @client.set_form_and_submit("http://www.google.com/webhp", {:name => "f"}, :first, {:q => {:type => :input, :value => "Ruby on Rails"}})
|
31
|
+
page.parser.should_not be_nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|