http_utilities 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +22 -0
  3. data/README +15 -0
  4. data/Rakefile +87 -0
  5. data/VERSION +1 -0
  6. data/http_utilities.gemspec +78 -0
  7. data/lib/generators/active_record/http_utilities_generator.rb +21 -0
  8. data/lib/generators/active_record/templates/migration.rb +34 -0
  9. data/lib/generators/active_record/templates/proxy.rb +3 -0
  10. data/lib/generators/helpers/file_helper.rb +35 -0
  11. data/lib/generators/helpers/orm_helpers.rb +15 -0
  12. data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
  13. data/lib/generators/templates/http_utilities.rb +2 -0
  14. data/lib/generators/templates/user_agents.yml +3419 -0
  15. data/lib/http_utilities/http/adapters/curb.rb +107 -0
  16. data/lib/http_utilities/http/adapters/net_http.rb +130 -0
  17. data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
  18. data/lib/http_utilities/http/client.rb +22 -0
  19. data/lib/http_utilities/http/cookies.rb +49 -0
  20. data/lib/http_utilities/http/format.rb +26 -0
  21. data/lib/http_utilities/http/get.rb +67 -0
  22. data/lib/http_utilities/http/logger.rb +11 -0
  23. data/lib/http_utilities/http/mechanize/client.rb +197 -0
  24. data/lib/http_utilities/http/post.rb +32 -0
  25. data/lib/http_utilities/http/proxy_support.rb +88 -0
  26. data/lib/http_utilities/http/request.rb +20 -0
  27. data/lib/http_utilities/http/response.rb +50 -0
  28. data/lib/http_utilities/http/url.rb +48 -0
  29. data/lib/http_utilities/http/user_agent.rb +3380 -0
  30. data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
  31. data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
  32. data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
  33. data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
  34. data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
  35. data/lib/http_utilities/proxies/proxy_module.rb +70 -0
  36. data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
  37. data/lib/http_utilities/railtie.rb +11 -0
  38. data/lib/http_utilities.rb +47 -0
  39. data/lib/tasks/http_utilities_tasks.rake +19 -0
  40. data/spec/database.yml.example +10 -0
  41. data/spec/http_utilities/client_spec.rb +145 -0
  42. data/spec/http_utilities/mechanize_client_spec.rb +35 -0
  43. data/spec/http_utilities/proxy_checker_spec.rb +11 -0
  44. data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
  45. data/spec/http_utilities/proxy_spec.rb +114 -0
  46. data/spec/models.rb +6 -0
  47. data/spec/schema.rb +30 -0
  48. data/spec/spec_helper.rb +50 -0
  49. metadata +209 -0
@@ -0,0 +1,15 @@
1
+ module HttpUtilities
2
+ module Jobs
3
+ module Resque
4
+ module Proxies
5
+ class CheckProxiesJob
6
+ @queue = :proxies
7
+
8
+ def perform(protocol = :all, proxy_type = :all, mode = :synchronous)
9
+ HttpUtilities::Proxies::ProxyChecker.new.check_proxies(protocol.to_sym, proxy_type.to_sym, mode.to_sym)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ module HttpUtilities
2
+ module Jobs
3
+ module Resque
4
+ module Proxies
5
+ class CheckProxyJob
6
+ @queue = :proxies
7
+
8
+ def self.perform(proxy_id)
9
+ proxy_object = ::Proxy.where(:id => proxy_id).first
10
+
11
+ if (proxy_object)
12
+ checker = HttpUtilities::Proxies::ProxyChecker.new
13
+ checker.check_proxy(proxy_object)
14
+ checker.update_proxies
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ module HttpUtilities
2
+ module Jobs
3
+ module Sidekiq
4
+ module Proxies
5
+ class CheckProxiesJob
6
+ include ::Sidekiq::Worker
7
+ sidekiq_options :queue => :proxies,
8
+ :unique => false
9
+
10
+ def perform(protocol = :all, proxy_type = :all, mode = :synchronous)
11
+ HttpUtilities::Proxies::ProxyChecker.new.check_proxies(protocol.to_sym, proxy_type.to_sym, mode.to_sym)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ module HttpUtilities
2
+ module Jobs
3
+ module Sidekiq
4
+ module Proxies
5
+ class CheckProxyJob
6
+ include ::Sidekiq::Worker
7
+ sidekiq_options :queue => :proxies
8
+
9
+ def perform(proxy_id)
10
+ proxy_object = ::Proxy.where(:id => proxy_id).first
11
+
12
+ if (proxy_object)
13
+ checker = HttpUtilities::Proxies::ProxyChecker.new
14
+ checker.check_proxy(proxy_object)
15
+ checker.update_proxies
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,122 @@
1
+ # -*- encoding : utf-8 -*-
2
+
3
+ module HttpUtilities
4
+ module Proxies
5
+ class ProxyChecker
6
+ require 'activerecord-import'
7
+ attr_accessor :client, :processed_proxies
8
+ attr_accessor :limit, :minimum_successful_attempts, :maximum_failed_attempts
9
+
10
+ def initialize
11
+ self.client = HttpUtilities::Http::Client.new
12
+ self.processed_proxies = []
13
+
14
+ self.limit = 1000
15
+ self.minimum_successful_attempts = 1
16
+ self.maximum_failed_attempts = 10
17
+ end
18
+
19
+ def check_and_update_proxies(protocol = :all, proxy_type = :all, mode = :synchronous)
20
+ check_proxies(protocol, proxy_type, mode)
21
+ update_proxies
22
+ end
23
+
24
+ def check_proxies(protocol = :all, proxy_type = :all, mode = :synchronous)
25
+ proxies = Proxy.should_be_checked(protocol, proxy_type, Time.now, self.limit)
26
+
27
+ if (proxies && proxies.any?)
28
+ Rails.logger.info "Found #{proxies.size} #{proxy_type} proxies to check."
29
+
30
+ proxies.each do |proxy|
31
+ case mode
32
+ when :synchronous
33
+ check_proxy(proxy)
34
+ when :resque
35
+ Resque.enqueue(HttpUtilities::Jobs::Resque::Proxies::CheckProxyJob, proxy.id)
36
+ when :sidekiq
37
+ HttpUtilities::Jobs::Sidekiq::Proxies::CheckProxyJob.perform_async(proxy.id)
38
+ end
39
+ end
40
+
41
+ else
42
+ Rails.logger.info "Couldn't find any proxies to check!"
43
+ end
44
+ end
45
+
46
+ def check_proxy(proxy, timeout = 60)
47
+ document = nil
48
+ valid_proxy = false
49
+
50
+ options = {:method => :net_http,
51
+ :use_proxy => true,
52
+ :proxy => proxy.proxy_address,
53
+ :proxy_protocol => proxy.protocol,
54
+ :timeout => timeout,
55
+ :maximum_redirects => 1,
56
+ :disable_auth => true
57
+ }
58
+
59
+ Rails.logger.info "#{Time.now}: Fetching Proxy #{proxy.proxy_address}."
60
+
61
+ response = self.client.retrieve_parsed_html("http://www.google.com/webhp?hl=en", options)
62
+
63
+ if (response && response.parsed_body)
64
+ title = response.parsed_body.css("title").first
65
+
66
+ if (title && title.content)
67
+ begin
68
+ title = title.content.encode("UTF-8").strip.downcase
69
+ body_content = response.parsed_body.content.to_s.encode("UTF-8").strip.downcase
70
+
71
+ valid_proxy = (title.eql?("google") || !(body_content =~ /google home/i).nil?)
72
+
73
+ Rails.logger.info "Title is: #{title}. Proxy #{proxy.proxy_address}"
74
+
75
+ rescue Exception => e
76
+ Rails.logger.error "Exception occured while trying to validate proxy. Error Class: #{e.class}. Error Message: #{e.message}"
77
+ valid_proxy = false
78
+ end
79
+ end
80
+ end
81
+
82
+ if (valid_proxy)
83
+ Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
84
+ else
85
+ Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
86
+ end
87
+
88
+ self.processed_proxies << {:proxy => proxy, :valid => valid_proxy}
89
+ end
90
+
91
+ def update_proxies()
92
+ columns = [:host, :port, :last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts]
93
+ values = []
94
+
95
+ Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"
96
+
97
+ if (self.processed_proxies && self.processed_proxies.any?)
98
+ self.processed_proxies.each do |value|
99
+ proxy = value[:proxy]
100
+ valid = value[:valid]
101
+ successful_attempts = proxy.successful_attempts
102
+ failed_attempts = proxy.failed_attempts
103
+
104
+ if (valid)
105
+ successful_attempts += 1
106
+ else
107
+ failed_attempts += 1
108
+ end
109
+
110
+ is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
111
+ value_arr = [proxy.host, proxy.port, Time.now, is_valid, successful_attempts, failed_attempts]
112
+ values << value_arr
113
+ end
114
+
115
+ ::Proxy.import(columns, values, :on_duplicate_key_update => [:last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts], :validate => false)
116
+ end
117
+
118
+ end
119
+
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,70 @@
1
+ module HttpUtilities
2
+ module Proxies
3
+ module ProxyModule
4
+
5
+ def self.included(base)
6
+ base.send :extend, ClassMethods
7
+ base.send :include, InstanceMethods
8
+ end
9
+
10
+ module ClassMethods
11
+ def should_be_checked(protocol = :all, proxy_type = :all, date = Time.now, limit = 10)
12
+ conditions = set_protocol_and_proxy_type_conditions(protocol, proxy_type)
13
+ conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["(last_checked_at IS NULL OR last_checked_at < ?)", date])
14
+ conditions << "failed_attempts <= 10"
15
+ query = conditions.join(" AND ")
16
+
17
+ where(query).order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC").limit(limit)
18
+ end
19
+
20
+ def get_random_proxy(protocol = :all, proxy_type = :all)
21
+ conditions = set_protocol_and_proxy_type_conditions(protocol, proxy_type)
22
+ conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["valid_proxy = ?", true])
23
+ conditions << "last_checked_at IS NOT NULL"
24
+ query = conditions.join(" AND ")
25
+
26
+ order_clause = case ActiveRecord::Base.connection.class.name
27
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter" then "RAND() DESC"
28
+ when "ActiveRecord::ConnectionAdapters::SQLite3Adapter" then "RANDOM() DESC"
29
+ end
30
+
31
+ proxy = nil
32
+
33
+ uncached do
34
+ proxy = where(query).order(order_clause).limit(1).first
35
+ end
36
+
37
+ return proxy
38
+ end
39
+
40
+ def set_protocol_and_proxy_type_conditions(protocol, proxy_type)
41
+ conditions = []
42
+ conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["protocol = ?", protocol]) if (protocol && !protocol.downcase.to_sym.eql?(:all))
43
+ conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["proxy_type = ?", proxy_type]) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
44
+ return conditions
45
+ end
46
+
47
+ def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
48
+ proxy_address = "#{proxy_host}:#{proxy_port}"
49
+ proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
50
+ return proxy_address
51
+ end
52
+
53
+ def format_proxy_credentials(username, password)
54
+ return "#{username}:#{password}"
55
+ end
56
+ end
57
+
58
+ module InstanceMethods
59
+ def proxy_address(include_http = false)
60
+ return ::Proxy.format_proxy_address(self.host, self.port, include_http)
61
+ end
62
+
63
+ def proxy_credentials
64
+ return ::Proxy.format_proxy_credentials(self.username, self.password)
65
+ end
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,104 @@
1
+ module HttpUtilities
2
+ module Proxies
3
+ class ProxySeeder
4
+ require 'activerecord-import'
5
+ attr_accessor :protocols, :proxy_types, :categories
6
+
7
+ def initialize
8
+ self.protocols = ['http', 'socks5']
9
+ self.proxy_types = ['public', 'shared', 'private']
10
+ self.categories = ['L1', 'L2', 'L3', 'unspecified']
11
+ end
12
+
13
+ def seed
14
+ import_proxies
15
+ end
16
+
17
+ def import_proxies
18
+ proxy_data = parse_proxies
19
+
20
+ proxy_data.each do |protocol, types|
21
+ types.each do |type, categories|
22
+ categories.each do |category, proxies|
23
+ bulk_import_proxies(proxies, protocol, type, category)
24
+ end
25
+ end
26
+ end if (proxy_data && !proxy_data.empty?)
27
+ end
28
+
29
+ def bulk_import_proxies(proxy_list, protocol, proxy_type, category)
30
+ columns = [:host, :port, :protocol, :proxy_type, :category]
31
+ category = (category && !category.eql?('unspecified')) ? category : nil
32
+
33
+ begin
34
+ values = []
35
+
36
+ proxy_list.slice!(0..1000).each do |proxy|
37
+ host = proxy[:host]
38
+ port = proxy[:port]
39
+ value_arr = [host, port, protocol, proxy_type, category]
40
+ values << value_arr
41
+ end
42
+
43
+ ::Proxy.import(columns, values, :on_duplicate_key_update => [:proxy_type], :validate => false) if (values && values.any?)
44
+ end while (proxy_list && proxy_list.any?)
45
+ end
46
+
47
+ def parse_proxies
48
+ proxies = {}
49
+
50
+ self.protocols.each do |protocol|
51
+ proxies[protocol] = {}
52
+
53
+ self.proxy_types.each do |proxy_type|
54
+ proxies[protocol][proxy_type] = {}
55
+ proxies[protocol][proxy_type]['unspecified'] = []
56
+
57
+ if (protocol.eql?("http"))
58
+ self.categories.each do |category|
59
+ proxies[protocol][proxy_type][category] = get_proxies_from_files("#{get_seed_root}#{protocol}/#{proxy_type}/#{category}/*.txt")
60
+ end
61
+ end
62
+
63
+ proxies[protocol][proxy_type]['unspecified'] = proxies[protocol][proxy_type]['unspecified'] + get_proxies_from_files("#{get_seed_root}#{protocol}/#{proxy_type}/*.txt")
64
+ end
65
+ end
66
+
67
+ return proxies
68
+ end
69
+
70
+ def get_proxies_from_files(pattern)
71
+ proxies = []
72
+ file_paths = Dir.glob(pattern)
73
+
74
+ file_paths.each do |file_path|
75
+ proxy_rows = []
76
+ File.open(file_path, 'r') {|f| proxy_rows = f.readlines("\n") }
77
+
78
+ proxy_rows.each do |row|
79
+ host, port = nil
80
+
81
+ parts = row.include?(":") ? row.split(":") : nil
82
+
83
+ if (parts && parts.any? && parts.size >= 2)
84
+ host = parts.first
85
+ port = parts.second.to_i
86
+ end
87
+
88
+ proxies << {:host => host, :port => port} if (host && port)
89
+ end
90
+ end
91
+
92
+ return proxies
93
+ end
94
+
95
+ def get_seed_root
96
+ rails_seed_root = defined?(Rails) ? "#{Rails.root}/db/seed_data/proxies/" : nil
97
+ gem_seed_root = File.join(File.dirname(__FILE__), "../../generators/templates/seed_data/proxies/")
98
+
99
+ return (rails_seed_root && File.exists?(rails_seed_root)) ? rails_seed_root : gem_seed_root
100
+ end
101
+
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,11 @@
1
+ require 'http_utilities'
2
+ require 'rails'
3
+ module HttpUtilities
4
+ class Railtie < Rails::Railtie
5
+
6
+ rake_tasks do
7
+ Dir[File.join(File.dirname(__FILE__), '../tasks/*.rake')].each { |ext| load ext }
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,47 @@
1
+ # -*- encoding : utf-8 -*-
2
+ module HttpUtilities
3
+ VERSION = "1.0.1"
4
+
5
+ require File.join(File.dirname(__FILE__), 'http_utilities/railtie') if defined?(Rails)
6
+
7
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/proxy_support')
8
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/cookies')
9
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/user_agent')
10
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/url')
11
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/format')
12
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/get')
13
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/post')
14
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/logger')
15
+
16
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/request')
17
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/response')
18
+
19
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/net_http')
20
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/open_uri')
21
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/curb')
22
+
23
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/client')
24
+
25
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/mechanize/client')
26
+
27
+ if defined?(ActiveRecord)
28
+ require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_module')
29
+ require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_checker')
30
+ require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_seeder')
31
+
32
+ if defined?(Resque)
33
+ require File.join(File.dirname(__FILE__), 'http_utilities/jobs/resque/proxies/check_proxies_job')
34
+ require File.join(File.dirname(__FILE__), 'http_utilities/jobs/resque/proxies/check_proxy_job')
35
+ end
36
+
37
+ if defined?(Sidekiq)
38
+ require File.join(File.dirname(__FILE__), 'http_utilities/jobs/sidekiq/proxies/check_proxies_job')
39
+ require File.join(File.dirname(__FILE__), 'http_utilities/jobs/sidekiq/proxies/check_proxy_job')
40
+ end
41
+ end
42
+
43
+ if defined?(MultiXml)
44
+ MultiXml.parser = :nokogiri
45
+ end
46
+ end
47
+
@@ -0,0 +1,19 @@
1
+ namespace :http_utilities do
2
+ namespace :proxies do
3
+ desc "Proxy tasks"
4
+
5
+ task :seed_proxies => :environment do |task, args|
6
+ seeder = HttpUtilities::Proxies::ProxySeeder.new
7
+ seeder.seed
8
+ end
9
+
10
+ task :check_proxies, [:protocol, :proxy_type, :processing_method] => [:environment] do |task, args|
11
+ protocol = (args.protocol) ? args.protocol.to_sym : :http
12
+ proxy_type = (args.proxy_type) ? args.proxy_type.to_sym : :public
13
+ processing_method = (args.processing_method) ? args.processing_method.to_sym : :synchronous
14
+
15
+ proxy_checker = HttpUtilities::Proxies::ProxyChecker.new
16
+ proxy_checker.check_and_update_proxies(protocol, proxy_type, processing_method)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,10 @@
1
+ sqlite3:
2
+ adapter: sqlite3
3
+ database: http_utilities.sqlite3
4
+
5
+ mysql:
6
+ adapter: mysql2
7
+ encoding: utf8
8
+ database: http_utilities_test
9
+ username:
10
+ password:
@@ -0,0 +1,145 @@
1
+ require File.expand_path('../../spec_helper', __FILE__)
2
+
3
+ describe HttpUtilities::Http::Client do
4
+
5
+ describe "when modules have been included" do
6
+ before(:each) do
7
+ @client = HttpUtilities::Http::Client.new
8
+ @request = HttpUtilities::Http::Request.new
9
+ @response = HttpUtilities::Http::Response.new
10
+ end
11
+
12
+ it "should respond to a net http module method" do
13
+ @client.should respond_to(:post_and_retrieve_content_using_net_http)
14
+ end
15
+
16
+ it "should respond to a open uri module method" do
17
+ @client.should respond_to(:retrieve_open_uri_content)
18
+ end
19
+
20
+ it "should respond to a curb module method" do
21
+ @client.should respond_to(:post_and_retrieve_content_using_curl)
22
+ end
23
+
24
+ it "should respond to a proxy module method" do
25
+ @request.should respond_to(:set_proxy_options)
26
+ end
27
+
28
+ it "should respond to a cookies module method" do
29
+ @client.should respond_to(:handle_cookies)
30
+ end
31
+
32
+ it "should respond to a user agent module method" do
33
+ @request.should respond_to(:user_agent)
34
+ end
35
+
36
+ it "should respond to a request module method" do
37
+ @client.should respond_to(:generate_request_url)
38
+ end
39
+
40
+ it "should respond to a get module method" do
41
+ @client.should respond_to(:retrieve_raw_content)
42
+ end
43
+
44
+ it "should respond to a post module method" do
45
+ @client.should respond_to(:post_and_retrieve_content)
46
+ end
47
+
48
+ it "should respond to a format module method" do
49
+ @response.should respond_to(:as_html)
50
+ end
51
+ end
52
+
53
+ describe "when initialized" do
54
+ before(:each) do
55
+ @client = HttpUtilities::Http::Client.new
56
+ @request = HttpUtilities::Http::Request.new
57
+ end
58
+
59
+ it "should have assigned user agent" do
60
+ @request.user_agent.should_not == nil
61
+ end
62
+
63
+ it "should return a properly formatted request url using supplied parameters" do
64
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
65
+ @client.generate_request_url(params).should == "http://www.google.com?q=ruby%20on%20rails&start=0"
66
+ end
67
+
68
+ describe "when retrieving content using Net::Http" do
69
+ it "should fetch Google results as unparsed HTML" do
70
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
71
+ response = @client.retrieve_raw_content(@client.generate_request_url(params), {:method => :net_http})
72
+ response.body.should be_a(String)
73
+ end
74
+
75
+ it "should fetch Google results as a Nokogiri::HTML::Document" do
76
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
77
+ response = @client.retrieve_parsed_html(@client.generate_request_url(params), {:method => :net_http})
78
+ response.parsed_body.should be_a(Nokogiri::HTML::Document)
79
+ end
80
+
81
+ it "should fetch Google Weather data a Nokogiri::XML::Document" do
82
+ params = {:url => "http://www.google.com/ig/api", :weather => 90120}
83
+ response = @client.retrieve_parsed_xml(@client.generate_request_url(params), {:method => :net_http})
84
+ response.parsed_body.should be_a(Nokogiri::XML::Document)
85
+ end
86
+ end
87
+
88
+ describe "when retrieving content using a proxy" do
89
+ it "should have the proxy instance variable properly set" do
90
+ options = {:method => :net_http, :proxy => "127.0.0.1:80", :response_only => false}
91
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
92
+
93
+ response = @client.retrieve_parsed_html(@client.generate_request_url(params), options)
94
+ proxy = response.request.proxy
95
+
96
+ proxy.should_not be_nil
97
+ proxy[:host].should == '127.0.0.1'
98
+ proxy[:port].should == 80
99
+ end
100
+ end
101
+
102
+ describe "when persisting cookies" do
103
+ it "should have the cookie instance variable properly set" do
104
+ options = {:method => :net_http, :use_cookies => true, :save_cookies => true, :response_only => false}
105
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
106
+
107
+ response = @client.retrieve_parsed_html(@client.generate_request_url(params), options)
108
+ cookies = response.request.cookies
109
+
110
+ cookies.should_not be_nil
111
+ end
112
+ end
113
+
114
+ describe "when posting content" do
115
+ before(:each) do
116
+ @trackback_url = "http://techcrunch.com/wp-trackback.php?p=314942"
117
+ @post_data = {
118
+ :url => "http://www.google.com",
119
+ :blog_name => "Testing",
120
+ :title => "Title",
121
+ :excerpt => "Testing..."
122
+ }
123
+ end
124
+
125
+ if (!defined?(JRUBY_VERSION))
126
+ it "should send a trackback to a TechCrunch post using Curb and return the response as a Nokogiri::XML::Document" do
127
+ options = {:method => :curl}
128
+
129
+ response = @client.post_and_retrieve_parsed_xml(@trackback_url, @post_data, options)
130
+ response.parsed_body.should be_a(Nokogiri::XML::Document)
131
+ end
132
+ end
133
+
134
+ it "should send a trackback to a TechCrunch post using Net::Http and return the response as a Nokogiri::XML::Document" do
135
+ options = {:method => :net_http}
136
+
137
+ response = @client.post_and_retrieve_parsed_xml(@trackback_url, @post_data, options)
138
+ response.parsed_body.should be_a(Nokogiri::XML::Document)
139
+ end
140
+ end
141
+
142
+ end
143
+
144
+ end
145
+
@@ -0,0 +1,35 @@
1
+ require File.expand_path('../../spec_helper', __FILE__)
2
+
3
+ describe HttpUtilities::Http::Mechanize::Client do
4
+
5
+ describe "when modules have been included" do
6
+ before(:each) do
7
+ @client = HttpUtilities::Http::Mechanize::Client.new
8
+ end
9
+
10
+ it "should respond to a user agent module method" do
11
+ @client.should respond_to(:user_agent)
12
+ end
13
+
14
+ it "should respond to a request module method" do
15
+ @client.should respond_to(:generate_request_url)
16
+ end
17
+ end
18
+
19
+ describe "when initialized" do
20
+ before(:each) do
21
+ @client = HttpUtilities::Http::Mechanize::Client.new
22
+ end
23
+
24
+ it "should have assigned user agents" do
25
+ @client.user_agent.should_not be_nil
26
+ end
27
+
28
+ it "should submit a google search query successfully" do
29
+ #mock this later on...
30
+ page = @client.set_form_and_submit("http://www.google.com/webhp", {:name => "f"}, :first, {:q => {:type => :input, :value => "Ruby on Rails"}})
31
+ page.parser.should_not be_nil
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,11 @@
1
+ require File.expand_path('../../spec_helper', __FILE__)
2
+
3
+ describe HttpUtilities::Proxies::ProxyChecker do
4
+
5
+ describe "when initialized" do
6
+ before(:each) do
7
+ @checker = HttpUtilities::Proxies::ProxyChecker.new
8
+ end
9
+
10
+ end
11
+ end