http_utilities 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +22 -0
  3. data/README +15 -0
  4. data/Rakefile +87 -0
  5. data/VERSION +1 -0
  6. data/http_utilities.gemspec +78 -0
  7. data/lib/generators/active_record/http_utilities_generator.rb +21 -0
  8. data/lib/generators/active_record/templates/migration.rb +34 -0
  9. data/lib/generators/active_record/templates/proxy.rb +3 -0
  10. data/lib/generators/helpers/file_helper.rb +35 -0
  11. data/lib/generators/helpers/orm_helpers.rb +15 -0
  12. data/lib/generators/http_utilities/http_utilities_generator.rb +25 -0
  13. data/lib/generators/templates/http_utilities.rb +2 -0
  14. data/lib/generators/templates/user_agents.yml +3419 -0
  15. data/lib/http_utilities/http/adapters/curb.rb +107 -0
  16. data/lib/http_utilities/http/adapters/net_http.rb +130 -0
  17. data/lib/http_utilities/http/adapters/open_uri.rb +46 -0
  18. data/lib/http_utilities/http/client.rb +22 -0
  19. data/lib/http_utilities/http/cookies.rb +49 -0
  20. data/lib/http_utilities/http/format.rb +26 -0
  21. data/lib/http_utilities/http/get.rb +67 -0
  22. data/lib/http_utilities/http/logger.rb +11 -0
  23. data/lib/http_utilities/http/mechanize/client.rb +197 -0
  24. data/lib/http_utilities/http/post.rb +32 -0
  25. data/lib/http_utilities/http/proxy_support.rb +88 -0
  26. data/lib/http_utilities/http/request.rb +20 -0
  27. data/lib/http_utilities/http/response.rb +50 -0
  28. data/lib/http_utilities/http/url.rb +48 -0
  29. data/lib/http_utilities/http/user_agent.rb +3380 -0
  30. data/lib/http_utilities/jobs/resque/proxies/check_proxies_job.rb +15 -0
  31. data/lib/http_utilities/jobs/resque/proxies/check_proxy_job.rb +21 -0
  32. data/lib/http_utilities/jobs/sidekiq/proxies/check_proxies_job.rb +17 -0
  33. data/lib/http_utilities/jobs/sidekiq/proxies/check_proxy_job.rb +22 -0
  34. data/lib/http_utilities/proxies/proxy_checker.rb +122 -0
  35. data/lib/http_utilities/proxies/proxy_module.rb +70 -0
  36. data/lib/http_utilities/proxies/proxy_seeder.rb +104 -0
  37. data/lib/http_utilities/railtie.rb +11 -0
  38. data/lib/http_utilities.rb +47 -0
  39. data/lib/tasks/http_utilities_tasks.rake +19 -0
  40. data/spec/database.yml.example +10 -0
  41. data/spec/http_utilities/client_spec.rb +145 -0
  42. data/spec/http_utilities/mechanize_client_spec.rb +35 -0
  43. data/spec/http_utilities/proxy_checker_spec.rb +11 -0
  44. data/spec/http_utilities/proxy_seeder_spec.rb +24 -0
  45. data/spec/http_utilities/proxy_spec.rb +114 -0
  46. data/spec/models.rb +6 -0
  47. data/spec/schema.rb +30 -0
  48. data/spec/spec_helper.rb +50 -0
  49. metadata +209 -0
@@ -0,0 +1,15 @@
1
+ module HttpUtilities
2
+ module Jobs
3
+ module Resque
4
+ module Proxies
5
+ class CheckProxiesJob
6
+ @queue = :proxies
7
+
8
+ def perform(protocol = :all, proxy_type = :all, mode = :synchronous)
9
+ HttpUtilities::Proxies::ProxyChecker.new.check_proxies(protocol.to_sym, proxy_type.to_sym, mode.to_sym)
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,21 @@
1
+ module HttpUtilities
2
+ module Jobs
3
+ module Resque
4
+ module Proxies
5
+ class CheckProxyJob
6
+ @queue = :proxies
7
+
8
+ def self.perform(proxy_id)
9
+ proxy_object = ::Proxy.where(:id => proxy_id).first
10
+
11
+ if (proxy_object)
12
+ checker = HttpUtilities::Proxies::ProxyChecker.new
13
+ checker.check_proxy(proxy_object)
14
+ checker.update_proxies
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ module HttpUtilities
2
+ module Jobs
3
+ module Sidekiq
4
+ module Proxies
5
+ class CheckProxiesJob
6
+ include ::Sidekiq::Worker
7
+ sidekiq_options :queue => :proxies,
8
+ :unique => false
9
+
10
+ def perform(protocol = :all, proxy_type = :all, mode = :synchronous)
11
+ HttpUtilities::Proxies::ProxyChecker.new.check_proxies(protocol.to_sym, proxy_type.to_sym, mode.to_sym)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ module HttpUtilities
2
+ module Jobs
3
+ module Sidekiq
4
+ module Proxies
5
+ class CheckProxyJob
6
+ include ::Sidekiq::Worker
7
+ sidekiq_options :queue => :proxies
8
+
9
+ def perform(proxy_id)
10
+ proxy_object = ::Proxy.where(:id => proxy_id).first
11
+
12
+ if (proxy_object)
13
+ checker = HttpUtilities::Proxies::ProxyChecker.new
14
+ checker.check_proxy(proxy_object)
15
+ checker.update_proxies
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,122 @@
1
+ # -*- encoding : utf-8 -*-
2
+
3
+ module HttpUtilities
4
+ module Proxies
5
+ class ProxyChecker
6
+ require 'activerecord-import'
7
+ attr_accessor :client, :processed_proxies
8
+ attr_accessor :limit, :minimum_successful_attempts, :maximum_failed_attempts
9
+
10
+ def initialize
11
+ self.client = HttpUtilities::Http::Client.new
12
+ self.processed_proxies = []
13
+
14
+ self.limit = 1000
15
+ self.minimum_successful_attempts = 1
16
+ self.maximum_failed_attempts = 10
17
+ end
18
+
19
+ def check_and_update_proxies(protocol = :all, proxy_type = :all, mode = :synchronous)
20
+ check_proxies(protocol, proxy_type, mode)
21
+ update_proxies
22
+ end
23
+
24
+ def check_proxies(protocol = :all, proxy_type = :all, mode = :synchronous)
25
+ proxies = Proxy.should_be_checked(protocol, proxy_type, Time.now, self.limit)
26
+
27
+ if (proxies && proxies.any?)
28
+ Rails.logger.info "Found #{proxies.size} #{proxy_type} proxies to check."
29
+
30
+ proxies.each do |proxy|
31
+ case mode
32
+ when :synchronous
33
+ check_proxy(proxy)
34
+ when :resque
35
+ Resque.enqueue(HttpUtilities::Jobs::Resque::Proxies::CheckProxyJob, proxy.id)
36
+ when :sidekiq
37
+ HttpUtilities::Jobs::Sidekiq::Proxies::CheckProxyJob.perform_async(proxy.id)
38
+ end
39
+ end
40
+
41
+ else
42
+ Rails.logger.info "Couldn't find any proxies to check!"
43
+ end
44
+ end
45
+
46
+ def check_proxy(proxy, timeout = 60)
47
+ document = nil
48
+ valid_proxy = false
49
+
50
+ options = {:method => :net_http,
51
+ :use_proxy => true,
52
+ :proxy => proxy.proxy_address,
53
+ :proxy_protocol => proxy.protocol,
54
+ :timeout => timeout,
55
+ :maximum_redirects => 1,
56
+ :disable_auth => true
57
+ }
58
+
59
+ Rails.logger.info "#{Time.now}: Fetching Proxy #{proxy.proxy_address}."
60
+
61
+ response = self.client.retrieve_parsed_html("http://www.google.com/webhp?hl=en", options)
62
+
63
+ if (response && response.parsed_body)
64
+ title = response.parsed_body.css("title").first
65
+
66
+ if (title && title.content)
67
+ begin
68
+ title = title.content.encode("UTF-8").strip.downcase
69
+ body_content = response.parsed_body.content.to_s.encode("UTF-8").strip.downcase
70
+
71
+ valid_proxy = (title.eql?("google") || !(body_content =~ /google home/i).nil?)
72
+
73
+ Rails.logger.info "Title is: #{title}. Proxy #{proxy.proxy_address}"
74
+
75
+ rescue Exception => e
76
+ Rails.logger.error "Exception occured while trying to validate proxy. Error Class: #{e.class}. Error Message: #{e.message}"
77
+ valid_proxy = false
78
+ end
79
+ end
80
+ end
81
+
82
+ if (valid_proxy)
83
+ Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is working!"
84
+ else
85
+ Rails.logger.info "#{Time.now}: Proxy #{proxy.proxy_address} is not working!"
86
+ end
87
+
88
+ self.processed_proxies << {:proxy => proxy, :valid => valid_proxy}
89
+ end
90
+
91
+ def update_proxies()
92
+ columns = [:host, :port, :last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts]
93
+ values = []
94
+
95
+ Rails.logger.info "Updating/Importing #{self.processed_proxies.size} proxies"
96
+
97
+ if (self.processed_proxies && self.processed_proxies.any?)
98
+ self.processed_proxies.each do |value|
99
+ proxy = value[:proxy]
100
+ valid = value[:valid]
101
+ successful_attempts = proxy.successful_attempts
102
+ failed_attempts = proxy.failed_attempts
103
+
104
+ if (valid)
105
+ successful_attempts += 1
106
+ else
107
+ failed_attempts += 1
108
+ end
109
+
110
+ is_valid = (successful_attempts >= self.minimum_successful_attempts && failed_attempts < self.maximum_failed_attempts)
111
+ value_arr = [proxy.host, proxy.port, Time.now, is_valid, successful_attempts, failed_attempts]
112
+ values << value_arr
113
+ end
114
+
115
+ ::Proxy.import(columns, values, :on_duplicate_key_update => [:last_checked_at, :valid_proxy, :successful_attempts, :failed_attempts], :validate => false)
116
+ end
117
+
118
+ end
119
+
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,70 @@
1
+ module HttpUtilities
2
+ module Proxies
3
+ module ProxyModule
4
+
5
+ def self.included(base)
6
+ base.send :extend, ClassMethods
7
+ base.send :include, InstanceMethods
8
+ end
9
+
10
+ module ClassMethods
11
+ def should_be_checked(protocol = :all, proxy_type = :all, date = Time.now, limit = 10)
12
+ conditions = set_protocol_and_proxy_type_conditions(protocol, proxy_type)
13
+ conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["(last_checked_at IS NULL OR last_checked_at < ?)", date])
14
+ conditions << "failed_attempts <= 10"
15
+ query = conditions.join(" AND ")
16
+
17
+ where(query).order("valid_proxy ASC, failed_attempts ASC, last_checked_at ASC").limit(limit)
18
+ end
19
+
20
+ def get_random_proxy(protocol = :all, proxy_type = :all)
21
+ conditions = set_protocol_and_proxy_type_conditions(protocol, proxy_type)
22
+ conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["valid_proxy = ?", true])
23
+ conditions << "last_checked_at IS NOT NULL"
24
+ query = conditions.join(" AND ")
25
+
26
+ order_clause = case ActiveRecord::Base.connection.class.name
27
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter", "ActiveRecord::ConnectionAdapters::Mysql2Adapter" then "RAND() DESC"
28
+ when "ActiveRecord::ConnectionAdapters::SQLite3Adapter" then "RANDOM() DESC"
29
+ end
30
+
31
+ proxy = nil
32
+
33
+ uncached do
34
+ proxy = where(query).order(order_clause).limit(1).first
35
+ end
36
+
37
+ return proxy
38
+ end
39
+
40
+ def set_protocol_and_proxy_type_conditions(protocol, proxy_type)
41
+ conditions = []
42
+ conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["protocol = ?", protocol]) if (protocol && !protocol.downcase.to_sym.eql?(:all))
43
+ conditions << ActiveRecord::Base.send(:sanitize_sql_array, ["proxy_type = ?", proxy_type]) if (proxy_type && !proxy_type.downcase.to_sym.eql?(:all))
44
+ return conditions
45
+ end
46
+
47
+ def format_proxy_address(proxy_host, proxy_port = 80, include_http = false)
48
+ proxy_address = "#{proxy_host}:#{proxy_port}"
49
+ proxy_address.insert(0, "http://") if (include_http && !proxy_address.start_with?("http://"))
50
+ return proxy_address
51
+ end
52
+
53
+ def format_proxy_credentials(username, password)
54
+ return "#{username}:#{password}"
55
+ end
56
+ end
57
+
58
+ module InstanceMethods
59
+ def proxy_address(include_http = false)
60
+ return ::Proxy.format_proxy_address(self.host, self.port, include_http)
61
+ end
62
+
63
+ def proxy_credentials
64
+ return ::Proxy.format_proxy_credentials(self.username, self.password)
65
+ end
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,104 @@
1
+ module HttpUtilities
2
+ module Proxies
3
+ class ProxySeeder
4
+ require 'activerecord-import'
5
+ attr_accessor :protocols, :proxy_types, :categories
6
+
7
+ def initialize
8
+ self.protocols = ['http', 'socks5']
9
+ self.proxy_types = ['public', 'shared', 'private']
10
+ self.categories = ['L1', 'L2', 'L3', 'unspecified']
11
+ end
12
+
13
+ def seed
14
+ import_proxies
15
+ end
16
+
17
+ def import_proxies
18
+ proxy_data = parse_proxies
19
+
20
+ proxy_data.each do |protocol, types|
21
+ types.each do |type, categories|
22
+ categories.each do |category, proxies|
23
+ bulk_import_proxies(proxies, protocol, type, category)
24
+ end
25
+ end
26
+ end if (proxy_data && !proxy_data.empty?)
27
+ end
28
+
29
+ def bulk_import_proxies(proxy_list, protocol, proxy_type, category)
30
+ columns = [:host, :port, :protocol, :proxy_type, :category]
31
+ category = (category && !category.eql?('unspecified')) ? category : nil
32
+
33
+ begin
34
+ values = []
35
+
36
+ proxy_list.slice!(0..1000).each do |proxy|
37
+ host = proxy[:host]
38
+ port = proxy[:port]
39
+ value_arr = [host, port, protocol, proxy_type, category]
40
+ values << value_arr
41
+ end
42
+
43
+ ::Proxy.import(columns, values, :on_duplicate_key_update => [:proxy_type], :validate => false) if (values && values.any?)
44
+ end while (proxy_list && proxy_list.any?)
45
+ end
46
+
47
+ def parse_proxies
48
+ proxies = {}
49
+
50
+ self.protocols.each do |protocol|
51
+ proxies[protocol] = {}
52
+
53
+ self.proxy_types.each do |proxy_type|
54
+ proxies[protocol][proxy_type] = {}
55
+ proxies[protocol][proxy_type]['unspecified'] = []
56
+
57
+ if (protocol.eql?("http"))
58
+ self.categories.each do |category|
59
+ proxies[protocol][proxy_type][category] = get_proxies_from_files("#{get_seed_root}#{protocol}/#{proxy_type}/#{category}/*.txt")
60
+ end
61
+ end
62
+
63
+ proxies[protocol][proxy_type]['unspecified'] = proxies[protocol][proxy_type]['unspecified'] + get_proxies_from_files("#{get_seed_root}#{protocol}/#{proxy_type}/*.txt")
64
+ end
65
+ end
66
+
67
+ return proxies
68
+ end
69
+
70
+ def get_proxies_from_files(pattern)
71
+ proxies = []
72
+ file_paths = Dir.glob(pattern)
73
+
74
+ file_paths.each do |file_path|
75
+ proxy_rows = []
76
+ File.open(file_path, 'r') {|f| proxy_rows = f.readlines("\n") }
77
+
78
+ proxy_rows.each do |row|
79
+ host, port = nil
80
+
81
+ parts = row.include?(":") ? row.split(":") : nil
82
+
83
+ if (parts && parts.any? && parts.size >= 2)
84
+ host = parts.first
85
+ port = parts.second.to_i
86
+ end
87
+
88
+ proxies << {:host => host, :port => port} if (host && port)
89
+ end
90
+ end
91
+
92
+ return proxies
93
+ end
94
+
95
+ def get_seed_root
96
+ rails_seed_root = defined?(Rails) ? "#{Rails.root}/db/seed_data/proxies/" : nil
97
+ gem_seed_root = File.join(File.dirname(__FILE__), "../../generators/templates/seed_data/proxies/")
98
+
99
+ return (rails_seed_root && File.exists?(rails_seed_root)) ? rails_seed_root : gem_seed_root
100
+ end
101
+
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,11 @@
1
+ require 'http_utilities'
2
+ require 'rails'
3
+ module HttpUtilities
4
+ class Railtie < Rails::Railtie
5
+
6
+ rake_tasks do
7
+ Dir[File.join(File.dirname(__FILE__), '../tasks/*.rake')].each { |ext| load ext }
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,47 @@
1
+ # -*- encoding : utf-8 -*-
2
+ module HttpUtilities
3
+ VERSION = "1.0.1"
4
+
5
+ require File.join(File.dirname(__FILE__), 'http_utilities/railtie') if defined?(Rails)
6
+
7
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/proxy_support')
8
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/cookies')
9
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/user_agent')
10
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/url')
11
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/format')
12
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/get')
13
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/post')
14
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/logger')
15
+
16
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/request')
17
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/response')
18
+
19
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/net_http')
20
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/open_uri')
21
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/adapters/curb')
22
+
23
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/client')
24
+
25
+ require File.join(File.dirname(__FILE__), 'http_utilities/http/mechanize/client')
26
+
27
+ if defined?(ActiveRecord)
28
+ require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_module')
29
+ require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_checker')
30
+ require File.join(File.dirname(__FILE__), 'http_utilities/proxies/proxy_seeder')
31
+
32
+ if defined?(Resque)
33
+ require File.join(File.dirname(__FILE__), 'http_utilities/jobs/resque/proxies/check_proxies_job')
34
+ require File.join(File.dirname(__FILE__), 'http_utilities/jobs/resque/proxies/check_proxy_job')
35
+ end
36
+
37
+ if defined?(Sidekiq)
38
+ require File.join(File.dirname(__FILE__), 'http_utilities/jobs/sidekiq/proxies/check_proxies_job')
39
+ require File.join(File.dirname(__FILE__), 'http_utilities/jobs/sidekiq/proxies/check_proxy_job')
40
+ end
41
+ end
42
+
43
+ if defined?(MultiXml)
44
+ MultiXml.parser = :nokogiri
45
+ end
46
+ end
47
+
@@ -0,0 +1,19 @@
1
+ namespace :http_utilities do
2
+ namespace :proxies do
3
+ desc "Proxy tasks"
4
+
5
+ task :seed_proxies => :environment do |task, args|
6
+ seeder = HttpUtilities::Proxies::ProxySeeder.new
7
+ seeder.seed
8
+ end
9
+
10
+ task :check_proxies, [:protocol, :proxy_type, :processing_method] => [:environment] do |task, args|
11
+ protocol = (args.protocol) ? args.protocol.to_sym : :http
12
+ proxy_type = (args.proxy_type) ? args.proxy_type.to_sym : :public
13
+ processing_method = (args.processing_method) ? args.processing_method.to_sym : :synchronous
14
+
15
+ proxy_checker = HttpUtilities::Proxies::ProxyChecker.new
16
+ proxy_checker.check_and_update_proxies(protocol, proxy_type, processing_method)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,10 @@
1
+ sqlite3:
2
+ adapter: sqlite3
3
+ database: http_utilities.sqlite3
4
+
5
+ mysql:
6
+ adapter: mysql2
7
+ encoding: utf8
8
+ database: http_utilities_test
9
+ username:
10
+ password:
@@ -0,0 +1,145 @@
1
+ require File.expand_path('../../spec_helper', __FILE__)
2
+
3
+ describe HttpUtilities::Http::Client do
4
+
5
+ describe "when modules have been included" do
6
+ before(:each) do
7
+ @client = HttpUtilities::Http::Client.new
8
+ @request = HttpUtilities::Http::Request.new
9
+ @response = HttpUtilities::Http::Response.new
10
+ end
11
+
12
+ it "should respond to a net http module method" do
13
+ @client.should respond_to(:post_and_retrieve_content_using_net_http)
14
+ end
15
+
16
+ it "should respond to a open uri module method" do
17
+ @client.should respond_to(:retrieve_open_uri_content)
18
+ end
19
+
20
+ it "should respond to a curb module method" do
21
+ @client.should respond_to(:post_and_retrieve_content_using_curl)
22
+ end
23
+
24
+ it "should respond to a proxy module method" do
25
+ @request.should respond_to(:set_proxy_options)
26
+ end
27
+
28
+ it "should respond to a cookies module method" do
29
+ @client.should respond_to(:handle_cookies)
30
+ end
31
+
32
+ it "should respond to a user agent module method" do
33
+ @request.should respond_to(:user_agent)
34
+ end
35
+
36
+ it "should respond to a request module method" do
37
+ @client.should respond_to(:generate_request_url)
38
+ end
39
+
40
+ it "should respond to a get module method" do
41
+ @client.should respond_to(:retrieve_raw_content)
42
+ end
43
+
44
+ it "should respond to a post module method" do
45
+ @client.should respond_to(:post_and_retrieve_content)
46
+ end
47
+
48
+ it "should respond to a format module method" do
49
+ @response.should respond_to(:as_html)
50
+ end
51
+ end
52
+
53
+ describe "when initialized" do
54
+ before(:each) do
55
+ @client = HttpUtilities::Http::Client.new
56
+ @request = HttpUtilities::Http::Request.new
57
+ end
58
+
59
+ it "should have assigned user agent" do
60
+ @request.user_agent.should_not == nil
61
+ end
62
+
63
+ it "should return a properly formatted request url using supplied parameters" do
64
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
65
+ @client.generate_request_url(params).should == "http://www.google.com?q=ruby%20on%20rails&start=0"
66
+ end
67
+
68
+ describe "when retrieving content using Net::Http" do
69
+ it "should fetch Google results as unparsed HTML" do
70
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
71
+ response = @client.retrieve_raw_content(@client.generate_request_url(params), {:method => :net_http})
72
+ response.body.should be_a(String)
73
+ end
74
+
75
+ it "should fetch Google results as a Nokogiri::HTML::Document" do
76
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
77
+ response = @client.retrieve_parsed_html(@client.generate_request_url(params), {:method => :net_http})
78
+ response.parsed_body.should be_a(Nokogiri::HTML::Document)
79
+ end
80
+
81
+ it "should fetch Google Weather data a Nokogiri::XML::Document" do
82
+ params = {:url => "http://www.google.com/ig/api", :weather => 90120}
83
+ response = @client.retrieve_parsed_xml(@client.generate_request_url(params), {:method => :net_http})
84
+ response.parsed_body.should be_a(Nokogiri::XML::Document)
85
+ end
86
+ end
87
+
88
+ describe "when retrieving content using a proxy" do
89
+ it "should have the proxy instance variable properly set" do
90
+ options = {:method => :net_http, :proxy => "127.0.0.1:80", :response_only => false}
91
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
92
+
93
+ response = @client.retrieve_parsed_html(@client.generate_request_url(params), options)
94
+ proxy = response.request.proxy
95
+
96
+ proxy.should_not be_nil
97
+ proxy[:host].should == '127.0.0.1'
98
+ proxy[:port].should == 80
99
+ end
100
+ end
101
+
102
+ describe "when persisting cookies" do
103
+ it "should have the cookie instance variable properly set" do
104
+ options = {:method => :net_http, :use_cookies => true, :save_cookies => true, :response_only => false}
105
+ params = {:url => "http://www.google.com", :q => "ruby on rails", :start => 0}
106
+
107
+ response = @client.retrieve_parsed_html(@client.generate_request_url(params), options)
108
+ cookies = response.request.cookies
109
+
110
+ cookies.should_not be_nil
111
+ end
112
+ end
113
+
114
+ describe "when posting content" do
115
+ before(:each) do
116
+ @trackback_url = "http://techcrunch.com/wp-trackback.php?p=314942"
117
+ @post_data = {
118
+ :url => "http://www.google.com",
119
+ :blog_name => "Testing",
120
+ :title => "Title",
121
+ :excerpt => "Testing..."
122
+ }
123
+ end
124
+
125
+ if (!defined?(JRUBY_VERSION))
126
+ it "should send a trackback to a TechCrunch post using Curb and return the response as a Nokogiri::XML::Document" do
127
+ options = {:method => :curl}
128
+
129
+ response = @client.post_and_retrieve_parsed_xml(@trackback_url, @post_data, options)
130
+ response.parsed_body.should be_a(Nokogiri::XML::Document)
131
+ end
132
+ end
133
+
134
+ it "should send a trackback to a TechCrunch post using Net::Http and return the response as a Nokogiri::XML::Document" do
135
+ options = {:method => :net_http}
136
+
137
+ response = @client.post_and_retrieve_parsed_xml(@trackback_url, @post_data, options)
138
+ response.parsed_body.should be_a(Nokogiri::XML::Document)
139
+ end
140
+ end
141
+
142
+ end
143
+
144
+ end
145
+
@@ -0,0 +1,35 @@
1
+ require File.expand_path('../../spec_helper', __FILE__)
2
+
3
+ describe HttpUtilities::Http::Mechanize::Client do
4
+
5
+ describe "when modules have been included" do
6
+ before(:each) do
7
+ @client = HttpUtilities::Http::Mechanize::Client.new
8
+ end
9
+
10
+ it "should respond to a user agent module method" do
11
+ @client.should respond_to(:user_agent)
12
+ end
13
+
14
+ it "should respond to a request module method" do
15
+ @client.should respond_to(:generate_request_url)
16
+ end
17
+ end
18
+
19
+ describe "when initialized" do
20
+ before(:each) do
21
+ @client = HttpUtilities::Http::Mechanize::Client.new
22
+ end
23
+
24
+ it "should have assigned user agents" do
25
+ @client.user_agent.should_not be_nil
26
+ end
27
+
28
+ it "should submit a google search query successfully" do
29
+ #mock this later on...
30
+ page = @client.set_form_and_submit("http://www.google.com/webhp", {:name => "f"}, :first, {:q => {:type => :input, :value => "Ruby on Rails"}})
31
+ page.parser.should_not be_nil
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,11 @@
1
+ require File.expand_path('../../spec_helper', __FILE__)
2
+
3
+ describe HttpUtilities::Proxies::ProxyChecker do
4
+
5
+ describe "when initialized" do
6
+ before(:each) do
7
+ @checker = HttpUtilities::Proxies::ProxyChecker.new
8
+ end
9
+
10
+ end
11
+ end