magellan 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,11 @@
1
+ Magellan: (alpha)
2
+
3
+ Currently the supported functionality is a rake task that crawl your website and find any broken a[@href], img[@src], or script[@src] links.
4
+
5
+ Magellan::Rake::Task.new do |t|
6
+ t.origin_url = "http://localhost:3000/"
7
+ t.explore_depth = 100
8
+ end
9
+
10
+ Assumptions:
11
+ This tool works best if you follow the rules of unobtrusive javascript and property set the http status code header.
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 1
3
+ :patch: 0
4
+ :major: 0
@@ -0,0 +1,13 @@
1
+ require 'magellan/cartographer'
2
+ require 'magellan/explorer'
3
+ require 'magellan/result'
4
+ require 'magellan/broken_link_tracker'
5
+ require 'magellan/expected_links_tracker'
6
+ require 'magellan/logger'
7
+ require 'magellan/extensions/string'
8
+ require 'magellan/extensions/array'
9
+ require 'magellan/extensions/mechanize_page'
10
+
11
+ module Magellan
12
+ VERSION = '0.0.1'
13
+ end
@@ -0,0 +1,30 @@
1
+ module Magellan
2
+ class BrokenLinkTracker
3
+ include Observable
4
+
5
+ attr_reader :broken_links
6
+
7
+ def initialize
8
+ @broken_links = []
9
+ @first_linked_from = {}
10
+ end
11
+
12
+ def update(time,result)
13
+ failed = result.status_code.starts_with?("5") || result.status_code.starts_with?("4")
14
+ @broken_links << result if failed
15
+ changed
16
+ notify_observers(Time.now, !failed)
17
+ result.absolute_linked_resources.each do |linked_resource|
18
+ @first_linked_from[linked_resource] = result.url if !@first_linked_from.has_key?(linked_resource)
19
+ end
20
+ end
21
+
22
+ def failed?
23
+ !@broken_links.empty?
24
+ end
25
+
26
+ def failure_message
27
+ @broken_links.map{|broken_link| "#{broken_link.url} first linked from: #{@first_linked_from[broken_link.url]} returned: #{broken_link.status_code}"}.join("\n")
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,70 @@
1
+ require 'activesupport'
2
+ require 'observer'
3
+
4
+ module Magellan
5
+ class Cartographer
6
+ include Observable
7
+
8
+ def initialize(settings)
9
+ @origin_url = settings[:origin_url]
10
+ @known_urls = settings[:ignored_urls]
11
+ @domains = settings[:domains].map {|domain| URI.parse(domain)}
12
+ @depth_to_explore = settings[:depth_to_explore]
13
+ @links_we_want_to_explore = settings[:links_to_explore]
14
+ @trace = settings[:trace]
15
+ end
16
+
17
+ def crawl
18
+ recursive_explore([@origin_url],1)
19
+ end
20
+
21
+ def recursive_explore(urls,depth)
22
+ if i_am_not_too_deep?(depth)
23
+ $stdout.puts "exploring:\n#{urls.join("\n")}" if @trace
24
+ results = Explorer.new(urls,@links_we_want_to_explore).explore
25
+ results.each do |result|
26
+ changed
27
+ notify_observers(Time.now, result)
28
+ @known_urls << result.url.remove_fragment
29
+ @known_urls << result.destination_url.remove_fragment
30
+ remove_javascript_and_print_warning result
31
+ end
32
+
33
+ all_urls = results.map {|result| result.absolute_linked_resources }.flatten
34
+ all_urls.uniq!
35
+ #TODO: handle any other url parsing error
36
+ all_urls.delete_if { |url| !a_domain_we_care_about?(url)}
37
+ all_urls.delete_if { |url| i_have_seen_this_url_before?(url)}
38
+ all_urls.chunk(40).each do |result_chunk|
39
+ recursive_explore(result_chunk,depth+1)
40
+ end
41
+ end
42
+ end
43
+
44
+ def i_have_seen_this_url_before?(url)
45
+ @known_urls.include?(url.remove_fragment)
46
+ end
47
+
48
+ def i_am_not_too_deep?(depth)
49
+ depth <= @depth_to_explore
50
+ end
51
+
52
+ def a_domain_we_care_about?(url)
53
+ begin
54
+ !@domains.select { |domain| URI.parse(url).host == domain.host }.empty?
55
+ rescue
56
+ !@domains.select { |domain| url.gsub(/https*:\/\//,'').starts_with?(domain.host) }.empty?
57
+ end
58
+ end
59
+
60
+ def remove_javascript_and_print_warning(result)
61
+ result.linked_resources.delete_if do |linked_resource|
62
+ starts_with_javascript = linked_resource.downcase.starts_with?("javascript:")
63
+ #TODO: put this in the logger
64
+ #$stderr.puts "Found obtrusive javascript: #{linked_resource} on page #{result.url}" if starts_with_javascript
65
+ starts_with_javascript
66
+ end
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,55 @@
1
+ module Magellan
2
+ class ExpectedLinksTracker
3
+ include Observable
4
+ attr_reader :errors
5
+
6
+ def initialize(expected_patterns)
7
+ @errors = []
8
+ @expected_patterns = expected_patterns
9
+ @evaluated_expectations = {}
10
+ end
11
+
12
+ def update(time,result)
13
+ if result.html_content?
14
+ patterns_that_apply(result).each do |pattern,expectation|
15
+ passed = result.linked_resources.include?(expectation)
16
+ changed
17
+ notify_observers(Time.now, passed)
18
+ @errors << "#{result.url} did not contain a link to #{expectation}" unless passed
19
+ end
20
+ end
21
+ end
22
+
23
+ def patterns_that_apply(result)
24
+ res = @expected_patterns.select{|pattern,expecation| result.url =~ pattern || result.destination_url =~ pattern}
25
+ res.each { |expected_pattern| @evaluated_expectations[expected_pattern] = nil }
26
+ res
27
+ end
28
+
29
+ def has_errors?
30
+ !@errors.empty?
31
+ end
32
+
33
+ def unmet_expecations?
34
+ !unmet_expecations.empty?
35
+ end
36
+
37
+ def failed?
38
+ unmet_expecations? || has_errors?
39
+ end
40
+
41
+ def failure_message
42
+ unmet_expecations_messages << errors.join("\n")
43
+ end
44
+
45
+ def unmet_expecations_messages
46
+ message = "\n\n"
47
+ unmet_expecations.each {|pattern,unmet_expecation| message << "#{pattern} was never evaluted during the crawl\n"}
48
+ message
49
+ end
50
+
51
+ def unmet_expecations
52
+ @expected_patterns - @evaluated_expectations.keys
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,45 @@
1
+ require 'hpricot'
2
+ require 'open-uri'
3
+ require 'ostruct'
4
+
5
+ module Magellan
6
+ class Explorer
7
+ UNKNOWN_CONTENT = "unknown"
8
+ def initialize(urls,links)
9
+ @links = links
10
+ @urls = urls
11
+ end
12
+
13
+ def explore
14
+ reqs = []
15
+ @urls.each do |url|
16
+ reqs.push Thread.new { explore_a(url) }
17
+ end
18
+ reqs.collect { |req| req.value }
19
+ end
20
+
21
+ def explore_a(url)
22
+ begin
23
+ agent = WWW::Mechanize.new
24
+ agent.user_agent = "Ruby/#{RUBY_VERSION}"
25
+ doc = agent.get(url)
26
+ destination_url = doc.uri.to_s
27
+ status_code = doc.code
28
+ #TODO: clean this up, this is very hacky, I would rather pass in a hpricot doc to create a result
29
+ if doc.respond_to?(:content_type) && doc.content_type.starts_with?("text/html")
30
+ Explorer.create_result(url, destination_url, status_code, doc.links_to_other_documents(@links),doc.content_type)
31
+ else
32
+ Explorer.create_result(url, destination_url, status_code, [], doc.respond_to?(:content_type) ? doc.content_type : UNKNOWN_CONTENT)
33
+ end
34
+ rescue WWW::Mechanize::ResponseCodeError => the_error
35
+ Explorer.create_result(url, url, the_error.response_code, [],UNKNOWN_CONTENT)
36
+ rescue Timeout::Error
37
+ Explorer.create_result(url, url, "505", [],UNKNOWN_CONTENT)
38
+ end
39
+ end
40
+
41
+ def self.create_result(url,destination_url,status_code,links,content_type)
42
+ Result.new(status_code,url,destination_url,links.map{|link| link.to_s},content_type)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,10 @@
1
+ class Array
2
+ def chunk(max_size)
3
+ result = []
4
+ number_of_chunks = (self.size.to_f / max_size).ceil
5
+ for i in 0...number_of_chunks do
6
+ result << self[i*max_size...(i+1)*max_size]
7
+ end
8
+ result
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ require 'mechanize'
2
+ class WWW::Mechanize::Page
3
+ def links_to_other_documents(links_to_other_resources)
4
+ links_to_other_resources.map {|links_to_other_resource| get_attributes(links_to_other_resource.first,links_to_other_resource.last)}.flatten
5
+ end
6
+
7
+ def get_attributes(tag,attribute)
8
+ (self/tag).map{|alink| alink.attributes[attribute]}.compact
9
+ end
10
+ end
@@ -0,0 +1,21 @@
1
+ require 'activesupport'
2
+ require 'open-uri'
3
+ class String
4
+ def to_absolute_url(origin_url)
5
+ begin
6
+ #BUG in URI.join? URI.join('http://www.google.com/index.html?foo=b','?foo=a') # => http://www.google.com/?foo=a
7
+ stripped = self.strip
8
+ if stripped.starts_with?('?')
9
+ origin_url.gsub(/\?.*/,'') + stripped
10
+ else
11
+ URI.join(origin_url,stripped).to_s
12
+ end
13
+ rescue
14
+ self
15
+ end
16
+ end
17
+
18
+ def remove_fragment
19
+ self.gsub(/#.*/,'')
20
+ end
21
+ end
@@ -0,0 +1,8 @@
1
+ module Magellan
2
+ class Logger
3
+ def update(time,result)
4
+ $stdout.putc(result ? '.' : 'F')
5
+ $stdout.flush
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,40 @@
1
+
2
+ #TODO: this is not a good place to use a template method - violates Liskov substitution principle
3
+ module Magellan
4
+ module Rake
5
+ class BaseMagellanTask < ::Rake::TaskLib
6
+ attr_accessor :origin_url
7
+ attr_accessor :explore_depth
8
+ attr_accessor :ignored_urls
9
+
10
+ def initialize(name)
11
+ @ignored_urls = []
12
+ @name=name
13
+ yield self if block_given?
14
+ define
15
+ end
16
+
17
+ def define
18
+ desc description
19
+ task @name do
20
+ settings = {:origin_url => origin_url, :depth_to_explore => explore_depth, :domains => [origin_url],
21
+ :ignored_urls =>ignored_urls, :links_to_explore => links_to_explore, :trace => ENV['TRACE']}
22
+ cartographer = Magellan::Cartographer.new(settings)
23
+ observer = create_observer
24
+ observer.add_observer(Magellan::Logger.new)
25
+ cartographer.add_observer(observer)
26
+ cartographer.crawl
27
+ if observer.failed?
28
+ STDERR.puts observer.failure_message
29
+ exit 1
30
+ else
31
+ $stdout.puts "\n" + success_message
32
+ end
33
+ end
34
+
35
+ end
36
+
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,33 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'magellan'
4
+ require 'magellan/rake/base_magellan_task'
5
+
6
+ module Magellan
7
+ module Rake
8
+
9
+ class BrokenLinkTask < BaseMagellanTask
10
+ def initialize(name="magellan:explore")
11
+ super(name)
12
+ end
13
+
14
+ def create_observer
15
+ Magellan::BrokenLinkTracker.new
16
+ end
17
+
18
+ def links_to_explore
19
+ [["a","href"],["script","src"],["img","src"]]
20
+ end
21
+
22
+ def description
23
+ "explore #{@origin_url} for broken links"
24
+ end
25
+
26
+ def success_message
27
+ "No broken links were found!"
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,35 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'magellan'
4
+ require 'magellan/rake/base_magellan_task'
5
+
6
+ module Magellan
7
+ module Rake
8
+
9
+ class ExpectedLinksTask < BaseMagellanTask
10
+ attr_accessor :patterns_and_expected_links
11
+
12
+ def initialize(name="magellan:check_links")
13
+ super(name)
14
+ end
15
+
16
+ def description
17
+ "Explore #{@origin_url} and find check if all given patterns are matched"
18
+ end
19
+
20
+ def links_to_explore
21
+ [["a","href"]]
22
+ end
23
+
24
+ def create_observer
25
+ Magellan::ExpectedLinksTracker.new(@patterns_and_expected_links)
26
+ end
27
+
28
+ def success_message
29
+ "All expected links found!"
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,20 @@
1
+ module Magellan
2
+ class Result
3
+ attr_reader :status_code,:url,:destination_url,:linked_resources
4
+ def initialize(status_code,url,destination_url,linked_resources,content_type)
5
+ @status_code = status_code
6
+ @url = url
7
+ @destination_url = destination_url
8
+ @linked_resources = linked_resources
9
+ @content_type = content_type
10
+ end
11
+
12
+ def absolute_linked_resources
13
+ absolute_links = linked_resources.map { |linked_resource| linked_resource.to_s.to_absolute_url(destination_url) }.compact
14
+ end
15
+
16
+ def html_content?
17
+ @content_type.starts_with?("text/html")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe "Array Extensions" do
5
+ it "should be able to break up a array into chunks with a max size" do
6
+ [1,2,3,4,5].chunk(3).size.should eql(2)
7
+ [1,2,3,4,5].chunk(3).first.should eql([1,2,3])
8
+ [1,2,3,4,5].chunk(3).last.should eql([4,5])
9
+ end
10
+ it "should be able to break up a array into chunks with a max size" do
11
+ [1,2,3,4,5].chunk(1).size.should eql(5)
12
+ [1,2,3,4,5].chunk(1).first.should eql([1])
13
+ end
14
+
15
+ end
@@ -0,0 +1,64 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'rake'
3
+
4
+ describe "Magellan BrokenLinkTask" do
5
+
6
+ before :all do
7
+ @file_name = File.dirname(__FILE__) + "/../lib/magellan/rake/broken_link_task.rb"
8
+ @rake = Rake::Application.new
9
+ Rake.application = @rake
10
+ end
11
+
12
+ before :each do
13
+ load @file_name
14
+ $stdout.stubs(:putc)
15
+ end
16
+
17
+ after :all do
18
+ Rake.application = nil
19
+ end
20
+
21
+ it "should create a rake task" do
22
+ Magellan::Rake::BrokenLinkTask.new
23
+ tasks.include?("magellan:explore").should be_true
24
+ end
25
+
26
+ it "should explore when task is invoked" do
27
+ Magellan::Rake::BrokenLinkTask.new("invoke_task") do |t|
28
+ t.explore_depth = 1
29
+ t.origin_url = "http://localhost:8080"
30
+ end
31
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
32
+ $stdout.expects(:puts) #passed message
33
+ @rake.invoke_task("invoke_task")
34
+ end
35
+
36
+ it "should raise exception when broken links are found" do
37
+ Magellan::Rake::BrokenLinkTask.new("exception_task") do |t|
38
+ t.explore_depth = 1
39
+ t.origin_url = "http://canrailsscale.com"
40
+ end
41
+ $stderr.expects(:puts)
42
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","500"))
43
+ lambda {@rake.invoke_task("exception_task")}.should raise_error
44
+ end
45
+
46
+ it "should attach logger" do
47
+ Magellan::Rake::BrokenLinkTask.new("logger_test") do |t|
48
+ t.explore_depth = 1
49
+ t.origin_url = "http://canrailsscale.com"
50
+ end
51
+ $stderr.stubs(:puts)
52
+ Magellan::Logger.any_instance.expects(:update)
53
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","500"))
54
+ lambda {@rake.invoke_task("logger_test")}.should raise_error
55
+ end
56
+
57
+ def create_result(url,status_code)
58
+ Magellan::Explorer.create_result(url,url,status_code, [],"foo")
59
+ end
60
+
61
+ def tasks
62
+ @rake.tasks.collect{|task| task.name }
63
+ end
64
+ end
@@ -0,0 +1,67 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::BrokenLinkTracker do
5
+
6
+ it "should not report broken links if there are none" do
7
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
8
+ broken_link_tracker.update(Time.now,create_success_result('http://www.foo.com',['jalskdjflakjsf']))
9
+ broken_link_tracker.failed?.should be_false
10
+ end
11
+
12
+ it "should record links by absolute_url" do
13
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
14
+ broken_link_tracker.update(Time.now,create_success_result('http://www.bozo.com/foople.html',['/apples.html']))
15
+ broken_link_tracker.update(Time.now,create_result('http://www.bozo.com/apples.html',"404",[]))
16
+ broken_link_tracker.failure_message.should include("http://www.bozo.com/foople.html")
17
+ end
18
+
19
+ it "should only record broken links errors" do
20
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
21
+ broken_link_tracker.update(Time.now,create_success_result('http://www.foo.com',['http://www.google.com']))
22
+ broken_link_tracker.update(Time.now,create_result('http://www.foo.com/404',"404",[]))
23
+ broken_link_tracker.failed?.should be_true
24
+ broken_link_tracker.broken_links.size.should eql(1)
25
+ end
26
+
27
+ it "should record 4** errors" do
28
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
29
+ broken_link_tracker.update(Time.now,create_result('http://www.foo.com/404',"404",[]))
30
+ broken_link_tracker.broken_links.first.status_code.should eql('404')
31
+ end
32
+
33
+ it "have url and status code in the error message" do
34
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
35
+ broken_link_tracker.update(Time.now,create_result('broke url',"404",[]))
36
+ broken_link_tracker.failure_message.should include('broke url')
37
+ broken_link_tracker.failure_message.should include("404")
38
+ end
39
+
40
+ it "should record 5** errors" do
41
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
42
+ broken_link_tracker.update(Time.now,create_result('fooz',"500",[]))
43
+ broken_link_tracker.broken_links.first.status_code.should eql('500')
44
+ end
45
+
46
+ def create_success_result(url,linked_resources)
47
+ create_result(url,"200",linked_resources)
48
+ end
49
+
50
+ def create_result(url,status_code, linked_resources)
51
+ Magellan::Result.new(status_code,url,url,linked_resources,"foo")
52
+ end
53
+
54
+ it "should update the observer with a pass" do
55
+ tracker = Magellan::BrokenLinkTracker.new
56
+ tracker.add_observer(Magellan::Logger.new)
57
+ $stdout.expects(:putc).with('.')
58
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
59
+ end
60
+
61
+ it "should update the observer with a pass" do
62
+ tracker = Magellan::BrokenLinkTracker.new
63
+ tracker.add_observer(Magellan::Logger.new)
64
+ $stdout.expects(:putc).with('F')
65
+ tracker.update(Time.now,Magellan::Result.new('404','/zoro','/zoro',['/fail_about_us.html'],"text/html"))
66
+ end
67
+ end
@@ -0,0 +1,176 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::Cartographer do
5
+
6
+ it "should not visit the same url more then once" do
7
+ origin_url = "http://www.google.com"
8
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com']))
9
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
10
+ cartographer.crawl
11
+ end
12
+
13
+ it "should not visit the origin url more then once if it finds a link with a finishing /" do
14
+ pending
15
+ origin_url = "http://www.google.com"
16
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/']))
17
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
18
+ cartographer.crawl
19
+ end
20
+
21
+ it "should try to explore urls in the domain we care about that have non ascii characters in them" do
22
+ origin_url = "http://www.reddit.com"
23
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["http://www.reddit.com/r/science/comments/87dk7/cold_fusion_is_a_pipe_dream_but_μcatalyzed_cool/","http://www.domainwedontcareabout.com/μ"]))
24
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://www.reddit.com/r/science/comments/87dk7/cold_fusion_is_a_pipe_dream_but_μcatalyzed_cool/").returns(create_success_result([]))
25
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
26
+ cartographer.crawl
27
+ end
28
+
29
+ it "should not visit the same url more then once if they differ by fragment id" do
30
+ origin_url = "http://www.google.com"
31
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com#foo']))
32
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
33
+ cartographer.crawl
34
+ end
35
+
36
+ it "should notify observers when a result comes in" do
37
+ origin_url = "http://www.google.com"
38
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com']))
39
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
40
+ foo = Object.new
41
+ foo.expects(:update)
42
+ cartographer.add_observer(foo)
43
+ cartographer.crawl
44
+ end
45
+
46
+ it "should notify observers everytime a result comes in" do
47
+ origin_url = "http://www.google.com"
48
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/bar.html']))
49
+ Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
50
+ Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/bar.html').returns(create_success_result([]))
51
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
52
+ foo = Object.new
53
+ foo.expects(:update).times(3)
54
+ cartographer.add_observer(foo)
55
+ cartographer.crawl
56
+ end
57
+
58
+ it "should explore other linked resources" do
59
+ origin_url = "http://www.google.com"
60
+ Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
61
+ Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
62
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
63
+ cartographer.crawl
64
+ end
65
+
66
+ it "should not explore ignored urls" do
67
+ origin_url = "http://www.google.com"
68
+ Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/ignoreme.html']))
69
+ Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
70
+ cartographer = Magellan::Cartographer.new(settings(origin_url,3,[origin_url],['http://www.google.com/ignoreme.html']))
71
+ cartographer.crawl
72
+ end
73
+
74
+ it "should not explore the same url more then once" do
75
+ origin_url = "http://www.google.com"
76
+ Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/foo.html']))
77
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result([]))
78
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
79
+ cartographer.crawl
80
+ end
81
+
82
+ it "should be able to specify crawlable domains" do
83
+ origin_url = "http://www.google.com"
84
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.foo.com']))
85
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.foo.com').returns(create_success_result(['http://www.bar.com']))
86
+ cartographer = Magellan::Cartographer.new(settings(origin_url, 5,['http://www.google.com','http://www.foo.com']))
87
+ cartographer.crawl
88
+ end
89
+
90
+ it "should explore relative links" do
91
+ origin_url = "http://www.google.com"
92
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
93
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result(['/foo2.html']))
94
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo2.html').returns(create_success_result([]))
95
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
96
+ cartographer.crawl
97
+ end
98
+
99
+ it "should go n layers deep into a site" do
100
+ origin_url = "http://www.google.com"
101
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
102
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result(['http://www.google.com/foo2.html']))
103
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo2.html').returns(create_success_result(['http://www.google.com/foo3.html']))
104
+ cartographer = Magellan::Cartographer.new(settings(origin_url,3))
105
+ cartographer.crawl
106
+ end
107
+
108
+ it "should use host to determine if we are in a allowed domain" do
109
+ origin_url = "http://www.google.com/jskfjlsajfd"
110
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
111
+ cartographer.a_domain_we_care_about?("http://www.google.com/index.html").should be_true
112
+ end
113
+
114
+ it "should not explore js urls and print warnings if they are found, obtrusive javascript is bad mmkay" do
115
+ origin_url = "http://www.google.com"
116
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["javascript:bookmarksite('ThoughtWorks Studios', 'http://studios.thoughtworks.com')",'http://www.google.com/foo']))
117
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo').returns(create_success_result([]))
118
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
119
+ cartographer.crawl
120
+ end
121
+
122
+ #<a alex.hal9000@gmail.com="" href="mailto:PWang@thoughtworks.com,">PWang@thoughtworks.com, alex.hal9000@gmail.com</a>
123
+
124
+ it "should not die on checking the domain on invalid urls" do
125
+ origin_url = "http://www.google.com/adsfaf"
126
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
127
+ cartographer.a_domain_we_care_about?("mailto:PWang@thoughtworks.com,").should be_false
128
+ end
129
+
130
+ it "should not explore mailto urls" do
131
+ origin_url = "http://www.google.com/adsfaf"
132
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["mailto:foo"]))
133
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
134
+ cartographer.crawl
135
+ end
136
+
137
+ it "should puts out urls if the trace is enabled" do
138
+ origin_url = "http://www.google.com/adsfaf"
139
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with(origin_url).returns(create_success_result([]))
140
+ cartographer = Magellan::Cartographer.new(settings(origin_url).merge( {:trace=> true}))
141
+ $stdout.expects(:puts).with {|value| value.include?(origin_url)}
142
+ cartographer.crawl
143
+ end
144
+
145
+ it "should not puts if the trace is disabled" do
146
+ origin_url = "http://www.google.com/adsfaf"
147
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with(origin_url).returns(create_success_result([]))
148
+ cartographer = Magellan::Cartographer.new(settings(origin_url).merge( {:trace=> false}))
149
+ $stdout.expects(:puts).never
150
+ cartographer.crawl
151
+ end
152
+
153
+ it "should record the source and the destination url in known urls" do
154
+ origin_url = "http://studios.thoughtworks.com/cruise"
155
+ cartographer = Magellan::Cartographer.new(settings(origin_url, 1))
156
+ cartographer.crawl
157
+ cartographer.i_have_seen_this_url_before?(origin_url).should be_true
158
+ cartographer.i_have_seen_this_url_before?("http://studios.thoughtworks.com/cruise-continuous-integration").should be_true
159
+ end
160
+
161
+ it "should go through a entire site if layers to explore is set to -1"
162
+ it "should explore n layers into external domains"
163
+
164
+ def create_success_result(linked_resources)
165
+ create_result("200",linked_resources)
166
+ end
167
+
168
+ def settings(origin_url,depth=5,domains = [origin_url], ignored_urls=[])
169
+ {:origin_url => origin_url, :depth_to_explore => depth, :domains => domains, :ignored_urls =>ignored_urls, :links_to_explore => [["a","href"]] }
170
+ end
171
+
172
+ def create_result(status_code, linked_resources)
173
+ Magellan::Result.new(status_code,"http://www.google.com","http://www.google.com",linked_resources,"text/html")
174
+ end
175
+
176
+ end
@@ -0,0 +1,68 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'rake'
3
+
4
+ describe "Magellan ExpectedLinksTask" do
5
+
6
+ before :all do
7
+ @file_name = File.dirname(__FILE__) + "/../lib/magellan/rake/expected_links_task.rb"
8
+ @rake = Rake::Application.new
9
+ Rake.application = @rake
10
+ end
11
+
12
+ before :each do
13
+ $stdout.stubs(:putc)
14
+ load @file_name
15
+ end
16
+
17
+ after :all do
18
+ Rake.application = nil
19
+ end
20
+
21
+ it "should create a rake task" do
22
+ Magellan::Rake::ExpectedLinksTask.new
23
+ tasks.include?("magellan:check_links").should be_true
24
+ end
25
+
26
+ it "should explore when task is invoked" do
27
+ Magellan::Rake::ExpectedLinksTask.new("some_task") do |t|
28
+ t.explore_depth = 1
29
+ t.patterns_and_expected_links = []
30
+ t.origin_url = "http://localhost:8080"
31
+ end
32
+ $stdout.expects(:puts)
33
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
34
+ @rake.invoke_task("some_task")
35
+ end
36
+
37
+
38
+ it "should notify a expected link tracker when a task is invoked" do
39
+ Magellan::Rake::ExpectedLinksTask.new("invoke_expected_link_tracker") do |t|
40
+ t.explore_depth = 1
41
+ t.patterns_and_expected_links = []
42
+ t.origin_url = "http://localhost:8080"
43
+ end
44
+ $stdout.expects(:puts)
45
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
46
+ Magellan::ExpectedLinksTracker.any_instance.expects(:update).once
47
+ @rake.invoke_task("invoke_expected_link_tracker")
48
+ end
49
+
50
+ it "should fail the rake task if expected links did not exist or rules did not evaluate to be true" do
51
+ Magellan::Rake::ExpectedLinksTask.new("exception_raising_task") do |t|
52
+ t.explore_depth = 1
53
+ t.patterns_and_expected_links = [[/.*/,'/about_us.html']]
54
+ t.origin_url = "http://canrailsscale.com"
55
+ end
56
+ $stderr.expects(:puts)
57
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","200"))
58
+ lambda {@rake.invoke_task("exception_raising_task")}.should raise_error
59
+ end
60
+
61
+ def create_result(url,status_code)
62
+ Magellan::Explorer.create_result(url,url,status_code, [],"text/html")
63
+ end
64
+
65
+ def tasks
66
+ @rake.tasks.collect{|task| task.name }
67
+ end
68
+ end
@@ -0,0 +1,87 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::ExpectedLinksTracker do
5
+
6
+ it "should create a error message contianing the offending url and " do
7
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
8
+ tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"text/html"))
9
+ tracker.errors.first.should include('/fozo')
10
+ tracker.errors.first.should include('/about_us.html')
11
+ end
12
+
13
+ it "should be able specify all resource should link to something" do
14
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
15
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
16
+ tracker.has_errors?.should be_false
17
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_fail_us.html'],"text/html"))
18
+ tracker.has_errors?.should be_true
19
+ end
20
+
21
+ it "should only apply rules if they apply to source url" do
22
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
23
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_fail_us.html'],"text/html"))
24
+ tracker.has_errors?.should be_false
25
+ tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/zoro',['/about_fail_us.html'],"text/html"))
26
+ tracker.has_errors?.should be_true
27
+ end
28
+
29
+ it "should only apply rules if they apply to destination url" do
30
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
31
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
32
+ tracker.has_errors?.should be_false
33
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/foo.html',['/about_fail_us.html'],"text/html"))
34
+ tracker.has_errors?.should be_true
35
+ end
36
+
37
+ it "should know if a expectation was never met" do
38
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
39
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
40
+ tracker.unmet_expecations?.should be_true
41
+ tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/foo.html',['/about_fail_us.html'],"text/html"))
42
+ tracker.unmet_expecations?.should be_false
43
+ end
44
+
45
+ it "should provide a meaningfull error message around unmet expectations" do
46
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
47
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
48
+ tracker.unmet_expecations_messages.should include(/foo\.html/.to_s)
49
+ end
50
+
51
+ it "should return failed if there are unmet expectations" do
52
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
53
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
54
+ tracker.failed?.should be_true
55
+ tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/zoro',['/about_us.html'],"text/html"))
56
+ tracker.failed?.should be_false
57
+ end
58
+
59
+ it "should return failed if there are failed expectations" do
60
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
61
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
62
+ tracker.failed?.should be_false
63
+ tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"text/html"))
64
+ tracker.failed?.should be_true
65
+ end
66
+
67
+ it "should ignore the result if it is not a html content type" do
68
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
69
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
70
+ tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"application/javascript"))
71
+ tracker.failed?.should be_false
72
+ end
73
+
74
+ it "should update the observer with a pass" do
75
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
76
+ tracker.add_observer(Magellan::Logger.new)
77
+ $stdout.expects(:putc).with('.')
78
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
79
+ end
80
+ it "should update the observer with a pass" do
81
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
82
+ tracker.add_observer(Magellan::Logger.new)
83
+ $stdout.expects(:putc).with('F')
84
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/fail_about_us.html'],"text/html"))
85
+ end
86
+
87
+ end
@@ -0,0 +1,72 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::Explorer do
5
+
6
+ it "should find other js resources" do
7
+ result = Magellan::Explorer.new(['http://canrailsscale.com/'],links_to_explore).explore
8
+ result.first.absolute_linked_resources.should include('http://pagead2.googlesyndication.com/pagead/show_ads.js')
9
+ end
10
+
11
+ it "should foo" do
12
+ WWW::Mechanize.any_instance.expects(:get).raises(Timeout::Error)
13
+ result = Magellan::Explorer.new(['http://canrailsscale.com/'],links_to_explore).explore
14
+ result.first.status_code.should eql('505')
15
+ result.first.url.should eql('http://canrailsscale.com/')
16
+ end
17
+
18
+ it "should have one result for one url" do
19
+ result = Magellan::Explorer.new(['http://www.google.com/'],links_to_explore).explore
20
+ result.size.should eql(1)
21
+ end
22
+
23
+ it "should have two results for two urls" do
24
+ result = Magellan::Explorer.new(['http://www.google.com/','http://www.apple.com/'],links_to_explore).explore
25
+ result.size.should eql(2)
26
+ end
27
+
28
+ it "should find other pages to explore via a href" do
29
+ result = Magellan::Explorer.new('http://www.google.com/',links_to_explore).explore
30
+ result.first.absolute_linked_resources.should include('http://video.google.com/?hl=en&tab=wv')
31
+ end
32
+
33
+ it "should translate relative urls to absolute ones" do
34
+ result = Magellan::Explorer.new('http://www.google.com/',links_to_explore).explore
35
+ result.first.absolute_linked_resources.should include('http://www.google.com/intl/en/about.html')
36
+ end
37
+
38
+ it "should report non successful status codes" do
39
+ result = Magellan::Explorer.new('http://www.google.com/dfkjaslfkjaslfkj.html',links_to_explore).explore
40
+ result.first.status_code.should eql("404")
41
+ end
42
+
43
+ it "should not get any links if it not a text/xhtml file" do
44
+ result = Magellan::Explorer.new("http://jqueryjs.googlecode.com/files/jquery-1.3.2.min.js",links_to_explore).explore
45
+ result.first.absolute_linked_resources.should be_empty
46
+ end
47
+
48
+ it "should update url if redirected" do
49
+ result = Magellan::Explorer.new("http://www.thoughtworks.com/mingle",links_to_explore).explore
50
+ result.first.destination_url.should eql("http://studios.thoughtworks.com/mingle-agile-project-management")
51
+ end
52
+
53
+ it "should return source url as desintation url if a error occurs" do
54
+ result = Magellan::Explorer.new("http://www.google.com/dfkjaslfkjaslfkj.html",links_to_explore).explore
55
+ result.first.destination_url.should eql("http://www.google.com/dfkjaslfkjaslfkj.html")
56
+ end
57
+
58
+ it "should be able to explore a url" do
59
+ Magellan::Explorer.new('',links_to_explore).explore_a("http://www.yahoo.com")
60
+ end
61
+
62
+ it "should be able to go from http to https" do
63
+ result = Magellan::Explorer.new("http://mail.yahoo.com",links_to_explore).explore
64
+ result.first.destination_url.starts_with?("https://").should be_true
65
+ end
66
+
67
+ it "should be able to crawl ftp based links"
68
+
69
+ def links_to_explore
70
+ [["a","href"],["script","src"],["img","src"]]
71
+ end
72
+ end
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::Logger do
5
+ it "should put a . for a pass" do
6
+ logger = Magellan::Logger.new
7
+ $stdout.expects(:putc).with('.')
8
+ logger.update(Time.now,true)
9
+ end
10
+ it "should put a F for a fail" do
11
+ logger = Magellan::Logger.new
12
+ $stdout.expects(:putc).with('F')
13
+ logger.update(Time.now,false)
14
+ end
15
+ end
@@ -0,0 +1,44 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe "WWW::Mechanize::Page Extensions" do
5
+ LINKS = [["a","href"],["script","src"],["img","src"]]
6
+
7
+ it "should not return nil for script tags without src attritubes" do
8
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<script class=foo>something</script>")
9
+ doc.links_to_other_documents(LINKS).should be_empty
10
+ end
11
+
12
+ it "should find links based on script tags with src attritubes" do
13
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<script class=foo src='foozor'>something</script>")
14
+ links_to_other_documents = doc.links_to_other_documents(LINKS)
15
+ links_to_other_documents.size.should eql(1)
16
+ links_to_other_documents.first.to_s.should eql("foozor")
17
+ end
18
+
19
+ it "should be able to get two script sources" do
20
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<body><script class=foo src='foozor'>something</script><script class=foo src='fdsajfkajf'>something</script></body>")
21
+ links_to_other_documents = doc.links_to_other_documents(LINKS)
22
+ links_to_other_documents.size.should eql(2)
23
+ end
24
+
25
+ it "should find links based on a tags with href attritubes" do
26
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<a class=foo href='bozo'>something</a>")
27
+ links_to_other_documents = doc.links_to_other_documents(LINKS)
28
+ links_to_other_documents.size.should eql(1)
29
+ links_to_other_documents.first.to_s.should eql("bozo")
30
+ end
31
+
32
+ it "should find links based on img tags with src attritubes" do
33
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<img class=foo src='ohno' alt='whatever' />")
34
+ links_to_other_documents = doc.links_to_other_documents(LINKS)
35
+ links_to_other_documents.size.should eql(1)
36
+ links_to_other_documents.first.to_s.should eql("ohno")
37
+ end
38
+
39
+ it "should links based on a tags with href attritubes" do
40
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<a class=foo>something</a>")
41
+ doc.links_to_other_documents(LINKS).should be_empty
42
+ end
43
+
44
+ end
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::Result do
5
+
6
+ it "should not remove fragments when converting to absolute urls" do
7
+ results = Magellan::Result.new("200","http://www.google.com/index.html","http://www.google.com/index.html",["/index.html#foo"],"foo")
8
+ results.absolute_linked_resources.should include("http://www.google.com/index.html#foo")
9
+ end
10
+
11
+ it "should use destination_url to build new absolute urls" do
12
+ results = Magellan::Result.new("200","http://www.google.com/bob.html","http://www.foo.com/bob.html",["/index.html"],"foo")
13
+ results.absolute_linked_resources.should include("http://www.foo.com/index.html")
14
+ end
15
+
16
+
17
+ end
@@ -0,0 +1,11 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'mocha'
4
+ require File.dirname(__FILE__) + '/../config/vendorized_gems'
5
+
6
+ lib_path = File.expand_path("#{File.dirname(__FILE__)}/../lib")
7
+ $LOAD_PATH.unshift lib_path unless $LOAD_PATH.include?(lib_path)
8
+
9
+ Spec::Runner.configure do |config|
10
+ config.mock_with :mocha
11
+ end
@@ -0,0 +1,67 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe "String Extensions" do
5
+
6
+ it "should convert relative urls to absolute" do
7
+ input = '/Test_Automation_Framework/chrome/common/js/trac.js'
8
+ input.to_absolute_url('http://www.google.com').should eql('http://www.google.com/Test_Automation_Framework/chrome/common/js/trac.js')
9
+ end
10
+
11
+ it "should remove any relative path from original url" do
12
+ input = '/foo/trac.js'
13
+ input.to_absolute_url('http://www.google.com/something/index.html').should eql('http://www.google.com/foo/trac.js')
14
+ end
15
+
16
+ it "should merge urls correctly with dots" do
17
+ input = '../foo/trac.js'
18
+ input.to_absolute_url('http://www.google.com/something/index.html').should eql('http://www.google.com/foo/trac.js')
19
+ end
20
+
21
+ it "should do nothing to absolute http urls" do
22
+ input = 'http://www.apple.com'
23
+ input.to_absolute_url('http://www.google.com').should eql('http://www.apple.com')
24
+ end
25
+
26
+ it "should not put double slashes when converting absolute to relative" do
27
+ input = "/intl/en/about.html"
28
+ input.to_absolute_url('http://www.google.com/').should eql('http://www.google.com/intl/en/about.html')
29
+ end
30
+
31
+ it "should do nothing to absolute https urls" do
32
+ input = 'https://www.apple.com'
33
+ input.to_absolute_url('http://www.google.com').should eql('https://www.apple.com')
34
+ end
35
+
36
+ it "should translate relative https urls to absolute" do
37
+ input = "/intl/en/about.html"
38
+ input.to_absolute_url('https://www.google.com/').should eql('https://www.google.com/intl/en/about.html')
39
+ end
40
+
41
+ it "should translate relative urls to absolute ones" do
42
+ "/intl/en/about.html".to_absolute_url("http://www.google.com").should eql('http://www.google.com/intl/en/about.html')
43
+ end
44
+
45
+ it "should not translate absolute urls" do
46
+ "http://video.google.com/foo/about.html".to_absolute_url("http://www.google.com").should eql("http://video.google.com/foo/about.html")
47
+ end
48
+
49
+ it "should return string itself if uri parse fails" do
50
+ "something not a url".to_absolute_url("http://www.google.com").should eql("something not a url")
51
+ end
52
+
53
+ it "should chomp the fragment portion off the url" do
54
+ "http://video.google.com/foo/about.html#sdkfjskajflsajf".remove_fragment.should eql("http://video.google.com/foo/about.html")
55
+ end
56
+
57
+ it "should strip spaces off of the input url" do
58
+ input = ' http://www.apple.com'
59
+ input.to_absolute_url('http://www.google.com').should eql('http://www.apple.com')
60
+ end
61
+
62
+ it "should correctly join urls that are stricktly query params" do
63
+ input = '?foo=bar'
64
+ input.to_absolute_url('http://www.google.com/index.html?foo=zoro').should eql('http://www.google.com/index.html?foo=bar')
65
+ end
66
+
67
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: magellan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Nolan Evans
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-04-06 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: mechanize
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: activesupport
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: TODO
36
+ email: nolane@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ files:
44
+ - VERSION.yml
45
+ - lib/magellan
46
+ - lib/magellan/broken_link_tracker.rb
47
+ - lib/magellan/cartographer.rb
48
+ - lib/magellan/expected_links_tracker.rb
49
+ - lib/magellan/explorer.rb
50
+ - lib/magellan/extensions
51
+ - lib/magellan/extensions/array.rb
52
+ - lib/magellan/extensions/mechanize_page.rb
53
+ - lib/magellan/extensions/string.rb
54
+ - lib/magellan/logger.rb
55
+ - lib/magellan/rake
56
+ - lib/magellan/rake/base_magellan_task.rb
57
+ - lib/magellan/rake/broken_link_task.rb
58
+ - lib/magellan/rake/expected_links_task.rb
59
+ - lib/magellan/result.rb
60
+ - lib/magellan.rb
61
+ - spec/array_spec.rb
62
+ - spec/broken_link_task_spec.rb
63
+ - spec/broken_link_tracker_spec.rb
64
+ - spec/cartographer_spec.rb
65
+ - spec/expected_links_task_spec.rb
66
+ - spec/expected_links_tracker_spec.rb
67
+ - spec/explorer_spec.rb
68
+ - spec/logger_spec.rb
69
+ - spec/mechanize_page_spec.rb
70
+ - spec/result_spec.rb
71
+ - spec/spec_helper.rb
72
+ - spec/string_extensions_spec.rb
73
+ - README
74
+ has_rdoc: true
75
+ homepage: http://github.com/nolman/magellan
76
+ post_install_message:
77
+ rdoc_options:
78
+ - --inline-source
79
+ - --charset=UTF-8
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: "0"
87
+ version:
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: "0"
93
+ version:
94
+ requirements: []
95
+
96
+ rubyforge_project: magellan
97
+ rubygems_version: 1.3.1
98
+ signing_key:
99
+ specification_version: 2
100
+ summary: A web testing framework that embraces the discoverable nature of the web
101
+ test_files: []
102
+