magellan 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,11 @@
1
+ Magellan: (alpha)
2
+
3
+ Currently the supported functionality is a rake task that crawl your website and find any broken a[@href], img[@src], or script[@src] links.
4
+
5
+ Magellan::Rake::Task.new do |t|
6
+ t.origin_url = "http://localhost:3000/"
7
+ t.explore_depth = 100
8
+ end
9
+
10
+ Assumptions:
11
+ This tool works best if you follow the rules of unobtrusive javascript and property set the http status code header.
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 1
3
+ :patch: 0
4
+ :major: 0
@@ -0,0 +1,13 @@
1
+ require 'magellan/cartographer'
2
+ require 'magellan/explorer'
3
+ require 'magellan/result'
4
+ require 'magellan/broken_link_tracker'
5
+ require 'magellan/expected_links_tracker'
6
+ require 'magellan/logger'
7
+ require 'magellan/extensions/string'
8
+ require 'magellan/extensions/array'
9
+ require 'magellan/extensions/mechanize_page'
10
+
11
+ module Magellan
12
+ VERSION = '0.0.1'
13
+ end
@@ -0,0 +1,30 @@
1
+ module Magellan
2
+ class BrokenLinkTracker
3
+ include Observable
4
+
5
+ attr_reader :broken_links
6
+
7
+ def initialize
8
+ @broken_links = []
9
+ @first_linked_from = {}
10
+ end
11
+
12
+ def update(time,result)
13
+ failed = result.status_code.starts_with?("5") || result.status_code.starts_with?("4")
14
+ @broken_links << result if failed
15
+ changed
16
+ notify_observers(Time.now, !failed)
17
+ result.absolute_linked_resources.each do |linked_resource|
18
+ @first_linked_from[linked_resource] = result.url if !@first_linked_from.has_key?(linked_resource)
19
+ end
20
+ end
21
+
22
+ def failed?
23
+ !@broken_links.empty?
24
+ end
25
+
26
+ def failure_message
27
+ @broken_links.map{|broken_link| "#{broken_link.url} first linked from: #{@first_linked_from[broken_link.url]} returned: #{broken_link.status_code}"}.join("\n")
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,70 @@
1
+ require 'activesupport'
2
+ require 'observer'
3
+
4
+ module Magellan
5
+ class Cartographer
6
+ include Observable
7
+
8
+ def initialize(settings)
9
+ @origin_url = settings[:origin_url]
10
+ @known_urls = settings[:ignored_urls]
11
+ @domains = settings[:domains].map {|domain| URI.parse(domain)}
12
+ @depth_to_explore = settings[:depth_to_explore]
13
+ @links_we_want_to_explore = settings[:links_to_explore]
14
+ @trace = settings[:trace]
15
+ end
16
+
17
+ def crawl
18
+ recursive_explore([@origin_url],1)
19
+ end
20
+
21
+ def recursive_explore(urls,depth)
22
+ if i_am_not_too_deep?(depth)
23
+ $stdout.puts "exploring:\n#{urls.join("\n")}" if @trace
24
+ results = Explorer.new(urls,@links_we_want_to_explore).explore
25
+ results.each do |result|
26
+ changed
27
+ notify_observers(Time.now, result)
28
+ @known_urls << result.url.remove_fragment
29
+ @known_urls << result.destination_url.remove_fragment
30
+ remove_javascript_and_print_warning result
31
+ end
32
+
33
+ all_urls = results.map {|result| result.absolute_linked_resources }.flatten
34
+ all_urls.uniq!
35
+ #TODO: handle any other url parsing error
36
+ all_urls.delete_if { |url| !a_domain_we_care_about?(url)}
37
+ all_urls.delete_if { |url| i_have_seen_this_url_before?(url)}
38
+ all_urls.chunk(40).each do |result_chunk|
39
+ recursive_explore(result_chunk,depth+1)
40
+ end
41
+ end
42
+ end
43
+
44
+ def i_have_seen_this_url_before?(url)
45
+ @known_urls.include?(url.remove_fragment)
46
+ end
47
+
48
+ def i_am_not_too_deep?(depth)
49
+ depth <= @depth_to_explore
50
+ end
51
+
52
+ def a_domain_we_care_about?(url)
53
+ begin
54
+ !@domains.select { |domain| URI.parse(url).host == domain.host }.empty?
55
+ rescue
56
+ !@domains.select { |domain| url.gsub(/https*:\/\//,'').starts_with?(domain.host) }.empty?
57
+ end
58
+ end
59
+
60
+ def remove_javascript_and_print_warning(result)
61
+ result.linked_resources.delete_if do |linked_resource|
62
+ starts_with_javascript = linked_resource.downcase.starts_with?("javascript:")
63
+ #TODO: put this in the logger
64
+ #$stderr.puts "Found obtrusive javascript: #{linked_resource} on page #{result.url}" if starts_with_javascript
65
+ starts_with_javascript
66
+ end
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,55 @@
1
+ module Magellan
2
+ class ExpectedLinksTracker
3
+ include Observable
4
+ attr_reader :errors
5
+
6
+ def initialize(expected_patterns)
7
+ @errors = []
8
+ @expected_patterns = expected_patterns
9
+ @evaluated_expectations = {}
10
+ end
11
+
12
+ def update(time,result)
13
+ if result.html_content?
14
+ patterns_that_apply(result).each do |pattern,expectation|
15
+ passed = result.linked_resources.include?(expectation)
16
+ changed
17
+ notify_observers(Time.now, passed)
18
+ @errors << "#{result.url} did not contain a link to #{expectation}" unless passed
19
+ end
20
+ end
21
+ end
22
+
23
+ def patterns_that_apply(result)
24
+ res = @expected_patterns.select{|pattern,expecation| result.url =~ pattern || result.destination_url =~ pattern}
25
+ res.each { |expected_pattern| @evaluated_expectations[expected_pattern] = nil }
26
+ res
27
+ end
28
+
29
+ def has_errors?
30
+ !@errors.empty?
31
+ end
32
+
33
+ def unmet_expecations?
34
+ !unmet_expecations.empty?
35
+ end
36
+
37
+ def failed?
38
+ unmet_expecations? || has_errors?
39
+ end
40
+
41
+ def failure_message
42
+ unmet_expecations_messages << errors.join("\n")
43
+ end
44
+
45
+ def unmet_expecations_messages
46
+ message = "\n\n"
47
+ unmet_expecations.each {|pattern,unmet_expecation| message << "#{pattern} was never evaluted during the crawl\n"}
48
+ message
49
+ end
50
+
51
+ def unmet_expecations
52
+ @expected_patterns - @evaluated_expectations.keys
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,45 @@
1
+ require 'hpricot'
2
+ require 'open-uri'
3
+ require 'ostruct'
4
+
5
+ module Magellan
6
+ class Explorer
7
+ UNKNOWN_CONTENT = "unknown"
8
+ def initialize(urls,links)
9
+ @links = links
10
+ @urls = urls
11
+ end
12
+
13
+ def explore
14
+ reqs = []
15
+ @urls.each do |url|
16
+ reqs.push Thread.new { explore_a(url) }
17
+ end
18
+ reqs.collect { |req| req.value }
19
+ end
20
+
21
+ def explore_a(url)
22
+ begin
23
+ agent = WWW::Mechanize.new
24
+ agent.user_agent = "Ruby/#{RUBY_VERSION}"
25
+ doc = agent.get(url)
26
+ destination_url = doc.uri.to_s
27
+ status_code = doc.code
28
+ #TODO: clean this up, this is very hacky, I would rather pass in a hpricot doc to create a result
29
+ if doc.respond_to?(:content_type) && doc.content_type.starts_with?("text/html")
30
+ Explorer.create_result(url, destination_url, status_code, doc.links_to_other_documents(@links),doc.content_type)
31
+ else
32
+ Explorer.create_result(url, destination_url, status_code, [], doc.respond_to?(:content_type) ? doc.content_type : UNKNOWN_CONTENT)
33
+ end
34
+ rescue WWW::Mechanize::ResponseCodeError => the_error
35
+ Explorer.create_result(url, url, the_error.response_code, [],UNKNOWN_CONTENT)
36
+ rescue Timeout::Error
37
+ Explorer.create_result(url, url, "505", [],UNKNOWN_CONTENT)
38
+ end
39
+ end
40
+
41
+ def self.create_result(url,destination_url,status_code,links,content_type)
42
+ Result.new(status_code,url,destination_url,links.map{|link| link.to_s},content_type)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,10 @@
1
+ class Array
2
+ def chunk(max_size)
3
+ result = []
4
+ number_of_chunks = (self.size.to_f / max_size).ceil
5
+ for i in 0...number_of_chunks do
6
+ result << self[i*max_size...(i+1)*max_size]
7
+ end
8
+ result
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ require 'mechanize'
2
+ class WWW::Mechanize::Page
3
+ def links_to_other_documents(links_to_other_resources)
4
+ links_to_other_resources.map {|links_to_other_resource| get_attributes(links_to_other_resource.first,links_to_other_resource.last)}.flatten
5
+ end
6
+
7
+ def get_attributes(tag,attribute)
8
+ (self/tag).map{|alink| alink.attributes[attribute]}.compact
9
+ end
10
+ end
@@ -0,0 +1,21 @@
1
+ require 'activesupport'
2
+ require 'open-uri'
3
+ class String
4
+ def to_absolute_url(origin_url)
5
+ begin
6
+ #BUG in URI.join? URI.join('http://www.google.com/index.html?foo=b','?foo=a') # => http://www.google.com/?foo=a
7
+ stripped = self.strip
8
+ if stripped.starts_with?('?')
9
+ origin_url.gsub(/\?.*/,'') + stripped
10
+ else
11
+ URI.join(origin_url,stripped).to_s
12
+ end
13
+ rescue
14
+ self
15
+ end
16
+ end
17
+
18
+ def remove_fragment
19
+ self.gsub(/#.*/,'')
20
+ end
21
+ end
@@ -0,0 +1,8 @@
1
+ module Magellan
2
+ class Logger
3
+ def update(time,result)
4
+ $stdout.putc(result ? '.' : 'F')
5
+ $stdout.flush
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,40 @@
1
+
2
+ #TODO: this is not a good place to use a template method - violates Liskov substitution principle
3
+ module Magellan
4
+ module Rake
5
+ class BaseMagellanTask < ::Rake::TaskLib
6
+ attr_accessor :origin_url
7
+ attr_accessor :explore_depth
8
+ attr_accessor :ignored_urls
9
+
10
+ def initialize(name)
11
+ @ignored_urls = []
12
+ @name=name
13
+ yield self if block_given?
14
+ define
15
+ end
16
+
17
+ def define
18
+ desc description
19
+ task @name do
20
+ settings = {:origin_url => origin_url, :depth_to_explore => explore_depth, :domains => [origin_url],
21
+ :ignored_urls =>ignored_urls, :links_to_explore => links_to_explore, :trace => ENV['TRACE']}
22
+ cartographer = Magellan::Cartographer.new(settings)
23
+ observer = create_observer
24
+ observer.add_observer(Magellan::Logger.new)
25
+ cartographer.add_observer(observer)
26
+ cartographer.crawl
27
+ if observer.failed?
28
+ STDERR.puts observer.failure_message
29
+ exit 1
30
+ else
31
+ $stdout.puts "\n" + success_message
32
+ end
33
+ end
34
+
35
+ end
36
+
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,33 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'magellan'
4
+ require 'magellan/rake/base_magellan_task'
5
+
6
+ module Magellan
7
+ module Rake
8
+
9
+ class BrokenLinkTask < BaseMagellanTask
10
+ def initialize(name="magellan:explore")
11
+ super(name)
12
+ end
13
+
14
+ def create_observer
15
+ Magellan::BrokenLinkTracker.new
16
+ end
17
+
18
+ def links_to_explore
19
+ [["a","href"],["script","src"],["img","src"]]
20
+ end
21
+
22
+ def description
23
+ "explore #{@origin_url} for broken links"
24
+ end
25
+
26
+ def success_message
27
+ "No broken links were found!"
28
+ end
29
+
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,35 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+ require 'magellan'
4
+ require 'magellan/rake/base_magellan_task'
5
+
6
+ module Magellan
7
+ module Rake
8
+
9
+ class ExpectedLinksTask < BaseMagellanTask
10
+ attr_accessor :patterns_and_expected_links
11
+
12
+ def initialize(name="magellan:check_links")
13
+ super(name)
14
+ end
15
+
16
+ def description
17
+ "Explore #{@origin_url} and find check if all given patterns are matched"
18
+ end
19
+
20
+ def links_to_explore
21
+ [["a","href"]]
22
+ end
23
+
24
+ def create_observer
25
+ Magellan::ExpectedLinksTracker.new(@patterns_and_expected_links)
26
+ end
27
+
28
+ def success_message
29
+ "All expected links found!"
30
+ end
31
+
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,20 @@
1
+ module Magellan
2
+ class Result
3
+ attr_reader :status_code,:url,:destination_url,:linked_resources
4
+ def initialize(status_code,url,destination_url,linked_resources,content_type)
5
+ @status_code = status_code
6
+ @url = url
7
+ @destination_url = destination_url
8
+ @linked_resources = linked_resources
9
+ @content_type = content_type
10
+ end
11
+
12
+ def absolute_linked_resources
13
+ absolute_links = linked_resources.map { |linked_resource| linked_resource.to_s.to_absolute_url(destination_url) }.compact
14
+ end
15
+
16
+ def html_content?
17
+ @content_type.starts_with?("text/html")
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe "Array Extensions" do
5
+ it "should be able to break up a array into chunks with a max size" do
6
+ [1,2,3,4,5].chunk(3).size.should eql(2)
7
+ [1,2,3,4,5].chunk(3).first.should eql([1,2,3])
8
+ [1,2,3,4,5].chunk(3).last.should eql([4,5])
9
+ end
10
+ it "should be able to break up a array into chunks with a max size" do
11
+ [1,2,3,4,5].chunk(1).size.should eql(5)
12
+ [1,2,3,4,5].chunk(1).first.should eql([1])
13
+ end
14
+
15
+ end
@@ -0,0 +1,64 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'rake'
3
+
4
+ describe "Magellan BrokenLinkTask" do
5
+
6
+ before :all do
7
+ @file_name = File.dirname(__FILE__) + "/../lib/magellan/rake/broken_link_task.rb"
8
+ @rake = Rake::Application.new
9
+ Rake.application = @rake
10
+ end
11
+
12
+ before :each do
13
+ load @file_name
14
+ $stdout.stubs(:putc)
15
+ end
16
+
17
+ after :all do
18
+ Rake.application = nil
19
+ end
20
+
21
+ it "should create a rake task" do
22
+ Magellan::Rake::BrokenLinkTask.new
23
+ tasks.include?("magellan:explore").should be_true
24
+ end
25
+
26
+ it "should explore when task is invoked" do
27
+ Magellan::Rake::BrokenLinkTask.new("invoke_task") do |t|
28
+ t.explore_depth = 1
29
+ t.origin_url = "http://localhost:8080"
30
+ end
31
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
32
+ $stdout.expects(:puts) #passed message
33
+ @rake.invoke_task("invoke_task")
34
+ end
35
+
36
+ it "should raise exception when broken links are found" do
37
+ Magellan::Rake::BrokenLinkTask.new("exception_task") do |t|
38
+ t.explore_depth = 1
39
+ t.origin_url = "http://canrailsscale.com"
40
+ end
41
+ $stderr.expects(:puts)
42
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","500"))
43
+ lambda {@rake.invoke_task("exception_task")}.should raise_error
44
+ end
45
+
46
+ it "should attach logger" do
47
+ Magellan::Rake::BrokenLinkTask.new("logger_test") do |t|
48
+ t.explore_depth = 1
49
+ t.origin_url = "http://canrailsscale.com"
50
+ end
51
+ $stderr.stubs(:puts)
52
+ Magellan::Logger.any_instance.expects(:update)
53
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","500"))
54
+ lambda {@rake.invoke_task("logger_test")}.should raise_error
55
+ end
56
+
57
+ def create_result(url,status_code)
58
+ Magellan::Explorer.create_result(url,url,status_code, [],"foo")
59
+ end
60
+
61
+ def tasks
62
+ @rake.tasks.collect{|task| task.name }
63
+ end
64
+ end
@@ -0,0 +1,67 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::BrokenLinkTracker do
5
+
6
+ it "should not report broken links if there are none" do
7
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
8
+ broken_link_tracker.update(Time.now,create_success_result('http://www.foo.com',['jalskdjflakjsf']))
9
+ broken_link_tracker.failed?.should be_false
10
+ end
11
+
12
+ it "should record links by absolute_url" do
13
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
14
+ broken_link_tracker.update(Time.now,create_success_result('http://www.bozo.com/foople.html',['/apples.html']))
15
+ broken_link_tracker.update(Time.now,create_result('http://www.bozo.com/apples.html',"404",[]))
16
+ broken_link_tracker.failure_message.should include("http://www.bozo.com/foople.html")
17
+ end
18
+
19
+ it "should only record broken links errors" do
20
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
21
+ broken_link_tracker.update(Time.now,create_success_result('http://www.foo.com',['http://www.google.com']))
22
+ broken_link_tracker.update(Time.now,create_result('http://www.foo.com/404',"404",[]))
23
+ broken_link_tracker.failed?.should be_true
24
+ broken_link_tracker.broken_links.size.should eql(1)
25
+ end
26
+
27
+ it "should record 4** errors" do
28
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
29
+ broken_link_tracker.update(Time.now,create_result('http://www.foo.com/404',"404",[]))
30
+ broken_link_tracker.broken_links.first.status_code.should eql('404')
31
+ end
32
+
33
+ it "have url and status code in the error message" do
34
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
35
+ broken_link_tracker.update(Time.now,create_result('broke url',"404",[]))
36
+ broken_link_tracker.failure_message.should include('broke url')
37
+ broken_link_tracker.failure_message.should include("404")
38
+ end
39
+
40
+ it "should record 5** errors" do
41
+ broken_link_tracker = Magellan::BrokenLinkTracker.new
42
+ broken_link_tracker.update(Time.now,create_result('fooz',"500",[]))
43
+ broken_link_tracker.broken_links.first.status_code.should eql('500')
44
+ end
45
+
46
+ def create_success_result(url,linked_resources)
47
+ create_result(url,"200",linked_resources)
48
+ end
49
+
50
+ def create_result(url,status_code, linked_resources)
51
+ Magellan::Result.new(status_code,url,url,linked_resources,"foo")
52
+ end
53
+
54
+ it "should update the observer with a pass" do
55
+ tracker = Magellan::BrokenLinkTracker.new
56
+ tracker.add_observer(Magellan::Logger.new)
57
+ $stdout.expects(:putc).with('.')
58
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
59
+ end
60
+
61
+ it "should update the observer with a pass" do
62
+ tracker = Magellan::BrokenLinkTracker.new
63
+ tracker.add_observer(Magellan::Logger.new)
64
+ $stdout.expects(:putc).with('F')
65
+ tracker.update(Time.now,Magellan::Result.new('404','/zoro','/zoro',['/fail_about_us.html'],"text/html"))
66
+ end
67
+ end
@@ -0,0 +1,176 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::Cartographer do
5
+
6
+ it "should not visit the same url more then once" do
7
+ origin_url = "http://www.google.com"
8
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com']))
9
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
10
+ cartographer.crawl
11
+ end
12
+
13
+ it "should not visit the origin url more then once if it finds a link with a finishing /" do
14
+ pending
15
+ origin_url = "http://www.google.com"
16
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/']))
17
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
18
+ cartographer.crawl
19
+ end
20
+
21
+ it "should try to explore urls in the domain we care about that have non ascii characters in them" do
22
+ origin_url = "http://www.reddit.com"
23
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["http://www.reddit.com/r/science/comments/87dk7/cold_fusion_is_a_pipe_dream_but_μcatalyzed_cool/","http://www.domainwedontcareabout.com/μ"]))
24
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://www.reddit.com/r/science/comments/87dk7/cold_fusion_is_a_pipe_dream_but_μcatalyzed_cool/").returns(create_success_result([]))
25
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
26
+ cartographer.crawl
27
+ end
28
+
29
+ it "should not visit the same url more then once if they differ by fragment id" do
30
+ origin_url = "http://www.google.com"
31
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com#foo']))
32
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
33
+ cartographer.crawl
34
+ end
35
+
36
+ it "should notify observers when a result comes in" do
37
+ origin_url = "http://www.google.com"
38
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com']))
39
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
40
+ foo = Object.new
41
+ foo.expects(:update)
42
+ cartographer.add_observer(foo)
43
+ cartographer.crawl
44
+ end
45
+
46
+ it "should notify observers everytime a result comes in" do
47
+ origin_url = "http://www.google.com"
48
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/bar.html']))
49
+ Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
50
+ Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/bar.html').returns(create_success_result([]))
51
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
52
+ foo = Object.new
53
+ foo.expects(:update).times(3)
54
+ cartographer.add_observer(foo)
55
+ cartographer.crawl
56
+ end
57
+
58
+ it "should explore other linked resources" do
59
+ origin_url = "http://www.google.com"
60
+ Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
61
+ Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
62
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
63
+ cartographer.crawl
64
+ end
65
+
66
+ it "should not explore ignored urls" do
67
+ origin_url = "http://www.google.com"
68
+ Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/ignoreme.html']))
69
+ Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
70
+ cartographer = Magellan::Cartographer.new(settings(origin_url,3,[origin_url],['http://www.google.com/ignoreme.html']))
71
+ cartographer.crawl
72
+ end
73
+
74
+ it "should not explore the same url more then once" do
75
+ origin_url = "http://www.google.com"
76
+ Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/foo.html']))
77
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result([]))
78
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
79
+ cartographer.crawl
80
+ end
81
+
82
+ it "should be able to specify crawlable domains" do
83
+ origin_url = "http://www.google.com"
84
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.foo.com']))
85
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.foo.com').returns(create_success_result(['http://www.bar.com']))
86
+ cartographer = Magellan::Cartographer.new(settings(origin_url, 5,['http://www.google.com','http://www.foo.com']))
87
+ cartographer.crawl
88
+ end
89
+
90
+ it "should explore relative links" do
91
+ origin_url = "http://www.google.com"
92
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
93
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result(['/foo2.html']))
94
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo2.html').returns(create_success_result([]))
95
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
96
+ cartographer.crawl
97
+ end
98
+
99
+ it "should go n layers deep into a site" do
100
+ origin_url = "http://www.google.com"
101
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
102
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result(['http://www.google.com/foo2.html']))
103
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo2.html').returns(create_success_result(['http://www.google.com/foo3.html']))
104
+ cartographer = Magellan::Cartographer.new(settings(origin_url,3))
105
+ cartographer.crawl
106
+ end
107
+
108
+ it "should use host to determine if we are in a allowed domain" do
109
+ origin_url = "http://www.google.com/jskfjlsajfd"
110
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
111
+ cartographer.a_domain_we_care_about?("http://www.google.com/index.html").should be_true
112
+ end
113
+
114
+ it "should not explore js urls and print warnings if they are found, obtrusive javascript is bad mmkay" do
115
+ origin_url = "http://www.google.com"
116
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["javascript:bookmarksite('ThoughtWorks Studios', 'http://studios.thoughtworks.com')",'http://www.google.com/foo']))
117
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo').returns(create_success_result([]))
118
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
119
+ cartographer.crawl
120
+ end
121
+
122
+ #<a alex.hal9000@gmail.com="" href="mailto:PWang@thoughtworks.com,">PWang@thoughtworks.com, alex.hal9000@gmail.com</a>
123
+
124
+ it "should not die on checking the domain on invalid urls" do
125
+ origin_url = "http://www.google.com/adsfaf"
126
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
127
+ cartographer.a_domain_we_care_about?("mailto:PWang@thoughtworks.com,").should be_false
128
+ end
129
+
130
+ it "should not explore mailto urls" do
131
+ origin_url = "http://www.google.com/adsfaf"
132
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["mailto:foo"]))
133
+ cartographer = Magellan::Cartographer.new(settings(origin_url))
134
+ cartographer.crawl
135
+ end
136
+
137
+ it "should puts out urls if the trace is enabled" do
138
+ origin_url = "http://www.google.com/adsfaf"
139
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with(origin_url).returns(create_success_result([]))
140
+ cartographer = Magellan::Cartographer.new(settings(origin_url).merge( {:trace=> true}))
141
+ $stdout.expects(:puts).with {|value| value.include?(origin_url)}
142
+ cartographer.crawl
143
+ end
144
+
145
+ it "should not puts if the trace is disabled" do
146
+ origin_url = "http://www.google.com/adsfaf"
147
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with(origin_url).returns(create_success_result([]))
148
+ cartographer = Magellan::Cartographer.new(settings(origin_url).merge( {:trace=> false}))
149
+ $stdout.expects(:puts).never
150
+ cartographer.crawl
151
+ end
152
+
153
+ it "should record the source and the destination url in known urls" do
154
+ origin_url = "http://studios.thoughtworks.com/cruise"
155
+ cartographer = Magellan::Cartographer.new(settings(origin_url, 1))
156
+ cartographer.crawl
157
+ cartographer.i_have_seen_this_url_before?(origin_url).should be_true
158
+ cartographer.i_have_seen_this_url_before?("http://studios.thoughtworks.com/cruise-continuous-integration").should be_true
159
+ end
160
+
161
+ it "should go through a entire site if layers to explore is set to -1"
162
+ it "should explore n layers into external domains"
163
+
164
+ def create_success_result(linked_resources)
165
+ create_result("200",linked_resources)
166
+ end
167
+
168
+ def settings(origin_url,depth=5,domains = [origin_url], ignored_urls=[])
169
+ {:origin_url => origin_url, :depth_to_explore => depth, :domains => domains, :ignored_urls =>ignored_urls, :links_to_explore => [["a","href"]] }
170
+ end
171
+
172
+ def create_result(status_code, linked_resources)
173
+ Magellan::Result.new(status_code,"http://www.google.com","http://www.google.com",linked_resources,"text/html")
174
+ end
175
+
176
+ end
@@ -0,0 +1,68 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'rake'
3
+
4
+ describe "Magellan ExpectedLinksTask" do
5
+
6
+ before :all do
7
+ @file_name = File.dirname(__FILE__) + "/../lib/magellan/rake/expected_links_task.rb"
8
+ @rake = Rake::Application.new
9
+ Rake.application = @rake
10
+ end
11
+
12
+ before :each do
13
+ $stdout.stubs(:putc)
14
+ load @file_name
15
+ end
16
+
17
+ after :all do
18
+ Rake.application = nil
19
+ end
20
+
21
+ it "should create a rake task" do
22
+ Magellan::Rake::ExpectedLinksTask.new
23
+ tasks.include?("magellan:check_links").should be_true
24
+ end
25
+
26
+ it "should explore when task is invoked" do
27
+ Magellan::Rake::ExpectedLinksTask.new("some_task") do |t|
28
+ t.explore_depth = 1
29
+ t.patterns_and_expected_links = []
30
+ t.origin_url = "http://localhost:8080"
31
+ end
32
+ $stdout.expects(:puts)
33
+ Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
34
+ @rake.invoke_task("some_task")
35
+ end
36
+
37
+
38
+ it "should notify a expected link tracker when a task is invoked" do
39
+ Magellan::Rake::ExpectedLinksTask.new("invoke_expected_link_tracker") do |t|
40
+ t.explore_depth = 1
41
+ t.patterns_and_expected_links = []
42
+ t.origin_url = "http://localhost:8080"
43
+ end
44
+ $stdout.expects(:puts)
45
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
46
+ Magellan::ExpectedLinksTracker.any_instance.expects(:update).once
47
+ @rake.invoke_task("invoke_expected_link_tracker")
48
+ end
49
+
50
+ it "should fail the rake task if expected links did not exist or rules did not evaluate to be true" do
51
+ Magellan::Rake::ExpectedLinksTask.new("exception_raising_task") do |t|
52
+ t.explore_depth = 1
53
+ t.patterns_and_expected_links = [[/.*/,'/about_us.html']]
54
+ t.origin_url = "http://canrailsscale.com"
55
+ end
56
+ $stderr.expects(:puts)
57
+ Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","200"))
58
+ lambda {@rake.invoke_task("exception_raising_task")}.should raise_error
59
+ end
60
+
61
+ def create_result(url,status_code)
62
+ Magellan::Explorer.create_result(url,url,status_code, [],"text/html")
63
+ end
64
+
65
+ def tasks
66
+ @rake.tasks.collect{|task| task.name }
67
+ end
68
+ end
@@ -0,0 +1,87 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::ExpectedLinksTracker do
5
+
6
+ it "should create a error message contianing the offending url and " do
7
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
8
+ tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"text/html"))
9
+ tracker.errors.first.should include('/fozo')
10
+ tracker.errors.first.should include('/about_us.html')
11
+ end
12
+
13
+ it "should be able specify all resource should link to something" do
14
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
15
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
16
+ tracker.has_errors?.should be_false
17
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_fail_us.html'],"text/html"))
18
+ tracker.has_errors?.should be_true
19
+ end
20
+
21
+ it "should only apply rules if they apply to source url" do
22
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
23
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_fail_us.html'],"text/html"))
24
+ tracker.has_errors?.should be_false
25
+ tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/zoro',['/about_fail_us.html'],"text/html"))
26
+ tracker.has_errors?.should be_true
27
+ end
28
+
29
+ it "should only apply rules if they apply to destination url" do
30
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
31
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
32
+ tracker.has_errors?.should be_false
33
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/foo.html',['/about_fail_us.html'],"text/html"))
34
+ tracker.has_errors?.should be_true
35
+ end
36
+
37
+ it "should know if a expectation was never met" do
38
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
39
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
40
+ tracker.unmet_expecations?.should be_true
41
+ tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/foo.html',['/about_fail_us.html'],"text/html"))
42
+ tracker.unmet_expecations?.should be_false
43
+ end
44
+
45
+ it "should provide a meaningfull error message around unmet expectations" do
46
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
47
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
48
+ tracker.unmet_expecations_messages.should include(/foo\.html/.to_s)
49
+ end
50
+
51
+ it "should return failed if there are unmet expectations" do
52
+ tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
53
+ tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
54
+ tracker.failed?.should be_true
55
+ tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/zoro',['/about_us.html'],"text/html"))
56
+ tracker.failed?.should be_false
57
+ end
58
+
59
+ it "should return failed if there are failed expectations" do
60
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
61
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
62
+ tracker.failed?.should be_false
63
+ tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"text/html"))
64
+ tracker.failed?.should be_true
65
+ end
66
+
67
+ it "should ignore the result if it is not a html content type" do
68
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
69
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
70
+ tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"application/javascript"))
71
+ tracker.failed?.should be_false
72
+ end
73
+
74
+ it "should update the observer with a pass" do
75
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
76
+ tracker.add_observer(Magellan::Logger.new)
77
+ $stdout.expects(:putc).with('.')
78
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
79
+ end
80
+ it "should update the observer with a pass" do
81
+ tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
82
+ tracker.add_observer(Magellan::Logger.new)
83
+ $stdout.expects(:putc).with('F')
84
+ tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/fail_about_us.html'],"text/html"))
85
+ end
86
+
87
+ end
@@ -0,0 +1,72 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::Explorer do
5
+
6
+ it "should find other js resources" do
7
+ result = Magellan::Explorer.new(['http://canrailsscale.com/'],links_to_explore).explore
8
+ result.first.absolute_linked_resources.should include('http://pagead2.googlesyndication.com/pagead/show_ads.js')
9
+ end
10
+
11
+ it "should foo" do
12
+ WWW::Mechanize.any_instance.expects(:get).raises(Timeout::Error)
13
+ result = Magellan::Explorer.new(['http://canrailsscale.com/'],links_to_explore).explore
14
+ result.first.status_code.should eql('505')
15
+ result.first.url.should eql('http://canrailsscale.com/')
16
+ end
17
+
18
+ it "should have one result for one url" do
19
+ result = Magellan::Explorer.new(['http://www.google.com/'],links_to_explore).explore
20
+ result.size.should eql(1)
21
+ end
22
+
23
+ it "should have two results for two urls" do
24
+ result = Magellan::Explorer.new(['http://www.google.com/','http://www.apple.com/'],links_to_explore).explore
25
+ result.size.should eql(2)
26
+ end
27
+
28
+ it "should find other pages to explore via a href" do
29
+ result = Magellan::Explorer.new('http://www.google.com/',links_to_explore).explore
30
+ result.first.absolute_linked_resources.should include('http://video.google.com/?hl=en&tab=wv')
31
+ end
32
+
33
+ it "should translate relative urls to absolute ones" do
34
+ result = Magellan::Explorer.new('http://www.google.com/',links_to_explore).explore
35
+ result.first.absolute_linked_resources.should include('http://www.google.com/intl/en/about.html')
36
+ end
37
+
38
+ it "should report non successful status codes" do
39
+ result = Magellan::Explorer.new('http://www.google.com/dfkjaslfkjaslfkj.html',links_to_explore).explore
40
+ result.first.status_code.should eql("404")
41
+ end
42
+
43
+ it "should not get any links if it not a text/xhtml file" do
44
+ result = Magellan::Explorer.new("http://jqueryjs.googlecode.com/files/jquery-1.3.2.min.js",links_to_explore).explore
45
+ result.first.absolute_linked_resources.should be_empty
46
+ end
47
+
48
+ it "should update url if redirected" do
49
+ result = Magellan::Explorer.new("http://www.thoughtworks.com/mingle",links_to_explore).explore
50
+ result.first.destination_url.should eql("http://studios.thoughtworks.com/mingle-agile-project-management")
51
+ end
52
+
53
+ it "should return source url as desintation url if a error occurs" do
54
+ result = Magellan::Explorer.new("http://www.google.com/dfkjaslfkjaslfkj.html",links_to_explore).explore
55
+ result.first.destination_url.should eql("http://www.google.com/dfkjaslfkjaslfkj.html")
56
+ end
57
+
58
+ it "should be able to explore a url" do
59
+ Magellan::Explorer.new('',links_to_explore).explore_a("http://www.yahoo.com")
60
+ end
61
+
62
+ it "should be able to go from http to https" do
63
+ result = Magellan::Explorer.new("http://mail.yahoo.com",links_to_explore).explore
64
+ result.first.destination_url.starts_with?("https://").should be_true
65
+ end
66
+
67
+ it "should be able to crawl ftp based links"
68
+
69
+ def links_to_explore
70
+ [["a","href"],["script","src"],["img","src"]]
71
+ end
72
+ end
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::Logger do
5
+ it "should put a . for a pass" do
6
+ logger = Magellan::Logger.new
7
+ $stdout.expects(:putc).with('.')
8
+ logger.update(Time.now,true)
9
+ end
10
+ it "should put a F for a fail" do
11
+ logger = Magellan::Logger.new
12
+ $stdout.expects(:putc).with('F')
13
+ logger.update(Time.now,false)
14
+ end
15
+ end
@@ -0,0 +1,44 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe "WWW::Mechanize::Page Extensions" do
5
+ LINKS = [["a","href"],["script","src"],["img","src"]]
6
+
7
+ it "should not return nil for script tags without src attritubes" do
8
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<script class=foo>something</script>")
9
+ doc.links_to_other_documents(LINKS).should be_empty
10
+ end
11
+
12
+ it "should find links based on script tags with src attritubes" do
13
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<script class=foo src='foozor'>something</script>")
14
+ links_to_other_documents = doc.links_to_other_documents(LINKS)
15
+ links_to_other_documents.size.should eql(1)
16
+ links_to_other_documents.first.to_s.should eql("foozor")
17
+ end
18
+
19
+ it "should be able to get two script sources" do
20
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<body><script class=foo src='foozor'>something</script><script class=foo src='fdsajfkajf'>something</script></body>")
21
+ links_to_other_documents = doc.links_to_other_documents(LINKS)
22
+ links_to_other_documents.size.should eql(2)
23
+ end
24
+
25
+ it "should find links based on a tags with href attritubes" do
26
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<a class=foo href='bozo'>something</a>")
27
+ links_to_other_documents = doc.links_to_other_documents(LINKS)
28
+ links_to_other_documents.size.should eql(1)
29
+ links_to_other_documents.first.to_s.should eql("bozo")
30
+ end
31
+
32
+ it "should find links based on img tags with src attritubes" do
33
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<img class=foo src='ohno' alt='whatever' />")
34
+ links_to_other_documents = doc.links_to_other_documents(LINKS)
35
+ links_to_other_documents.size.should eql(1)
36
+ links_to_other_documents.first.to_s.should eql("ohno")
37
+ end
38
+
39
+ it "should links based on a tags with href attritubes" do
40
+ doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<a class=foo>something</a>")
41
+ doc.links_to_other_documents(LINKS).should be_empty
42
+ end
43
+
44
+ end
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe Magellan::Result do
5
+
6
+ it "should not remove fragments when converting to absolute urls" do
7
+ results = Magellan::Result.new("200","http://www.google.com/index.html","http://www.google.com/index.html",["/index.html#foo"],"foo")
8
+ results.absolute_linked_resources.should include("http://www.google.com/index.html#foo")
9
+ end
10
+
11
+ it "should use destination_url to build new absolute urls" do
12
+ results = Magellan::Result.new("200","http://www.google.com/bob.html","http://www.foo.com/bob.html",["/index.html"],"foo")
13
+ results.absolute_linked_resources.should include("http://www.foo.com/index.html")
14
+ end
15
+
16
+
17
+ end
@@ -0,0 +1,11 @@
1
+ require 'rubygems'
2
+ require 'spec'
3
+ require 'mocha'
4
+ require File.dirname(__FILE__) + '/../config/vendorized_gems'
5
+
6
+ lib_path = File.expand_path("#{File.dirname(__FILE__)}/../lib")
7
+ $LOAD_PATH.unshift lib_path unless $LOAD_PATH.include?(lib_path)
8
+
9
+ Spec::Runner.configure do |config|
10
+ config.mock_with :mocha
11
+ end
@@ -0,0 +1,67 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+ require 'magellan'
3
+
4
+ describe "String Extensions" do
5
+
6
+ it "should convert relative urls to absolute" do
7
+ input = '/Test_Automation_Framework/chrome/common/js/trac.js'
8
+ input.to_absolute_url('http://www.google.com').should eql('http://www.google.com/Test_Automation_Framework/chrome/common/js/trac.js')
9
+ end
10
+
11
+ it "should remove any relative path from original url" do
12
+ input = '/foo/trac.js'
13
+ input.to_absolute_url('http://www.google.com/something/index.html').should eql('http://www.google.com/foo/trac.js')
14
+ end
15
+
16
+ it "should merge urls correctly with dots" do
17
+ input = '../foo/trac.js'
18
+ input.to_absolute_url('http://www.google.com/something/index.html').should eql('http://www.google.com/foo/trac.js')
19
+ end
20
+
21
+ it "should do nothing to absolute http urls" do
22
+ input = 'http://www.apple.com'
23
+ input.to_absolute_url('http://www.google.com').should eql('http://www.apple.com')
24
+ end
25
+
26
+ it "should not put double slashes when converting absolute to relative" do
27
+ input = "/intl/en/about.html"
28
+ input.to_absolute_url('http://www.google.com/').should eql('http://www.google.com/intl/en/about.html')
29
+ end
30
+
31
+ it "should do nothing to absolute https urls" do
32
+ input = 'https://www.apple.com'
33
+ input.to_absolute_url('http://www.google.com').should eql('https://www.apple.com')
34
+ end
35
+
36
+ it "should translate relative https urls to absolute" do
37
+ input = "/intl/en/about.html"
38
+ input.to_absolute_url('https://www.google.com/').should eql('https://www.google.com/intl/en/about.html')
39
+ end
40
+
41
+ it "should translate relative urls to absolute ones" do
42
+ "/intl/en/about.html".to_absolute_url("http://www.google.com").should eql('http://www.google.com/intl/en/about.html')
43
+ end
44
+
45
+ it "should not translate absolute urls" do
46
+ "http://video.google.com/foo/about.html".to_absolute_url("http://www.google.com").should eql("http://video.google.com/foo/about.html")
47
+ end
48
+
49
+ it "should return string itself if uri parse fails" do
50
+ "something not a url".to_absolute_url("http://www.google.com").should eql("something not a url")
51
+ end
52
+
53
+ it "should chomp the fragment portion off the url" do
54
+ "http://video.google.com/foo/about.html#sdkfjskajflsajf".remove_fragment.should eql("http://video.google.com/foo/about.html")
55
+ end
56
+
57
+ it "should strip spaces off of the input url" do
58
+ input = ' http://www.apple.com'
59
+ input.to_absolute_url('http://www.google.com').should eql('http://www.apple.com')
60
+ end
61
+
62
+ it "should correctly join urls that are stricktly query params" do
63
+ input = '?foo=bar'
64
+ input.to_absolute_url('http://www.google.com/index.html?foo=zoro').should eql('http://www.google.com/index.html?foo=bar')
65
+ end
66
+
67
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: magellan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Nolan Evans
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-04-06 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: mechanize
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: activesupport
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: TODO
36
+ email: nolane@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ files:
44
+ - VERSION.yml
45
+ - lib/magellan
46
+ - lib/magellan/broken_link_tracker.rb
47
+ - lib/magellan/cartographer.rb
48
+ - lib/magellan/expected_links_tracker.rb
49
+ - lib/magellan/explorer.rb
50
+ - lib/magellan/extensions
51
+ - lib/magellan/extensions/array.rb
52
+ - lib/magellan/extensions/mechanize_page.rb
53
+ - lib/magellan/extensions/string.rb
54
+ - lib/magellan/logger.rb
55
+ - lib/magellan/rake
56
+ - lib/magellan/rake/base_magellan_task.rb
57
+ - lib/magellan/rake/broken_link_task.rb
58
+ - lib/magellan/rake/expected_links_task.rb
59
+ - lib/magellan/result.rb
60
+ - lib/magellan.rb
61
+ - spec/array_spec.rb
62
+ - spec/broken_link_task_spec.rb
63
+ - spec/broken_link_tracker_spec.rb
64
+ - spec/cartographer_spec.rb
65
+ - spec/expected_links_task_spec.rb
66
+ - spec/expected_links_tracker_spec.rb
67
+ - spec/explorer_spec.rb
68
+ - spec/logger_spec.rb
69
+ - spec/mechanize_page_spec.rb
70
+ - spec/result_spec.rb
71
+ - spec/spec_helper.rb
72
+ - spec/string_extensions_spec.rb
73
+ - README
74
+ has_rdoc: true
75
+ homepage: http://github.com/nolman/magellan
76
+ post_install_message:
77
+ rdoc_options:
78
+ - --inline-source
79
+ - --charset=UTF-8
80
+ require_paths:
81
+ - lib
82
+ required_ruby_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: "0"
87
+ version:
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: "0"
93
+ version:
94
+ requirements: []
95
+
96
+ rubyforge_project: magellan
97
+ rubygems_version: 1.3.1
98
+ signing_key:
99
+ specification_version: 2
100
+ summary: A web testing framework that embraces the discoverable nature of the web
101
+ test_files: []
102
+