magellan 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +11 -0
- data/VERSION.yml +4 -0
- data/lib/magellan.rb +13 -0
- data/lib/magellan/broken_link_tracker.rb +30 -0
- data/lib/magellan/cartographer.rb +70 -0
- data/lib/magellan/expected_links_tracker.rb +55 -0
- data/lib/magellan/explorer.rb +45 -0
- data/lib/magellan/extensions/array.rb +10 -0
- data/lib/magellan/extensions/mechanize_page.rb +10 -0
- data/lib/magellan/extensions/string.rb +21 -0
- data/lib/magellan/logger.rb +8 -0
- data/lib/magellan/rake/base_magellan_task.rb +40 -0
- data/lib/magellan/rake/broken_link_task.rb +33 -0
- data/lib/magellan/rake/expected_links_task.rb +35 -0
- data/lib/magellan/result.rb +20 -0
- data/spec/array_spec.rb +15 -0
- data/spec/broken_link_task_spec.rb +64 -0
- data/spec/broken_link_tracker_spec.rb +67 -0
- data/spec/cartographer_spec.rb +176 -0
- data/spec/expected_links_task_spec.rb +68 -0
- data/spec/expected_links_tracker_spec.rb +87 -0
- data/spec/explorer_spec.rb +72 -0
- data/spec/logger_spec.rb +15 -0
- data/spec/mechanize_page_spec.rb +44 -0
- data/spec/result_spec.rb +17 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/string_extensions_spec.rb +67 -0
- metadata +102 -0
    
        data/README
    ADDED
    
    | @@ -0,0 +1,11 @@ | |
| 1 | 
            +
            Magellan: (alpha)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Currently the supported functionality is a rake task that crawl your website and find any broken a[@href], img[@src], or script[@src] links.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            Magellan::Rake::Task.new do |t|
         | 
| 6 | 
            +
              t.origin_url = "http://localhost:3000/"
         | 
| 7 | 
            +
              t.explore_depth = 100
         | 
| 8 | 
            +
            end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            Assumptions:
         | 
| 11 | 
            +
            This tool works best if you follow the rules of unobtrusive javascript and property set the http status code header.
         | 
    
        data/VERSION.yml
    ADDED
    
    
    
        data/lib/magellan.rb
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
| 1 | 
            +
            require 'magellan/cartographer'
         | 
| 2 | 
            +
            require 'magellan/explorer'
         | 
| 3 | 
            +
            require 'magellan/result'
         | 
| 4 | 
            +
            require 'magellan/broken_link_tracker'
         | 
| 5 | 
            +
            require 'magellan/expected_links_tracker'
         | 
| 6 | 
            +
            require 'magellan/logger'
         | 
| 7 | 
            +
            require 'magellan/extensions/string'
         | 
| 8 | 
            +
            require 'magellan/extensions/array'
         | 
| 9 | 
            +
            require 'magellan/extensions/mechanize_page'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            module Magellan
         | 
| 12 | 
            +
              VERSION = '0.0.1'
         | 
| 13 | 
            +
            end
         | 
| @@ -0,0 +1,30 @@ | |
| 1 | 
            +
            module Magellan
         | 
| 2 | 
            +
              class BrokenLinkTracker
         | 
| 3 | 
            +
                include Observable
         | 
| 4 | 
            +
                
         | 
| 5 | 
            +
                attr_reader :broken_links
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def initialize
         | 
| 8 | 
            +
                  @broken_links = []
         | 
| 9 | 
            +
                  @first_linked_from = {}
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                def update(time,result)
         | 
| 13 | 
            +
                  failed = result.status_code.starts_with?("5") || result.status_code.starts_with?("4")
         | 
| 14 | 
            +
                  @broken_links << result if failed
         | 
| 15 | 
            +
                  changed
         | 
| 16 | 
            +
                  notify_observers(Time.now, !failed)
         | 
| 17 | 
            +
                  result.absolute_linked_resources.each do |linked_resource|
         | 
| 18 | 
            +
                    @first_linked_from[linked_resource] = result.url if !@first_linked_from.has_key?(linked_resource)
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                def failed?
         | 
| 23 | 
            +
                  !@broken_links.empty?
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                def failure_message
         | 
| 27 | 
            +
                  @broken_links.map{|broken_link| "#{broken_link.url} first linked from: #{@first_linked_from[broken_link.url]} returned: #{broken_link.status_code}"}.join("\n")
         | 
| 28 | 
            +
                end
         | 
| 29 | 
            +
              end
         | 
| 30 | 
            +
            end
         | 
| @@ -0,0 +1,70 @@ | |
| 1 | 
            +
            require 'activesupport'
         | 
| 2 | 
            +
            require 'observer'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            module Magellan
         | 
| 5 | 
            +
              class Cartographer
         | 
| 6 | 
            +
                include Observable
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                def initialize(settings)
         | 
| 9 | 
            +
                  @origin_url = settings[:origin_url]
         | 
| 10 | 
            +
                  @known_urls = settings[:ignored_urls]
         | 
| 11 | 
            +
                  @domains = settings[:domains].map {|domain| URI.parse(domain)}
         | 
| 12 | 
            +
                  @depth_to_explore = settings[:depth_to_explore]
         | 
| 13 | 
            +
                  @links_we_want_to_explore = settings[:links_to_explore]
         | 
| 14 | 
            +
                  @trace = settings[:trace]
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def crawl
         | 
| 18 | 
            +
                  recursive_explore([@origin_url],1)
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def recursive_explore(urls,depth)
         | 
| 22 | 
            +
                  if i_am_not_too_deep?(depth)
         | 
| 23 | 
            +
                    $stdout.puts "exploring:\n#{urls.join("\n")}" if @trace
         | 
| 24 | 
            +
                    results = Explorer.new(urls,@links_we_want_to_explore).explore
         | 
| 25 | 
            +
                    results.each do |result|
         | 
| 26 | 
            +
                      changed
         | 
| 27 | 
            +
                      notify_observers(Time.now, result)
         | 
| 28 | 
            +
                      @known_urls << result.url.remove_fragment
         | 
| 29 | 
            +
                      @known_urls << result.destination_url.remove_fragment
         | 
| 30 | 
            +
                      remove_javascript_and_print_warning result
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    all_urls = results.map {|result| result.absolute_linked_resources }.flatten
         | 
| 34 | 
            +
                    all_urls.uniq!
         | 
| 35 | 
            +
                    #TODO: handle any other url parsing error
         | 
| 36 | 
            +
                    all_urls.delete_if { |url| !a_domain_we_care_about?(url)}
         | 
| 37 | 
            +
                    all_urls.delete_if { |url| i_have_seen_this_url_before?(url)}
         | 
| 38 | 
            +
                    all_urls.chunk(40).each do |result_chunk|
         | 
| 39 | 
            +
                      recursive_explore(result_chunk,depth+1)
         | 
| 40 | 
            +
                    end
         | 
| 41 | 
            +
                  end
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def i_have_seen_this_url_before?(url)
         | 
| 45 | 
            +
                  @known_urls.include?(url.remove_fragment)
         | 
| 46 | 
            +
                end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                def i_am_not_too_deep?(depth)
         | 
| 49 | 
            +
                  depth <= @depth_to_explore
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                def a_domain_we_care_about?(url)
         | 
| 53 | 
            +
                  begin
         | 
| 54 | 
            +
                    !@domains.select { |domain| URI.parse(url).host == domain.host }.empty?
         | 
| 55 | 
            +
                  rescue
         | 
| 56 | 
            +
                    !@domains.select { |domain| url.gsub(/https*:\/\//,'').starts_with?(domain.host) }.empty?
         | 
| 57 | 
            +
                  end
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                def remove_javascript_and_print_warning(result)
         | 
| 61 | 
            +
                  result.linked_resources.delete_if do |linked_resource|
         | 
| 62 | 
            +
                    starts_with_javascript = linked_resource.downcase.starts_with?("javascript:")
         | 
| 63 | 
            +
                    #TODO: put this in the logger
         | 
| 64 | 
            +
                    #$stderr.puts "Found obtrusive javascript: #{linked_resource} on page #{result.url}" if starts_with_javascript
         | 
| 65 | 
            +
                    starts_with_javascript
         | 
| 66 | 
            +
                  end
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
              end
         | 
| 70 | 
            +
            end
         | 
| @@ -0,0 +1,55 @@ | |
| 1 | 
            +
            module Magellan
         | 
| 2 | 
            +
              class ExpectedLinksTracker
         | 
| 3 | 
            +
                include Observable
         | 
| 4 | 
            +
                attr_reader :errors
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                def initialize(expected_patterns)
         | 
| 7 | 
            +
                  @errors = []
         | 
| 8 | 
            +
                  @expected_patterns = expected_patterns
         | 
| 9 | 
            +
                  @evaluated_expectations = {}
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                def update(time,result)
         | 
| 13 | 
            +
                  if result.html_content?
         | 
| 14 | 
            +
                    patterns_that_apply(result).each do |pattern,expectation|
         | 
| 15 | 
            +
                      passed = result.linked_resources.include?(expectation)
         | 
| 16 | 
            +
                      changed
         | 
| 17 | 
            +
                      notify_observers(Time.now, passed)
         | 
| 18 | 
            +
                      @errors << "#{result.url} did not contain a link to #{expectation}" unless passed
         | 
| 19 | 
            +
                    end
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def patterns_that_apply(result)
         | 
| 24 | 
            +
                  res = @expected_patterns.select{|pattern,expecation| result.url =~ pattern || result.destination_url =~ pattern}
         | 
| 25 | 
            +
                  res.each { |expected_pattern| @evaluated_expectations[expected_pattern] = nil }
         | 
| 26 | 
            +
                  res
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                def has_errors?
         | 
| 30 | 
            +
                  !@errors.empty?
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                def unmet_expecations?
         | 
| 34 | 
            +
                  !unmet_expecations.empty?
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                def failed?
         | 
| 38 | 
            +
                  unmet_expecations? || has_errors?
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def failure_message
         | 
| 42 | 
            +
                  unmet_expecations_messages << errors.join("\n")
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                def unmet_expecations_messages
         | 
| 46 | 
            +
                  message = "\n\n"
         | 
| 47 | 
            +
                  unmet_expecations.each {|pattern,unmet_expecation| message << "#{pattern} was never evaluted during the crawl\n"}
         | 
| 48 | 
            +
                  message
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                def unmet_expecations
         | 
| 52 | 
            +
                  @expected_patterns - @evaluated_expectations.keys
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
              end
         | 
| 55 | 
            +
            end
         | 
| @@ -0,0 +1,45 @@ | |
| 1 | 
            +
            require 'hpricot'
         | 
| 2 | 
            +
            require 'open-uri'
         | 
| 3 | 
            +
            require 'ostruct'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Magellan
         | 
| 6 | 
            +
              class Explorer
         | 
| 7 | 
            +
                UNKNOWN_CONTENT = "unknown"
         | 
| 8 | 
            +
                def initialize(urls,links)
         | 
| 9 | 
            +
                  @links = links
         | 
| 10 | 
            +
                  @urls = urls
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def explore
         | 
| 14 | 
            +
                  reqs = []
         | 
| 15 | 
            +
                  @urls.each do |url|
         | 
| 16 | 
            +
                    reqs.push Thread.new { explore_a(url) }
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
                  reqs.collect { |req| req.value }
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def explore_a(url)
         | 
| 22 | 
            +
                  begin
         | 
| 23 | 
            +
                    agent = WWW::Mechanize.new
         | 
| 24 | 
            +
                    agent.user_agent = "Ruby/#{RUBY_VERSION}"
         | 
| 25 | 
            +
                    doc = agent.get(url)
         | 
| 26 | 
            +
                    destination_url = doc.uri.to_s
         | 
| 27 | 
            +
                    status_code = doc.code
         | 
| 28 | 
            +
                    #TODO: clean this up, this is very hacky, I would rather pass in a hpricot doc to create a result
         | 
| 29 | 
            +
                    if doc.respond_to?(:content_type) && doc.content_type.starts_with?("text/html")
         | 
| 30 | 
            +
                      Explorer.create_result(url, destination_url, status_code, doc.links_to_other_documents(@links),doc.content_type)
         | 
| 31 | 
            +
                    else
         | 
| 32 | 
            +
                      Explorer.create_result(url, destination_url, status_code, [], doc.respond_to?(:content_type) ? doc.content_type : UNKNOWN_CONTENT)
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
                  rescue WWW::Mechanize::ResponseCodeError => the_error
         | 
| 35 | 
            +
                    Explorer.create_result(url, url, the_error.response_code, [],UNKNOWN_CONTENT)
         | 
| 36 | 
            +
                  rescue Timeout::Error
         | 
| 37 | 
            +
                    Explorer.create_result(url, url, "505", [],UNKNOWN_CONTENT)
         | 
| 38 | 
            +
                  end
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def self.create_result(url,destination_url,status_code,links,content_type)
         | 
| 42 | 
            +
                  Result.new(status_code,url,destination_url,links.map{|link| link.to_s},content_type)
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
              end
         | 
| 45 | 
            +
            end
         | 
| @@ -0,0 +1,10 @@ | |
| 1 | 
            +
            require 'mechanize'
         | 
| 2 | 
            +
            class WWW::Mechanize::Page
         | 
| 3 | 
            +
              def links_to_other_documents(links_to_other_resources)
         | 
| 4 | 
            +
                links_to_other_resources.map {|links_to_other_resource| get_attributes(links_to_other_resource.first,links_to_other_resource.last)}.flatten
         | 
| 5 | 
            +
              end
         | 
| 6 | 
            +
              
         | 
| 7 | 
            +
              def get_attributes(tag,attribute)
         | 
| 8 | 
            +
                (self/tag).map{|alink| alink.attributes[attribute]}.compact
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
            end
         | 
| @@ -0,0 +1,21 @@ | |
| 1 | 
            +
            require 'activesupport'
         | 
| 2 | 
            +
            require 'open-uri'
         | 
| 3 | 
            +
            class String
         | 
| 4 | 
            +
              def to_absolute_url(origin_url)
         | 
| 5 | 
            +
                begin
         | 
| 6 | 
            +
                  #BUG in URI.join?  URI.join('http://www.google.com/index.html?foo=b','?foo=a') # => http://www.google.com/?foo=a
         | 
| 7 | 
            +
                  stripped = self.strip
         | 
| 8 | 
            +
                  if stripped.starts_with?('?')
         | 
| 9 | 
            +
                    origin_url.gsub(/\?.*/,'') + stripped
         | 
| 10 | 
            +
                  else
         | 
| 11 | 
            +
                    URI.join(origin_url,stripped).to_s
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
                rescue
         | 
| 14 | 
            +
                  self
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              def remove_fragment
         | 
| 19 | 
            +
                self.gsub(/#.*/,'')
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
            end
         | 
| @@ -0,0 +1,40 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            #TODO: this is not a good place to use a template method - violates Liskov substitution principle
         | 
| 3 | 
            +
            module Magellan
         | 
| 4 | 
            +
              module Rake
         | 
| 5 | 
            +
                class BaseMagellanTask < ::Rake::TaskLib
         | 
| 6 | 
            +
                  attr_accessor :origin_url
         | 
| 7 | 
            +
                  attr_accessor :explore_depth
         | 
| 8 | 
            +
                  attr_accessor :ignored_urls
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                  def initialize(name)
         | 
| 11 | 
            +
                    @ignored_urls = []
         | 
| 12 | 
            +
                    @name=name
         | 
| 13 | 
            +
                    yield self if block_given?
         | 
| 14 | 
            +
                    define
         | 
| 15 | 
            +
                  end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  def define
         | 
| 18 | 
            +
                    desc description
         | 
| 19 | 
            +
                    task @name do
         | 
| 20 | 
            +
                      settings = {:origin_url => origin_url, :depth_to_explore => explore_depth, :domains => [origin_url], 
         | 
| 21 | 
            +
                                  :ignored_urls =>ignored_urls, :links_to_explore => links_to_explore, :trace => ENV['TRACE']}
         | 
| 22 | 
            +
                      cartographer = Magellan::Cartographer.new(settings)
         | 
| 23 | 
            +
                      observer = create_observer
         | 
| 24 | 
            +
                      observer.add_observer(Magellan::Logger.new)
         | 
| 25 | 
            +
                      cartographer.add_observer(observer)
         | 
| 26 | 
            +
                      cartographer.crawl
         | 
| 27 | 
            +
                      if observer.failed?
         | 
| 28 | 
            +
                        STDERR.puts observer.failure_message
         | 
| 29 | 
            +
                        exit 1
         | 
| 30 | 
            +
                      else
         | 
| 31 | 
            +
                        $stdout.puts "\n" + success_message
         | 
| 32 | 
            +
                      end
         | 
| 33 | 
            +
                    end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
             | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
              end
         | 
| 40 | 
            +
            end
         | 
| @@ -0,0 +1,33 @@ | |
| 1 | 
            +
            require 'rake'
         | 
| 2 | 
            +
            require 'rake/tasklib'
         | 
| 3 | 
            +
            require 'magellan'
         | 
| 4 | 
            +
            require 'magellan/rake/base_magellan_task'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            module Magellan
         | 
| 7 | 
            +
              module Rake
         | 
| 8 | 
            +
                
         | 
| 9 | 
            +
                class BrokenLinkTask < BaseMagellanTask
         | 
| 10 | 
            +
                  def initialize(name="magellan:explore")
         | 
| 11 | 
            +
                    super(name)
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
                  
         | 
| 14 | 
            +
                  def create_observer
         | 
| 15 | 
            +
                    Magellan::BrokenLinkTracker.new
         | 
| 16 | 
            +
                  end
         | 
| 17 | 
            +
                  
         | 
| 18 | 
            +
                  def links_to_explore
         | 
| 19 | 
            +
                    [["a","href"],["script","src"],["img","src"]]
         | 
| 20 | 
            +
                  end
         | 
| 21 | 
            +
                  
         | 
| 22 | 
            +
                  def description
         | 
| 23 | 
            +
                    "explore #{@origin_url} for broken links"
         | 
| 24 | 
            +
                  end
         | 
| 25 | 
            +
                  
         | 
| 26 | 
            +
                  def success_message
         | 
| 27 | 
            +
                    "No broken links were found!"
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                  
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
                
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
            end
         | 
| @@ -0,0 +1,35 @@ | |
| 1 | 
            +
            require 'rake'
         | 
| 2 | 
            +
            require 'rake/tasklib'
         | 
| 3 | 
            +
            require 'magellan'
         | 
| 4 | 
            +
            require 'magellan/rake/base_magellan_task'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            module Magellan
         | 
| 7 | 
            +
              module Rake
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                class ExpectedLinksTask < BaseMagellanTask
         | 
| 10 | 
            +
                  attr_accessor :patterns_and_expected_links
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  def initialize(name="magellan:check_links")
         | 
| 13 | 
            +
                    super(name)
         | 
| 14 | 
            +
                  end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  def description
         | 
| 17 | 
            +
                    "Explore #{@origin_url} and find check if all given patterns are matched"
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
                  
         | 
| 20 | 
            +
                  def links_to_explore
         | 
| 21 | 
            +
                    [["a","href"]]
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  def create_observer
         | 
| 25 | 
            +
                    Magellan::ExpectedLinksTracker.new(@patterns_and_expected_links)
         | 
| 26 | 
            +
                  end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  def success_message
         | 
| 29 | 
            +
                    "All expected links found!"
         | 
| 30 | 
            +
                  end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              end
         | 
| 35 | 
            +
            end
         | 
| @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            module Magellan
         | 
| 2 | 
            +
              class Result
         | 
| 3 | 
            +
                attr_reader :status_code,:url,:destination_url,:linked_resources
         | 
| 4 | 
            +
                def initialize(status_code,url,destination_url,linked_resources,content_type)
         | 
| 5 | 
            +
                  @status_code = status_code
         | 
| 6 | 
            +
                  @url = url
         | 
| 7 | 
            +
                  @destination_url = destination_url
         | 
| 8 | 
            +
                  @linked_resources = linked_resources
         | 
| 9 | 
            +
                  @content_type = content_type
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
                
         | 
| 12 | 
            +
                def absolute_linked_resources
         | 
| 13 | 
            +
                  absolute_links = linked_resources.map { |linked_resource| linked_resource.to_s.to_absolute_url(destination_url) }.compact
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
                
         | 
| 16 | 
            +
                def html_content?
         | 
| 17 | 
            +
                  @content_type.starts_with?("text/html")
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
            end
         | 
    
        data/spec/array_spec.rb
    ADDED
    
    | @@ -0,0 +1,15 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe "Array Extensions" do
         | 
| 5 | 
            +
              it "should be able to break up a array into chunks with a max size" do
         | 
| 6 | 
            +
                [1,2,3,4,5].chunk(3).size.should eql(2)
         | 
| 7 | 
            +
                [1,2,3,4,5].chunk(3).first.should eql([1,2,3])
         | 
| 8 | 
            +
                [1,2,3,4,5].chunk(3).last.should eql([4,5])
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
              it "should be able to break up a array into chunks with a max size" do
         | 
| 11 | 
            +
                [1,2,3,4,5].chunk(1).size.should eql(5)
         | 
| 12 | 
            +
                [1,2,3,4,5].chunk(1).first.should eql([1])
         | 
| 13 | 
            +
              end
         | 
| 14 | 
            +
              
         | 
| 15 | 
            +
            end
         | 
| @@ -0,0 +1,64 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'rake'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe "Magellan BrokenLinkTask" do
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              before :all do
         | 
| 7 | 
            +
                @file_name = File.dirname(__FILE__)  + "/../lib/magellan/rake/broken_link_task.rb"
         | 
| 8 | 
            +
                @rake = Rake::Application.new
         | 
| 9 | 
            +
                Rake.application = @rake
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              before :each do
         | 
| 13 | 
            +
                load @file_name
         | 
| 14 | 
            +
                $stdout.stubs(:putc)
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              after :all do
         | 
| 18 | 
            +
                Rake.application = nil
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
              it "should create a rake task" do
         | 
| 22 | 
            +
                Magellan::Rake::BrokenLinkTask.new
         | 
| 23 | 
            +
                tasks.include?("magellan:explore").should be_true
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
              
         | 
| 26 | 
            +
              it "should explore when task is invoked" do
         | 
| 27 | 
            +
                Magellan::Rake::BrokenLinkTask.new("invoke_task") do |t|
         | 
| 28 | 
            +
                  t.explore_depth = 1
         | 
| 29 | 
            +
                  t.origin_url = "http://localhost:8080"
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
         | 
| 32 | 
            +
                $stdout.expects(:puts) #passed message
         | 
| 33 | 
            +
                @rake.invoke_task("invoke_task")
         | 
| 34 | 
            +
              end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
              it "should raise exception when broken links are found" do
         | 
| 37 | 
            +
                Magellan::Rake::BrokenLinkTask.new("exception_task") do |t|
         | 
| 38 | 
            +
                  t.explore_depth = 1
         | 
| 39 | 
            +
                  t.origin_url = "http://canrailsscale.com"
         | 
| 40 | 
            +
                end
         | 
| 41 | 
            +
                $stderr.expects(:puts)
         | 
| 42 | 
            +
                Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","500"))
         | 
| 43 | 
            +
                lambda {@rake.invoke_task("exception_task")}.should raise_error
         | 
| 44 | 
            +
              end
         | 
| 45 | 
            +
              
         | 
| 46 | 
            +
              it "should attach logger" do
         | 
| 47 | 
            +
                 Magellan::Rake::BrokenLinkTask.new("logger_test") do |t|
         | 
| 48 | 
            +
                   t.explore_depth = 1
         | 
| 49 | 
            +
                   t.origin_url = "http://canrailsscale.com"
         | 
| 50 | 
            +
                 end
         | 
| 51 | 
            +
                 $stderr.stubs(:puts)
         | 
| 52 | 
            +
                 Magellan::Logger.any_instance.expects(:update)
         | 
| 53 | 
            +
                 Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","500"))
         | 
| 54 | 
            +
                 lambda {@rake.invoke_task("logger_test")}.should raise_error
         | 
| 55 | 
            +
               end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
              def create_result(url,status_code)
         | 
| 58 | 
            +
                Magellan::Explorer.create_result(url,url,status_code, [],"foo")
         | 
| 59 | 
            +
              end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
              def tasks
         | 
| 62 | 
            +
                @rake.tasks.collect{|task| task.name }
         | 
| 63 | 
            +
              end
         | 
| 64 | 
            +
            end
         | 
| @@ -0,0 +1,67 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Magellan::BrokenLinkTracker do
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              it "should not report broken links if there are none" do
         | 
| 7 | 
            +
                broken_link_tracker = Magellan::BrokenLinkTracker.new
         | 
| 8 | 
            +
                broken_link_tracker.update(Time.now,create_success_result('http://www.foo.com',['jalskdjflakjsf']))
         | 
| 9 | 
            +
                broken_link_tracker.failed?.should be_false
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              it "should record links by absolute_url" do
         | 
| 13 | 
            +
                broken_link_tracker = Magellan::BrokenLinkTracker.new
         | 
| 14 | 
            +
                broken_link_tracker.update(Time.now,create_success_result('http://www.bozo.com/foople.html',['/apples.html']))
         | 
| 15 | 
            +
                broken_link_tracker.update(Time.now,create_result('http://www.bozo.com/apples.html',"404",[]))
         | 
| 16 | 
            +
                broken_link_tracker.failure_message.should  include("http://www.bozo.com/foople.html")
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              it "should only record broken links errors" do
         | 
| 20 | 
            +
                broken_link_tracker = Magellan::BrokenLinkTracker.new
         | 
| 21 | 
            +
                broken_link_tracker.update(Time.now,create_success_result('http://www.foo.com',['http://www.google.com']))
         | 
| 22 | 
            +
                broken_link_tracker.update(Time.now,create_result('http://www.foo.com/404',"404",[]))
         | 
| 23 | 
            +
                broken_link_tracker.failed?.should be_true
         | 
| 24 | 
            +
                broken_link_tracker.broken_links.size.should eql(1)
         | 
| 25 | 
            +
              end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
              it "should record 4** errors" do
         | 
| 28 | 
            +
                broken_link_tracker = Magellan::BrokenLinkTracker.new
         | 
| 29 | 
            +
                broken_link_tracker.update(Time.now,create_result('http://www.foo.com/404',"404",[]))
         | 
| 30 | 
            +
                broken_link_tracker.broken_links.first.status_code.should eql('404')
         | 
| 31 | 
            +
              end
         | 
| 32 | 
            +
              
         | 
| 33 | 
            +
              it "have url and status code in the error message" do
         | 
| 34 | 
            +
                broken_link_tracker = Magellan::BrokenLinkTracker.new
         | 
| 35 | 
            +
                broken_link_tracker.update(Time.now,create_result('broke url',"404",[]))
         | 
| 36 | 
            +
                broken_link_tracker.failure_message.should include('broke url')
         | 
| 37 | 
            +
                broken_link_tracker.failure_message.should include("404")
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
              
         | 
| 40 | 
            +
              it "should record 5** errors" do
         | 
| 41 | 
            +
                broken_link_tracker = Magellan::BrokenLinkTracker.new
         | 
| 42 | 
            +
                broken_link_tracker.update(Time.now,create_result('fooz',"500",[]))
         | 
| 43 | 
            +
                broken_link_tracker.broken_links.first.status_code.should eql('500')
         | 
| 44 | 
            +
              end
         | 
| 45 | 
            +
              
         | 
| 46 | 
            +
              def create_success_result(url,linked_resources)
         | 
| 47 | 
            +
                create_result(url,"200",linked_resources)
         | 
| 48 | 
            +
              end
         | 
| 49 | 
            +
              
         | 
| 50 | 
            +
              def create_result(url,status_code, linked_resources)
         | 
| 51 | 
            +
                Magellan::Result.new(status_code,url,url,linked_resources,"foo")
         | 
| 52 | 
            +
              end
         | 
| 53 | 
            +
              
         | 
| 54 | 
            +
              it "should update the observer with a pass" do
         | 
| 55 | 
            +
                tracker = Magellan::BrokenLinkTracker.new
         | 
| 56 | 
            +
                tracker.add_observer(Magellan::Logger.new)
         | 
| 57 | 
            +
                $stdout.expects(:putc).with('.')
         | 
| 58 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html")) 
         | 
| 59 | 
            +
              end
         | 
| 60 | 
            +
              
         | 
| 61 | 
            +
              it "should update the observer with a pass" do
         | 
| 62 | 
            +
                tracker = Magellan::BrokenLinkTracker.new
         | 
| 63 | 
            +
                tracker.add_observer(Magellan::Logger.new)
         | 
| 64 | 
            +
                $stdout.expects(:putc).with('F')
         | 
| 65 | 
            +
                tracker.update(Time.now,Magellan::Result.new('404','/zoro','/zoro',['/fail_about_us.html'],"text/html")) 
         | 
| 66 | 
            +
              end
         | 
| 67 | 
            +
            end
         | 
| @@ -0,0 +1,176 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Magellan::Cartographer do
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              it "should not visit the same url more then once" do
         | 
| 7 | 
            +
                origin_url = "http://www.google.com"
         | 
| 8 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com']))
         | 
| 9 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 10 | 
            +
                cartographer.crawl
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
              
         | 
| 13 | 
            +
              it "should not visit the origin url more then once if it finds a link with a finishing /" do
         | 
| 14 | 
            +
                pending
         | 
| 15 | 
            +
                origin_url = "http://www.google.com"
         | 
| 16 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/']))
         | 
| 17 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 18 | 
            +
                cartographer.crawl
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
              
         | 
| 21 | 
            +
              it "should try to explore urls in the domain we care about that have non ascii characters in them" do
         | 
| 22 | 
            +
                origin_url = "http://www.reddit.com"
         | 
| 23 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["http://www.reddit.com/r/science/comments/87dk7/cold_fusion_is_a_pipe_dream_but_μcatalyzed_cool/","http://www.domainwedontcareabout.com/μ"]))
         | 
| 24 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://www.reddit.com/r/science/comments/87dk7/cold_fusion_is_a_pipe_dream_but_μcatalyzed_cool/").returns(create_success_result([]))
         | 
| 25 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 26 | 
            +
                cartographer.crawl
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
              
         | 
| 29 | 
            +
              it "should not visit the same url more then once if they differ by fragment id" do
         | 
| 30 | 
            +
                origin_url = "http://www.google.com"
         | 
| 31 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com#foo']))
         | 
| 32 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 33 | 
            +
                cartographer.crawl
         | 
| 34 | 
            +
              end
         | 
| 35 | 
            +
              
         | 
| 36 | 
            +
              it "should notify observers when a result comes in" do
         | 
| 37 | 
            +
                origin_url = "http://www.google.com"
         | 
| 38 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com']))
         | 
| 39 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 40 | 
            +
                foo = Object.new
         | 
| 41 | 
            +
                foo.expects(:update)
         | 
| 42 | 
            +
                cartographer.add_observer(foo)
         | 
| 43 | 
            +
                cartographer.crawl
         | 
| 44 | 
            +
              end
         | 
| 45 | 
            +
              
         | 
| 46 | 
            +
              it "should notify observers everytime a result comes in" do
         | 
| 47 | 
            +
                origin_url = "http://www.google.com"
         | 
| 48 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/bar.html']))
         | 
| 49 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
         | 
| 50 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/bar.html').returns(create_success_result([]))
         | 
| 51 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 52 | 
            +
                foo = Object.new
         | 
| 53 | 
            +
                foo.expects(:update).times(3)
         | 
| 54 | 
            +
                cartographer.add_observer(foo)
         | 
| 55 | 
            +
                cartographer.crawl
         | 
| 56 | 
            +
              end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
              it "should explore other linked resources" do
         | 
| 59 | 
            +
                origin_url = "http://www.google.com"
         | 
| 60 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
         | 
| 61 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
         | 
| 62 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 63 | 
            +
                cartographer.crawl
         | 
| 64 | 
            +
              end
         | 
| 65 | 
            +
              
         | 
| 66 | 
            +
              it "should not explore ignored urls" do
         | 
| 67 | 
            +
                origin_url = "http://www.google.com"
         | 
| 68 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/ignoreme.html']))
         | 
| 69 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).with('http://www.google.com/foo.html').returns(create_success_result([]))
         | 
| 70 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url,3,[origin_url],['http://www.google.com/ignoreme.html']))
         | 
| 71 | 
            +
                cartographer.crawl
         | 
| 72 | 
            +
              end
         | 
| 73 | 
            +
              
         | 
| 74 | 
            +
              it "should not explore the same url more then once" do
         | 
| 75 | 
            +
                origin_url = "http://www.google.com"
         | 
| 76 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).with(origin_url).returns(create_success_result(['http://www.google.com/foo.html','http://www.google.com/foo.html']))
         | 
| 77 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result([]))
         | 
| 78 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 79 | 
            +
                cartographer.crawl
         | 
| 80 | 
            +
              end
         | 
| 81 | 
            +
              
         | 
| 82 | 
            +
              it "should be able to specify crawlable domains" do
         | 
| 83 | 
            +
                origin_url = "http://www.google.com"
         | 
| 84 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.foo.com']))
         | 
| 85 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.foo.com').returns(create_success_result(['http://www.bar.com']))
         | 
| 86 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url, 5,['http://www.google.com','http://www.foo.com']))
         | 
| 87 | 
            +
                cartographer.crawl
         | 
| 88 | 
            +
              end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
              it "should explore relative links" do
         | 
| 91 | 
            +
                 origin_url = "http://www.google.com"
         | 
| 92 | 
            +
                 Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
         | 
| 93 | 
            +
                 Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result(['/foo2.html']))
         | 
| 94 | 
            +
                 Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo2.html').returns(create_success_result([]))
         | 
| 95 | 
            +
                 cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 96 | 
            +
                 cartographer.crawl
         | 
| 97 | 
            +
               end
         | 
| 98 | 
            +
             | 
| 99 | 
            +
              it "should go n layers deep into a site" do
         | 
| 100 | 
            +
                origin_url = "http://www.google.com"
         | 
| 101 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(['http://www.google.com/foo.html']))
         | 
| 102 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo.html').returns(create_success_result(['http://www.google.com/foo2.html']))
         | 
| 103 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo2.html').returns(create_success_result(['http://www.google.com/foo3.html']))
         | 
| 104 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url,3))
         | 
| 105 | 
            +
                cartographer.crawl
         | 
| 106 | 
            +
              end
         | 
| 107 | 
            +
             | 
| 108 | 
            +
              it "should use host to determine if we are in a allowed domain" do
         | 
| 109 | 
            +
                origin_url = "http://www.google.com/jskfjlsajfd"
         | 
| 110 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 111 | 
            +
                cartographer.a_domain_we_care_about?("http://www.google.com/index.html").should be_true
         | 
| 112 | 
            +
              end
         | 
| 113 | 
            +
              
         | 
| 114 | 
            +
              it "should not explore js urls and print warnings if they are found, obtrusive javascript is bad mmkay" do
         | 
| 115 | 
            +
                origin_url = "http://www.google.com"
         | 
| 116 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["javascript:bookmarksite('ThoughtWorks Studios', 'http://studios.thoughtworks.com')",'http://www.google.com/foo']))
         | 
| 117 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with('http://www.google.com/foo').returns(create_success_result([]))
         | 
| 118 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 119 | 
            +
                cartographer.crawl
         | 
| 120 | 
            +
              end
         | 
| 121 | 
            +
              
         | 
| 122 | 
            +
              #<a alex.hal9000@gmail.com="" href="mailto:PWang@thoughtworks.com,">PWang@thoughtworks.com, alex.hal9000@gmail.com</a>
         | 
| 123 | 
            +
              
         | 
| 124 | 
            +
              it "should not die on checking the domain on invalid urls" do
         | 
| 125 | 
            +
                origin_url = "http://www.google.com/adsfaf"
         | 
| 126 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 127 | 
            +
                cartographer.a_domain_we_care_about?("mailto:PWang@thoughtworks.com,").should be_false
         | 
| 128 | 
            +
              end
         | 
| 129 | 
            +
              
         | 
| 130 | 
            +
              it "should not explore mailto urls" do
         | 
| 131 | 
            +
                origin_url = "http://www.google.com/adsfaf"
         | 
| 132 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with(origin_url).returns(create_success_result(["mailto:foo"]))
         | 
| 133 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url))
         | 
| 134 | 
            +
                cartographer.crawl
         | 
| 135 | 
            +
              end
         | 
| 136 | 
            +
              
         | 
| 137 | 
            +
              it "should puts out urls if the trace is enabled" do
         | 
| 138 | 
            +
                origin_url = "http://www.google.com/adsfaf"
         | 
| 139 | 
            +
                Magellan::Explorer.any_instance.stubs(:explore_a).once.with(origin_url).returns(create_success_result([]))
         | 
| 140 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url).merge( {:trace=> true}))
         | 
| 141 | 
            +
                $stdout.expects(:puts).with {|value| value.include?(origin_url)}
         | 
| 142 | 
            +
                cartographer.crawl
         | 
| 143 | 
            +
              end
         | 
| 144 | 
            +
              
         | 
| 145 | 
            +
              it "should not puts if the trace is disabled" do
         | 
| 146 | 
            +
                origin_url = "http://www.google.com/adsfaf"
         | 
| 147 | 
            +
                Magellan::Explorer.any_instance.stubs(:explore_a).once.with(origin_url).returns(create_success_result([]))
         | 
| 148 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url).merge( {:trace=> false}))
         | 
| 149 | 
            +
                $stdout.expects(:puts).never
         | 
| 150 | 
            +
                cartographer.crawl
         | 
| 151 | 
            +
              end
         | 
| 152 | 
            +
               
         | 
| 153 | 
            +
              it "should record the source and the destination url in known urls" do
         | 
| 154 | 
            +
                origin_url = "http://studios.thoughtworks.com/cruise"
         | 
| 155 | 
            +
                cartographer = Magellan::Cartographer.new(settings(origin_url, 1))
         | 
| 156 | 
            +
                cartographer.crawl
         | 
| 157 | 
            +
                cartographer.i_have_seen_this_url_before?(origin_url).should be_true
         | 
| 158 | 
            +
                cartographer.i_have_seen_this_url_before?("http://studios.thoughtworks.com/cruise-continuous-integration").should be_true
         | 
| 159 | 
            +
              end
         | 
| 160 | 
            +
              
         | 
| 161 | 
            +
              it "should go through a entire site if layers to explore is set to -1"
         | 
| 162 | 
            +
              it "should explore n layers into external domains"
         | 
| 163 | 
            +
              
         | 
| 164 | 
            +
              def create_success_result(linked_resources)
         | 
| 165 | 
            +
                create_result("200",linked_resources)
         | 
| 166 | 
            +
              end
         | 
| 167 | 
            +
              
         | 
| 168 | 
            +
              def settings(origin_url,depth=5,domains = [origin_url], ignored_urls=[])
         | 
| 169 | 
            +
                {:origin_url => origin_url, :depth_to_explore => depth, :domains => domains, :ignored_urls =>ignored_urls, :links_to_explore => [["a","href"]] }
         | 
| 170 | 
            +
              end
         | 
| 171 | 
            +
              
         | 
| 172 | 
            +
              def create_result(status_code, linked_resources)
         | 
| 173 | 
            +
                Magellan::Result.new(status_code,"http://www.google.com","http://www.google.com",linked_resources,"text/html")
         | 
| 174 | 
            +
              end
         | 
| 175 | 
            +
              
         | 
| 176 | 
            +
            end
         | 
| @@ -0,0 +1,68 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'rake'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe "Magellan ExpectedLinksTask" do
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              before :all do
         | 
| 7 | 
            +
                @file_name = File.dirname(__FILE__)  + "/../lib/magellan/rake/expected_links_task.rb"
         | 
| 8 | 
            +
                @rake = Rake::Application.new
         | 
| 9 | 
            +
                Rake.application = @rake
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              before :each do
         | 
| 13 | 
            +
                $stdout.stubs(:putc)
         | 
| 14 | 
            +
                load @file_name
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              after :all do
         | 
| 18 | 
            +
                Rake.application = nil
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
              it "should create a rake task" do
         | 
| 22 | 
            +
                Magellan::Rake::ExpectedLinksTask.new
         | 
| 23 | 
            +
                tasks.include?("magellan:check_links").should be_true
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
              it "should explore when task is invoked" do
         | 
| 27 | 
            +
                Magellan::Rake::ExpectedLinksTask.new("some_task") do |t|
         | 
| 28 | 
            +
                  t.explore_depth = 1
         | 
| 29 | 
            +
                  t.patterns_and_expected_links = []
         | 
| 30 | 
            +
                  t.origin_url = "http://localhost:8080"
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
                $stdout.expects(:puts)
         | 
| 33 | 
            +
                Magellan::Explorer.any_instance.expects(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
         | 
| 34 | 
            +
                @rake.invoke_task("some_task")
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
              
         | 
| 37 | 
            +
              
         | 
| 38 | 
            +
              it "should notify a expected link tracker when a task is invoked" do
         | 
| 39 | 
            +
                Magellan::Rake::ExpectedLinksTask.new("invoke_expected_link_tracker") do |t|
         | 
| 40 | 
            +
                  t.explore_depth = 1
         | 
| 41 | 
            +
                  t.patterns_and_expected_links = []
         | 
| 42 | 
            +
                  t.origin_url = "http://localhost:8080"
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
                $stdout.expects(:puts)
         | 
| 45 | 
            +
                Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://localhost:8080").returns(create_result("http://localhost:8080","200"))
         | 
| 46 | 
            +
                Magellan::ExpectedLinksTracker.any_instance.expects(:update).once
         | 
| 47 | 
            +
                @rake.invoke_task("invoke_expected_link_tracker")
         | 
| 48 | 
            +
              end
         | 
| 49 | 
            +
              
         | 
| 50 | 
            +
              it "should fail the rake task if expected links did not exist or rules did not evaluate to be true" do
         | 
| 51 | 
            +
                Magellan::Rake::ExpectedLinksTask.new("exception_raising_task") do |t|
         | 
| 52 | 
            +
                  t.explore_depth = 1
         | 
| 53 | 
            +
                  t.patterns_and_expected_links = [[/.*/,'/about_us.html']]
         | 
| 54 | 
            +
                  t.origin_url = "http://canrailsscale.com"
         | 
| 55 | 
            +
                end
         | 
| 56 | 
            +
                $stderr.expects(:puts)
         | 
| 57 | 
            +
                Magellan::Explorer.any_instance.stubs(:explore_a).once.with("http://canrailsscale.com").returns(create_result("http://canrailsscale.com","200"))
         | 
| 58 | 
            +
                lambda {@rake.invoke_task("exception_raising_task")}.should raise_error
         | 
| 59 | 
            +
              end
         | 
| 60 | 
            +
              
         | 
| 61 | 
            +
              def create_result(url,status_code)
         | 
| 62 | 
            +
                Magellan::Explorer.create_result(url,url,status_code, [],"text/html")
         | 
| 63 | 
            +
              end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
              def tasks
         | 
| 66 | 
            +
                @rake.tasks.collect{|task| task.name }
         | 
| 67 | 
            +
              end
         | 
| 68 | 
            +
            end
         | 
| @@ -0,0 +1,87 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Magellan::ExpectedLinksTracker do
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              it "should create a error message contianing the offending url and " do
         | 
| 7 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
         | 
| 8 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"text/html"))
         | 
| 9 | 
            +
                tracker.errors.first.should include('/fozo')
         | 
| 10 | 
            +
                tracker.errors.first.should include('/about_us.html')
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              it "should be able specify all resource should link to something" do
         | 
| 14 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
         | 
| 15 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html"))
         | 
| 16 | 
            +
                tracker.has_errors?.should be_false
         | 
| 17 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_fail_us.html'],"text/html"))
         | 
| 18 | 
            +
                tracker.has_errors?.should be_true
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
              
         | 
| 21 | 
            +
              it "should only apply rules if they apply to source url" do
         | 
| 22 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
         | 
| 23 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_fail_us.html'],"text/html"))
         | 
| 24 | 
            +
                tracker.has_errors?.should be_false
         | 
| 25 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/zoro',['/about_fail_us.html'],"text/html"))
         | 
| 26 | 
            +
                tracker.has_errors?.should be_true
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
              it "should only apply rules if they apply to destination url" do
         | 
| 30 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
         | 
| 31 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
         | 
| 32 | 
            +
                tracker.has_errors?.should be_false
         | 
| 33 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zooo','/foo.html',['/about_fail_us.html'],"text/html"))
         | 
| 34 | 
            +
                tracker.has_errors?.should be_true
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
              
         | 
| 37 | 
            +
              it "should know if a expectation was never met" do
         | 
| 38 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
         | 
| 39 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
         | 
| 40 | 
            +
                tracker.unmet_expecations?.should be_true
         | 
| 41 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/foo.html',['/about_fail_us.html'],"text/html"))
         | 
| 42 | 
            +
                tracker.unmet_expecations?.should be_false
         | 
| 43 | 
            +
              end
         | 
| 44 | 
            +
              
         | 
| 45 | 
            +
              it "should provide a meaningfull error message around unmet expectations" do
         | 
| 46 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
         | 
| 47 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
         | 
| 48 | 
            +
                tracker.unmet_expecations_messages.should include(/foo\.html/.to_s)
         | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
              
         | 
| 51 | 
            +
              it "should return failed if there are unmet expectations" do
         | 
| 52 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/foo\.html/,'/about_us.html']])
         | 
| 53 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zooo','/zoro',['/about_fail_us.html'],"text/html"))
         | 
| 54 | 
            +
                tracker.failed?.should be_true
         | 
| 55 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/foo.html','/zoro',['/about_us.html'],"text/html"))
         | 
| 56 | 
            +
                tracker.failed?.should be_false
         | 
| 57 | 
            +
              end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
              it "should return failed if there are failed expectations" do
         | 
| 60 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
         | 
| 61 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html")) 
         | 
| 62 | 
            +
                tracker.failed?.should be_false
         | 
| 63 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"text/html"))
         | 
| 64 | 
            +
                tracker.failed?.should be_true
         | 
| 65 | 
            +
              end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
              it "should ignore the result if it is not a html content type" do
         | 
| 68 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
         | 
| 69 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html")) 
         | 
| 70 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/fozo',"/bar",[],"application/javascript"))
         | 
| 71 | 
            +
                tracker.failed?.should be_false
         | 
| 72 | 
            +
              end
         | 
| 73 | 
            +
             | 
| 74 | 
            +
              it "should update the observer with a pass" do
         | 
| 75 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
         | 
| 76 | 
            +
                tracker.add_observer(Magellan::Logger.new)
         | 
| 77 | 
            +
                $stdout.expects(:putc).with('.')
         | 
| 78 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/about_us.html'],"text/html")) 
         | 
| 79 | 
            +
              end
         | 
| 80 | 
            +
              it "should update the observer with a pass" do
         | 
| 81 | 
            +
                tracker = Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
         | 
| 82 | 
            +
                tracker.add_observer(Magellan::Logger.new)
         | 
| 83 | 
            +
                $stdout.expects(:putc).with('F')
         | 
| 84 | 
            +
                tracker.update(Time.now,Magellan::Result.new('200','/zoro','/zoro',['/fail_about_us.html'],"text/html")) 
         | 
| 85 | 
            +
              end
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            end
         | 
| @@ -0,0 +1,72 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Magellan::Explorer do
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              it "should find other js resources" do
         | 
| 7 | 
            +
                result = Magellan::Explorer.new(['http://canrailsscale.com/'],links_to_explore).explore
         | 
| 8 | 
            +
                result.first.absolute_linked_resources.should include('http://pagead2.googlesyndication.com/pagead/show_ads.js')
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
              
         | 
| 11 | 
            +
              it "should foo" do
         | 
| 12 | 
            +
                WWW::Mechanize.any_instance.expects(:get).raises(Timeout::Error)
         | 
| 13 | 
            +
                result = Magellan::Explorer.new(['http://canrailsscale.com/'],links_to_explore).explore
         | 
| 14 | 
            +
                result.first.status_code.should eql('505')
         | 
| 15 | 
            +
                result.first.url.should eql('http://canrailsscale.com/')
         | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              it "should have one result for one url" do
         | 
| 19 | 
            +
                result = Magellan::Explorer.new(['http://www.google.com/'],links_to_explore).explore
         | 
| 20 | 
            +
                result.size.should eql(1)
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              it "should have two results for two urls" do
         | 
| 24 | 
            +
                result = Magellan::Explorer.new(['http://www.google.com/','http://www.apple.com/'],links_to_explore).explore
         | 
| 25 | 
            +
                result.size.should eql(2)
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              it "should find other pages to explore via a href" do
         | 
| 29 | 
            +
                result = Magellan::Explorer.new('http://www.google.com/',links_to_explore).explore
         | 
| 30 | 
            +
                result.first.absolute_linked_resources.should include('http://video.google.com/?hl=en&tab=wv')
         | 
| 31 | 
            +
              end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
              it "should translate relative urls to absolute ones" do
         | 
| 34 | 
            +
                result = Magellan::Explorer.new('http://www.google.com/',links_to_explore).explore
         | 
| 35 | 
            +
                result.first.absolute_linked_resources.should include('http://www.google.com/intl/en/about.html')
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
              it "should report non successful status codes" do
         | 
| 39 | 
            +
                result = Magellan::Explorer.new('http://www.google.com/dfkjaslfkjaslfkj.html',links_to_explore).explore
         | 
| 40 | 
            +
                result.first.status_code.should eql("404")
         | 
| 41 | 
            +
              end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
              it "should not get any links if it not a text/xhtml file" do
         | 
| 44 | 
            +
                result = Magellan::Explorer.new("http://jqueryjs.googlecode.com/files/jquery-1.3.2.min.js",links_to_explore).explore
         | 
| 45 | 
            +
                result.first.absolute_linked_resources.should be_empty
         | 
| 46 | 
            +
              end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
              it "should update url if redirected" do
         | 
| 49 | 
            +
                result = Magellan::Explorer.new("http://www.thoughtworks.com/mingle",links_to_explore).explore
         | 
| 50 | 
            +
                result.first.destination_url.should eql("http://studios.thoughtworks.com/mingle-agile-project-management")
         | 
| 51 | 
            +
              end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
              it "should return source url as desintation url if a error occurs" do
         | 
| 54 | 
            +
                result = Magellan::Explorer.new("http://www.google.com/dfkjaslfkjaslfkj.html",links_to_explore).explore
         | 
| 55 | 
            +
                result.first.destination_url.should eql("http://www.google.com/dfkjaslfkjaslfkj.html")
         | 
| 56 | 
            +
              end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
              it "should be able to explore a url" do
         | 
| 59 | 
            +
                Magellan::Explorer.new('',links_to_explore).explore_a("http://www.yahoo.com")
         | 
| 60 | 
            +
              end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
              it "should be able to go from http to https" do
         | 
| 63 | 
            +
                result = Magellan::Explorer.new("http://mail.yahoo.com",links_to_explore).explore
         | 
| 64 | 
            +
                result.first.destination_url.starts_with?("https://").should be_true
         | 
| 65 | 
            +
              end
         | 
| 66 | 
            +
              
         | 
| 67 | 
            +
              it "should be able to crawl ftp based links"
         | 
| 68 | 
            +
             | 
| 69 | 
            +
              def links_to_explore
         | 
| 70 | 
            +
                [["a","href"],["script","src"],["img","src"]]
         | 
| 71 | 
            +
              end
         | 
| 72 | 
            +
            end
         | 
    
        data/spec/logger_spec.rb
    ADDED
    
    | @@ -0,0 +1,15 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Magellan::Logger do
         | 
| 5 | 
            +
              it "should put a . for a pass" do
         | 
| 6 | 
            +
                logger = Magellan::Logger.new
         | 
| 7 | 
            +
                $stdout.expects(:putc).with('.')
         | 
| 8 | 
            +
                logger.update(Time.now,true)
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
              it "should put a F for a fail" do
         | 
| 11 | 
            +
                logger = Magellan::Logger.new
         | 
| 12 | 
            +
                $stdout.expects(:putc).with('F')
         | 
| 13 | 
            +
                logger.update(Time.now,false)
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
            end
         | 
| @@ -0,0 +1,44 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe "WWW::Mechanize::Page Extensions" do
         | 
| 5 | 
            +
              LINKS = [["a","href"],["script","src"],["img","src"]]
         | 
| 6 | 
            +
             | 
| 7 | 
            +
              it "should not return nil for script tags without src attritubes" do
         | 
| 8 | 
            +
                doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<script class=foo>something</script>")
         | 
| 9 | 
            +
                doc.links_to_other_documents(LINKS).should be_empty
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
              
         | 
| 12 | 
            +
              it "should find links based on script tags with src attritubes" do
         | 
| 13 | 
            +
                doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<script class=foo src='foozor'>something</script>")
         | 
| 14 | 
            +
                links_to_other_documents = doc.links_to_other_documents(LINKS)
         | 
| 15 | 
            +
                links_to_other_documents.size.should eql(1)
         | 
| 16 | 
            +
                links_to_other_documents.first.to_s.should eql("foozor")
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              it "should be able to get two script sources" do
         | 
| 20 | 
            +
                doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<body><script class=foo src='foozor'>something</script><script class=foo src='fdsajfkajf'>something</script></body>")
         | 
| 21 | 
            +
                links_to_other_documents = doc.links_to_other_documents(LINKS)
         | 
| 22 | 
            +
                links_to_other_documents.size.should eql(2)
         | 
| 23 | 
            +
              end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
              it "should find links based on a tags with href attritubes" do
         | 
| 26 | 
            +
                doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<a class=foo href='bozo'>something</a>")
         | 
| 27 | 
            +
                links_to_other_documents = doc.links_to_other_documents(LINKS)
         | 
| 28 | 
            +
                links_to_other_documents.size.should eql(1)
         | 
| 29 | 
            +
                links_to_other_documents.first.to_s.should eql("bozo")
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
              it "should find links based on img tags with src attritubes" do
         | 
| 33 | 
            +
                doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<img class=foo src='ohno' alt='whatever' />")
         | 
| 34 | 
            +
                links_to_other_documents = doc.links_to_other_documents(LINKS)
         | 
| 35 | 
            +
                links_to_other_documents.size.should eql(1)
         | 
| 36 | 
            +
                links_to_other_documents.first.to_s.should eql("ohno")
         | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
              it "should links based on a tags with href attritubes" do
         | 
| 40 | 
            +
                doc = WWW::Mechanize::Page.new(nil,{'content-type' => "text/html"},"<a class=foo>something</a>")
         | 
| 41 | 
            +
                doc.links_to_other_documents(LINKS).should be_empty
         | 
| 42 | 
            +
              end
         | 
| 43 | 
            +
              
         | 
| 44 | 
            +
            end
         | 
    
        data/spec/result_spec.rb
    ADDED
    
    | @@ -0,0 +1,17 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe Magellan::Result do
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              it "should not remove fragments when converting to absolute urls" do
         | 
| 7 | 
            +
                results = Magellan::Result.new("200","http://www.google.com/index.html","http://www.google.com/index.html",["/index.html#foo"],"foo")
         | 
| 8 | 
            +
                results.absolute_linked_resources.should include("http://www.google.com/index.html#foo")
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              it "should use destination_url to build new absolute urls" do
         | 
| 12 | 
            +
                results = Magellan::Result.new("200","http://www.google.com/bob.html","http://www.foo.com/bob.html",["/index.html"],"foo")
         | 
| 13 | 
            +
                results.absolute_linked_resources.should include("http://www.foo.com/index.html")
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              
         | 
| 17 | 
            +
            end
         | 
    
        data/spec/spec_helper.rb
    ADDED
    
    | @@ -0,0 +1,11 @@ | |
| 1 | 
            +
            require 'rubygems'
         | 
| 2 | 
            +
            require 'spec'
         | 
| 3 | 
            +
            require 'mocha'
         | 
| 4 | 
            +
            require File.dirname(__FILE__) + '/../config/vendorized_gems'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            lib_path = File.expand_path("#{File.dirname(__FILE__)}/../lib")
         | 
| 7 | 
            +
            $LOAD_PATH.unshift lib_path unless $LOAD_PATH.include?(lib_path)
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            Spec::Runner.configure do |config|
         | 
| 10 | 
            +
              config.mock_with :mocha
         | 
| 11 | 
            +
            end
         | 
| @@ -0,0 +1,67 @@ | |
| 1 | 
            +
            require File.dirname(__FILE__) + '/spec_helper'
         | 
| 2 | 
            +
            require 'magellan'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe "String Extensions" do
         | 
| 5 | 
            +
              
         | 
| 6 | 
            +
              it "should convert relative urls to absolute" do
         | 
| 7 | 
            +
                input = '/Test_Automation_Framework/chrome/common/js/trac.js'
         | 
| 8 | 
            +
                input.to_absolute_url('http://www.google.com').should eql('http://www.google.com/Test_Automation_Framework/chrome/common/js/trac.js')
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              it "should remove any relative path from original url" do
         | 
| 12 | 
            +
                input = '/foo/trac.js'
         | 
| 13 | 
            +
                input.to_absolute_url('http://www.google.com/something/index.html').should eql('http://www.google.com/foo/trac.js')
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              it "should merge urls correctly with dots" do
         | 
| 17 | 
            +
                input = '../foo/trac.js'
         | 
| 18 | 
            +
                input.to_absolute_url('http://www.google.com/something/index.html').should eql('http://www.google.com/foo/trac.js')
         | 
| 19 | 
            +
              end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
              it "should do nothing to absolute http urls" do
         | 
| 22 | 
            +
                input = 'http://www.apple.com'
         | 
| 23 | 
            +
                input.to_absolute_url('http://www.google.com').should eql('http://www.apple.com')
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
              
         | 
| 26 | 
            +
              it "should not put double slashes when converting absolute to relative" do
         | 
| 27 | 
            +
                input = "/intl/en/about.html"
         | 
| 28 | 
            +
                input.to_absolute_url('http://www.google.com/').should eql('http://www.google.com/intl/en/about.html')
         | 
| 29 | 
            +
              end
         | 
| 30 | 
            +
              
         | 
| 31 | 
            +
              it "should do nothing to absolute https urls" do
         | 
| 32 | 
            +
                input = 'https://www.apple.com'
         | 
| 33 | 
            +
                input.to_absolute_url('http://www.google.com').should eql('https://www.apple.com')
         | 
| 34 | 
            +
              end
         | 
| 35 | 
            +
              
         | 
| 36 | 
            +
              it "should translate relative https urls to absolute" do
         | 
| 37 | 
            +
                input = "/intl/en/about.html"
         | 
| 38 | 
            +
                input.to_absolute_url('https://www.google.com/').should eql('https://www.google.com/intl/en/about.html')
         | 
| 39 | 
            +
              end
         | 
| 40 | 
            +
              
         | 
| 41 | 
            +
              it "should translate relative urls to absolute ones" do
         | 
| 42 | 
            +
                "/intl/en/about.html".to_absolute_url("http://www.google.com").should eql('http://www.google.com/intl/en/about.html')
         | 
| 43 | 
            +
              end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
              it "should not translate absolute urls" do
         | 
| 46 | 
            +
                "http://video.google.com/foo/about.html".to_absolute_url("http://www.google.com").should eql("http://video.google.com/foo/about.html")
         | 
| 47 | 
            +
              end
         | 
| 48 | 
            +
              
         | 
| 49 | 
            +
              it "should return string itself if uri parse fails" do
         | 
| 50 | 
            +
                "something not a url".to_absolute_url("http://www.google.com").should eql("something not a url")
         | 
| 51 | 
            +
              end
         | 
| 52 | 
            +
              
         | 
| 53 | 
            +
              it "should chomp the fragment portion off the url" do
         | 
| 54 | 
            +
                "http://video.google.com/foo/about.html#sdkfjskajflsajf".remove_fragment.should eql("http://video.google.com/foo/about.html")
         | 
| 55 | 
            +
              end
         | 
| 56 | 
            +
              
         | 
| 57 | 
            +
              it "should strip spaces off of the input url" do
         | 
| 58 | 
            +
                input = ' http://www.apple.com'
         | 
| 59 | 
            +
                input.to_absolute_url('http://www.google.com').should eql('http://www.apple.com')
         | 
| 60 | 
            +
              end
         | 
| 61 | 
            +
              
         | 
| 62 | 
            +
              it "should correctly join urls that are stricktly query params" do 
         | 
| 63 | 
            +
                input = '?foo=bar'
         | 
| 64 | 
            +
                input.to_absolute_url('http://www.google.com/index.html?foo=zoro').should eql('http://www.google.com/index.html?foo=bar')
         | 
| 65 | 
            +
              end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,102 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification 
         | 
| 2 | 
            +
            name: magellan
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            +
              version: 0.1.0
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors: 
         | 
| 7 | 
            +
            - Nolan Evans
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            date: 2009-04-06 00:00:00 -07:00
         | 
| 13 | 
            +
            default_executable: 
         | 
| 14 | 
            +
            dependencies: 
         | 
| 15 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 16 | 
            +
              name: mechanize
         | 
| 17 | 
            +
              type: :runtime
         | 
| 18 | 
            +
              version_requirement: 
         | 
| 19 | 
            +
              version_requirements: !ruby/object:Gem::Requirement 
         | 
| 20 | 
            +
                requirements: 
         | 
| 21 | 
            +
                - - ">="
         | 
| 22 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 23 | 
            +
                    version: "0"
         | 
| 24 | 
            +
                version: 
         | 
| 25 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 26 | 
            +
              name: activesupport
         | 
| 27 | 
            +
              type: :runtime
         | 
| 28 | 
            +
              version_requirement: 
         | 
| 29 | 
            +
              version_requirements: !ruby/object:Gem::Requirement 
         | 
| 30 | 
            +
                requirements: 
         | 
| 31 | 
            +
                - - ">="
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 33 | 
            +
                    version: "0"
         | 
| 34 | 
            +
                version: 
         | 
| 35 | 
            +
            description: TODO
         | 
| 36 | 
            +
            email: nolane@gmail.com
         | 
| 37 | 
            +
            executables: []
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            extensions: []
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            extra_rdoc_files: 
         | 
| 42 | 
            +
            - README
         | 
| 43 | 
            +
            files: 
         | 
| 44 | 
            +
            - VERSION.yml
         | 
| 45 | 
            +
            - lib/magellan
         | 
| 46 | 
            +
            - lib/magellan/broken_link_tracker.rb
         | 
| 47 | 
            +
            - lib/magellan/cartographer.rb
         | 
| 48 | 
            +
            - lib/magellan/expected_links_tracker.rb
         | 
| 49 | 
            +
            - lib/magellan/explorer.rb
         | 
| 50 | 
            +
            - lib/magellan/extensions
         | 
| 51 | 
            +
            - lib/magellan/extensions/array.rb
         | 
| 52 | 
            +
            - lib/magellan/extensions/mechanize_page.rb
         | 
| 53 | 
            +
            - lib/magellan/extensions/string.rb
         | 
| 54 | 
            +
            - lib/magellan/logger.rb
         | 
| 55 | 
            +
            - lib/magellan/rake
         | 
| 56 | 
            +
            - lib/magellan/rake/base_magellan_task.rb
         | 
| 57 | 
            +
            - lib/magellan/rake/broken_link_task.rb
         | 
| 58 | 
            +
            - lib/magellan/rake/expected_links_task.rb
         | 
| 59 | 
            +
            - lib/magellan/result.rb
         | 
| 60 | 
            +
            - lib/magellan.rb
         | 
| 61 | 
            +
            - spec/array_spec.rb
         | 
| 62 | 
            +
            - spec/broken_link_task_spec.rb
         | 
| 63 | 
            +
            - spec/broken_link_tracker_spec.rb
         | 
| 64 | 
            +
            - spec/cartographer_spec.rb
         | 
| 65 | 
            +
            - spec/expected_links_task_spec.rb
         | 
| 66 | 
            +
            - spec/expected_links_tracker_spec.rb
         | 
| 67 | 
            +
            - spec/explorer_spec.rb
         | 
| 68 | 
            +
            - spec/logger_spec.rb
         | 
| 69 | 
            +
            - spec/mechanize_page_spec.rb
         | 
| 70 | 
            +
            - spec/result_spec.rb
         | 
| 71 | 
            +
            - spec/spec_helper.rb
         | 
| 72 | 
            +
            - spec/string_extensions_spec.rb
         | 
| 73 | 
            +
            - README
         | 
| 74 | 
            +
            has_rdoc: true
         | 
| 75 | 
            +
            homepage: http://github.com/nolman/magellan
         | 
| 76 | 
            +
            post_install_message: 
         | 
| 77 | 
            +
            rdoc_options: 
         | 
| 78 | 
            +
            - --inline-source
         | 
| 79 | 
            +
            - --charset=UTF-8
         | 
| 80 | 
            +
            require_paths: 
         | 
| 81 | 
            +
            - lib
         | 
| 82 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 83 | 
            +
              requirements: 
         | 
| 84 | 
            +
              - - ">="
         | 
| 85 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 86 | 
            +
                  version: "0"
         | 
| 87 | 
            +
              version: 
         | 
| 88 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 89 | 
            +
              requirements: 
         | 
| 90 | 
            +
              - - ">="
         | 
| 91 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 92 | 
            +
                  version: "0"
         | 
| 93 | 
            +
              version: 
         | 
| 94 | 
            +
            requirements: []
         | 
| 95 | 
            +
             | 
| 96 | 
            +
            rubyforge_project: magellan
         | 
| 97 | 
            +
            rubygems_version: 1.3.1
         | 
| 98 | 
            +
            signing_key: 
         | 
| 99 | 
            +
            specification_version: 2
         | 
| 100 | 
            +
            summary: A web testing framework that embraces the discoverable nature of the web
         | 
| 101 | 
            +
            test_files: []
         | 
| 102 | 
            +
             |