RubyGems - magellan - Versions diffs - 0.1.2 → 0.1.3 - Mend

magellan 0.1.2 → 0.1.3

Files changed (16) hide show

data/README.rdoc +69 -0
data/VERSION.yml +2 -2
data/lib/magellan/broken_link_tracker.rb +10 -3
data/lib/magellan/cartographer.rb +25 -9
data/lib/magellan/expected_links_tracker.rb +15 -5
data/lib/magellan/explorer.rb +5 -5
data/lib/magellan/extensions/array.rb +4 -0
data/lib/magellan/extensions/mechanize_page.rb +2 -2
data/lib/magellan/extensions/string.rb +9 -1
data/lib/magellan/logger.rb +6 -4
data/lib/magellan/rake/base_magellan_task.rb +11 -5
data/lib/magellan/rake/broken_link_task.rb +10 -3
data/lib/magellan/rake/expected_links_task.rb +12 -3
data/lib/magellan/result.rb +15 -3
metadata +4 -4
data/README +0 -11

data/README.rdoc ADDED

@@ -0,0 +1,69 @@
+= magellan
+  http://rubyforge.org/projects/magellan/
+  http://github.com/nolman/magellan/tree/master
+== DESCRIPTION:
+Magellan is a web testing tool that embraces the discoverable nature of the web.
+== INSTALL:
+ $ [sudo] gem install magellan
+== GETTING STARTED:
+There are two supported rake tasks, the Broken Link Task that will explore a site for any //script[@src] //img[@srg] and //a[@href] that return http status codes of 4** or 5**.
+In your Rakefile add:
+  require 'magellan/rake/broken_link_task'
+  Magellan::Rake::BrokenLinkTask.new("digg") do |t|
+    t.origin_url = "http://digg.com/"
+    t.explore_depth = 3
+  end
+This will crawl any links within the same domain as the origin_url to a depth of 3.  Treating the origin_url as a depth of 1 that means we will crawl all links that are linked within 2 pages of http://digg.com.
+The second rake task is one that will explore your site and ensure that given links exist.
+  require 'magellan/rake/expected_links_task'
+  Magellan::Rake::ExpectedLinksTask.new("gap") do |t|
+    t.origin_url = "http://www.gap.com/"
+    t.explore_depth = 2
+    t.patterns_and_expected_links = [[/.*/,'http://www.oldnavy.com'],[/http:\/\/[^\/]*\/\z/,'/browse/division.do?cid=5643']]
+  end
+The pattern and expected links is a array of tuples of regex, string.  If the current url matches the regex the task will look for the associated url string in the document.  This task by default only crawls //a[@href]'s.
+== ASSUMPTIONS:
+This tool works best if you follow the practices of unobtrusive javascript, and properly make use of http status codes.
+== DEPENDENCIES:
+ * ruby 1.8.6
+ * mechanize[http://mechanize.rubyforge.org/]
+ * activesupport[http://as.rubyonrails.org/]
+== SUPPORT:
+General help forum is located at:
+ * http://rubyforge.org/forum/forum.php?forum_id=31224
+Mailing list:
+ * http://rubyforge.org/mailman/listinfo/magellan-users
+Bug tracker:
+ * http://rubyforge.org/tracker/?atid=31199&group_id=8055
+== AUTHOR:
+Nolan Evans
+http://www.nolanevans.com
+nolane at gmail dot com

data/VERSION.yml CHANGED

@@ -1,4 +1,4 @@
 ---
-:minor: 1
-:patch: 2
+:patch: 3
 :major: 0
+:minor: 1

data/lib/magellan/broken_link_tracker.rb CHANGED

@@ -1,14 +1,18 @@
 module Magellan
+  # The class that will track all broken links, urls that return 4** or 5** http status codes.
   class BrokenLinkTracker
     include Observable
+    # All results containing 4** or 5** http status codes
     attr_reader :broken_links
+    # Create a new broken link tracker
     def initialize
       @broken_links = []
       @first_linked_from = {}
     end
+    # The updates that come in via a observable subject, the time the result came at and the Magellan::Result itself.
     def update(time,result)
       failed = result.status_code.starts_with?("5") || result.status_code.starts_with?("4")
       @broken_links << result if failed
@@ -19,16 +23,19 @@ module Magellan
       end
     end
-    def failed?
+    # Are there any broken links?
+    def failed?
       !@broken_links.empty?
     end
+    # A text message of all failures
     def failure_message
       @broken_links.map{|broken_link| broken_link_message(broken_link)}.join("\n")
     end
-    def broken_link_message(broken_link)
-      "#{broken_link.url} first linked from: #{@first_linked_from[broken_link.url]} returned: #{broken_link.status_code}"
+    # Generate the failure message for a Magellan::Result
+    def broken_link_message(result)
+      "#{result.url} first linked from: #{@first_linked_from[result.url]} returned: #{result.status_code}"
     end
   end
 end

data/lib/magellan/cartographer.rb CHANGED

@@ -2,9 +2,23 @@ require 'activesupport'
 require 'observer'
 module Magellan
+  # An instance of the Cartographer class maps a set of domains from a given starting url
+  # every time a new response is received the cartographer updates any observers listening to it
+  # to subscribe to the updates:
+  # cartographer = Cartographer.new({})
+  # cartographer.add_observer(some_observer_instance)
+  #
+  # Your observer instance should implement a update(time,result) method that takes in the current time and a Magellan::Result from the crawl
   class Cartographer
     include Observable
+    # Create a new Cartographer with a hash of settings:
+    # [:origin_url] - where to start exploring
+    # [:ignored_urls] - an array of absolute urls to not explore
+    # [:domains] - domains we should crawl
+    # [:depth_to_explore] - how deep to explore
+    # [:links_we_want_to_explore] - the kind of resources we will follow ex: //a[@href]
+    # [:trace] - enable a step by step trace
     def initialize(settings)
       @origin_url = settings[:origin_url]
       @known_urls = settings[:ignored_urls]
@@ -14,13 +28,15 @@ module Magellan
       @trace = settings[:trace]
     end
+    # Start recursivily exploring the site at the origin url you specify.
     def crawl
       recursive_explore([@origin_url],1)
     end
+    # Recursivily explore a list or urls until you reach a given depth or run out of known urls
     def recursive_explore(urls,depth)
       if i_am_not_too_deep?(depth)
-        $stdout.puts "exploring:\n#{urls.join("\n")}" if @trace
+        $stdout.puts "\nexploring:\n#{urls.join("\n")}" if @trace
         results = Explorer.new(urls,@links_we_want_to_explore).explore
         results.each do |result|
           changed
@@ -41,14 +57,17 @@ module Magellan
       end
     end
-    def i_have_seen_this_url_before?(url)
+    # Has the cartographer seen this url before?
+    def i_have_seen_this_url_before?(url)
       @known_urls.include?(url.remove_fragment)
     end
+    # Should we keep exploring this depth?
     def i_am_not_too_deep?(depth)
       depth <= @depth_to_explore
     end
+    # Is a given url in a domain that we care about?
     def a_domain_we_care_about?(url)
       begin
         !@domains.select { |domain| URI.parse(url).host == domain.host }.empty?
@@ -57,13 +76,10 @@ module Magellan
       end
     end
+    # Remove the javascript links from the set of links on the page.
     def remove_javascript_and_print_warning(result)
-      result.linked_resources.delete_if do |linked_resource|
-        starts_with_javascript = linked_resource.downcase.starts_with?("javascript:")
-        #TODO: put this in the logger
-        #$stderr.puts "Found obtrusive javascript: #{linked_resource} on page #{result.url}" if starts_with_javascript
-        starts_with_javascript
-      end
+      #TODO: put this in the logger
+      result.linked_resources.delete_if { |linked_resource| linked_resource.downcase.starts_with?("javascript:") }
     end
   end

data/lib/magellan/expected_links_tracker.rb CHANGED

@@ -1,14 +1,21 @@
 module Magellan
+  # The observer that will listen to all results and compare them to a list of rules about expected urls.
   class ExpectedLinksTracker
     include Observable
+    # An array of failed expecations
     attr_reader :errors
+    # Create a new expected links tracker.
+    # An array of tuples of the url pattern and expected link is a required argument.
+    # Example:
+    # Magellan::ExpectedLinksTracker.new([[/.*/,'/about_us.html']])
     def initialize(expected_patterns)
       @errors = []
       @expected_patterns = expected_patterns
       @evaluated_expectations = {}
     end
+    # The updates that come in via a observable subject, the time the result came at and the Magellan::Result itself.
     def update(time,result)
       if result.html_content?
         patterns_that_apply(result).each do |pattern,expectation|
@@ -21,34 +28,37 @@ module Magellan
       end
     end
-    def patterns_that_apply(result)
+    def patterns_that_apply(result) # :nodoc:
       res = @expected_patterns.select{|pattern,expecation| result.url =~ pattern || result.destination_url =~ pattern}
       res.each { |expected_pattern| @evaluated_expectations[expected_pattern] = nil }
       res
     end
-    def has_errors?
+    def has_errors? # :nodoc:
       !@errors.empty?
     end
-    def unmet_expecations?
+    def unmet_expecations? # :nodoc:
       !unmet_expecations.empty?
     end
+    # Are there expected urls that have not been found yet, or pages that have been found with missing links?
     def failed?
       unmet_expecations? || has_errors?
     end
+    # A string summary of all failure messages
     def failure_message
       unmet_expecations_messages << errors.join("\n")
     end
-    def unmet_expecations_messages
+    def unmet_expecations_messages # :nodoc:
       message = ""
       unmet_expecations.each {|pattern,unmet_expecation| message << "#{pattern} was never evaluted during the crawl\n"}
       message
     end
+    # Expecations that have never been evaluated
     def unmet_expecations
       @expected_patterns - @evaluated_expectations.keys
     end

data/lib/magellan/explorer.rb CHANGED

@@ -3,14 +3,14 @@ require 'open-uri'
 require 'ostruct'
 module Magellan
-  class Explorer
+  class Explorer # :nodoc:
     UNKNOWN_CONTENT = "unknown"
-    def initialize(urls,links)
+    def initialize(urls,links) # :nodoc:
       @links = links
       @urls = urls
     end
-    def explore
+    def explore # :nodoc:
       reqs = []
       @urls.each do |url|
         reqs.push Thread.new { explore_a(url) }
@@ -18,7 +18,7 @@ module Magellan
       reqs.collect { |req| req.value }
     end
-    def explore_a(url)
+    def explore_a(url) # :nodoc:
       begin
         agent = WWW::Mechanize.new
         agent.user_agent = "Ruby/#{RUBY_VERSION}"
@@ -38,7 +38,7 @@ module Magellan
       end
     end
-    def self.create_result(url,destination_url,status_code,links,content_type)
+    def self.create_result(url,destination_url,status_code,links,content_type) # :nodoc:
       Result.new(status_code,url,destination_url,links.map{|link| link.to_s},content_type)
     end
   end

data/lib/magellan/extensions/array.rb CHANGED

@@ -1,4 +1,8 @@
 class Array
+  # Break down an array into chunks of a given max size.
+  # Example:
+  #  [1,2,3,4].chunk(3)    # => [[1,2,3],[4]]
+  #  [1,2,3,4].chunk(2)    # => [[1,2],[3,4]]
   def chunk(max_size)
     result = []
     number_of_chunks = (self.size.to_f / max_size).ceil

data/lib/magellan/extensions/mechanize_page.rb CHANGED

@@ -1,10 +1,10 @@
 require 'mechanize'
 class WWW::Mechanize::Page
-  def links_to_other_documents(links_to_other_resources)
+  def links_to_other_documents(links_to_other_resources) # :nodoc:
     links_to_other_resources.map {|links_to_other_resource| get_attributes(links_to_other_resource.first,links_to_other_resource.last)}.flatten
   end
-  def get_attributes(tag,attribute)
+  def get_attributes(tag,attribute) # :nodoc:
     (self/tag).map{|alink| alink.attributes[attribute]}.compact
   end
 end

data/lib/magellan/extensions/string.rb CHANGED

@@ -1,7 +1,12 @@
 require 'activesupport'
 require 'open-uri'
 class String
-  def to_absolute_url(origin_url)
+  # Converts a relative url to a absolute url
+  # Example:
+  #  '/foo.html'.to_absolute_url('http://www.google.com/index.html?foo=b')    # => http://www.google.com/foo.html
+  #  '?foo=a'.to_absolute_url('http://www.google.com/index.html?foo=b')    # => http://www.google.com/index.html?foo=a
+  def to_absolute_url(origin_url) # :nodoc:
     begin
       #BUG in URI.join?  URI.join('http://www.google.com/index.html?foo=b','?foo=a') # => http://www.google.com/?foo=a
       stripped = self.strip
@@ -15,6 +20,9 @@ class String
     end
   end
+  # Removes a fragment from a URL
+  # Example:
+  #  '/foo.html#fsajfksafd'.remove_fragment    # => /foo.html
   def remove_fragment
     self.gsub(/#.*/,'')
   end

data/lib/magellan/logger.rb CHANGED

@@ -1,14 +1,16 @@
-module Magellan
-  class Logger
-    def initialize(file_name=nil)
+module Magellan
+  class Logger # :nodoc:
+    def initialize(file_name=nil) # :nodoc:
       @file_name = file_name
       File.open(@file_name, 'a') {} if @file_name
     end
-    def update(time,passed,message)
+    def update(time,passed,message)  # :nodoc:
       $stdout.putc(passed ? '.' : 'F')
       $stdout.flush
       File.open(@file_name, 'a') {|f| f.write(message + "\n") } if @file_name && !passed
     end
   end
 end

data/lib/magellan/rake/base_magellan_task.rb CHANGED

@@ -1,23 +1,30 @@
-#TODO: this is not a good place to use a template method - violates Liskov substitution principle
 module Magellan
   module Rake
+    # The base magellan rake task, defines most attributes associated with running a magellan task
+    #TODO: this is not a good place to use a template method - violates Liskov substitution principle
     class BaseMagellanTask < ::Rake::TaskLib
+      # The url to start the crawl at
       attr_accessor :origin_url
+      # How deep to explore
       attr_accessor :explore_depth
+      # An array of urls to not crawl
       attr_accessor :ignored_urls
+      # The kind of links you would like
       attr_accessor :links_to_explore
+      # The success message for the task, this is set by the broken link and expected links task.
       attr_accessor :success_message
+      # If this is set the logger will log out failures to a file that you specify here, you can tail this log
+      # while the crawl is running so you can see what is failing
       attr_accessor :failure_log
-      def initialize(name)
+      def initialize(name) # :nodoc:
         @ignored_urls = []
         @name=name
         yield self if block_given?
         define
       end
-      def define
+      def define # :nodoc:
         desc description
         task @name do
           settings = {:origin_url => origin_url, :depth_to_explore => explore_depth, :domains => [origin_url],
@@ -37,7 +44,6 @@ module Magellan
       end
     end
   end
 end

data/lib/magellan/rake/broken_link_task.rb CHANGED

@@ -5,19 +5,26 @@ require 'magellan/rake/base_magellan_task'
 module Magellan
   module Rake
+    # Example:
+    # require 'magellan/rake/broken_link_task'
+    # Magellan::Rake::BrokenLinkTask.new("digg") do |t|
+    #   t.origin_url = "http://digg.com/"
+    #   t.explore_depth = 3
+    # end
     class BrokenLinkTask < BaseMagellanTask
+      # Defines a new task, using the name +name+.
       def initialize(name="magellan:explore")
         @links_to_explore = [["a","href"],["script","src"],["img","src"]]
         @success_message = "No broken links were found!"
         super(name)
       end
-      def create_observer
+      def create_observer # :nodoc:
         Magellan::BrokenLinkTracker.new
       end
-      def description
+      def description # :nodoc:
         "explore #{@origin_url} for broken links"
       end
     end

data/lib/magellan/rake/expected_links_task.rb CHANGED

@@ -5,21 +5,30 @@ require 'magellan/rake/base_magellan_task'
 module Magellan
   module Rake
+    # Example:
+    # Magellan::Rake::ExpectedLinksTask.new("digg") do |t|
+    #   t.origin_url = "http://digg.com/"
+    #   t.explore_depth = 2
+    #   t.patterns_and_expected_links = YAML.load_file("digg.yml")
+    # end
     class ExpectedLinksTask < BaseMagellanTask
+      # Tuple of patterns and expected links at a given pattern
+      # Example:
+      # patterns_and_expected_links = [[/.*/,'/about_us.html']] # => this says all pages should have a link to the about us page.
       attr_accessor :patterns_and_expected_links
+      # Defines a new task, using the name +name+.
       def initialize(name="magellan:check_links")
         @success_message = "All expected links found!"
         @links_to_explore = [["a","href"]]
         super(name)
       end
-      def description
+      def description # :nodoc:
         "Explore #{@origin_url} and find check if all given patterns are matched"
       end
-      def create_observer
+      def create_observer # :nodoc:
         Magellan::ExpectedLinksTracker.new(@patterns_and_expected_links)
       end
     end

data/lib/magellan/result.rb CHANGED

@@ -1,7 +1,17 @@
 module Magellan
+  # The resulting data from crawling a url
   class Result
-    attr_reader :status_code,:url,:destination_url,:linked_resources
-    def initialize(status_code,url,destination_url,linked_resources,content_type)
+    # The http status code returned by the request for the specified url
+    attr_reader :status_code
+    # The original URL requested
+    attr_reader :url
+    # The destination URL after following redirects
+    attr_reader :destination_url
+    # Relative linked resources (based off of the kinds of links you are looking for)
+    attr_reader :linked_resources
+    # create a new result, with the status code, url, destination url, linked resources and content type, see attr_readers for more information about these fields
+    def initialize(status_code,url,destination_url,linked_resources,content_type) # :nodoc:
       @status_code = status_code
       @url = url
       @destination_url = destination_url
@@ -9,10 +19,12 @@ module Magellan
       @content_type = content_type
     end
-    def absolute_linked_resources
+    # Absolute links to resources
+    def absolute_linked_resources # :nodoc:
       absolute_links = linked_resources.map { |linked_resource| linked_resource.to_s.to_absolute_url(destination_url) }.compact
     end
+    # Was the document text/html
     def html_content?
       @content_type.starts_with?("text/html")
     end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: magellan
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - Nolan Evans
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-04-07 00:00:00 -07:00
+date: 2009-04-09 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -39,8 +39,9 @@ executables: []
 extensions: []
 extra_rdoc_files:
-- README
+- README.rdoc
 files:
+- README.rdoc
 - VERSION.yml
 - lib/magellan
 - lib/magellan/broken_link_tracker.rb
@@ -70,7 +71,6 @@ files:
 - spec/result_spec.rb
 - spec/spec_helper.rb
 - spec/string_extensions_spec.rb
-- README
 has_rdoc: true
 homepage: http://github.com/nolman/magellan
 post_install_message:

data/README DELETED

@@ -1,11 +0,0 @@
-Magellan: (alpha)
-Currently the supported functionality is a rake task that crawl your website and find any broken a[@href], img[@src], or script[@src] links.
-Magellan::Rake::Task.new do |t|
-  t.origin_url = "http://localhost:3000/"
-  t.explore_depth = 100
-end
-Assumptions:
-This tool works best if you follow the rules of unobtrusive javascript and property set the http status code header.