RubyGems - wgit - Versions diffs - 0.10.8 → 0.12.0 - Mend

wgit 0.10.8 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +72 -1
data/CODE_OF_CONDUCT.md +1 -1
data/CONTRIBUTING.md +2 -2
data/README.md +24 -20
data/bin/wgit +75 -19
data/lib/wgit/assertable.rb +33 -6
data/lib/wgit/core_ext.rb +1 -1
data/lib/wgit/crawler.rb +102 -37
data/lib/wgit/database/adapters/in_memory.rb +204 -0
data/lib/wgit/database/adapters/mongo_db.rb +627 -0
data/lib/wgit/database/database.rb +18 -651
data/lib/wgit/database/database_adapter.rb +147 -0
data/lib/wgit/document.rb +222 -98
data/lib/wgit/document_extractors.rb +16 -10
data/lib/wgit/dsl.rb +74 -81
data/lib/wgit/html_to_text.rb +277 -0
data/lib/wgit/indexer.rb +184 -71
data/lib/wgit/logger.rb +2 -2
data/lib/wgit/model.rb +164 -0
data/lib/wgit/response.rb +25 -13
data/lib/wgit/robots_parser.rb +193 -0
data/lib/wgit/url.rb +150 -90
data/lib/wgit/utils.rb +200 -37
data/lib/wgit/version.rb +1 -1
data/lib/wgit.rb +18 -13
metadata +56 -43
data/lib/wgit/database/model.rb +0 -60

data/lib/wgit/indexer.rb CHANGED Viewed

@@ -1,12 +1,23 @@
 # frozen_string_literal: true
+require_relative 'assertable'
 require_relative 'crawler'
-require_relative 'database/database'
+require_relative 'database/database_adapter'
 module Wgit
   # Class which crawls and saves the Documents to a database. Can be thought of
-  # as a combination of Wgit::Crawler and Wgit::Database.
+  # as a combination of Wgit::Crawler and Wgit::Database::DatabaseAdapter.
   class Indexer
+    include Assertable
+    # The ENV var used to omit and ignore robots.txt parsing during an index.
+    # Applies to all index_* methods if set in the ENV.
+    WGIT_IGNORE_ROBOTS_TXT = "WGIT_IGNORE_ROBOTS_TXT".freeze
+    # The block return value used to skip saving a crawled document to the
+    # database. Applies to all index_* methods that take a block.
+    SKIP_UPSERT = :skip.freeze
     # The crawler used to index the WWW.
     attr_reader :crawler
@@ -15,10 +26,13 @@ module Wgit
     # Initialize the Indexer.
     #
-    # @param database [Wgit::Database] The database instance (already
-    #   initialized and connected) used to index.
-    # @param crawler [Wgit::Crawler] The crawler instance used to index.
+    # @param database [Wgit::Database::DatabaseAdapter] The database instance
+    #   (already initialized and connected) used for indexing.
+    # @param crawler [Wgit::Crawler] The crawler instance used for indexing.
     def initialize(database = Wgit::Database.new, crawler = Wgit::Crawler.new)
+      assert_type(database, Wgit::Database::DatabaseAdapter)
+      assert_type(crawler, Wgit::Crawler)
       @db      = database
       @crawler = crawler
     end
@@ -26,33 +40,38 @@ module Wgit
     # Retrieves uncrawled url's from the database and recursively crawls each
     # site storing their internal pages into the database and adding their
     # external url's to be crawled later on. Logs info on the crawl using
-    # Wgit.logger as it goes along.
+    # Wgit.logger as it goes along. This method will honour all site's
+    # robots.txt and 'noindex' requests.
     #
     # @param max_sites [Integer] The number of separate and whole
     #   websites to be crawled before the method exits. Defaults to -1 which
-    #   means the crawl will occur until manually stopped (Ctrl+C etc).
+    #   means the crawl will occur until manually stopped (Ctrl+C), the
+    #   max_data has been reached, or it runs out of external urls to index.
     # @param max_data [Integer] The maximum amount of bytes that will be
     #   scraped from the web (default is 1GB). Note, that this value is used to
     #   determine when to stop crawling; it's not a guarantee of the max data
     #   that will be obtained.
-    def index_www(max_sites: -1, max_data: 1_048_576_000)
+    # @param max_urls_per_iteration [Integer] The maximum number of uncrawled
+    #   urls to index for each iteration, before checking max_sites and
+    #   max_data, possibly ending the crawl.
+    def index_www(max_sites: -1, max_data: 1_048_576_000, max_urls_per_iteration: 10)
       if max_sites.negative?
         Wgit.logger.info("Indexing until the database has been filled or it \
-runs out of urls to crawl (which might be never).")
+runs out of urls to crawl (which might be never)")
       end
       site_count = 0
       while keep_crawling?(site_count, max_sites, max_data)
         Wgit.logger.info("Current database size: #{@db.size}")
-        uncrawled_urls = @db.uncrawled_urls(limit: 100)
+        uncrawled_urls = @db.uncrawled_urls(limit: max_urls_per_iteration)
         if uncrawled_urls.empty?
-          Wgit.logger.info('No urls to crawl, exiting.')
+          Wgit.logger.info('No urls to crawl, exiting')
           return
         end
-        Wgit.logger.info("Starting crawl loop for: #{uncrawled_urls}")
+        Wgit.logger.info("Starting indexing loop for: #{uncrawled_urls.map(&:to_s)}")
         docs_count = 0
         urls_count = 0
@@ -60,38 +79,48 @@ runs out of urls to crawl (which might be never).")
         uncrawled_urls.each do |url|
           unless keep_crawling?(site_count, max_sites, max_data)
             Wgit.logger.info("Reached max number of sites to crawl or \
-database capacity, exiting.")
+database capacity, exiting")
             return
           end
           site_count += 1
+          parser = parse_robots_txt(url)
+          if parser&.no_index?
+            upsert_url_and_redirects(url)
+            next
+          end
           site_docs_count = 0
-          ext_links = @crawler.crawl_site(url) do |doc|
-            unless doc.empty?
-              write_doc_to_db(doc)
-              docs_count += 1
-              site_docs_count += 1
-            end
+          ext_links = @crawler.crawl_site(
+            url, allow_paths: parser&.allow_paths, disallow_paths: parser&.disallow_paths
+          ) do |doc|
+            next if doc.empty? || no_index?(@crawler.last_response, doc)
+            upsert_doc(doc)
+            docs_count += 1
+            site_docs_count += 1
           end
-          raise 'Error updating url' unless @db.update(url) == 1
+          upsert_url_and_redirects(url)
-          urls_count += write_urls_to_db(ext_links)
+          urls_count += upsert_external_urls(ext_links)
         end
         Wgit.logger.info("Crawled and indexed documents for #{docs_count} \
-url(s) overall for this iteration.")
+url(s) during this iteration")
         Wgit.logger.info("Found and saved #{urls_count} external url(s) for \
-the next iteration.")
-        nil
+future iterations")
       end
+      nil
     end
     # Crawls a single website's pages and stores them into the database.
     # There is no max download limit so be careful which sites you index.
-    # Logs info on the crawl using Wgit.logger as it goes along.
+    # Logs info on the crawl using Wgit.logger as it goes along. This method
+    # will honour the site's robots.txt and 'noindex' requests.
     #
     # @param url [Wgit::Url] The base Url of the website to crawl.
     # @param insert_externals [Boolean] Whether or not to insert the website's
@@ -113,28 +142,29 @@ the next iteration.")
       url, insert_externals: false, follow: :default,
       allow_paths: nil, disallow_paths: nil
     )
-      crawl_opts = {
-        follow: follow,
-        allow_paths: allow_paths,
-        disallow_paths: disallow_paths
-      }
+      parser = parse_robots_txt(url)
+      if parser&.no_index?
+        upsert_url_and_redirects(url)
+        return 0
+      end
+      allow_paths, disallow_paths = merge_paths(parser, allow_paths, disallow_paths)
+      crawl_opts = { follow:, allow_paths:, disallow_paths: }
       total_pages_indexed = 0
       ext_urls = @crawler.crawl_site(url, **crawl_opts) do |doc|
+        next if no_index?(@crawler.last_response, doc)
         result = block_given? ? yield(doc) : true
+        next if doc.empty? || result == SKIP_UPSERT
-        if result && !doc.empty?
-          write_doc_to_db(doc)
-          total_pages_indexed += 1
-        end
+        upsert_doc(doc)
+        total_pages_indexed += 1
       end
-      @db.upsert(url)
-      if insert_externals && ext_urls
-        num_inserted_urls = write_urls_to_db(ext_urls)
-        Wgit.logger.info("Found and saved #{num_inserted_urls} external url(s)")
-      end
+      upsert_url_and_redirects(url)
+      upsert_external_urls(ext_urls) if insert_externals && ext_urls
       Wgit.logger.info("Crawled and indexed #{total_pages_indexed} documents \
 for the site: #{url}")
@@ -145,6 +175,8 @@ for the site: #{url}")
     # Crawls one or more webpages and stores them into the database.
     # There is no max download limit so be careful of large pages.
     # Logs info on the crawl using Wgit.logger as it goes along.
+    # This method will honour the site's robots.txt and 'noindex' requests
+    # in relation to the given urls.
     #
     # @param urls [*Wgit::Url] The webpage Url's to crawl.
     # @param insert_externals [Boolean] Whether or not to insert the webpages
@@ -157,7 +189,7 @@ for the site: #{url}")
     def index_urls(*urls, insert_externals: false, &block)
       raise 'You must provide at least one Url' if urls.empty?
-      opts = { insert_externals: insert_externals }
+      opts = { insert_externals: }
       Wgit::Utils.each(urls) { |url| index_url(url, **opts, &block) }
       nil
@@ -166,6 +198,8 @@ for the site: #{url}")
     # Crawls a single webpage and stores it into the database.
     # There is no max download limit so be careful of large pages.
     # Logs info on the crawl using Wgit.logger as it goes along.
+    # This method will honour the site's robots.txt and 'noindex' requests
+    # in relation to the given url.
     #
     # @param url [Wgit::Url] The webpage Url to crawl.
     # @param insert_externals [Boolean] Whether or not to insert the webpages
@@ -175,18 +209,26 @@ for the site: #{url}")
     #   manipulation. Return nil or false from the block to prevent the
     #   document from being saved into the database.
     def index_url(url, insert_externals: false)
+      parser = parse_robots_txt(url)
+      if parser && (parser.no_index? || contains_path?(parser.disallow_paths, url))
+        upsert_url_and_redirects(url)
+        return
+      end
       document = @crawler.crawl_url(url) do |doc|
+        break if no_index?(@crawler.last_response, doc)
         result = block_given? ? yield(doc) : true
-        write_doc_to_db(doc) if result && !doc.empty?
+        break if doc.empty? || result == SKIP_UPSERT
+        upsert_doc(doc)
       end
-      @db.upsert(url)
+      upsert_url_and_redirects(url)
       ext_urls = document&.external_links
-      if insert_externals && ext_urls
-        num_inserted_urls = write_urls_to_db(ext_urls)
-        Wgit.logger.info("Found and saved #{num_inserted_urls} external url(s)")
-      end
+      upsert_external_urls(ext_urls) if insert_externals && ext_urls
       nil
     end
@@ -210,10 +252,11 @@ for the site: #{url}")
     end
     # Write the doc to the DB. Note that the unique url index on the documents
-    # collection deliberately prevents duplicate inserts.
+    # collection deliberately prevents duplicate inserts. If the document
+    # already exists, then it will be updated in the DB.
     #
     # @param doc [Wgit::Document] The document to write to the DB.
-    def write_doc_to_db(doc)
+    def upsert_doc(doc)
       if @db.upsert(doc)
         Wgit.logger.info("Saved document for url: #{doc.url}")
       else
@@ -221,35 +264,105 @@ for the site: #{url}")
       end
     end
-    # Write the urls to the DB. Note that the unique url index on the urls
-    # collection deliberately prevents duplicate inserts.
+    # Upsert the url and its redirects, setting all to crawled = true.
     #
-    # @param urls [Array<Wgit::Url>] The urls to write to the DB.
-    # @return [Integer] The number of inserted urls.
-    def write_urls_to_db(urls)
-      count = 0
+    # @param url [Wgit::Url] The url to write to the DB.
+    # @return [Integer] The number of upserted urls (url + redirect urls).
+    def upsert_url_and_redirects(url)
+      url.crawled = true unless url.crawled?
-      return count unless urls.respond_to?(:each)
+      # Upsert the url and any url redirects, setting them as crawled also.
+      @db.bulk_upsert(url.redirects_journey)
+    end
-      urls.each do |url|
-        if url.invalid?
-          Wgit.logger.info("Ignoring invalid external url: #{url}")
-          next
-        end
+    # Write the external urls to the DB. For any external url, its origin will
+    # be inserted e.g. if the external url is http://example.com/contact then
+    # http://example.com will be inserted into the database. Note that the
+    # unique url index on the urls collection deliberately prevents duplicate
+    # inserts.
+    #
+    # @param urls [Array<Wgit::Url>] The external urls to write to the DB.
+    # @return [Integer] The number of upserted urls.
+    def upsert_external_urls(urls)
+      urls = urls
+             .reject(&:invalid?)
+             .map(&:to_origin)
+             .uniq
+      return 0 if urls.empty?
+      count = @db.bulk_upsert(urls)
+      Wgit.logger.info("Saved #{count} external urls")
+      count
+    end
+    private
+    # Crawls and parses robots.txt file (if found). Returns the parser or nil.
+    def parse_robots_txt(url)
+      return nil if ENV[WGIT_IGNORE_ROBOTS_TXT]
+      robots_url = url.to_origin.join('/robots.txt')
-        @db.insert(url)
-        count += 1
+      Wgit.logger.info("Crawling for robots.txt: #{robots_url}")
-        Wgit.logger.info("Inserted external url: #{url}")
-      rescue Mongo::Error::OperationFailure
-        Wgit.logger.info("External url already exists: #{url}")
+      doc = @crawler.crawl_url(robots_url)
+      return nil if !@crawler.last_response.ok? || doc.empty?
+      parser = Wgit::RobotsParser.new(doc.content)
+      Wgit.logger.info("robots.txt allow paths: #{parser.allow_paths}")
+      Wgit.logger.info("robots.txt disallow paths: #{parser.disallow_paths}")
+      if parser.no_index?
+        Wgit.logger.info('robots.txt has banned wgit indexing, skipping')
       end
-      count
+      parser
+    end
+    # Takes the user defined allow/disallow_paths and merges robots paths
+    # into them. The allow/disallow_paths vars each can be of type nil, String,
+    # Enumerable<String>.
+    def merge_paths(parser, allow_paths, disallow_paths)
+      return allow_paths, disallow_paths unless parser&.rules?
+      allow = allow_paths || []
+      allow = [allow] unless allow.is_a?(Enumerable)
+      disallow = disallow_paths || []
+      disallow = [disallow] unless disallow.is_a?(Enumerable)
+      allow.concat(parser.allow_paths)
+      disallow.concat(parser.disallow_paths)
+      [allow, disallow]
+    end
+    # Returns true if url is included in the given paths.
+    def contains_path?(paths, url)
+      paths.any? { |path| Wgit::Url.new(path).to_path == url.to_path }
+    end
+    # Returns if the last_response or doc #no_index? is true or not.
+    def no_index?(last_response, doc)
+      return false if ENV[WGIT_IGNORE_ROBOTS_TXT]
+      url = last_response.url.to_s
+      if last_response.no_index?
+        Wgit.logger.info("Skipping page due to no-index response header: #{url}")
+        return true
+      end
+      if doc&.no_index?
+        Wgit.logger.info("Skipping page due to no-index HTML meta tag: #{url}")
+        return true
+      end
+      false
     end
-    alias database db
-    alias index    index_urls
-    alias index_r  index_site
+    alias_method :database, :db
+    alias_method :index,    :index_urls
+    alias_method :index_r,  :index_site
   end
 end

data/lib/wgit/logger.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 # FYI: The default logger is set at the bottom of this file.
-require 'logger'
+require "logger"
 module Wgit
   # The Logger instance used by Wgit. Set your own custom logger after
@@ -28,7 +28,7 @@ module Wgit
   #
   # @return [Logger] The default Logger instance.
   def self.default_logger
-    logger = Logger.new(STDOUT, progname: 'wgit', level: :info)
+    logger = Logger.new($stdout, progname: "wgit", level: :info)
     logger.formatter = proc do |_severity, _datetime, progname, msg|
       "[#{progname}] #{msg}\n"
     end

data/lib/wgit/model.rb ADDED Viewed

@@ -0,0 +1,164 @@
+# frozen_string_literal: true
+require_relative "./utils"
+module Wgit
+  # Module used to build the Database collection objects, forming a data model.
+  # The models produced are Hash like and therefore DB agnostic. Each model
+  # will contain a unique field used for searching and avoiding duplicates,
+  # this is typically a `url` field. Also contained in the model are the
+  # search fields used in Database and Document #search calls.
+  module Model
+    # The default search fields used in Database and Document #search calls.
+    # The number of matches for each field is multiplied by the field weight,
+    # the total is the search score, used to sort the search results.
+    # Call Wgit::Model.set_default_search_fields` to revert to default.
+    DEFAULT_SEARCH_FIELDS = {
+      title: 2,
+      description: 2,
+      keywords: 2,
+      text: 1
+    }.freeze
+    # The search fields used in Database and Document #search calls.
+    # The number of matches for each field is multiplied by the field weight,
+    # the total is the search score, used to sort the search results.
+    # Call Wgit::Model.set_default_search_fields` to revert to default.
+    @search_fields = DEFAULT_SEARCH_FIELDS
+    # Whether or not to include the Document#html in the #document model.
+    @include_doc_html = false
+    # Whether or not to include the Document#score in the #document model.
+    @include_doc_score = false
+    class << self
+      # The search fields used in Database and Document #search calls.
+      # A custom setter method is also provided for changing these fields.
+      attr_reader :search_fields
+      # Whether or not to include the Document#html in the #document model.
+      attr_accessor :include_doc_html
+      # Whether or not to include the Document#score in the #document model.
+      attr_accessor :include_doc_score
+    end
+    # Sets the search fields used in Database and Document #search calls.
+    #
+    # You can pass the fields as an Array of Symbols which gives each field a
+    # weight of 1 meaning all fields are considered of equal value. Or you can
+    # pass a Hash of Symbol => Int and specify the weights yourself, allowing
+    # you to customise the search rankings.
+    #
+    # Use like:
+    # ```
+    # Wgit::Model.set_search_fields [:title, :text], db
+    # => { title: 1, text: 1 }
+    # Wgit::Model.set_search_fields {title: 2, text: 1}, db
+    # => { title: 2, text: 1 }
+    # ```
+    #
+    # If the given db (database) param responds to #search_fields= then it will
+    # be called and given the fields to set. This should perform whatever the
+    # database adapter needs in order to search using the given fields e.g.
+    # creating a search index. Calling the DB enables the search_fields to be
+    # set globally within Wgit by one method call, this one.
+    #
+    # @param fields [Array<Symbol>, Hash<Symbol, Integer>] The field names or
+    #   the field names with their coresponding search weights.
+    # @param db [Wgit::Database::DatabaseAdapter] A connected db instance. If
+    #   db responds to #search_fields=, it will be called and given the fields.
+    # @raise [StandardError] If fields is of an incorrect type.
+    # @return [Hash<Symbol, Integer>] The fields and their weights.
+    def self.set_search_fields(fields, db = nil)
+      # We need a Hash of fields => weights (Symbols => Integers).
+      case fields
+      when Array # of Strings/Symbols.
+        fields = fields.map { |field| [field.to_sym, 1] }
+      when Hash  # of Strings/Symbols and Integers.
+        fields = fields.map { |field, weight| [field.to_sym, weight.to_i] }
+      else
+        raise "fields must be an Array or Hash, not a #{fields.class}"
+      end
+      @search_fields = fields.to_h
+      db.search_fields = @search_fields if db.respond_to?(:search_fields=)
+      @search_fields
+    end
+    # Sets the search fields used in Database and Document #search calls.
+    #
+    # If the given db (database) param responds to #search_fields= then it will
+    # be called and given the fields to set. This should perform whatever the
+    # database adapter needs in order to search using the given fields e.g.
+    # creating a search index. Calling the DB enables the search_fields to be
+    # set globally within Wgit by one method call, this one.
+    #
+    # @param db [Wgit::Database::DatabaseAdapter] A connected db instance. If
+    #   db responds to #search_fields=, it will be called and given the fields.
+    # @return [Hash<Symbol, Integer>] The fields and their weights.
+    def self.set_default_search_fields(db = nil)
+      set_search_fields(DEFAULT_SEARCH_FIELDS, db)
+    end
+    # The data model for a Wgit::Url collection object and for an embedded
+    # 'url' inside a Wgit::Document collection object.
+    #
+    # The unique field for this model is `model['url']`.
+    #
+    # @param url [Wgit::Url] The Url data object.
+    # @return [Hash] The URL model ready for DB insertion.
+    def self.url(url)
+      raise "url must respond_to? :to_h" unless url.respond_to?(:to_h)
+      model = url.to_h
+      select_bson_types(model)
+    end
+    # The data model for a Wgit::Document collection object.
+    #
+    # The unique field for this model is `model['url']['url']`.
+    #
+    # @param doc [Wgit::Document] The Document data object.
+    # @return [Hash] The Document model ready for DB insertion.
+    def self.document(doc)
+      raise "doc must respond_to? :to_h" unless doc.respond_to?(:to_h)
+      model = doc.to_h(
+        include_html: @include_doc_html, include_score: @include_doc_score
+      )
+      model["url"] = url(doc.url) # Expand Url String into full object.
+      select_bson_types(model)
+    end
+    # Common fields when inserting a record into the DB.
+    #
+    # @return [Hash] Insertion fields common to all models.
+    def self.common_insert_data
+      {
+        date_added:    Wgit::Utils.time_stamp,
+        date_modified: Wgit::Utils.time_stamp
+      }
+    end
+    # Common fields when updating a record in the DB.
+    #
+    # @return [Hash] Update fields common to all models.
+    def self.common_update_data
+      {
+        date_modified: Wgit::Utils.time_stamp
+      }
+    end
+    # Returns the model having removed non bson types (for use with MongoDB).
+    #
+    # @param model_hash [Hash] The model Hash to sanitize.
+    # @return [Hash] The model Hash with non bson types removed.
+    def self.select_bson_types(model_hash)
+      model_hash.select { |_k, v| v.respond_to?(:bson_type) }
+    end
+  end
+end

data/lib/wgit/response.rb CHANGED Viewed

@@ -27,18 +27,25 @@ module Wgit
     # Defaults some values and returns a "blank" Wgit::Response object.
     def initialize
-      @body         = ''
+      @body         = ""
       @headers      = {}
       @redirections = {}
       @total_time   = 0.0
     end
+    # Overrides String#inspect to shorten the printed output of a Response.
+    #
+    # @return [String] A short textual representation of this Response.
+    def inspect
+      "#<Wgit::Response url=\"#{@url}\" status=#{status}>"
+    end
     # Adds time to @total_time (incrementally).
     #
     # @param time [Float] The time to add to @total_time.
     # @return [Float] @total_time's new value.
     def add_total_time(time)
-      @total_time += (time || 0.0)
+      @total_time += time || 0.0
     end
     # Sets the HTML response body.
@@ -46,7 +53,7 @@ module Wgit
     # @param str [String] The new HTML body.
     # @return [String] @body's new value.
     def body=(str)
-      @body = (str || '')
+      @body = str || ""
     end
     # Returns the HTML response body or nil (if it's empty).
@@ -74,10 +81,7 @@ module Wgit
         return
       end
-      @headers = headers.map do |k, v|
-        k = k.downcase.gsub('-', '_').to_sym
-        [k, v]
-      end.to_h
+      @headers = headers.transform_keys { |k| k.downcase.gsub("-", "_").to_sym }
     end
     # Returns whether or not the response is 404 Not Found.
@@ -134,11 +138,19 @@ module Wgit
       @status.positive?
     end
-    alias code           status
-    alias content        body
-    alias crawl_duration total_time
-    alias to_s           body
-    alias redirects      redirections
-    alias length         size
+    # Returns whether or not Wgit is banned from indexing this site.
+    #
+    # @return [Boolean] True if Wgit should not index this site, false
+    #   otherwise.
+    def no_index?
+      headers.fetch(:x_robots_tag, "").downcase.strip == "noindex"
+    end
+    alias_method :code,           :status
+    alias_method :content,        :body
+    alias_method :crawl_duration, :total_time
+    alias_method :to_s,           :body
+    alias_method :redirects,      :redirections
+    alias_method :length,         :size
   end
 end