RubyGems - wgit - Versions diffs - 0.10.8 → 0.12.0 - Mend

wgit 0.10.8 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +72 -1
data/CODE_OF_CONDUCT.md +1 -1
data/CONTRIBUTING.md +2 -2
data/README.md +24 -20
data/bin/wgit +75 -19
data/lib/wgit/assertable.rb +33 -6
data/lib/wgit/core_ext.rb +1 -1
data/lib/wgit/crawler.rb +102 -37
data/lib/wgit/database/adapters/in_memory.rb +204 -0
data/lib/wgit/database/adapters/mongo_db.rb +627 -0
data/lib/wgit/database/database.rb +18 -651
data/lib/wgit/database/database_adapter.rb +147 -0
data/lib/wgit/document.rb +222 -98
data/lib/wgit/document_extractors.rb +16 -10
data/lib/wgit/dsl.rb +74 -81
data/lib/wgit/html_to_text.rb +277 -0
data/lib/wgit/indexer.rb +184 -71
data/lib/wgit/logger.rb +2 -2
data/lib/wgit/model.rb +164 -0
data/lib/wgit/response.rb +25 -13
data/lib/wgit/robots_parser.rb +193 -0
data/lib/wgit/url.rb +150 -90
data/lib/wgit/utils.rb +200 -37
data/lib/wgit/version.rb +1 -1
data/lib/wgit.rb +18 -13
metadata +56 -43
data/lib/wgit/database/model.rb +0 -60

data/lib/wgit/robots_parser.rb ADDED Viewed

@@ -0,0 +1,193 @@
+# frozen_string_literal: true
+module Wgit
+  # The RobotsParser class handles parsing and processing of a web servers
+  # robots.txt file.
+  class RobotsParser
+    include Wgit::Assertable
+    # Key representing the start of a comment.
+    KEY_COMMENT    = "#"
+    # Key value separator used in robots.txt files.
+    KEY_SEPARATOR  = ":"
+    # Key representing a user agent.
+    KEY_USER_AGENT = "User-agent"
+    # Key representing an allow URL rule.
+    KEY_ALLOW      = "Allow"
+    # Key representing a disallow URL rule.
+    KEY_DISALLOW   = "Disallow"
+    # Value representing the Wgit user agent.
+    USER_AGENT_WGIT = :wgit
+    # Value representing any user agent including Wgit.
+    USER_AGENT_ANY  = :*
+    # Value representing any and all paths.
+    PATHS_ALL = %w[/ *].freeze
+    # Hash containing the user-agent allow/disallow URL rules. Looks like:
+    #   allow_paths:    ["/"]
+    #   disallow_paths: ["/accounts", ...]
+    attr_reader :rules
+    # Initializes and returns a Wgit::RobotsParser instance having parsed the
+    # robot.txt contents.
+    #
+    # @param contents [String, #to_s] The contents of the robots.txt file to be
+    #   parsed.
+    def initialize(contents)
+      @rules = {
+        allow_paths: Set.new,
+        disallow_paths: Set.new
+      }
+      assert_respond_to(contents, :to_s)
+      parse(contents.to_s)
+    end
+    # Overrides String#inspect to shorten the printed output of a Parser.
+    #
+    # @return [String] A short textual representation of this Parser.
+    def inspect
+      "#<Wgit::RobotsParser has_rules=#{rules?} no_index=#{no_index?}>"
+    end
+    # Returns the allow paths/rules for this parser's robots.txt contents.
+    #
+    # @return [Array<String>] The allow paths/rules to follow.
+    def allow_paths
+      @rules[:allow_paths].to_a
+    end
+    # Returns the disallow paths/rules for this parser's robots.txt contents.
+    #
+    # @return [Array<String>] The disallow paths/rules to follow.
+    def disallow_paths
+      @rules[:disallow_paths].to_a
+    end
+    # Returns whether or not there are rules applying to Wgit.
+    #
+    # @return [Boolean] True if there are rules for Wgit to follow, false
+    #   otherwise.
+    def rules?
+      allow_rules? || disallow_rules?
+    end
+    # Returns whether or not there are allow rules applying to Wgit.
+    #
+    # @return [Boolean] True if there are allow rules for Wgit to follow,
+    #   false otherwise.
+    def allow_rules?
+      @rules[:allow_paths].any?
+    end
+    # Returns whether or not there are disallow rules applying to Wgit.
+    #
+    # @return [Boolean] True if there are disallow rules for Wgit to follow,
+    #   false otherwise.
+    def disallow_rules?
+      @rules[:disallow_paths].any?
+    end
+    # Returns whether or not Wgit is banned from indexing this site.
+    #
+    # @return [Boolean] True if Wgit should not index this site, false
+    #   otherwise.
+    def no_index?
+      @rules[:disallow_paths].any? { |path| PATHS_ALL.include?(path) }
+    end
+    private
+    # Parses the file contents and sets @rules.
+    def parse(contents)
+      user_agents = []
+      new_block = false
+      contents.split("\n").each do |line|
+        line.strip!
+        next if line.empty? || line.start_with?(KEY_COMMENT)
+        # A user agent block is denoted by N User-agent's followed by N
+        # Allow/Disallow's. After which a new block is formed from scratch.
+        if start_with_any_case?(line, KEY_USER_AGENT)
+          if new_block
+            user_agents = []
+            new_block = false
+          end
+          user_agents << remove_key(line, KEY_USER_AGENT).downcase.to_sym
+        else
+          new_block = true
+        end
+        if start_with_any_case?(line, KEY_ALLOW)
+          append_allow_rule(user_agents, line)
+        elsif start_with_any_case?(line, KEY_DISALLOW)
+          append_disallow_rule(user_agents, line)
+        elsif !start_with_any_case?(line, KEY_USER_AGENT)
+          Wgit.logger.debug("Skipping unsupported robots.txt line: #{line}")
+        end
+      end
+    end
+    # Implements start_with? but case insensitive.
+    def start_with_any_case?(str, prefix)
+      str.downcase.start_with?(prefix.downcase)
+    end
+    # Returns line with key removed (if present). Otherwise line is returned
+    # as given.
+    def remove_key(line, key)
+      return line unless start_with_any_case?(line, key)
+      return line unless line.count(KEY_SEPARATOR) == 1
+      segs = line.split(KEY_SEPARATOR)
+      return "" if segs.size == 1
+      segs.last.strip
+    end
+    # Don't append * or /, as this means all paths, which is the same as no
+    # allow_paths when passed to Wgit::Crawler.
+    def append_allow_rule(user_agents, line)
+      return unless wgit_user_agent?(user_agents)
+      path = remove_key(line, KEY_ALLOW)
+      path = parse_special_syntax(path)
+      return if PATHS_ALL.include?(path)
+      @rules[:allow_paths] << path
+    end
+    def append_disallow_rule(user_agents, line)
+      return unless wgit_user_agent?(user_agents)
+      path = remove_key(line, KEY_DISALLOW)
+      path = parse_special_syntax(path)
+      @rules[:disallow_paths] << path
+    end
+    def wgit_user_agent?(user_agents)
+      user_agents.any? do |agent|
+        [USER_AGENT_ANY, USER_AGENT_WGIT].include?(agent.downcase)
+      end
+    end
+    def parse_special_syntax(path)
+      # Remove $ e.g. "/blah$" becomes "/blah"
+      path = path.gsub("$", "")
+      # Remove any inline comments e.g. "/blah # comment" becomes "/blah"
+      path = path.split(" #{KEY_COMMENT}").first if path.include?(" #{KEY_COMMENT}")
+      # Replace an empty path with * e.g. "Allow: " becomes "Allow: *"
+      path = "*" if path.empty?
+      path
+    end
+    alias_method :paths, :rules
+    alias_method :banned?, :no_index?
+  end
+end

data/lib/wgit/url.rb CHANGED Viewed

@@ -1,9 +1,9 @@
 # frozen_string_literal: true
-require_relative 'utils'
-require_relative 'assertable'
-require 'uri'
-require 'addressable/uri'
+require_relative "utils"
+require_relative "assertable"
+require "uri"
+require "addressable/uri"
 module Wgit
   # Class modeling/serialising a web based HTTP URL.
@@ -28,6 +28,9 @@ module Wgit
     # The duration of the crawl for this Url (in seconds).
     attr_accessor :crawl_duration
+    # Record the redirects from the initial Url to the final Url.
+    attr_reader :redirects
     # Initializes a new instance of Wgit::Url which models a web based
     # HTTP URL.
     #
@@ -53,16 +56,18 @@ module Wgit
         obj = url_or_obj
         assert_respond_to(obj, :fetch)
-        url            = obj.fetch('url') # Should always be present.
-        crawled        = obj.fetch('crawled', false)
-        date_crawled   = obj.fetch('date_crawled', nil)
-        crawl_duration = obj.fetch('crawl_duration', nil)
+        url            = obj.fetch("url") # Should always be present.
+        crawled        = obj.fetch("crawled", false)
+        date_crawled   = obj.fetch("date_crawled", nil)
+        crawl_duration = obj.fetch("crawl_duration", nil)
+        redirects      = obj.fetch("redirects", {})
       end
       @uri            = Addressable::URI.parse(url)
       @crawled        = crawled
       @date_crawled   = date_crawled
       @crawl_duration = crawl_duration
+      @redirects      = redirects || {}
       super(url)
     end
@@ -84,7 +89,7 @@ module Wgit
     # @raise [StandardError] If obj.is_a?(String) is false.
     # @return [Wgit::Url] A Wgit::Url instance.
     def self.parse(obj)
-      raise 'Can only parse if obj#is_a?(String)' unless obj.is_a?(String)
+      raise "Can only parse if obj#is_a?(String)" unless obj.is_a?(String)
       # Return a Wgit::Url as is to avoid losing state e.g. date_crawled etc.
       obj.is_a?(Wgit::Url) ? obj : new(obj)
@@ -107,16 +112,6 @@ Addressable::URI::InvalidURIError")
       nil
     end
-    # Sets the @crawled instance var, also setting @date_crawled for
-    # convenience.
-    #
-    # @param bool [Boolean] True if this Url has been crawled, false otherwise.
-    # @return [Boolean] The value of bool having been set.
-    def crawled=(bool)
-      @crawled      = bool
-      @date_crawled = bool ? Wgit::Utils.time_stamp : nil
-    end
     # Overrides String#inspect to distingiush this Url from a String.
     #
     # @return [String] A short textual representation of this Url.
@@ -134,6 +129,71 @@ Addressable::URI::InvalidURIError")
       super(new_url)
     end
+    # Overrides String#concat which oddly returns a Wgit::Url object, and
+    # instead returns a String. Therefore this method works the same as if
+    # you call String#concat, or its alias String#+, which is desired for
+    # this method. If you want to join two Urls, use Wgit::Url#join method.
+    #
+    # @param other [String] The String to concat onto this one.
+    # @return [String] The new concatted String, not a Wgit::Url.
+    def concat(other)
+      to_s.concat(other.to_s)
+    end
+    # Sets the @crawled instance var, also setting @date_crawled for
+    # convenience.
+    #
+    # @param bool [Boolean] True if this Url has been crawled, false otherwise.
+    # @return [Boolean] The value of bool having been set.
+    def crawled=(bool)
+      @crawled      = bool
+      @date_crawled = bool ? Wgit::Utils.time_stamp : nil
+    end
+    # Sets the @redirects instance var, mapping any Strings into Wgit::Urls.
+    #
+    # @param redirects [Hash] The redirects Hash to set for this Url.
+    def redirects=(redirects)
+      assert_type(redirects, Hash)
+      map_to_url = proc do |url|
+        Wgit::Url.new(url.to_s, crawled: @crawled, date_crawled: @date_crawled)
+      end
+      @redirects = redirects
+                   .map { |from, to| [map_to_url.call(from), map_to_url.call(to)] }
+                   .to_h
+    end
+    # Returns the Wgit::Url's starting with the originally requested Url to be
+    # crawled, followed by each redirected to Url, finishing with the final
+    # crawled Url e.g.
+    #
+    # Example Url redirects journey (dictated by the webserver):
+    #
+    # ```
+    # http://example.com   => 301 to https://example.com
+    # https://example.com  => 301 to https://example.com/
+    # https://example.com/ => 200 OK (no more redirects, crawl complete)
+    # ```
+    #
+    # Would return an Array of Wgit::Url's in the form of:
+    #
+    # ```
+    # %w(
+    #   http://example.com
+    #   https://example.com
+    #   https://example.com/
+    # )
+    # ```
+    #
+    # @return [Array<Wgit::Url>] Each redirected to Url's finishing with the
+    #   final (successfully) crawled Url. If no redirects took place, then just
+    #   the originally requested Url is returned inside the Array.
+    def redirects_journey
+      [redirects.keys, self].flatten
+    end
     # Returns true if self is a relative Url; false if absolute.
     #
     # An absolute URL must have a scheme prefix e.g.
@@ -167,10 +227,10 @@ Addressable::URI::InvalidURIError")
     def relative?(opts = {})
       defaults = { origin: nil, host: nil, domain: nil, brand: nil }
       opts = defaults.merge(opts)
-      raise 'Url (self) cannot be empty' if empty?
+      raise "Url (self) cannot be empty" if empty?
       return false if scheme_relative?
-      return true if @uri.relative?
+      return true  if @uri.relative?
       # Self is absolute but may be relative to the opts param e.g. host.
       opts.select! { |_k, v| v }
@@ -226,22 +286,23 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
       !valid?
     end
-    # Concats self and other together before returning a new Url. Self is not
-    # modified.
+    # Joins self and other together before returning a new Url. Self is not
+    # modified. Some magic occurs depending on what is being joined, see
+    # the source code for more information.
     #
-    # @param other [Wgit::Url, String] The other to concat to the end of self.
+    # @param other [Wgit::Url, String] The other (relative) Url to join to the
+    #   end of self.
     # @return [Wgit::Url] self + separator + other, separator depends on other.
-    def concat(other)
+    def join(other)
       other = Wgit::Url.new(other)
-      raise 'other must be relative' unless other.relative?
+      raise "other must be relative" unless other.relative?
       other = other.omit_leading_slash
-      separator = %w[# ? .].include?(other[0]) ? '' : '/'
-      # We use to_s below to call String#+, not Wgit::Url#+ (alias for concat).
-      concatted = omit_trailing_slash.to_s + separator.to_s + other.to_s
+      separator = %w[# ? .].include?(other[0]) ? "" : "/"
+      separator = "" if end_with?("/")
+      joined = self + separator + other
-      Wgit::Url.new(concatted)
+      Wgit::Url.new(joined)
     end
     # Normalizes/escapes self and returns a new Wgit::Url. Self isn't modified.
@@ -257,7 +318,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     # If self is absolute then it's returned as is, making this method
     # idempotent. The doc's `<base>` element is used if present, otherwise
-    # `doc.url` is used as the base; which is concatted with self.
+    # `doc.url` is used as the base; which is joined with self.
     #
     # Typically used to build an absolute link obtained from a document.
     #
@@ -267,19 +328,19 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     #   link.make_absolute(doc) # => "http://example.com/favicon.png"
     #
-    # @param doc [Wgit::Document] The doc whose base Url is concatted with
+    # @param doc [Wgit::Document] The doc whose base Url is joined with
     #   self.
     # @raise [StandardError] If doc isn't a Wgit::Document or if `doc.base_url`
     #   raises an Exception.
     # @return [Wgit::Url] Self in absolute form.
     def make_absolute(doc)
       assert_type(doc, Wgit::Document)
-      raise 'Cannot make absolute when Document @url is not valid' \
+      raise "Cannot make absolute when Document @url is not valid" \
       unless doc.url.valid?
       return prefix_scheme(doc.url.to_scheme&.to_sym) if scheme_relative?
-      absolute? ? self : doc.base_url(link: self).concat(self)
+      absolute? ? self : doc.base_url(link: self).join(self)
     end
     # Returns self having prefixed a scheme/protocol. Doesn't modify receiver.
@@ -294,7 +355,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
       return self if absolute? && !scheme_relative?
-      separator = scheme_relative? ? '' : '//'
+      separator = scheme_relative? ? "" : "//"
       Wgit::Url.new("#{scheme}:#{separator}#{self}")
     end
@@ -303,8 +364,8 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     # @return [Hash] self's instance vars as a Hash.
     def to_h
-      h = Wgit::Utils.to_h(self, ignore: ['@uri'])
-      Hash[h.to_a.insert(0, ['url', self])] # Insert url at position 0.
+      h = Wgit::Utils.to_h(self, ignore: ["@uri"])
+      Hash[h.to_a.insert(0, ["url", to_s])] # Insert url at position 0.
     end
     # Returns a normalised URI object for this URL.
@@ -379,7 +440,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
       dot_domain = ".#{to_domain}"
       return nil unless include?(dot_domain)
-      sub_domain = to_host.sub(dot_domain, '')
+      sub_domain = to_host.sub(dot_domain, "")
       Wgit::Url.new(sub_domain)
     end
@@ -389,7 +450,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     # @return [Wgit::Url, nil] Containing just the brand or nil.
     def to_brand
       domain = to_domain
-      domain ? Wgit::Url.new(domain.split('.').first) : nil
+      domain ? Wgit::Url.new(domain.split(".").first) : nil
     end
     # Returns only the base of this URL e.g. the protocol scheme and host
@@ -425,9 +486,9 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     def to_path
       path = @uri.path
       return nil if path.nil? || path.empty?
-      return Wgit::Url.new('/') if path == '/'
+      return Wgit::Url.new("/") if path == "/"
-      Wgit::Url.new(path).omit_slashes
+      Wgit::Url.new(path).omit_leading_slash
     end
     # Returns the endpoint of this URL e.g. the bit after the host with any
@@ -439,7 +500,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #   an endpoint, / is returned.
     def to_endpoint
       endpoint = @uri.path
-      endpoint = '/' + endpoint unless endpoint.start_with?('/')
+      endpoint = "/#{endpoint}" unless endpoint.start_with?("/")
       Wgit::Url.new(endpoint)
     end
@@ -463,8 +524,8 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
       query_str = to_query
       return {} unless query_str
-      query_str.split('&').each_with_object({}) do |param, hash|
-        k, v = param.split('=')
+      query_str.split("&").each_with_object({}) do |param, hash|
+        k, v = param.split("=")
         k = k.to_sym if symbolize_keys
         hash[k] = v
       end
@@ -484,10 +545,10 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     # @return [Wgit::Url, nil] Containing just the extension string or nil.
     def to_extension
-      path = to_path
+      path = to_path&.omit_trailing_slash
       return nil unless path
-      segs = path.split('.')
+      segs = path.split(".")
       segs.length > 1 ? Wgit::Url.new(segs.last) : nil
     end
@@ -530,7 +591,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     # @return [Wgit::Url] Self without a trailing slash.
     def omit_leading_slash
-      start_with?('/') ? Wgit::Url.new(self[1..-1]) : self
+      start_with?("/") ? Wgit::Url.new(self[1..]) : self
     end
     # Returns a new Wgit::Url containing self without a trailing slash. Is
@@ -539,7 +600,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     # @return [Wgit::Url] Self without a trailing slash.
     def omit_trailing_slash
-      end_with?('/') ? Wgit::Url.new(chop) : self
+      end_with?("/") ? Wgit::Url.new(chop) : self
     end
     # Returns a new Wgit::Url containing self without a leading or trailing
@@ -560,11 +621,11 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     # @return [Wgit::Url] Self containing everything after the base.
     def omit_base
       base_url = to_base
-      omit_base = base_url ? gsub(base_url, '') : self
+      omit_base = base_url ? gsub(base_url, "") : self
-      return self if ['', '/'].include?(omit_base)
+      return self if ["", "/"].include?(omit_base)
-      Wgit::Url.new(omit_base).omit_slashes
+      Wgit::Url.new(omit_base).omit_leading_slash
     end
     # Returns a new Wgit::Url with the origin (base + port) removed e.g. Given
@@ -575,11 +636,11 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     # @return [Wgit::Url] Self containing everything after the origin.
     def omit_origin
       origin = to_origin
-      omit_origin = origin ? gsub(origin, '') : self
+      omit_origin = origin ? gsub(origin, "") : self
-      return self if ['', '/'].include?(omit_origin)
+      return self if ["", "/"].include?(omit_origin)
-      Wgit::Url.new(omit_origin).omit_slashes
+      Wgit::Url.new(omit_origin).omit_leading_slash
     end
     # Returns a new Wgit::Url with the query string portion removed e.g. Given
@@ -591,7 +652,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     # @return [Wgit::Url] Self with the query string portion removed.
     def omit_query
       query = to_query
-      omit_query_string = query ? gsub("?#{query}", '') : self
+      omit_query_string = query ? gsub("?#{query}", "") : self
       Wgit::Url.new(omit_query_string)
     end
@@ -606,7 +667,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     # @return [Wgit::Url] Self with the fragment portion removed.
     def omit_fragment
       fragment = to_fragment
-      omit_fragment = fragment ? gsub("##{fragment}", '') : self
+      omit_fragment = fragment ? gsub("##{fragment}", "") : self
       Wgit::Url.new(omit_fragment)
     end
@@ -616,7 +677,7 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     # @return [Boolean] True if self is a query string, false otherwise.
     def query?
-      start_with?('?')
+      start_with?("?")
     end
     # Returns true if self is a URL fragment e.g. #top etc. Note this
@@ -624,14 +685,14 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     # @return [Boolean] True if self is a fragment, false otherwise.
     def fragment?
-      start_with?('#')
+      start_with?("#")
     end
     # Returns true if self equals '/' a.k.a. index.
     #
     # @return [Boolean] True if self equals '/', false otherwise.
     def index?
-      self == '/'
+      self == "/"
     end
     # Returns true if self starts with '//' a.k.a a scheme/protocol relative
@@ -639,35 +700,34 @@ protocol scheme and domain (e.g. http://example.com): #{url}"
     #
     # @return [Boolean] True if self starts with '//', false otherwise.
     def scheme_relative?
-      start_with?('//')
-    end
-    alias +                   concat
-    alias crawled?            crawled
-    alias is_relative?        relative?
-    alias is_absolute?        absolute?
-    alias is_valid?           valid?
-    alias is_query?           query?
-    alias is_fragment?        fragment?
-    alias is_index?           index?
-    alias is_scheme_relative? scheme_relative?
-    alias uri                 to_uri
-    alias url                 to_url
-    alias scheme              to_scheme
-    alias host                to_host
-    alias port                to_port
-    alias domain              to_domain
-    alias brand               to_brand
-    alias base                to_base
-    alias origin              to_origin
-    alias path                to_path
-    alias endpoint            to_endpoint
-    alias query               to_query
-    alias query_hash          to_query_hash
-    alias fragment            to_fragment
-    alias extension           to_extension
-    alias user                to_user
-    alias password            to_password
-    alias sub_domain          to_sub_domain
+      start_with?("//")
+    end
+    alias_method :crawled?,            :crawled
+    alias_method :is_relative?,        :relative?
+    alias_method :is_absolute?,        :absolute?
+    alias_method :is_valid?,           :valid?
+    alias_method :is_query?,           :query?
+    alias_method :is_fragment?,        :fragment?
+    alias_method :is_index?,           :index?
+    alias_method :is_scheme_relative?, :scheme_relative?
+    alias_method :uri,                 :to_uri
+    alias_method :url,                 :to_url
+    alias_method :scheme,              :to_scheme
+    alias_method :host,                :to_host
+    alias_method :port,                :to_port
+    alias_method :domain,              :to_domain
+    alias_method :brand,               :to_brand
+    alias_method :base,                :to_base
+    alias_method :origin,              :to_origin
+    alias_method :path,                :to_path
+    alias_method :endpoint,            :to_endpoint
+    alias_method :query,               :to_query
+    alias_method :query_hash,          :to_query_hash
+    alias_method :fragment,            :to_fragment
+    alias_method :extension,           :to_extension
+    alias_method :user,                :to_user
+    alias_method :password,            :to_password
+    alias_method :sub_domain,          :to_sub_domain
   end
 end