RubyGems - rails-deprecated_sanitizer-no-registration - Versions diffs - 1.0.4 - Mend

rails-deprecated_sanitizer-no-registration 1.0.4

Files changed (22) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +19 -0
data/LICENSE +22 -0
data/README.md +17 -0
data/lib/rails-deprecated_sanitizer.rb +1 -0
data/lib/rails/deprecated_sanitizer.rb +25 -0
data/lib/rails/deprecated_sanitizer/html-scanner.rb +21 -0
data/lib/rails/deprecated_sanitizer/html-scanner/html/document.rb +68 -0
data/lib/rails/deprecated_sanitizer/html-scanner/html/node.rb +532 -0
data/lib/rails/deprecated_sanitizer/html-scanner/html/sanitizer.rb +188 -0
data/lib/rails/deprecated_sanitizer/html-scanner/html/selector.rb +830 -0
data/lib/rails/deprecated_sanitizer/html-scanner/html/tokenizer.rb +107 -0
data/lib/rails/deprecated_sanitizer/html-scanner/html/version.rb +11 -0
data/lib/rails/deprecated_sanitizer/version.rb +5 -0
data/test/cdata_node_test.rb +16 -0
data/test/document_test.rb +149 -0
data/test/node_test.rb +90 -0
data/test/tag_node_test.rb +244 -0
data/test/test_helper.rb +13 -0
data/test/text_node_test.rb +51 -0
data/test/tokenizer_test.rb +132 -0
metadata +112 -0

data/lib/rails/deprecated_sanitizer/html-scanner/html/sanitizer.rb ADDED Viewed

@@ -0,0 +1,188 @@
+require 'set'
+require 'cgi'
+require 'active_support/core_ext/module/attribute_accessors'
+module HTMLDeprecated
+  class Sanitizer
+    def sanitize(text, options = {})
+      validate_options(options)
+      return text unless sanitizeable?(text)
+      tokenize(text, options).join
+    end
+    def sanitizeable?(text)
+      !(text.nil? || text.empty? || !text.index("<"))
+    end
+  protected
+    def tokenize(text, options)
+      tokenizer = HTMLDeprecated::Tokenizer.new(text)
+      result = []
+      while token = tokenizer.next
+        node = Node.parse(nil, 0, 0, token, false)
+        process_node node, result, options
+      end
+      result
+    end
+    def process_node(node, result, options)
+      result << node.to_s
+    end
+    def validate_options(options)
+      if options[:tags] && !options[:tags].is_a?(Enumerable)
+        raise ArgumentError, "You should pass :tags as an Enumerable"
+      end
+      if options[:attributes] && !options[:attributes].is_a?(Enumerable)
+        raise ArgumentError, "You should pass :attributes as an Enumerable"
+      end
+    end
+  end
+  class FullSanitizer < Sanitizer
+    def sanitize(text, options = {})
+      result = super
+      # strip any comments, and if they have a newline at the end (ie. line with
+      # only a comment) strip that too
+      result = result.gsub(/<!--(.*?)-->[\n]?/m, "") if (result && result =~ /<!--(.*?)-->[\n]?/m)
+      # Recurse - handle all dirty nested tags
+      result == text ? result : sanitize(result, options)
+    end
+    def process_node(node, result, options)
+      result << node.to_s if node.class == HTMLDeprecated::Text
+    end
+  end
+  class LinkSanitizer < FullSanitizer
+    cattr_accessor :included_tags, :instance_writer => false
+    self.included_tags = Set.new(%w(a href))
+    def sanitizeable?(text)
+      !(text.nil? || text.empty? || !((text.index("<a") || text.index("<href")) && text.index(">")))
+    end
+  protected
+    def process_node(node, result, options)
+      result << node.to_s unless node.is_a?(HTMLDeprecated::Tag) && included_tags.include?(node.name)
+    end
+  end
+  class WhiteListSanitizer < Sanitizer
+    [:protocol_separator, :uri_attributes, :allowed_attributes, :allowed_tags, :allowed_protocols, :bad_tags,
+     :allowed_css_properties, :allowed_css_keywords, :shorthand_css_properties].each do |attr|
+      class_attribute attr, :instance_writer => false
+    end
+    # A regular expression of the valid characters used to separate protocols like
+    # the ':' in 'http://foo.com'
+    self.protocol_separator     = /:|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A/i
+    # Specifies a Set of HTML attributes that can have URIs.
+    self.uri_attributes         = Set.new(%w(href src cite action longdesc xlink:href lowsrc))
+    # Specifies a Set of 'bad' tags that the #sanitize helper will remove completely, as opposed
+    # to just escaping harmless tags like &lt;font&gt;
+    self.bad_tags               = Set.new(%w(script))
+    # Specifies the default Set of tags that the #sanitize helper will allow unscathed.
+    self.allowed_tags           = Set.new(%w(strong em b i p code pre tt samp kbd var sub
+      sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
+      acronym a img blockquote del ins))
+    # Specifies the default Set of html attributes that the #sanitize helper will leave
+    # in the allowed tag.
+    self.allowed_attributes     = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
+    # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
+    self.allowed_protocols      = Set.new(%w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto
+      feed svn urn aim rsync tag ssh sftp rtsp afs))
+    # Specifies the default Set of acceptable css properties that #sanitize and #sanitize_css will accept.
+    self.allowed_css_properties = Set.new(%w(azimuth background-color border-bottom-color border-collapse
+      border-color border-left-color border-right-color border-top-color clear color cursor direction display
+      elevation float font font-family font-size font-style font-variant font-weight height letter-spacing line-height
+      overflow pause pause-after pause-before pitch pitch-range richness speak speak-header speak-numeral speak-punctuation
+      speech-rate stress text-align text-decoration text-indent unicode-bidi vertical-align voice-family volume white-space
+      width))
+    # Specifies the default Set of acceptable css keywords that #sanitize and #sanitize_css will accept.
+    self.allowed_css_keywords   = Set.new(%w(auto aqua black block blue bold both bottom brown center
+      collapse dashed dotted fuchsia gray green !important italic left lime maroon medium none navy normal
+      nowrap olive pointer purple red right solid silver teal top transparent underline white yellow))
+    # Specifies the default Set of allowed shorthand css properties for the #sanitize and #sanitize_css helpers.
+    self.shorthand_css_properties = Set.new(%w(background border margin padding))
+    # Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute
+    def sanitize_css(style)
+      # disallow urls
+      style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
+      # gauntlet
+      if style !~ /\A([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*\z/ ||
+          style !~ /\A(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*\z/
+        return ''
+      end
+      clean = []
+      style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
+        if allowed_css_properties.include?(prop.downcase)
+          clean <<  prop + ': ' + val + ';'
+        elsif shorthand_css_properties.include?(prop.split('-')[0].downcase)
+          unless val.split().any? do |keyword|
+            !allowed_css_keywords.include?(keyword) &&
+              keyword !~ /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
+          end
+            clean << prop + ': ' + val + ';'
+          end
+        end
+      end
+      clean.join(' ')
+    end
+  protected
+    def tokenize(text, options)
+      options[:parent] = []
+      options[:attributes] ||= allowed_attributes
+      options[:tags]       ||= allowed_tags
+      super
+    end
+    def process_node(node, result, options)
+      result << case node
+        when HTMLDeprecated::Tag
+          if node.closing == :close
+            options[:parent].shift
+          else
+            options[:parent].unshift node.name
+          end
+          process_attributes_for node, options
+          options[:tags].include?(node.name) ? node : nil
+        else
+          bad_tags.include?(options[:parent].first) ? nil : node.to_s.gsub(/</, "&lt;")
+      end
+    end
+    def process_attributes_for(node, options)
+      return unless node.attributes
+      node.attributes.keys.each do |attr_name|
+        value = node.attributes[attr_name].to_s
+        if !options[:attributes].include?(attr_name) || contains_bad_protocols?(attr_name, value)
+          node.attributes.delete(attr_name)
+        else
+          node.attributes[attr_name] = attr_name == 'style' ? sanitize_css(value) : CGI::escapeHTML(CGI::unescapeHTML(value))
+        end
+      end
+    end
+    def contains_bad_protocols?(attr_name, value)
+      uri_attributes.include?(attr_name) &&
+      (value =~ /(^[^\/:]*):|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A/i && !allowed_protocols.include?(value.split(protocol_separator).first.downcase.strip))
+    end
+  end
+end

data/lib/rails/deprecated_sanitizer/html-scanner/html/selector.rb ADDED Viewed

@@ -0,0 +1,830 @@
+#--
+# Copyright (c) 2006 Assaf Arkin (http://labnotes.org)
+# Under MIT and/or CC By license.
+#++
+module HTMLDeprecated
+  # Selects HTML elements using CSS 2 selectors.
+  #
+  # The +Selector+ class uses CSS selector expressions to match and select
+  # HTML elements.
+  #
+  # For example:
+  #   selector = HTML::Selector.new "form.login[action=/login]"
+  # creates a new selector that matches any +form+ element with the class
+  # +login+ and an attribute +action+ with the value <tt>/login</tt>.
+  #
+  # === Matching Elements
+  #
+  # Use the #match method to determine if an element matches the selector.
+  #
+  # For simple selectors, the method returns an array with that element,
+  # or +nil+ if the element does not match. For complex selectors (see below)
+  # the method returns an array with all matched elements, of +nil+ if no
+  # match found.
+  #
+  # For example:
+  #   if selector.match(element)
+  #     puts "Element is a login form"
+  #   end
+  #
+  # === Selecting Elements
+  #
+  # Use the #select method to select all matching elements starting with
+  # one element and going through all children in depth-first order.
+  #
+  # This method returns an array of all matching elements, an empty array
+  # if no match is found
+  #
+  # For example:
+  #   selector = HTML::Selector.new "input[type=text]"
+  #   matches = selector.select(element)
+  #   matches.each do |match|
+  #     puts "Found text field with name #{match.attributes['name']}"
+  #   end
+  #
+  # === Expressions
+  #
+  # Selectors can match elements using any of the following criteria:
+  # * <tt>name</tt> -- Match an element based on its name (tag name).
+  #   For example, <tt>p</tt> to match a paragraph. You can use <tt>*</tt>
+  #   to match any element.
+  # * <tt>#</tt><tt>id</tt> -- Match an element based on its identifier (the
+  #   <tt>id</tt> attribute). For example, <tt>#</tt><tt>page</tt>.
+  # * <tt>.class</tt> -- Match an element based on its class name, all
+  #   class names if more than one specified.
+  # * <tt>[attr]</tt> -- Match an element that has the specified attribute.
+  # * <tt>[attr=value]</tt> -- Match an element that has the specified
+  #   attribute and value. (More operators are supported see below)
+  # * <tt>:pseudo-class</tt> -- Match an element based on a pseudo class,
+  #   such as <tt>:nth-child</tt> and <tt>:empty</tt>.
+  # * <tt>:not(expr)</tt> -- Match an element that does not match the
+  #   negation expression.
+  #
+  # When using a combination of the above, the element name comes first
+  # followed by identifier, class names, attributes, pseudo classes and
+  # negation in any order. Do not separate these parts with spaces!
+  # Space separation is used for descendant selectors.
+  #
+  # For example:
+  #   selector = HTML::Selector.new "form.login[action=/login]"
+  # The matched element must be of type +form+ and have the class +login+.
+  # It may have other classes, but the class +login+ is required to match.
+  # It must also have an attribute called +action+ with the value
+  # <tt>/login</tt>.
+  #
+  # This selector will match the following element:
+  #   <form class="login form" method="post" action="/login">
+  # but will not match the element:
+  #   <form method="post" action="/logout">
+  #
+  # === Attribute Values
+  #
+  # Several operators are supported for matching attributes:
+  # * <tt>name</tt> -- The element must have an attribute with that name.
+  # * <tt>name=value</tt> -- The element must have an attribute with that
+  #   name and value.
+  # * <tt>name^=value</tt> -- The attribute value must start with the
+  #   specified value.
+  # * <tt>name$=value</tt> -- The attribute value must end with the
+  #   specified value.
+  # * <tt>name*=value</tt> -- The attribute value must contain the
+  #   specified value.
+  # * <tt>name~=word</tt> -- The attribute value must contain the specified
+  #   word (space separated).
+  # * <tt>name|=word</tt> -- The attribute value must start with specified
+  #   word.
+  #
+  # For example, the following two selectors match the same element:
+  #   #my_id
+  #   [id=my_id]
+  # and so do the following two selectors:
+  #   .my_class
+  #   [class~=my_class]
+  #
+  # === Alternatives, siblings, children
+  #
+  # Complex selectors use a combination of expressions to match elements:
+  # * <tt>expr1 expr2</tt> -- Match any element against the second expression
+  #   if it has some parent element that matches the first expression.
+  # * <tt>expr1 > expr2</tt> -- Match any element against the second expression
+  #   if it is the child of an element that matches the first expression.
+  # * <tt>expr1 + expr2</tt> -- Match any element against the second expression
+  #   if it immediately follows an element that matches the first expression.
+  # * <tt>expr1 ~ expr2</tt> -- Match any element against the second expression
+  #   that comes after an element that matches the first expression.
+  # * <tt>expr1, expr2</tt> -- Match any element against the first expression,
+  #   or against the second expression.
+  #
+  # Since children and sibling selectors may match more than one element given
+  # the first element, the #match method may return more than one match.
+  #
+  # === Pseudo classes
+  #
+  # Pseudo classes were introduced in CSS 3. They are most often used to select
+  # elements in a given position:
+  # * <tt>:root</tt> -- Match the element only if it is the root element
+  #   (no parent element).
+  # * <tt>:empty</tt> -- Match the element only if it has no child elements,
+  #   and no text content.
+  # * <tt>:content(string)</tt> -- Match the element only if it has <tt>string</tt>
+  #   as its text content (ignoring leading and trailing whitespace).
+  # * <tt>:only-child</tt> -- Match the element if it is the only child (element)
+  #   of its parent element.
+  # * <tt>:only-of-type</tt> -- Match the element if it is the only child (element)
+  #   of its parent element and its type.
+  # * <tt>:first-child</tt> -- Match the element if it is the first child (element)
+  #   of its parent element.
+  # * <tt>:first-of-type</tt> -- Match the element if it is the first child (element)
+  #   of its parent element of its type.
+  # * <tt>:last-child</tt> -- Match the element if it is the last child (element)
+  #   of its parent element.
+  # * <tt>:last-of-type</tt> -- Match the element if it is the last child (element)
+  #   of its parent element of its type.
+  # * <tt>:nth-child(b)</tt> -- Match the element if it is the b-th child (element)
+  #   of its parent element. The value <tt>b</tt> specifies its index, starting with 1.
+  # * <tt>:nth-child(an+b)</tt> -- Match the element if it is the b-th child (element)
+  #   in each group of <tt>a</tt> child elements of its parent element.
+  # * <tt>:nth-child(-an+b)</tt> -- Match the element if it is the first child (element)
+  #   in each group of <tt>a</tt> child elements, up to the first <tt>b</tt> child
+  #   elements of its parent element.
+  # * <tt>:nth-child(odd)</tt> -- Match element in the odd position (i.e. first, third).
+  #   Same as <tt>:nth-child(2n+1)</tt>.
+  # * <tt>:nth-child(even)</tt> -- Match element in the even position (i.e. second,
+  #   fourth). Same as <tt>:nth-child(2n+2)</tt>.
+  # * <tt>:nth-of-type(..)</tt> -- As above, but only counts elements of its type.
+  # * <tt>:nth-last-child(..)</tt> -- As above, but counts from the last child.
+  # * <tt>:nth-last-of-type(..)</tt> -- As above, but counts from the last child and
+  #   only elements of its type.
+  # * <tt>:not(selector)</tt> -- Match the element only if the element does not
+  #   match the simple selector.
+  #
+  # As you can see, <tt>:nth-child</tt> pseudo class and its variant can get quite
+  # tricky and the CSS specification doesn't do a much better job explaining it.
+  # But after reading the examples and trying a few combinations, it's easy to
+  # figure out.
+  #
+  # For example:
+  #   table tr:nth-child(odd)
+  # Selects every second row in the table starting with the first one.
+  #
+  #   div p:nth-child(4)
+  # Selects the fourth paragraph in the +div+, but not if the +div+ contains
+  # other elements, since those are also counted.
+  #
+  #   div p:nth-of-type(4)
+  # Selects the fourth paragraph in the +div+, counting only paragraphs, and
+  # ignoring all other elements.
+  #
+  #   div p:nth-of-type(-n+4)
+  # Selects the first four paragraphs, ignoring all others.
+  #
+  # And you can always select an element that matches one set of rules but
+  # not another using <tt>:not</tt>. For example:
+  #   p:not(.post)
+  # Matches all paragraphs that do not have the class <tt>.post</tt>.
+  #
+  # === Substitution Values
+  #
+  # You can use substitution with identifiers, class names and element values.
+  # A substitution takes the form of a question mark (<tt>?</tt>) and uses the
+  # next value in the argument list following the CSS expression.
+  #
+  # The substitution value may be a string or a regular expression. All other
+  # values are converted to strings.
+  #
+  # For example:
+  #   selector = HTML::Selector.new "#?", /^\d+$/
+  # matches any element whose identifier consists of one or more digits.
+  #
+  # See http://www.w3.org/TR/css3-selectors/
+  class Selector
+    # An invalid selector.
+    class InvalidSelectorError < StandardError #:nodoc:
+    end
+    class << self
+      # :call-seq:
+      #   Selector.for_class(cls) => selector
+      #
+      # Creates a new selector for the given class name.
+      def for_class(cls)
+        self.new([".?", cls])
+      end
+      # :call-seq:
+      #   Selector.for_id(id) => selector
+      #
+      # Creates a new selector for the given id.
+      def for_id(id)
+        self.new(["#?", id])
+      end
+    end
+    # :call-seq:
+    #   Selector.new(string, [values ...]) => selector
+    #
+    # Creates a new selector from a CSS 2 selector expression.
+    #
+    # The first argument is the selector expression. All other arguments
+    # are used for value substitution.
+    #
+    # Throws InvalidSelectorError is the selector expression is invalid.
+    def initialize(selector, *values)
+      raise ArgumentError, "CSS expression cannot be empty" if selector.empty?
+      @source = ""
+      values = values[0] if values.size == 1 && values[0].is_a?(Array)
+      # We need a copy to determine if we failed to parse, and also
+      # preserve the original pass by-ref statement.
+      statement = selector.strip.dup
+      # Create a simple selector, along with negation.
+      simple_selector(statement, values).each { |name, value| instance_variable_set("@#{name}", value) }
+      @alternates = []
+      @depends = nil
+      # Alternative selector.
+      if statement.sub!(/^\s*,\s*/, "")
+        second = Selector.new(statement, values)
+        @alternates << second
+        # If there are alternate selectors, we group them in the top selector.
+        if alternates = second.instance_variable_get(:@alternates)
+          second.instance_variable_set(:@alternates, [])
+          @alternates.concat alternates
+        end
+        @source << " , " << second.to_s
+      # Sibling selector: create a dependency into second selector that will
+      # match element immediately following this one.
+      elsif statement.sub!(/^\s*\+\s*/, "")
+        second = next_selector(statement, values)
+        @depends = lambda do |element, first|
+          if element = next_element(element)
+            second.match(element, first)
+          end
+        end
+        @source << " + " << second.to_s
+      # Adjacent selector: create a dependency into second selector that will
+      # match all elements following this one.
+      elsif statement.sub!(/^\s*~\s*/, "")
+        second = next_selector(statement, values)
+        @depends = lambda do |element, first|
+          matches = []
+          while element = next_element(element)
+            if subset = second.match(element, first)
+              if first && !subset.empty?
+                matches << subset.first
+                break
+              else
+                matches.concat subset
+              end
+            end
+          end
+          matches.empty? ? nil : matches
+        end
+        @source << " ~ " << second.to_s
+      # Child selector: create a dependency into second selector that will
+      # match a child element of this one.
+      elsif statement.sub!(/^\s*>\s*/, "")
+        second = next_selector(statement, values)
+        @depends = lambda do |element, first|
+          matches = []
+          element.children.each do |child|
+            if child.tag? && subset = second.match(child, first)
+              if first && !subset.empty?
+                matches << subset.first
+                break
+              else
+                matches.concat subset
+              end
+            end
+          end
+          matches.empty? ? nil : matches
+        end
+        @source << " > " << second.to_s
+      # Descendant selector: create a dependency into second selector that
+      # will match all descendant elements of this one. Note,
+      elsif statement =~ /^\s+\S+/ && statement != selector
+        second = next_selector(statement, values)
+        @depends = lambda do |element, first|
+          matches = []
+          stack = element.children.reverse
+          while node = stack.pop
+            next unless node.tag?
+            if subset = second.match(node, first)
+              if first && !subset.empty?
+                matches << subset.first
+                break
+              else
+                matches.concat subset
+              end
+            elsif children = node.children
+              stack.concat children.reverse
+            end
+          end
+          matches.empty? ? nil : matches
+        end
+        @source << " " << second.to_s
+      else
+        # The last selector is where we check that we parsed
+        # all the parts.
+        unless statement.empty? || statement.strip.empty?
+          raise ArgumentError, "Invalid selector: #{statement}"
+        end
+      end
+    end
+    # :call-seq:
+    #   match(element, first?) => array or nil
+    #
+    # Matches an element against the selector.
+    #
+    # For a simple selector this method returns an array with the
+    # element if the element matches, nil otherwise.
+    #
+    # For a complex selector (sibling and descendant) this method
+    # returns an array with all matching elements, nil if no match is
+    # found.
+    #
+    # Use +first_only=true+ if you are only interested in the first element.
+    #
+    # For example:
+    #   if selector.match(element)
+    #     puts "Element is a login form"
+    #   end
+    def match(element, first_only = false)
+      # Match element if no element name or element name same as element name
+      if matched = (!@tag_name || @tag_name == element.name)
+        # No match if one of the attribute matches failed
+        for attr in @attributes
+          if element.attributes[attr[0]] !~ attr[1]
+            matched = false
+            break
+          end
+        end
+      end
+      # Pseudo class matches (nth-child, empty, etc).
+      if matched
+        for pseudo in @pseudo
+          unless pseudo.call(element)
+            matched = false
+            break
+          end
+        end
+      end
+      # Negation. Same rules as above, but we fail if a match is made.
+      if matched && @negation
+        for negation in @negation
+          if negation[:tag_name] == element.name
+            matched = false
+          else
+            for attr in negation[:attributes]
+              if element.attributes[attr[0]] =~ attr[1]
+                matched = false
+                break
+              end
+            end
+          end
+          if matched
+            for pseudo in negation[:pseudo]
+              if pseudo.call(element)
+                matched = false
+                break
+              end
+            end
+          end
+          break unless matched
+        end
+      end
+      # If element matched but depends on another element (child,
+      # sibling, etc), apply the dependent matches instead.
+      if matched && @depends
+        matches = @depends.call(element, first_only)
+      else
+        matches = matched ? [element] : nil
+      end
+      # If this selector is part of the group, try all the alternative
+      # selectors (unless first_only).
+      if !first_only || !matches
+        @alternates.each do |alternate|
+          break if matches && first_only
+          if subset = alternate.match(element, first_only)
+            if matches
+              matches.concat subset
+            else
+              matches = subset
+            end
+          end
+        end
+      end
+      matches
+    end
+    # :call-seq:
+    #   select(root) => array
+    #
+    # Selects and returns an array with all matching elements, beginning
+    # with one node and traversing through all children depth-first.
+    # Returns an empty array if no match is found.
+    #
+    # The root node may be any element in the document, or the document
+    # itself.
+    #
+    # For example:
+    #   selector = HTML::Selector.new "input[type=text]"
+    #   matches = selector.select(element)
+    #   matches.each do |match|
+    #     puts "Found text field with name #{match.attributes['name']}"
+    #   end
+    def select(root)
+      matches = []
+      stack = [root]
+      while node = stack.pop
+        if node.tag? && subset = match(node, false)
+          subset.each do |match|
+            matches << match unless matches.any? { |item| item.equal?(match) }
+          end
+        elsif children = node.children
+          stack.concat children.reverse
+        end
+      end
+      matches
+    end
+    # Similar to #select but returns the first matching element. Returns +nil+
+    # if no element matches the selector.
+    def select_first(root)
+      stack = [root]
+      while node = stack.pop
+        if node.tag? && subset = match(node, true)
+          return subset.first if !subset.empty?
+        elsif children = node.children
+          stack.concat children.reverse
+        end
+      end
+      nil
+    end
+    def to_s #:nodoc:
+      @source
+    end
+    # Returns the next element after this one. Skips sibling text nodes.
+    #
+    # With the +name+ argument, returns the next element with that name,
+    # skipping other sibling elements.
+    def next_element(element, name = nil)
+      if siblings = element.parent.children
+        found = false
+        siblings.each do |node|
+          if node.equal?(element)
+            found = true
+          elsif found && node.tag?
+            return node if (name.nil? || node.name == name)
+          end
+        end
+      end
+      nil
+    end
+  protected
+    # Creates a simple selector given the statement and array of
+    # substitution values.
+    #
+    # Returns a hash with the values +tag_name+, +attributes+,
+    # +pseudo+ (classes) and +negation+.
+    #
+    # Called the first time with +can_negate+ true to allow
+    # negation. Called a second time with false since negation
+    # cannot be negated.
+    def simple_selector(statement, values, can_negate = true)
+      tag_name = nil
+      attributes = []
+      pseudo = []
+      negation = []
+      # Element name. (Note that in negation, this can come at
+      # any order, but for simplicity we allow if only first).
+      statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match|
+        match.strip!
+        tag_name = match.downcase unless match == "*"
+        @source << match
+        "" # Remove
+      end
+      # Get identifier, class, attribute name, pseudo or negation.
+      while true
+        # Element identifier.
+        next if statement.sub!(/^#(\?|[\w\-]+)/) do
+          id = $1
+          if id == "?"
+            id = values.shift
+          end
+          @source << "##{id}"
+          id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp)
+          attributes << ["id", id]
+          "" # Remove
+        end
+        # Class name.
+        next if statement.sub!(/^\.([\w\-]+)/) do
+          class_name = $1
+          @source << ".#{class_name}"
+          class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp)
+          attributes << ["class", class_name]
+          "" # Remove
+        end
+        # Attribute value.
+        next if statement.sub!(/^\[\s*([[:alpha:]][\w\-:]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do
+          name, equality, value = $1, $2, $3
+          if value == "?"
+            value = values.shift
+          else
+            # Handle single and double quotes.
+            value.strip!
+            if (value[0] == ?" || value[0] == ?') && value[0] == value[-1]
+              value = value[1..-2]
+            end
+          end
+          @source << "[#{name}#{equality}'#{value}']"
+          attributes << [name.downcase.strip, attribute_match(equality, value)]
+          "" # Remove
+        end
+        # Root element only.
+        next if statement.sub!(/^:root/) do
+          pseudo << lambda do |element|
+            element.parent.nil? || !element.parent.tag?
+          end
+          @source << ":root"
+          "" # Remove
+        end
+        # Nth-child including last and of-type.
+        next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match|
+          reverse = $1 == "last-"
+          of_type = $2 == "of-type"
+          @source << ":nth-#{$1}#{$2}("
+          case $3
+            when "odd"
+              pseudo << nth_child(2, 1, of_type, reverse)
+              @source << "odd)"
+            when "even"
+              pseudo << nth_child(2, 2, of_type, reverse)
+              @source << "even)"
+            when /^(\d+|\?)$/  # b only
+              b = ($1 == "?" ? values.shift : $1).to_i
+              pseudo << nth_child(0, b, of_type, reverse)
+              @source << "#{b})"
+            when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/
+              a = ($1 == "?" ? values.shift :
+                   $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i
+              b = ($2 == "?" ? values.shift : $2).to_i
+              pseudo << nth_child(a, b, of_type, reverse)
+              @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})")
+            else
+              raise ArgumentError, "Invalid nth-child #{match}"
+          end
+          "" # Remove
+        end
+        # First/last child (of type).
+        next if statement.sub!(/^:(first|last)-(child|of-type)/) do
+          reverse = $1 == "last"
+          of_type = $2 == "of-type"
+          pseudo << nth_child(0, 1, of_type, reverse)
+          @source << ":#{$1}-#{$2}"
+          "" # Remove
+        end
+        # Only child (of type).
+        next if statement.sub!(/^:only-(child|of-type)/) do
+          of_type = $1 == "of-type"
+          pseudo << only_child(of_type)
+          @source << ":only-#{$1}"
+          "" # Remove
+        end
+        # Empty: no child elements or meaningful content (whitespaces
+        # are ignored).
+        next if statement.sub!(/^:empty/) do
+          pseudo << lambda do |element|
+            empty = true
+            for child in element.children
+              if child.tag? || !child.content.strip.empty?
+                empty = false
+                break
+              end
+            end
+            empty
+          end
+          @source << ":empty"
+          "" # Remove
+        end
+        # Content: match the text content of the element, stripping
+        # leading and trailing spaces.
+        next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do
+          content = $1
+          if content == "?"
+            content = values.shift
+          elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1]
+            content = content[1..-2]
+          end
+          @source << ":content('#{content}')"
+          content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp)
+          pseudo << lambda do |element|
+            text = ""
+            for child in element.children
+              unless child.tag?
+                text << child.content
+              end
+            end
+            text.strip =~ content
+          end
+          "" # Remove
+        end
+        # Negation. Create another simple selector to handle it.
+        if statement.sub!(/^:not\(\s*/, "")
+          raise ArgumentError, "Double negatives are not missing feature" unless can_negate
+          @source << ":not("
+          negation << simple_selector(statement, values, false)
+          raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "")
+          @source << ")"
+          next
+        end
+        # No match: moving on.
+        break
+      end
+      # Return hash. The keys are mapped to instance variables.
+      {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation}
+    end
+    # Create a regular expression to match an attribute value based
+    # on the equality operator (=, ^=, |=, etc).
+    def attribute_match(equality, value)
+      regexp = value.is_a?(Regexp) ? value : Regexp.escape(value.to_s)
+      case equality
+        when "=" then
+          # Match the attribute value in full
+          Regexp.new("^#{regexp}$")
+        when "~=" then
+          # Match a space-separated word within the attribute value
+          Regexp.new("(^|\s)#{regexp}($|\s)")
+        when "^="
+          # Match the beginning of the attribute value
+          Regexp.new("^#{regexp}")
+        when "$="
+          # Match the end of the attribute value
+          Regexp.new("#{regexp}$")
+        when "*="
+          # Match substring of the attribute value
+          regexp.is_a?(Regexp) ? regexp : Regexp.new(regexp)
+        when "|=" then
+          # Match the first space-separated item of the attribute value
+          Regexp.new("^#{regexp}($|\s)")
+        else
+          raise InvalidSelectorError, "Invalid operation/value" unless value.empty?
+          # Match all attributes values (existence check)
+          //
+      end
+    end
+    # Returns a lambda that can match an element against the nth-child
+    # pseudo class, given the following arguments:
+    # * +a+ -- Value of a part.
+    # * +b+ -- Value of b part.
+    # * +of_type+ -- True to test only elements of this type (of-type).
+    # * +reverse+ -- True to count in reverse order (last-).
+    def nth_child(a, b, of_type, reverse)
+      # a = 0 means select at index b, if b = 0 nothing selected
+      return lambda { |element| false } if a == 0 && b == 0
+      # a < 0 and b < 0 will never match against an index
+      return lambda { |element| false } if a < 0 && b < 0
+      b = a + b + 1 if b < 0   # b < 0 just picks last element from each group
+      b -= 1 unless b == 0  # b == 0 is same as b == 1, otherwise zero based
+      lambda do |element|
+        # Element must be inside parent element.
+        return false unless element.parent && element.parent.tag?
+        index = 0
+        # Get siblings, reverse if counting from last.
+        siblings = element.parent.children
+        siblings = siblings.reverse if reverse
+        # Match element name if of-type, otherwise ignore name.
+        name = of_type ? element.name : nil
+        found = false
+        for child in siblings
+          # Skip text nodes/comments.
+          if child.tag? && (name == nil || child.name == name)
+            if a == 0
+              # Shortcut when a == 0 no need to go past count
+              if index == b
+                found = child.equal?(element)
+                break
+              end
+            elsif a < 0
+              # Only look for first b elements
+              break if index > b
+              if child.equal?(element)
+                found = (index % a) == 0
+                break
+              end
+            else
+              # Otherwise, break if child found and count ==  an+b
+              if child.equal?(element)
+                found = (index % a) == b
+                break
+              end
+            end
+            index += 1
+          end
+        end
+        found
+      end
+    end
+    # Creates a only child lambda. Pass +of-type+ to only look at
+    # elements of its type.
+    def only_child(of_type)
+      lambda do |element|
+        # Element must be inside parent element.
+        return false unless element.parent && element.parent.tag?
+        name = of_type ? element.name : nil
+        other = false
+        for child in element.parent.children
+          # Skip text nodes/comments.
+          if child.tag? && (name == nil || child.name == name)
+            unless child.equal?(element)
+              other = true
+              break
+            end
+          end
+        end
+        !other
+      end
+    end
+    # Called to create a dependent selector (sibling, descendant, etc).
+    # Passes the remainder of the statement that will be reduced to zero
+    # eventually, and array of substitution values.
+    #
+    # This method is called from four places, so it helps to put it here
+    # for reuse. The only logic deals with the need to detect comma
+    # separators (alternate) and apply them to the selector group of the
+    # top selector.
+    def next_selector(statement, values)
+      second = Selector.new(statement, values)
+      # If there are alternate selectors, we group them in the top selector.
+      if alternates = second.instance_variable_get(:@alternates)
+        second.instance_variable_set(:@alternates, [])
+        @alternates.concat alternates
+      end
+      second
+    end
+  end
+  # See HTML::Selector.new
+  def self.selector(statement, *values)
+    Selector.new(statement, *values)
+  end
+  class Tag
+    def select(selector, *values)
+      selector = HTMLDeprecated::Selector.new(selector, values)
+      selector.select(self)
+    end
+  end
+end