RubyGems - cocoa-xml - Versions diffs - 0.4.4 - Mend

cocoa-xml 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data/History.rdoc +6 -0
data/Manifest.txt +20 -0
data/README.rdoc +105 -0
data/Rakefile +23 -0
data/lib/cocoa-xml.rb +48 -0
data/lib/cocoa-xml/nodeset.rb +57 -0
data/lib/cocoa-xml/nsxmldocument_extras.rb +48 -0
data/lib/cocoa-xml/nsxmlnode_extras.rb +106 -0
data/lib/cocoa-xml/version.rb +3 -0
data/lib/nokogiri/css.rb +27 -0
data/lib/nokogiri/css/generated_parser.rb +646 -0
data/lib/nokogiri/css/generated_tokenizer.rb +143 -0
data/lib/nokogiri/css/node.rb +100 -0
data/lib/nokogiri/css/parser.rb +83 -0
data/lib/nokogiri/css/parser.y +230 -0
data/lib/nokogiri/css/syntax_error.rb +7 -0
data/lib/nokogiri/css/tokenizer.rb +8 -0
data/lib/nokogiri/css/tokenizer.rex +55 -0
data/lib/nokogiri/css/xpath_visitor.rb +165 -0
data/lib/nokogiri/syntax_error.rb +4 -0
metadata +123 -0

data/lib/nokogiri/css/generated_tokenizer.rb ADDED

@@ -0,0 +1,143 @@
+#--
+# DO NOT MODIFY!!!!
+# This file is automatically generated by rex 1.0.4
+# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
+#++
+module Nokogiri
+module CSS
+  # @private
+class GeneratedTokenizer < GeneratedParser
+  require 'strscan'
+  class ScanError < StandardError ; end
+  attr_reader   :lineno
+  attr_reader   :filename
+  attr_accessor :state
+  def scan_setup(str)
+    @ss = StringScanner.new(str)
+    @lineno =  1
+    @state  = nil
+  end
+  def action(&block)
+    yield
+  end
+  def scan_str(str)
+    scan_setup(str)
+    do_parse
+  end
+  def load_file( filename )
+    @filename = filename
+    open(filename, "r") do |f|
+      scan_setup(f.read)
+    end
+  end
+  def scan_file( filename )
+    load_file(filename)
+    do_parse
+  end
+  def next_token
+    return if @ss.eos?
+    text = @ss.peek(1)
+    @lineno  +=  1  if text == "\n"
+    token = case @state
+    when nil
+      case
+      when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
+         action { [:FUNCTION, text] }
+      when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
+         action { [:IDENT, text] }
+      when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
+         action { [:HASH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
+         action { [:INCLUDES, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
+         action { [:DASHMATCH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
+         action { [:PREFIXMATCH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
+         action { [:SUFFIXMATCH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
+         action { [:SUBSTRINGMATCH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
+         action { [:NOT_EQUAL, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
+         action { [:EQUAL, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\)/))
+         action { [:RPAREN, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
+         action { [:LSQUARE, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\]/))
+         action { [:RSQUARE, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
+         action { [:PLUS, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
+         action { [:GREATER, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
+         action { [:COMMA, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
+         action { [:TILDE, text] }
+      when (text = @ss.scan(/\:not\([\s\r\n\f]*/))
+         action { [:NOT, text] }
+      when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
+         action { [:NUMBER, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
+         action { [:DOUBLESLASH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
+         action { [:SLASH, text] }
+      when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
+         action {[:UNICODE_RANGE, text] }
+      when (text = @ss.scan(/[\s\t\r\n\f]+/))
+         action { [:S, text] }
+      when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
+         action { [:STRING, text] }
+      when (text = @ss.scan(/./))
+         action { [text, text] }
+      else
+        text = @ss.string[@ss.pos .. -1]
+        raise  ScanError, "can not match: '" + text + "'"
+      end  # if
+    else
+      raise  ScanError, "undefined state: '" + state.to_s + "'"
+    end  # case state
+    token
+  end  # def next_token
+end # class
+end
+end

data/lib/nokogiri/css/node.rb ADDED

@@ -0,0 +1,100 @@
+module Nokogiri
+  module CSS
+    # @private
+    class Node
+      # Get the type of this node
+      attr_accessor :type
+      # Get the value of this node
+      attr_accessor :value
+      # Create a new Node with +type+ and +value+
+      def initialize type, value
+        @type = type
+        @value = value
+      end
+      # Accept +visitor+
+      def accept visitor
+        visitor.send(:"visit_#{type.to_s.downcase}", self)
+      end
+      ###
+      # Convert this CSS node to xpath with +prefix+ using +visitor+
+      def to_xpath prefix = '//', visitor = XPathVisitor.new
+        self.preprocess!
+        prefix + visitor.accept(self)
+      end
+      # Preprocess this node tree
+      def preprocess!
+        ### Deal with nth-child
+        matches = find_by_type(
+          [:CONDITIONAL_SELECTOR,
+            [:ELEMENT_NAME],
+            [:PSEUDO_CLASS,
+              [:FUNCTION]
+            ]
+          ]
+        )
+        matches.each do |match|
+          if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
+            tag_name = match.value[0].value.first
+            match.value[0].value = ['*']
+            match.value[1] = Node.new(:COMBINATOR, [
+              match.value[1].value[0],
+              Node.new(:FUNCTION, ['self(', tag_name])
+            ])
+          end
+        end
+        ### Deal with first-child, last-child
+        matches = find_by_type(
+          [:CONDITIONAL_SELECTOR,
+            [:ELEMENT_NAME], [:PSEUDO_CLASS]
+        ])
+        matches.each do |match|
+          if ['first-child', 'last-child'].include?(match.value[1].value.first)
+            which = match.value[1].value.first.gsub(/-\w*$/, '')
+            tag_name = match.value[0].value.first
+            match.value[0].value = ['*']
+            match.value[1] = Node.new(:COMBINATOR, [
+              Node.new(:FUNCTION, ["#{which}("]),
+              Node.new(:FUNCTION, ['self(', tag_name])
+            ])
+          elsif 'only-child' == match.value[1].value.first
+            tag_name = match.value[0].value.first
+            match.value[0].value = ['*']
+            match.value[1] = Node.new(:COMBINATOR, [
+              Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
+              Node.new(:FUNCTION, ['self(', tag_name])
+            ])
+          end
+        end
+        self
+      end
+      # Find a node by type using +types+
+      def find_by_type types
+        matches = []
+        matches << self if to_type == types
+        @value.each do |v|
+          matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
+        end
+        matches
+      end
+      # Convert to_type
+      def to_type
+        [@type] + @value.map { |n|
+          n.to_type if n.respond_to?(:to_type)
+        }.compact
+      end
+      # Convert to array
+      def to_a
+        [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
+      end
+    end
+  end
+end

data/lib/nokogiri/css/parser.rb ADDED

@@ -0,0 +1,83 @@
+require 'thread'
+module Nokogiri
+  module CSS
+    # @private
+    class Parser < GeneratedTokenizer
+      @cache_on = true
+      @cache    = {}
+      @mutex    = Mutex.new
+      class << self
+        # Turn on CSS parse caching
+        attr_accessor :cache_on
+        alias :cache_on? :cache_on
+        alias :set_cache :cache_on=
+        # Get the css selector in +string+ from the cache
+        def [] string
+          return unless @cache_on
+          @mutex.synchronize { @cache[string] }
+        end
+        # Set the css selector in +string+ in the cache to +value+
+        def []= string, value
+          return value unless @cache_on
+          @mutex.synchronize { @cache[string] = value }
+        end
+        # Clear the cache
+        def clear_cache
+          @mutex.synchronize { @cache = {} }
+        end
+        # Execute +block+ without cache
+        def without_cache &block
+          tmp = @cache_on
+          @cache_on = false
+          block.call
+          @cache_on = tmp
+        end
+        ###
+        # Parse this CSS selector in +selector+.  Returns an AST.
+        def parse selector
+          @warned ||= false
+          unless @warned
+            $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
+            @warned = true
+          end
+          new.parse selector
+        end
+      end
+      # Create a new CSS parser with respect to +namespaces+
+      def initialize namespaces = {}
+        @namespaces = namespaces
+        super()
+      end
+      alias :parse :scan_str
+      # Get the xpath for +string+ using +options+
+      def xpath_for string, options={}
+        key = "#{string}#{options[:ns]}#{options[:prefix]}"
+        v = self.class[key]
+        return v if v
+        args = [
+          options[:prefix] || '//',
+          options[:visitor] || XPathVisitor.new
+        ]
+        self.class[key] = parse(string).map { |ast|
+          ast.to_xpath(*args)
+        }
+      end
+      # On CSS parser error, raise an exception
+      def on_error error_token_id, error_value, value_stack
+        after = value_stack.compact.last
+        raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
+      end
+    end
+  end
+end

data/lib/nokogiri/css/parser.y ADDED

@@ -0,0 +1,230 @@
+class Nokogiri::CSS::GeneratedParser
+token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
+token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
+token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
+rule
+  selector
+    : selector COMMA simple_selector_1toN {
+        result = [val.first, val.last].flatten
+      }
+    | simple_selector_1toN { result = val.flatten }
+    ;
+  combinator
+    : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
+    | GREATER { result = :CHILD_SELECTOR }
+    | TILDE { result = :PRECEDING_SELECTOR }
+    | S { result = :DESCENDANT_SELECTOR }
+    | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
+    | SLASH { result = :CHILD_SELECTOR }
+    ;
+  simple_selector
+    : element_name hcap_0toN {
+        result =  if val[1].nil?
+                    val.first
+                  else
+                    Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
+                  end
+      }
+    | element_name hcap_1toN negation {
+        result = Node.new(:CONDITIONAL_SELECTOR,
+          [
+            val.first,
+            Node.new(:COMBINATOR, [val[1], val.last])
+          ]
+        )
+      }
+    | element_name negation {
+        result = Node.new(:CONDITIONAL_SELECTOR, val)
+      }
+    | function
+    | function attrib {
+        result = Node.new(:CONDITIONAL_SELECTOR, val)
+      }
+    | hcap_1toN negation {
+        result = Node.new(:CONDITIONAL_SELECTOR,
+          [
+            Node.new(:ELEMENT_NAME, ['*']),
+            Node.new(:COMBINATOR, val)
+          ]
+        )
+      }
+    | hcap_1toN {
+        result = Node.new(:CONDITIONAL_SELECTOR,
+          [Node.new(:ELEMENT_NAME, ['*']), val.first]
+        )
+      }
+    ;
+  simple_selector_1toN
+    : simple_selector combinator simple_selector_1toN {
+        result = Node.new(val[1], [val.first, val.last])
+      }
+    | simple_selector
+    ;
+  class
+    : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
+    ;
+  element_name
+    : namespace '|' IDENT {
+        result = Node.new(:ELEMENT_NAME,
+          [[val.first, val.last].compact.join(':')]
+        )
+      }
+    | IDENT {
+        name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
+        result = Node.new(:ELEMENT_NAME, [name])
+      }
+    | '*' { result = Node.new(:ELEMENT_NAME, val) }
+    ;
+  namespace
+    : IDENT { result = val[0] }
+    |
+    ;
+  attrib
+    : LSQUARE IDENT attrib_val_0or1 RSQUARE {
+        result = Node.new(:ATTRIBUTE_CONDITION,
+          [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
+        )
+      }
+    | LSQUARE function attrib_val_0or1 RSQUARE {
+        result = Node.new(:ATTRIBUTE_CONDITION,
+          [val[1]] + (val[2] || [])
+        )
+      }
+    | LSQUARE NUMBER RSQUARE {
+        # Non standard, but hpricot supports it.
+        result = Node.new(:PSEUDO_CLASS,
+          [Node.new(:FUNCTION, ['nth-child(', val[1]])]
+        )
+      }
+    ;
+  function
+    : FUNCTION RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip])
+      }
+    | FUNCTION expr RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
+      }
+    | FUNCTION an_plus_b RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
+      }
+    | NOT expr RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
+      }
+    | HAS selector RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
+      }
+    ;
+  expr
+    : NUMBER COMMA expr { result = [val.first, val.last] }
+    | STRING COMMA expr { result = [val.first, val.last] }
+    | IDENT COMMA expr { result = [val.first, val.last] }
+    | NUMBER
+    | STRING
+    | IDENT                             # even, odd
+      {
+        if val[0] == 'even'
+          val = ["2","n","+","0"]
+          result = Node.new(:AN_PLUS_B, val)
+        elsif val[0] == 'odd'
+          val = ["2","n","+","1"]
+          result = Node.new(:AN_PLUS_B, val)
+        else
+          # This is not CSS standard.  It allows us to support this:
+          # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
+          # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
+          # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
+          result = val
+        end
+      }
+    ;
+  an_plus_b
+    : NUMBER IDENT PLUS NUMBER          # 5n+3 -5n+3
+      {
+        if val[1] == 'n'
+          result = Node.new(:AN_PLUS_B, val)
+        else
+          raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
+        end
+      }
+    | IDENT PLUS NUMBER {               # n+3, -n+3
+        if val[0] == 'n'
+          val.unshift("1")
+          result = Node.new(:AN_PLUS_B, val)
+        elsif val[0] == '-n'
+          val[0] = 'n'
+          val.unshift("-1")
+          result = Node.new(:AN_PLUS_B, val)
+        else
+          raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
+        end
+      }
+    | NUMBER IDENT                      # 5n, -5n
+      {
+        if val[1] == 'n'
+          val << "+"
+          val << "0"
+          result = Node.new(:AN_PLUS_B, val)
+        else
+          raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
+        end
+      }
+    ;
+  pseudo
+    : ':' function {
+        result = Node.new(:PSEUDO_CLASS, [val[1]])
+      }
+    | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
+    ;
+  hcap_0toN
+    : hcap_1toN
+    |
+    ;
+  hcap_1toN
+    : attribute_id hcap_1toN {
+        result = Node.new(:COMBINATOR, val)
+      }
+    | class hcap_1toN {
+        result = Node.new(:COMBINATOR, val)
+      }
+    | attrib hcap_1toN {
+        result = Node.new(:COMBINATOR, val)
+      }
+    | pseudo hcap_1toN {
+        result = Node.new(:COMBINATOR, val)
+      }
+    | attribute_id
+    | class
+    | attrib
+    | pseudo
+    ;
+  attribute_id
+    : HASH { result = Node.new(:ID, val) }
+    ;
+  attrib_val_0or1
+    : eql_incl_dash IDENT { result = [val.first, val[1]] }
+    | eql_incl_dash STRING { result = [val.first, val[1]] }
+    |
+    ;
+  eql_incl_dash
+    : EQUAL           { result = :equal }
+    | PREFIXMATCH     { result = :prefix_match }
+    | SUFFIXMATCH     { result = :suffix_match }
+    | SUBSTRINGMATCH  { result = :substring_match }
+    | NOT_EQUAL       { result = :not_equal }
+    | INCLUDES        { result = :includes }
+    | DASHMATCH       { result = :dash_match }
+    ;
+  negation
+    : NOT negation_arg RPAREN {
+        result = Node.new(:NOT, [val[1]])
+      }
+    ;
+  negation_arg
+    : hcap_1toN
+    ;
+end
+---- header