RubyGems - cocoa-xml - Versions diffs - 0.4.4 - Mend

cocoa-xml 0.4.4

Files changed (21) hide show

data/History.rdoc +6 -0
data/Manifest.txt +20 -0
data/README.rdoc +105 -0
data/Rakefile +23 -0
data/lib/cocoa-xml.rb +48 -0
data/lib/cocoa-xml/nodeset.rb +57 -0
data/lib/cocoa-xml/nsxmldocument_extras.rb +48 -0
data/lib/cocoa-xml/nsxmlnode_extras.rb +106 -0
data/lib/cocoa-xml/version.rb +3 -0
data/lib/nokogiri/css.rb +27 -0
data/lib/nokogiri/css/generated_parser.rb +646 -0
data/lib/nokogiri/css/generated_tokenizer.rb +143 -0
data/lib/nokogiri/css/node.rb +100 -0
data/lib/nokogiri/css/parser.rb +83 -0
data/lib/nokogiri/css/parser.y +230 -0
data/lib/nokogiri/css/syntax_error.rb +7 -0
data/lib/nokogiri/css/tokenizer.rb +8 -0
data/lib/nokogiri/css/tokenizer.rex +55 -0
data/lib/nokogiri/css/xpath_visitor.rb +165 -0
data/lib/nokogiri/syntax_error.rb +4 -0
metadata +123 -0

data/lib/nokogiri/css/generated_tokenizer.rb ADDED

@@ -0,0 +1,143 @@
+#--
+# DO NOT MODIFY!!!!
+# This file is automatically generated by rex 1.0.4
+# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
+#++
+module Nokogiri
+module CSS
+  # @private
+class GeneratedTokenizer < GeneratedParser
+  require 'strscan'
+  class ScanError < StandardError ; end
+  attr_reader   :lineno
+  attr_reader   :filename
+  attr_accessor :state
+  def scan_setup(str)
+    @ss = StringScanner.new(str)
+    @lineno =  1
+    @state  = nil
+  end
+  def action(&block)
+    yield
+  end
+  def scan_str(str)
+    scan_setup(str)
+    do_parse
+  end
+  def load_file( filename )
+    @filename = filename
+    open(filename, "r") do |f|
+      scan_setup(f.read)
+    end
+  end
+  def scan_file( filename )
+    load_file(filename)
+    do_parse
+  end
+  def next_token
+    return if @ss.eos?
+    text = @ss.peek(1)
+    @lineno  +=  1  if text == "\n"
+    token = case @state
+    when nil
+      case
+      when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
+         action { [:FUNCTION, text] }
+      when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
+         action { [:IDENT, text] }
+      when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
+         action { [:HASH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
+         action { [:INCLUDES, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
+         action { [:DASHMATCH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
+         action { [:PREFIXMATCH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
+         action { [:SUFFIXMATCH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
+         action { [:SUBSTRINGMATCH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
+         action { [:NOT_EQUAL, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
+         action { [:EQUAL, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\)/))
+         action { [:RPAREN, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
+         action { [:LSQUARE, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\]/))
+         action { [:RSQUARE, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
+         action { [:PLUS, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
+         action { [:GREATER, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
+         action { [:COMMA, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
+         action { [:TILDE, text] }
+      when (text = @ss.scan(/\:not\([\s\r\n\f]*/))
+         action { [:NOT, text] }
+      when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
+         action { [:NUMBER, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
+         action { [:DOUBLESLASH, text] }
+      when (text = @ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
+         action { [:SLASH, text] }
+      when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
+         action {[:UNICODE_RANGE, text] }
+      when (text = @ss.scan(/[\s\t\r\n\f]+/))
+         action { [:S, text] }
+      when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
+         action { [:STRING, text] }
+      when (text = @ss.scan(/./))
+         action { [text, text] }
+      else
+        text = @ss.string[@ss.pos .. -1]
+        raise  ScanError, "can not match: '" + text + "'"
+      end  # if
+    else
+      raise  ScanError, "undefined state: '" + state.to_s + "'"
+    end  # case state
+    token
+  end  # def next_token
+end # class
+end
+end

data/lib/nokogiri/css/node.rb ADDED

@@ -0,0 +1,100 @@
+module Nokogiri
+  module CSS
+    # @private
+    class Node
+      # Get the type of this node
+      attr_accessor :type
+      # Get the value of this node
+      attr_accessor :value
+      # Create a new Node with +type+ and +value+
+      def initialize type, value
+        @type = type
+        @value = value
+      end
+      # Accept +visitor+
+      def accept visitor
+        visitor.send(:"visit_#{type.to_s.downcase}", self)
+      end
+      ###
+      # Convert this CSS node to xpath with +prefix+ using +visitor+
+      def to_xpath prefix = '//', visitor = XPathVisitor.new
+        self.preprocess!
+        prefix + visitor.accept(self)
+      end
+      # Preprocess this node tree
+      def preprocess!
+        ### Deal with nth-child
+        matches = find_by_type(
+          [:CONDITIONAL_SELECTOR,
+            [:ELEMENT_NAME],
+            [:PSEUDO_CLASS,
+              [:FUNCTION]
+            ]
+          ]
+        )
+        matches.each do |match|
+          if match.value[1].value[0].value[0] =~ /^nth-(last-)?child/
+            tag_name = match.value[0].value.first
+            match.value[0].value = ['*']
+            match.value[1] = Node.new(:COMBINATOR, [
+              match.value[1].value[0],
+              Node.new(:FUNCTION, ['self(', tag_name])
+            ])
+          end
+        end
+        ### Deal with first-child, last-child
+        matches = find_by_type(
+          [:CONDITIONAL_SELECTOR,
+            [:ELEMENT_NAME], [:PSEUDO_CLASS]
+        ])
+        matches.each do |match|
+          if ['first-child', 'last-child'].include?(match.value[1].value.first)
+            which = match.value[1].value.first.gsub(/-\w*$/, '')
+            tag_name = match.value[0].value.first
+            match.value[0].value = ['*']
+            match.value[1] = Node.new(:COMBINATOR, [
+              Node.new(:FUNCTION, ["#{which}("]),
+              Node.new(:FUNCTION, ['self(', tag_name])
+            ])
+          elsif 'only-child' == match.value[1].value.first
+            tag_name = match.value[0].value.first
+            match.value[0].value = ['*']
+            match.value[1] = Node.new(:COMBINATOR, [
+              Node.new(:FUNCTION, ["#{match.value[1].value.first}("]),
+              Node.new(:FUNCTION, ['self(', tag_name])
+            ])
+          end
+        end
+        self
+      end
+      # Find a node by type using +types+
+      def find_by_type types
+        matches = []
+        matches << self if to_type == types
+        @value.each do |v|
+          matches += v.find_by_type(types) if v.respond_to?(:find_by_type)
+        end
+        matches
+      end
+      # Convert to_type
+      def to_type
+        [@type] + @value.map { |n|
+          n.to_type if n.respond_to?(:to_type)
+        }.compact
+      end
+      # Convert to array
+      def to_a
+        [@type] + @value.map { |n| n.respond_to?(:to_a) ? n.to_a : [n] }
+      end
+    end
+  end
+end

data/lib/nokogiri/css/parser.rb ADDED

@@ -0,0 +1,83 @@
+require 'thread'
+module Nokogiri
+  module CSS
+    # @private
+    class Parser < GeneratedTokenizer
+      @cache_on = true
+      @cache    = {}
+      @mutex    = Mutex.new
+      class << self
+        # Turn on CSS parse caching
+        attr_accessor :cache_on
+        alias :cache_on? :cache_on
+        alias :set_cache :cache_on=
+        # Get the css selector in +string+ from the cache
+        def [] string
+          return unless @cache_on
+          @mutex.synchronize { @cache[string] }
+        end
+        # Set the css selector in +string+ in the cache to +value+
+        def []= string, value
+          return value unless @cache_on
+          @mutex.synchronize { @cache[string] = value }
+        end
+        # Clear the cache
+        def clear_cache
+          @mutex.synchronize { @cache = {} }
+        end
+        # Execute +block+ without cache
+        def without_cache &block
+          tmp = @cache_on
+          @cache_on = false
+          block.call
+          @cache_on = tmp
+        end
+        ###
+        # Parse this CSS selector in +selector+.  Returns an AST.
+        def parse selector
+          @warned ||= false
+          unless @warned
+            $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse(), this will be removed August 1st or version 1.4.0 (whichever is first)')
+            @warned = true
+          end
+          new.parse selector
+        end
+      end
+      # Create a new CSS parser with respect to +namespaces+
+      def initialize namespaces = {}
+        @namespaces = namespaces
+        super()
+      end
+      alias :parse :scan_str
+      # Get the xpath for +string+ using +options+
+      def xpath_for string, options={}
+        key = "#{string}#{options[:ns]}#{options[:prefix]}"
+        v = self.class[key]
+        return v if v
+        args = [
+          options[:prefix] || '//',
+          options[:visitor] || XPathVisitor.new
+        ]
+        self.class[key] = parse(string).map { |ast|
+          ast.to_xpath(*args)
+        }
+      end
+      # On CSS parser error, raise an exception
+      def on_error error_token_id, error_value, value_stack
+        after = value_stack.compact.last
+        raise SyntaxError.new("unexpected '#{error_value}' after '#{after}'")
+      end
+    end
+  end
+end

data/lib/nokogiri/css/parser.y ADDED

@@ -0,0 +1,230 @@
+class Nokogiri::CSS::GeneratedParser
+token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
+token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
+token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
+rule
+  selector
+    : selector COMMA simple_selector_1toN {
+        result = [val.first, val.last].flatten
+      }
+    | simple_selector_1toN { result = val.flatten }
+    ;
+  combinator
+    : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
+    | GREATER { result = :CHILD_SELECTOR }
+    | TILDE { result = :PRECEDING_SELECTOR }
+    | S { result = :DESCENDANT_SELECTOR }
+    | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
+    | SLASH { result = :CHILD_SELECTOR }
+    ;
+  simple_selector
+    : element_name hcap_0toN {
+        result =  if val[1].nil?
+                    val.first
+                  else
+                    Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
+                  end
+      }
+    | element_name hcap_1toN negation {
+        result = Node.new(:CONDITIONAL_SELECTOR,
+          [
+            val.first,
+            Node.new(:COMBINATOR, [val[1], val.last])
+          ]
+        )
+      }
+    | element_name negation {
+        result = Node.new(:CONDITIONAL_SELECTOR, val)
+      }
+    | function
+    | function attrib {
+        result = Node.new(:CONDITIONAL_SELECTOR, val)
+      }
+    | hcap_1toN negation {
+        result = Node.new(:CONDITIONAL_SELECTOR,
+          [
+            Node.new(:ELEMENT_NAME, ['*']),
+            Node.new(:COMBINATOR, val)
+          ]
+        )
+      }
+    | hcap_1toN {
+        result = Node.new(:CONDITIONAL_SELECTOR,
+          [Node.new(:ELEMENT_NAME, ['*']), val.first]
+        )
+      }
+    ;
+  simple_selector_1toN
+    : simple_selector combinator simple_selector_1toN {
+        result = Node.new(val[1], [val.first, val.last])
+      }
+    | simple_selector
+    ;
+  class
+    : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
+    ;
+  element_name
+    : namespace '|' IDENT {
+        result = Node.new(:ELEMENT_NAME,
+          [[val.first, val.last].compact.join(':')]
+        )
+      }
+    | IDENT {
+        name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
+        result = Node.new(:ELEMENT_NAME, [name])
+      }
+    | '*' { result = Node.new(:ELEMENT_NAME, val) }
+    ;
+  namespace
+    : IDENT { result = val[0] }
+    |
+    ;
+  attrib
+    : LSQUARE IDENT attrib_val_0or1 RSQUARE {
+        result = Node.new(:ATTRIBUTE_CONDITION,
+          [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
+        )
+      }
+    | LSQUARE function attrib_val_0or1 RSQUARE {
+        result = Node.new(:ATTRIBUTE_CONDITION,
+          [val[1]] + (val[2] || [])
+        )
+      }
+    | LSQUARE NUMBER RSQUARE {
+        # Non standard, but hpricot supports it.
+        result = Node.new(:PSEUDO_CLASS,
+          [Node.new(:FUNCTION, ['nth-child(', val[1]])]
+        )
+      }
+    ;
+  function
+    : FUNCTION RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip])
+      }
+    | FUNCTION expr RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
+      }
+    | FUNCTION an_plus_b RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
+      }
+    | NOT expr RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
+      }
+    | HAS selector RPAREN {
+        result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
+      }
+    ;
+  expr
+    : NUMBER COMMA expr { result = [val.first, val.last] }
+    | STRING COMMA expr { result = [val.first, val.last] }
+    | IDENT COMMA expr { result = [val.first, val.last] }
+    | NUMBER
+    | STRING
+    | IDENT                             # even, odd
+      {
+        if val[0] == 'even'
+          val = ["2","n","+","0"]
+          result = Node.new(:AN_PLUS_B, val)
+        elsif val[0] == 'odd'
+          val = ["2","n","+","1"]
+          result = Node.new(:AN_PLUS_B, val)
+        else
+          # This is not CSS standard.  It allows us to support this:
+          # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
+          # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
+          # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
+          result = val
+        end
+      }
+    ;
+  an_plus_b
+    : NUMBER IDENT PLUS NUMBER          # 5n+3 -5n+3
+      {
+        if val[1] == 'n'
+          result = Node.new(:AN_PLUS_B, val)
+        else
+          raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
+        end
+      }
+    | IDENT PLUS NUMBER {               # n+3, -n+3
+        if val[0] == 'n'
+          val.unshift("1")
+          result = Node.new(:AN_PLUS_B, val)
+        elsif val[0] == '-n'
+          val[0] = 'n'
+          val.unshift("-1")
+          result = Node.new(:AN_PLUS_B, val)
+        else
+          raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
+        end
+      }
+    | NUMBER IDENT                      # 5n, -5n
+      {
+        if val[1] == 'n'
+          val << "+"
+          val << "0"
+          result = Node.new(:AN_PLUS_B, val)
+        else
+          raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
+        end
+      }
+    ;
+  pseudo
+    : ':' function {
+        result = Node.new(:PSEUDO_CLASS, [val[1]])
+      }
+    | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
+    ;
+  hcap_0toN
+    : hcap_1toN
+    |
+    ;
+  hcap_1toN
+    : attribute_id hcap_1toN {
+        result = Node.new(:COMBINATOR, val)
+      }
+    | class hcap_1toN {
+        result = Node.new(:COMBINATOR, val)
+      }
+    | attrib hcap_1toN {
+        result = Node.new(:COMBINATOR, val)
+      }
+    | pseudo hcap_1toN {
+        result = Node.new(:COMBINATOR, val)
+      }
+    | attribute_id
+    | class
+    | attrib
+    | pseudo
+    ;
+  attribute_id
+    : HASH { result = Node.new(:ID, val) }
+    ;
+  attrib_val_0or1
+    : eql_incl_dash IDENT { result = [val.first, val[1]] }
+    | eql_incl_dash STRING { result = [val.first, val[1]] }
+    |
+    ;
+  eql_incl_dash
+    : EQUAL           { result = :equal }
+    | PREFIXMATCH     { result = :prefix_match }
+    | SUFFIXMATCH     { result = :suffix_match }
+    | SUBSTRINGMATCH  { result = :substring_match }
+    | NOT_EQUAL       { result = :not_equal }
+    | INCLUDES        { result = :includes }
+    | DASHMATCH       { result = :dash_match }
+    ;
+  negation
+    : NOT negation_arg RPAREN {
+        result = Node.new(:NOT, [val[1]])
+      }
+    ;
+  negation_arg
+    : hcap_1toN
+    ;
+end
+---- header