nokogiri 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (88) hide show
  1. data/History.ja.txt +34 -0
  2. data/History.txt +36 -0
  3. data/Manifest.txt +21 -0
  4. data/README.ja.txt +1 -1
  5. data/README.txt +1 -1
  6. data/Rakefile +27 -89
  7. data/ext/nokogiri/extconf.rb +48 -63
  8. data/ext/nokogiri/html_document.c +90 -29
  9. data/ext/nokogiri/html_sax_parser.c +23 -2
  10. data/ext/nokogiri/native.c +18 -8
  11. data/ext/nokogiri/native.h +22 -0
  12. data/ext/nokogiri/xml_attr.c +83 -0
  13. data/ext/nokogiri/xml_attr.h +9 -0
  14. data/ext/nokogiri/xml_cdata.c +1 -1
  15. data/ext/nokogiri/xml_document.c +84 -18
  16. data/ext/nokogiri/xml_document_fragment.c +38 -0
  17. data/ext/nokogiri/xml_document_fragment.h +10 -0
  18. data/ext/nokogiri/xml_dtd.c +2 -22
  19. data/ext/nokogiri/xml_entity_reference.c +41 -0
  20. data/ext/nokogiri/xml_entity_reference.h +9 -0
  21. data/ext/nokogiri/xml_io.c +10 -3
  22. data/ext/nokogiri/xml_io.h +1 -0
  23. data/ext/nokogiri/xml_node.c +116 -66
  24. data/ext/nokogiri/xml_node_set.c +5 -1
  25. data/ext/nokogiri/xml_processing_instruction.c +44 -0
  26. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  27. data/ext/nokogiri/xml_reader.c +20 -4
  28. data/ext/nokogiri/xml_sax_parser.c +51 -15
  29. data/ext/nokogiri/xml_sax_push_parser.c +85 -0
  30. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  31. data/ext/nokogiri/xml_syntax_error.c +12 -8
  32. data/ext/nokogiri/xml_syntax_error.h +2 -1
  33. data/ext/nokogiri/xml_xpath_context.c +11 -2
  34. data/ext/nokogiri/xslt_stylesheet.c +1 -6
  35. data/lib/nokogiri.rb +10 -13
  36. data/lib/nokogiri/css.rb +1 -1
  37. data/lib/nokogiri/css/generated_parser.rb +287 -295
  38. data/lib/nokogiri/css/generated_tokenizer.rb +36 -51
  39. data/lib/nokogiri/css/node.rb +1 -3
  40. data/lib/nokogiri/css/parser.rb +21 -12
  41. data/lib/nokogiri/css/parser.y +55 -44
  42. data/lib/nokogiri/css/syntax_error.rb +2 -1
  43. data/lib/nokogiri/css/tokenizer.rex +23 -32
  44. data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
  45. data/lib/nokogiri/html.rb +10 -4
  46. data/lib/nokogiri/html/document.rb +6 -2
  47. data/lib/nokogiri/syntax_error.rb +4 -0
  48. data/lib/nokogiri/version.rb +2 -1
  49. data/lib/nokogiri/xml.rb +3 -1
  50. data/lib/nokogiri/xml/attr.rb +3 -4
  51. data/lib/nokogiri/xml/cdata.rb +1 -1
  52. data/lib/nokogiri/xml/document.rb +4 -7
  53. data/lib/nokogiri/xml/document_fragment.rb +9 -0
  54. data/lib/nokogiri/xml/dtd.rb +3 -0
  55. data/lib/nokogiri/xml/node.rb +144 -40
  56. data/lib/nokogiri/xml/node/save_options.rb +32 -0
  57. data/lib/nokogiri/xml/node_set.rb +11 -20
  58. data/lib/nokogiri/xml/processing_instruction.rb +6 -0
  59. data/lib/nokogiri/xml/reader.rb +5 -0
  60. data/lib/nokogiri/xml/sax.rb +1 -0
  61. data/lib/nokogiri/xml/sax/push_parser.rb +47 -0
  62. data/lib/nokogiri/xml/syntax_error.rb +3 -1
  63. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  64. data/tasks/test.rb +136 -0
  65. data/test/css/test_parser.rb +4 -0
  66. data/test/css/test_tokenizer.rb +30 -17
  67. data/test/css/test_xpath_visitor.rb +11 -0
  68. data/test/helper.rb +11 -0
  69. data/test/hpricot/test_builder.rb +2 -9
  70. data/test/hpricot/test_parser.rb +4 -4
  71. data/test/html/test_builder.rb +7 -7
  72. data/test/html/test_document.rb +90 -4
  73. data/test/html/test_node.rb +1 -0
  74. data/test/test_css_cache.rb +1 -3
  75. data/test/test_reader.rb +19 -1
  76. data/test/test_xslt_transforms.rb +1 -1
  77. data/test/xml/node/test_save_options.rb +20 -0
  78. data/test/xml/sax/test_parser.rb +17 -0
  79. data/test/xml/sax/test_push_parser.rb +67 -0
  80. data/test/xml/test_attr.rb +16 -0
  81. data/test/xml/test_cdata.rb +1 -1
  82. data/test/xml/test_document.rb +45 -0
  83. data/test/xml/test_document_fragment.rb +18 -0
  84. data/test/xml/test_dtd.rb +2 -4
  85. data/test/xml/test_entity_reference.rb +16 -0
  86. data/test/xml/test_node.rb +149 -80
  87. data/test/xml/test_processing_instruction.rb +24 -0
  88. metadata +28 -2
@@ -53,94 +53,79 @@ class GeneratedTokenizer < GeneratedParser
53
53
  case state
54
54
  when nil
55
55
  case
56
- when (text = ss.scan(/~=/i))
56
+ when (text = ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*\(\s*/))
57
+ @rex_tokens.push action { [:FUNCTION, text] }
58
+
59
+ when (text = ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*/))
60
+ @rex_tokens.push action { [:IDENT, text] }
61
+
62
+ when (text = ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])+/))
63
+ @rex_tokens.push action { [:HASH, text] }
64
+
65
+ when (text = ss.scan(/[\s\r\n\f]*~=[\s\r\n\f]*/))
57
66
  @rex_tokens.push action { [:INCLUDES, text] }
58
67
 
59
- when (text = ss.scan(/\|=/i))
68
+ when (text = ss.scan(/[\s\r\n\f]*\|=[\s\r\n\f]*/))
60
69
  @rex_tokens.push action { [:DASHMATCH, text] }
61
70
 
62
- when (text = ss.scan(/\^=/i))
71
+ when (text = ss.scan(/[\s\r\n\f]*\^=[\s\r\n\f]*/))
63
72
  @rex_tokens.push action { [:PREFIXMATCH, text] }
64
73
 
65
- when (text = ss.scan(/\$=/i))
74
+ when (text = ss.scan(/[\s\r\n\f]*\$=[\s\r\n\f]*/))
66
75
  @rex_tokens.push action { [:SUFFIXMATCH, text] }
67
76
 
68
- when (text = ss.scan(/\*=/i))
77
+ when (text = ss.scan(/[\s\r\n\f]*\*=[\s\r\n\f]*/))
69
78
  @rex_tokens.push action { [:SUBSTRINGMATCH, text] }
70
79
 
71
- when (text = ss.scan(/!=/i))
80
+ when (text = ss.scan(/[\s\r\n\f]*!=[\s\r\n\f]*/))
72
81
  @rex_tokens.push action { [:NOT_EQUAL, text] }
73
82
 
74
- when (text = ss.scan(/[-]?([_a-z]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])([_a-z0-9-]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*\(\s*/i))
75
- @rex_tokens.push action { [:FUNCTION, text] }
83
+ when (text = ss.scan(/[\s\r\n\f]*=[\s\r\n\f]*/))
84
+ @rex_tokens.push action { [:EQUAL, text] }
76
85
 
77
- when (text = ss.scan(/@[-]?([_a-z]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])([_a-z0-9-]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*/i))
78
- @rex_tokens.push action { [:IDENT, text] }
86
+ when (text = ss.scan(/[\s\r\n\f]*\)[\s\r\n\f]*/))
87
+ @rex_tokens.push action { [:RPAREN, text] }
79
88
 
80
- when (text = ss.scan(/[-]?([_a-z]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])([_a-z0-9-]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*/i))
81
- @rex_tokens.push action { [:IDENT, text] }
89
+ when (text = ss.scan(/[\s\r\n\f]*\[[\s\r\n\f]*/))
90
+ @rex_tokens.push action { [:LSQUARE, text] }
82
91
 
83
- when (text = ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/i))
84
- @rex_tokens.push action { [:NUMBER, text] }
92
+ when (text = ss.scan(/[\s\r\n\f]*\][\s\r\n\f]*/))
93
+ @rex_tokens.push action { [:RSQUARE, text] }
85
94
 
86
- when (text = ss.scan(/\#([_a-z0-9-]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])+/i))
87
- @rex_tokens.push action { [:HASH, text] }
88
-
89
- when (text = ss.scan(/[\s\r\n\f]*\+/i))
95
+ when (text = ss.scan(/[\s\r\n\f]*\+[\s\r\n\f]*/))
90
96
  @rex_tokens.push action { [:PLUS, text] }
91
97
 
92
- when (text = ss.scan(/[\s\r\n\f]*>/i))
98
+ when (text = ss.scan(/[\s\r\n\f]*>[\s\r\n\f]*/))
93
99
  @rex_tokens.push action { [:GREATER, text] }
94
100
 
95
- when (text = ss.scan(/[\s\r\n\f]*,/i))
101
+ when (text = ss.scan(/[\s\r\n\f]*,[\s\r\n\f]*/))
96
102
  @rex_tokens.push action { [:COMMA, text] }
97
103
 
98
- when (text = ss.scan(/[\s\r\n\f]*~/i))
104
+ when (text = ss.scan(/[\s\r\n\f]*~[\s\r\n\f]*/))
99
105
  @rex_tokens.push action { [:TILDE, text] }
100
106
 
101
- when (text = ss.scan(/\:not\(/i))
107
+ when (text = ss.scan(/\:not\([\s\r\n\f]*/))
102
108
  @rex_tokens.push action { [:NOT, text] }
103
109
 
104
- when (text = ss.scan(/@[-]?([_a-z]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])([_a-z0-9-]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*/i))
105
- @rex_tokens.push action { [:ATKEYWORD, text] }
106
-
107
- when (text = ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)%/i))
108
- @rex_tokens.push action { [:PERCENTAGE, text] }
109
-
110
- when (text = ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)[-]?([_a-z]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])([_a-z0-9-]|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*/i))
111
- @rex_tokens.push action { [:DIMENSION, text] }
112
-
113
- when (text = ss.scan(/<!--/i))
114
- @rex_tokens.push action { [:CDO, text] }
115
-
116
- when (text = ss.scan(/-->/i))
117
- @rex_tokens.push action { [:CDC, text] }
110
+ when (text = ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
111
+ @rex_tokens.push action { [:NUMBER, text] }
118
112
 
119
- when (text = ss.scan(/[\s\r\n\f]*\/\//i))
113
+ when (text = ss.scan(/[\s\r\n\f]*\/\/[\s\r\n\f]*/))
120
114
  @rex_tokens.push action { [:DOUBLESLASH, text] }
121
115
 
122
- when (text = ss.scan(/[\s\r\n\f]*\//i))
116
+ when (text = ss.scan(/[\s\r\n\f]*\/[\s\r\n\f]*/))
123
117
  @rex_tokens.push action { [:SLASH, text] }
124
118
 
125
- when (text = ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/i))
119
+ when (text = ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
126
120
  @rex_tokens.push action {[:UNICODE_RANGE, text] }
127
121
 
128
- when (text = ss.scan(/\/\*(.|[\r\n])*?\*\//i))
129
- ;
130
-
131
- when (text = ss.scan(/[\s\t\r\n\f]+/i))
122
+ when (text = ss.scan(/[\s\t\r\n\f]+/))
132
123
  @rex_tokens.push action { [:S, text] }
133
124
 
134
- when (text = ss.scan(/[\.*:\[\]=\)]/i))
135
- @rex_tokens.push action { [text, text] }
136
-
137
- when (text = ss.scan(/"([^\n\r\f"]|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*"|'([^\n\r\f']|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*'/i))
125
+ when (text = ss.scan(/"([^\n\r\f"]|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9A-Fa-f])*'/))
138
126
  @rex_tokens.push action { [:STRING, text] }
139
127
 
140
- when (text = ss.scan(/\"([^\n\r\f\"]|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*|([^\n\r\f\']|\\n|\r\n|\r|\f|[^\0-\177]|\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?|\\[^\n\r\f0-9a-f])*/i))
141
- @rex_tokens.push action { [:INVALID, text] }
142
-
143
- when (text = ss.scan(/./i))
128
+ when (text = ss.scan(/./))
144
129
  @rex_tokens.push action { [text, text] }
145
130
 
146
131
  else
@@ -11,9 +11,7 @@ module Nokogiri
11
11
  visitor.send(:"visit_#{type.to_s.downcase}", self)
12
12
  end
13
13
 
14
- def to_xpath prefix = nil, visitor = nil
15
- prefix ||= '//'
16
- visitor ||= XPathVisitor.new
14
+ def to_xpath prefix = '//', visitor = XPathVisitor.new
17
15
  self.preprocess!
18
16
  prefix + visitor.accept(self)
19
17
  end
@@ -12,14 +12,6 @@ module Nokogiri
12
12
  alias :cache_on? :cache_on
13
13
  alias :set_cache :cache_on=
14
14
 
15
- def parse string
16
- new.parse(string)
17
- end
18
-
19
- def xpath_for string, options={}
20
- new.xpath_for(string, options)
21
- end
22
-
23
15
  def [] string
24
16
  return unless @cache_on
25
17
  @mutex.synchronize { @cache[string] }
@@ -40,6 +32,22 @@ module Nokogiri
40
32
  block.call
41
33
  @cache_on = tmp
42
34
  end
35
+
36
+ ###
37
+ # Parse this CSS selector in +selector+. Returns an AST.
38
+ def parse selector
39
+ @warned ||= false
40
+ unless @warned
41
+ $stderr.puts('Nokogiri::CSS::Parser.parse is deprecated, call Nokogiri::CSS.parse()')
42
+ @warned = true
43
+ end
44
+ new.parse selector
45
+ end
46
+ end
47
+
48
+ def initialize namespaces = {}
49
+ @namespaces = namespaces
50
+ super()
43
51
  end
44
52
  alias :parse :scan_str
45
53
 
@@ -47,11 +55,12 @@ module Nokogiri
47
55
  v = self.class[string]
48
56
  return v if v
49
57
 
50
- prefix = options[:prefix] || nil
51
- visitor = options[:visitor] || nil
52
- args = [prefix, visitor]
58
+ args = [
59
+ options[:prefix] || '//',
60
+ options[:visitor] || XPathVisitor.new
61
+ ]
53
62
  self.class[string] = parse(string).map { |ast|
54
- ast.to_xpath(prefix, visitor)
63
+ ast.to_xpath(*args)
55
64
  }
56
65
  end
57
66
 
@@ -1,25 +1,23 @@
1
1
  class Nokogiri::CSS::GeneratedParser
2
2
 
3
3
  token FUNCTION INCLUDES DASHMATCH LBRACE HASH PLUS GREATER S STRING IDENT
4
- token COMMA URI CDO CDC NUMBER PERCENTAGE LENGTH EMS EXS ANGLE TIME FREQ
5
- token IMPORTANT_SYM IMPORT_SYM MEDIA_SYM PAGE_SYM CHARSET_SYM DIMENSION
6
- token PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL SLASH DOUBLESLASH
7
- token NOT
4
+ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
5
+ token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE
8
6
 
9
7
  rule
10
8
  selector
11
- : selector COMMA s_0toN simple_selector_1toN {
9
+ : selector COMMA simple_selector_1toN {
12
10
  result = [val.first, val.last].flatten
13
11
  }
14
12
  | simple_selector_1toN { result = val.flatten }
15
13
  ;
16
14
  combinator
17
- : PLUS s_0toN { result = :DIRECT_ADJACENT_SELECTOR }
18
- | GREATER s_0toN { result = :CHILD_SELECTOR }
19
- | TILDE s_0toN { result = :PRECEDING_SELECTOR }
15
+ : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
16
+ | GREATER { result = :CHILD_SELECTOR }
17
+ | TILDE { result = :PRECEDING_SELECTOR }
20
18
  | S { result = :DESCENDANT_SELECTOR }
21
- | DOUBLESLASH s_0toN { result = :DESCENDANT_SELECTOR }
22
- | SLASH s_0toN { result = :CHILD_SELECTOR }
19
+ | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
20
+ | SLASH { result = :CHILD_SELECTOR }
23
21
  ;
24
22
  simple_selector
25
23
  : element_name hcap_0toN {
@@ -68,46 +66,75 @@ rule
68
66
  : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
69
67
  ;
70
68
  element_name
71
- : IDENT { result = Node.new(:ELEMENT_NAME, val) }
69
+ : namespace '|' IDENT {
70
+ result = Node.new(:ELEMENT_NAME,
71
+ [[val.first, val.last].compact.join(':')]
72
+ )
73
+ }
74
+ | IDENT {
75
+ name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
76
+ result = Node.new(:ELEMENT_NAME, [name])
77
+ }
72
78
  | '*' { result = Node.new(:ELEMENT_NAME, val) }
73
79
  ;
80
+ namespace
81
+ : IDENT { result = val[0] }
82
+ |
83
+ ;
74
84
  attrib
75
- : '[' s_0toN IDENT s_0toN attrib_val_0or1 ']' {
85
+ : LSQUARE IDENT attrib_val_0or1 RSQUARE {
76
86
  result = Node.new(:ATTRIBUTE_CONDITION,
77
- [Node.new(:ELEMENT_NAME, [val[2]])] + (val[4] || [])
87
+ [Node.new(:ELEMENT_NAME, [val[1]])] + (val[2] || [])
78
88
  )
79
89
  }
80
- | '[' s_0toN function s_0toN attrib_val_0or1 ']' {
90
+ | LSQUARE function attrib_val_0or1 RSQUARE {
81
91
  result = Node.new(:ATTRIBUTE_CONDITION,
82
- [val[2]] + (val[4] || [])
92
+ [val[1]] + (val[2] || [])
83
93
  )
84
94
  }
85
- | '[' s_0toN NUMBER s_0toN ']' {
95
+ | LSQUARE NUMBER RSQUARE {
86
96
  # Non standard, but hpricot supports it.
87
97
  result = Node.new(:PSEUDO_CLASS,
88
- [Node.new(:FUNCTION, ['nth-child(', val[2]])]
98
+ [Node.new(:FUNCTION, ['nth-child(', val[1]])]
89
99
  )
90
100
  }
91
101
  ;
92
102
  function
93
- : FUNCTION ')' {
103
+ : FUNCTION RPAREN {
94
104
  result = Node.new(:FUNCTION, [val.first.strip])
95
105
  }
96
- | FUNCTION expr ')' {
106
+ | FUNCTION expr RPAREN {
97
107
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
98
108
  }
99
- | FUNCTION an_plus_b ')' {
109
+ | FUNCTION an_plus_b RPAREN {
100
110
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
101
111
  }
102
- | NOT expr ')' {
112
+ | NOT expr RPAREN {
103
113
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
104
114
  }
105
115
  ;
106
116
  expr
107
- : NUMBER COMMA s_0toN expr { result = [val.first, val.last] }
108
- | STRING COMMA s_0toN expr { result = [val.first, val.last] }
117
+ : NUMBER COMMA expr { result = [val.first, val.last] }
118
+ | STRING COMMA expr { result = [val.first, val.last] }
119
+ | IDENT COMMA expr { result = [val.first, val.last] }
109
120
  | NUMBER
110
121
  | STRING
122
+ | IDENT # even, odd
123
+ {
124
+ if val[0] == 'even'
125
+ val = ["2","n","+","0"]
126
+ result = Node.new(:AN_PLUS_B, val)
127
+ elsif val[0] == 'odd'
128
+ val = ["2","n","+","1"]
129
+ result = Node.new(:AN_PLUS_B, val)
130
+ else
131
+ # This is not CSS standard. It allows us to support this:
132
+ # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
133
+ # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
134
+ # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
135
+ result = val
136
+ end
137
+ }
111
138
  ;
112
139
  an_plus_b
113
140
  : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
@@ -140,18 +167,6 @@ rule
140
167
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
141
168
  end
142
169
  }
143
- | IDENT # even, odd
144
- {
145
- if val[0] == 'even'
146
- val = ["2","n","+","0"]
147
- result = Node.new(:AN_PLUS_B, val)
148
- elsif val[0] == 'odd'
149
- val = ["2","n","+","1"]
150
- result = Node.new(:AN_PLUS_B, val)
151
- else
152
- raise Racc::ParseError, "parse error on IDENT '#{val[0]}'"
153
- end
154
- }
155
170
  ;
156
171
  pseudo
157
172
  : ':' function {
@@ -185,12 +200,12 @@ rule
185
200
  : HASH { result = Node.new(:ID, val) }
186
201
  ;
187
202
  attrib_val_0or1
188
- : eql_incl_dash s_0toN IDENT s_0toN { result = [val.first, val[2]] }
189
- | eql_incl_dash s_0toN STRING s_0toN { result = [val.first, val[2]] }
203
+ : eql_incl_dash IDENT { result = [val.first, val[1]] }
204
+ | eql_incl_dash STRING { result = [val.first, val[1]] }
190
205
  |
191
206
  ;
192
207
  eql_incl_dash
193
- : '='
208
+ : EQUAL
194
209
  | PREFIXMATCH
195
210
  | SUFFIXMATCH
196
211
  | SUBSTRINGMATCH
@@ -199,17 +214,13 @@ rule
199
214
  | DASHMATCH
200
215
  ;
201
216
  negation
202
- : NOT s_0toN negation_arg s_0toN ')' {
203
- result = Node.new(:NOT, [val[2]])
217
+ : NOT negation_arg RPAREN {
218
+ result = Node.new(:NOT, [val[1]])
204
219
  }
205
220
  ;
206
221
  negation_arg
207
222
  : hcap_1toN
208
223
  ;
209
- s_0toN
210
- : S s_0toN
211
- |
212
- ;
213
224
  end
214
225
 
215
226
  ---- header
@@ -1,6 +1,7 @@
1
+ require 'nokogiri/syntax_error'
1
2
  module Nokogiri
2
3
  module CSS
3
- class SyntaxError < ::SyntaxError
4
+ class SyntaxError < ::Nokogiri::SyntaxError
4
5
  end
5
6
  end
6
7
  end
@@ -7,56 +7,47 @@ macro
7
7
  w [\s\r\n\f]*
8
8
  nonascii [^\\\\0-\\\\177]
9
9
  num -?([0-9]+|[0-9]*\.[0-9]+)
10
- unicode \\\\\\\\\[0-9a-f]{1,6}(\r\n|[\s\n\r\t\f])?
10
+ unicode \\\\\\\\\[0-9A-Fa-f]{1,6}(\r\n|[\s\n\r\t\f])?
11
11
 
12
- escape {unicode}|\\\\\\\[^\n\r\f0-9a-f]
13
- nmchar [_a-z0-9-]|{nonascii}|{escape}
14
- nmstart [_a-z]|{nonascii}|{escape}
15
- ident [-]?({nmstart})({nmchar})*
12
+ escape {unicode}|\\\\\\\[^\n\r\f0-9A-Fa-f]
13
+ nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
+ nmstart [_A-Za-z]|{nonascii}|{escape}
15
+ ident [-@]?({nmstart})({nmchar})*
16
16
  name ({nmchar})+
17
17
  string1 "([^\n\r\f"]|\\{nl}|{nonascii}|{escape})*"
18
18
  string2 '([^\n\r\f']|\\{nl}|{nonascii}|{escape})*'
19
19
  string {string1}|{string2}
20
- invalid1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
21
- invalid2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
22
- invalid {invalid1}|{invalid2}
23
- Comment \/\*(.|[\r\n])*?\*\/
24
20
 
25
21
  rule
26
22
 
27
23
  # [:state] pattern [actions]
28
24
 
29
- ~= { [:INCLUDES, text] }
30
- \|= { [:DASHMATCH, text] }
31
- \^= { [:PREFIXMATCH, text] }
32
- \$= { [:SUFFIXMATCH, text] }
33
- \*= { [:SUBSTRINGMATCH, text] }
34
- != { [:NOT_EQUAL, text] }
35
25
  {ident}\(\s* { [:FUNCTION, text] }
36
- @{ident} { [:IDENT, text] }
37
26
  {ident} { [:IDENT, text] }
38
- {num} { [:NUMBER, text] }
39
27
  \#{name} { [:HASH, text] }
40
- {w}\+ { [:PLUS, text] }
41
- {w}> { [:GREATER, text] }
42
- {w}, { [:COMMA, text] }
43
- {w}~ { [:TILDE, text] }
44
- \:not\( { [:NOT, text] }
45
- @{ident} { [:ATKEYWORD, text] }
46
- {num}% { [:PERCENTAGE, text] }
47
- {num}{ident} { [:DIMENSION, text] }
48
- <!-- { [:CDO, text] }
49
- --> { [:CDC, text] }
50
- {w}\/\/ { [:DOUBLESLASH, text] }
51
- {w}\/ { [:SLASH, text] }
28
+ {w}~={w} { [:INCLUDES, text] }
29
+ {w}\|={w} { [:DASHMATCH, text] }
30
+ {w}\^={w} { [:PREFIXMATCH, text] }
31
+ {w}\$={w} { [:SUFFIXMATCH, text] }
32
+ {w}\*={w} { [:SUBSTRINGMATCH, text] }
33
+ {w}!={w} { [:NOT_EQUAL, text] }
34
+ {w}={w} { [:EQUAL, text] }
35
+ {w}\){w} { [:RPAREN, text] }
36
+ {w}\[{w} { [:LSQUARE, text] }
37
+ {w}\]{w} { [:RSQUARE, text] }
38
+ {w}\+{w} { [:PLUS, text] }
39
+ {w}>{w} { [:GREATER, text] }
40
+ {w},{w} { [:COMMA, text] }
41
+ {w}~{w} { [:TILDE, text] }
42
+ \:not\({w} { [:NOT, text] }
43
+ {num} { [:NUMBER, text] }
44
+ {w}\/\/{w} { [:DOUBLESLASH, text] }
45
+ {w}\/{w} { [:SLASH, text] }
52
46
 
53
47
  U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
54
48
 
55
- {Comment} /* ignore comments */
56
49
  [\s\t\r\n\f]+ { [:S, text] }
57
- [\.*:\[\]=\)] { [text, text] }
58
50
  {string} { [:STRING, text] }
59
- {invalid} { [:INVALID, text] }
60
51
  . { [text, text] }
61
52
  end
62
53
  end