nokogiri 1.5.10 → 1.10.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (182) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1614 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +198 -0
  5. data/bin/nokogiri +50 -10
  6. data/dependencies.yml +72 -0
  7. data/ext/nokogiri/extconf.rb +634 -92
  8. data/ext/nokogiri/html_document.c +8 -8
  9. data/ext/nokogiri/html_element_description.c +15 -15
  10. data/ext/nokogiri/html_entity_lookup.c +1 -1
  11. data/ext/nokogiri/html_sax_parser_context.c +4 -4
  12. data/ext/nokogiri/html_sax_push_parser.c +2 -2
  13. data/ext/nokogiri/nokogiri.c +20 -12
  14. data/ext/nokogiri/nokogiri.h +1 -44
  15. data/ext/nokogiri/xml_attr.c +34 -25
  16. data/ext/nokogiri/xml_cdata.c +12 -6
  17. data/ext/nokogiri/xml_comment.c +18 -3
  18. data/ext/nokogiri/xml_document.c +64 -32
  19. data/ext/nokogiri/xml_dtd.c +2 -2
  20. data/ext/nokogiri/xml_encoding_handler.c +3 -3
  21. data/ext/nokogiri/xml_entity_reference.c +1 -1
  22. data/ext/nokogiri/xml_io.c +11 -6
  23. data/ext/nokogiri/xml_namespace.c +50 -17
  24. data/ext/nokogiri/xml_namespace.h +3 -2
  25. data/ext/nokogiri/xml_node.c +459 -240
  26. data/ext/nokogiri/xml_node_set.c +166 -147
  27. data/ext/nokogiri/xml_node_set.h +2 -4
  28. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  29. data/ext/nokogiri/xml_reader.c +6 -19
  30. data/ext/nokogiri/xml_sax_parser.c +11 -13
  31. data/ext/nokogiri/xml_sax_parser_context.c +41 -1
  32. data/ext/nokogiri/xml_sax_push_parser.c +56 -12
  33. data/ext/nokogiri/xml_schema.c +1 -1
  34. data/ext/nokogiri/xml_syntax_error.c +11 -5
  35. data/ext/nokogiri/xml_syntax_error.h +1 -1
  36. data/ext/nokogiri/xml_text.c +1 -1
  37. data/ext/nokogiri/xml_xpath_context.c +17 -38
  38. data/ext/nokogiri/xslt_stylesheet.c +10 -10
  39. data/lib/nokogiri/css/node.rb +0 -50
  40. data/lib/nokogiri/css/parser.rb +263 -233
  41. data/lib/nokogiri/css/parser.y +54 -40
  42. data/lib/nokogiri/css/tokenizer.rb +104 -103
  43. data/lib/nokogiri/css/tokenizer.rex +5 -5
  44. data/lib/nokogiri/css/xpath_visitor.rb +78 -19
  45. data/lib/nokogiri/decorators/slop.rb +12 -5
  46. data/lib/nokogiri/html/document.rb +102 -21
  47. data/lib/nokogiri/html/document_fragment.rb +11 -3
  48. data/lib/nokogiri/html/sax/parser.rb +12 -2
  49. data/lib/nokogiri/html/sax/push_parser.rb +22 -2
  50. data/lib/nokogiri/version.rb +40 -22
  51. data/lib/nokogiri/xml/builder.rb +34 -31
  52. data/lib/nokogiri/xml/document.rb +20 -14
  53. data/lib/nokogiri/xml/document_fragment.rb +50 -2
  54. data/lib/nokogiri/xml/dtd.rb +14 -4
  55. data/lib/nokogiri/xml/entity_reference.rb +18 -0
  56. data/lib/nokogiri/xml/node.rb +148 -203
  57. data/lib/nokogiri/xml/node_set.rb +139 -123
  58. data/lib/nokogiri/xml/parse_options.rb +22 -0
  59. data/lib/nokogiri/xml/sax/document.rb +1 -1
  60. data/lib/nokogiri/xml/sax/parser.rb +7 -8
  61. data/lib/nokogiri/xml/searchable.rb +230 -0
  62. data/lib/nokogiri/xml/syntax_error.rb +24 -1
  63. data/lib/nokogiri/xml.rb +3 -1
  64. data/lib/nokogiri.rb +40 -24
  65. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
  66. data/patches/libxml2/0002-Remove-script-macro-support.patch +40 -0
  67. data/patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  68. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +120 -0
  69. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  70. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
  71. metadata +252 -388
  72. data/.autotest +0 -26
  73. data/.gemtest +0 -0
  74. data/CHANGELOG.ja.rdoc +0 -785
  75. data/CHANGELOG.rdoc +0 -783
  76. data/C_CODING_STYLE.rdoc +0 -33
  77. data/Manifest.txt +0 -303
  78. data/README.ja.rdoc +0 -106
  79. data/README.rdoc +0 -175
  80. data/ROADMAP.md +0 -90
  81. data/Rakefile +0 -228
  82. data/STANDARD_RESPONSES.md +0 -47
  83. data/Y_U_NO_GEMSPEC.md +0 -155
  84. data/build_all +0 -105
  85. data/tasks/cross_compile.rb +0 -150
  86. data/tasks/nokogiri.org.rb +0 -24
  87. data/tasks/test.rb +0 -95
  88. data/test/css/test_nthiness.rb +0 -159
  89. data/test/css/test_parser.rb +0 -341
  90. data/test/css/test_tokenizer.rb +0 -198
  91. data/test/css/test_xpath_visitor.rb +0 -91
  92. data/test/decorators/test_slop.rb +0 -16
  93. data/test/files/2ch.html +0 -108
  94. data/test/files/address_book.rlx +0 -12
  95. data/test/files/address_book.xml +0 -10
  96. data/test/files/bar/bar.xsd +0 -4
  97. data/test/files/dont_hurt_em_why.xml +0 -422
  98. data/test/files/encoding.html +0 -82
  99. data/test/files/encoding.xhtml +0 -84
  100. data/test/files/exslt.xml +0 -8
  101. data/test/files/exslt.xslt +0 -35
  102. data/test/files/foo/foo.xsd +0 -4
  103. data/test/files/metacharset.html +0 -10
  104. data/test/files/noencoding.html +0 -47
  105. data/test/files/po.xml +0 -32
  106. data/test/files/po.xsd +0 -66
  107. data/test/files/shift_jis.html +0 -10
  108. data/test/files/shift_jis.xml +0 -5
  109. data/test/files/snuggles.xml +0 -3
  110. data/test/files/staff.dtd +0 -10
  111. data/test/files/staff.xml +0 -59
  112. data/test/files/staff.xslt +0 -32
  113. data/test/files/test_document_url/bar.xml +0 -2
  114. data/test/files/test_document_url/document.dtd +0 -4
  115. data/test/files/test_document_url/document.xml +0 -6
  116. data/test/files/tlm.html +0 -850
  117. data/test/files/to_be_xincluded.xml +0 -2
  118. data/test/files/valid_bar.xml +0 -2
  119. data/test/files/xinclude.xml +0 -4
  120. data/test/helper.rb +0 -154
  121. data/test/html/sax/test_parser.rb +0 -141
  122. data/test/html/sax/test_parser_context.rb +0 -46
  123. data/test/html/test_builder.rb +0 -164
  124. data/test/html/test_document.rb +0 -552
  125. data/test/html/test_document_encoding.rb +0 -138
  126. data/test/html/test_document_fragment.rb +0 -261
  127. data/test/html/test_element_description.rb +0 -105
  128. data/test/html/test_named_characters.rb +0 -14
  129. data/test/html/test_node.rb +0 -196
  130. data/test/html/test_node_encoding.rb +0 -27
  131. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  132. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  133. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  134. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  135. data/test/test_convert_xpath.rb +0 -135
  136. data/test/test_css_cache.rb +0 -45
  137. data/test/test_encoding_handler.rb +0 -46
  138. data/test/test_memory_leak.rb +0 -156
  139. data/test/test_nokogiri.rb +0 -132
  140. data/test/test_reader.rb +0 -555
  141. data/test/test_soap4r_sax.rb +0 -52
  142. data/test/test_xslt_transforms.rb +0 -254
  143. data/test/xml/node/test_save_options.rb +0 -28
  144. data/test/xml/node/test_subclass.rb +0 -44
  145. data/test/xml/sax/test_parser.rb +0 -366
  146. data/test/xml/sax/test_parser_context.rb +0 -106
  147. data/test/xml/sax/test_push_parser.rb +0 -157
  148. data/test/xml/test_attr.rb +0 -64
  149. data/test/xml/test_attribute_decl.rb +0 -86
  150. data/test/xml/test_builder.rb +0 -306
  151. data/test/xml/test_c14n.rb +0 -151
  152. data/test/xml/test_cdata.rb +0 -48
  153. data/test/xml/test_comment.rb +0 -29
  154. data/test/xml/test_document.rb +0 -828
  155. data/test/xml/test_document_encoding.rb +0 -28
  156. data/test/xml/test_document_fragment.rb +0 -223
  157. data/test/xml/test_dtd.rb +0 -103
  158. data/test/xml/test_dtd_encoding.rb +0 -33
  159. data/test/xml/test_element_content.rb +0 -56
  160. data/test/xml/test_element_decl.rb +0 -73
  161. data/test/xml/test_entity_decl.rb +0 -122
  162. data/test/xml/test_entity_reference.rb +0 -245
  163. data/test/xml/test_namespace.rb +0 -95
  164. data/test/xml/test_node.rb +0 -1137
  165. data/test/xml/test_node_attributes.rb +0 -96
  166. data/test/xml/test_node_encoding.rb +0 -107
  167. data/test/xml/test_node_inheritance.rb +0 -32
  168. data/test/xml/test_node_reparenting.rb +0 -374
  169. data/test/xml/test_node_set.rb +0 -755
  170. data/test/xml/test_parse_options.rb +0 -64
  171. data/test/xml/test_processing_instruction.rb +0 -30
  172. data/test/xml/test_reader_encoding.rb +0 -142
  173. data/test/xml/test_relax_ng.rb +0 -60
  174. data/test/xml/test_schema.rb +0 -103
  175. data/test/xml/test_syntax_error.rb +0 -12
  176. data/test/xml/test_text.rb +0 -45
  177. data/test/xml/test_unparented_node.rb +0 -422
  178. data/test/xml/test_xinclude.rb +0 -83
  179. data/test/xml/test_xpath.rb +0 -295
  180. data/test/xslt/test_custom_functions.rb +0 -133
  181. data/test/xslt/test_exception_handling.rb +0 -37
  182. data/test_all +0 -81
@@ -10,13 +10,12 @@ rule
10
10
  result = [val.first, val.last].flatten
11
11
  }
12
12
  | prefixless_combinator_selector { result = val.flatten }
13
- | simple_selector_1toN { result = val.flatten }
13
+ | optional_S simple_selector_1toN { result = [val.last].flatten }
14
14
  ;
15
15
  combinator
16
16
  : PLUS { result = :DIRECT_ADJACENT_SELECTOR }
17
17
  | GREATER { result = :CHILD_SELECTOR }
18
18
  | TILDE { result = :FOLLOWING_SELECTOR }
19
- | S { result = :DESCENDANT_SELECTOR }
20
19
  | DOUBLESLASH { result = :DESCENDANT_SELECTOR }
21
20
  | SLASH { result = :CHILD_SELECTOR }
22
21
  ;
@@ -28,17 +27,6 @@ rule
28
27
  Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
29
28
  end
30
29
  }
31
- | element_name hcap_1toN negation {
32
- result = Node.new(:CONDITIONAL_SELECTOR,
33
- [
34
- val.first,
35
- Node.new(:COMBINATOR, [val[1], val.last])
36
- ]
37
- )
38
- }
39
- | element_name negation {
40
- result = Node.new(:CONDITIONAL_SELECTOR, val)
41
- }
42
30
  | function
43
31
  | function pseudo {
44
32
  result = Node.new(:CONDITIONAL_SELECTOR, val)
@@ -46,14 +34,6 @@ rule
46
34
  | function attrib {
47
35
  result = Node.new(:CONDITIONAL_SELECTOR, val)
48
36
  }
49
- | hcap_1toN negation {
50
- result = Node.new(:CONDITIONAL_SELECTOR,
51
- [
52
- Node.new(:ELEMENT_NAME, ['*']),
53
- Node.new(:COMBINATOR, val)
54
- ]
55
- )
56
- }
57
37
  | hcap_1toN {
58
38
  result = Node.new(:CONDITIONAL_SELECTOR,
59
39
  [Node.new(:ELEMENT_NAME, ['*']), val.first]
@@ -69,10 +49,13 @@ rule
69
49
  : simple_selector combinator simple_selector_1toN {
70
50
  result = Node.new(val[1], [val.first, val.last])
71
51
  }
52
+ | simple_selector S simple_selector_1toN {
53
+ result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
54
+ }
72
55
  | simple_selector
73
56
  ;
74
57
  class
75
- : '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
58
+ : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
76
59
  ;
77
60
  element_name
78
61
  : namespaced_ident
@@ -130,7 +113,7 @@ rule
130
113
  | FUNCTION expr RPAREN {
131
114
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
132
115
  }
133
- | FUNCTION an_plus_b RPAREN {
116
+ | FUNCTION nth RPAREN {
134
117
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
135
118
  }
136
119
  | NOT expr RPAREN {
@@ -148,12 +131,13 @@ rule
148
131
  | STRING
149
132
  | IDENT # even, odd
150
133
  {
151
- if val[0] == 'even'
152
- val = ["2","n","+","0"]
153
- result = Node.new(:AN_PLUS_B, val)
154
- elsif val[0] == 'odd'
155
- val = ["2","n","+","1"]
156
- result = Node.new(:AN_PLUS_B, val)
134
+ case val[0]
135
+ when 'even'
136
+ result = Node.new(:NTH, ['2','n','+','0'])
137
+ when 'odd'
138
+ result = Node.new(:NTH, ['2','n','+','1'])
139
+ when 'n'
140
+ result = Node.new(:NTH, ['1','n','+','0'])
157
141
  else
158
142
  # This is not CSS standard. It allows us to support this:
159
143
  # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
@@ -163,11 +147,11 @@ rule
163
147
  end
164
148
  }
165
149
  ;
166
- an_plus_b
150
+ nth
167
151
  : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
168
152
  {
169
153
  if val[1] == 'n'
170
- result = Node.new(:AN_PLUS_B, val)
154
+ result = Node.new(:NTH, val)
171
155
  else
172
156
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
173
157
  end
@@ -175,21 +159,27 @@ rule
175
159
  | IDENT PLUS NUMBER { # n+3, -n+3
176
160
  if val[0] == 'n'
177
161
  val.unshift("1")
178
- result = Node.new(:AN_PLUS_B, val)
162
+ result = Node.new(:NTH, val)
179
163
  elsif val[0] == '-n'
180
164
  val[0] = 'n'
181
165
  val.unshift("-1")
182
- result = Node.new(:AN_PLUS_B, val)
166
+ result = Node.new(:NTH, val)
183
167
  else
184
168
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
185
169
  end
186
170
  }
187
- | NUMBER IDENT # 5n, -5n
188
- {
189
- if val[1] == 'n'
171
+ | NUMBER IDENT { # 5n, -5n, 10n-1
172
+ n = val[1]
173
+ if n[0, 2] == 'n-'
174
+ val[1] = 'n'
175
+ val << "-"
176
+ # b is contained in n as n is the string "n-b"
177
+ val << n[2, n.size]
178
+ result = Node.new(:NTH, val)
179
+ elsif n == 'n'
190
180
  val << "+"
191
181
  val << "0"
192
- result = Node.new(:AN_PLUS_B, val)
182
+ result = Node.new(:NTH, val)
193
183
  else
194
184
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
195
185
  end
@@ -218,17 +208,22 @@ rule
218
208
  | pseudo hcap_1toN {
219
209
  result = Node.new(:COMBINATOR, val)
220
210
  }
211
+ | negation hcap_1toN {
212
+ result = Node.new(:COMBINATOR, val)
213
+ }
221
214
  | attribute_id
222
215
  | class
223
216
  | attrib
224
217
  | pseudo
218
+ | negation
225
219
  ;
226
220
  attribute_id
227
- : HASH { result = Node.new(:ID, val) }
221
+ : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
228
222
  ;
229
223
  attrib_val_0or1
230
- : eql_incl_dash IDENT { result = [val.first, val[1]] }
231
- | eql_incl_dash STRING { result = [val.first, val[1]] }
224
+ : eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] }
225
+ | eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] }
226
+ | eql_incl_dash NUMBER { result = [val.first, val[1]] }
232
227
  |
233
228
  ;
234
229
  eql_incl_dash
@@ -250,9 +245,28 @@ rule
250
245
  | element_name hcap_1toN
251
246
  | hcap_1toN
252
247
  ;
248
+ optional_S
249
+ : S
250
+ |
251
+ ;
253
252
  end
254
253
 
255
254
  ---- header
256
255
 
257
256
  require 'nokogiri/css/parser_extras'
258
257
 
258
+ ---- inner
259
+
260
+ def unescape_css_identifier(identifier)
261
+ identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
262
+ end
263
+
264
+ def unescape_css_string(str)
265
+ str.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/) do |m|
266
+ if $1=="\n"
267
+ ''
268
+ else
269
+ $1 || [$2.hex].pack('U')
270
+ end
271
+ end
272
+ end
@@ -1,151 +1,152 @@
1
1
  #--
2
2
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by rex 1.0.5
3
+ # This file is automatically generated by rex 1.0.7
4
4
  # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
5
  #++
6
6
 
7
7
  module Nokogiri
8
8
  module CSS
9
9
  class Tokenizer # :nodoc:
10
- require 'strscan'
10
+ require 'strscan'
11
11
 
12
- class ScanError < StandardError ; end
12
+ class ScanError < StandardError ; end
13
13
 
14
- attr_reader :lineno
15
- attr_reader :filename
16
- attr_accessor :state
14
+ attr_reader :lineno
15
+ attr_reader :filename
16
+ attr_accessor :state
17
17
 
18
- def scan_setup(str)
19
- @ss = StringScanner.new(str)
20
- @lineno = 1
21
- @state = nil
22
- end
18
+ def scan_setup(str)
19
+ @ss = StringScanner.new(str)
20
+ @lineno = 1
21
+ @state = nil
22
+ end
23
23
 
24
- def action
25
- yield
26
- end
24
+ def action
25
+ yield
26
+ end
27
27
 
28
- def scan_str(str)
29
- scan_setup(str)
30
- do_parse
31
- end
32
- alias :scan :scan_str
28
+ def scan_str(str)
29
+ scan_setup(str)
30
+ do_parse
31
+ end
32
+ alias :scan :scan_str
33
33
 
34
- def load_file( filename )
35
- @filename = filename
36
- open(filename, "r") do |f|
37
- scan_setup(f.read)
38
- end
39
- end
34
+ def load_file( filename )
35
+ @filename = filename
36
+ File.open(filename, "r") do |f|
37
+ scan_setup(f.read)
38
+ end
39
+ end
40
40
 
41
- def scan_file( filename )
42
- load_file(filename)
43
- do_parse
44
- end
41
+ def scan_file( filename )
42
+ load_file(filename)
43
+ do_parse
44
+ end
45
45
 
46
46
 
47
- def next_token
48
- return if @ss.eos?
49
-
50
- # skips empty actions
51
- until token = _next_token or @ss.eos?; end
52
- token
53
- end
47
+ def next_token
48
+ return if @ss.eos?
54
49
 
55
- def _next_token
56
- text = @ss.peek(1)
57
- @lineno += 1 if text == "\n"
58
- token = case @state
59
- when nil
60
- case
61
- when (text = @ss.scan(/has\([\s]*/))
62
- action { [:HAS, text] }
50
+ # skips empty actions
51
+ until token = _next_token or @ss.eos?; end
52
+ token
53
+ end
63
54
 
64
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
- action { [:FUNCTION, text] }
55
+ def _next_token
56
+ text = @ss.peek(1)
57
+ @lineno += 1 if text == "\n"
58
+ token = case @state
59
+ when nil
60
+ case
61
+ when (text = @ss.scan(/has\([\s]*/))
62
+ action { [:HAS, text] }
66
63
 
67
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
- action { [:IDENT, text] }
64
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
+ action { [:FUNCTION, text] }
69
66
 
70
- when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
- action { [:HASH, text] }
67
+ when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
+ action { [:IDENT, text] }
72
69
 
73
- when (text = @ss.scan(/[\s]*~=[\s]*/))
74
- action { [:INCLUDES, text] }
70
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
71
+ action { [:HASH, text] }
75
72
 
76
- when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
- action { [:DASHMATCH, text] }
73
+ when (text = @ss.scan(/[\s]*~=[\s]*/))
74
+ action { [:INCLUDES, text] }
78
75
 
79
- when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
- action { [:PREFIXMATCH, text] }
76
+ when (text = @ss.scan(/[\s]*\|=[\s]*/))
77
+ action { [:DASHMATCH, text] }
81
78
 
82
- when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
- action { [:SUFFIXMATCH, text] }
79
+ when (text = @ss.scan(/[\s]*\^=[\s]*/))
80
+ action { [:PREFIXMATCH, text] }
84
81
 
85
- when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
- action { [:SUBSTRINGMATCH, text] }
82
+ when (text = @ss.scan(/[\s]*\$=[\s]*/))
83
+ action { [:SUFFIXMATCH, text] }
87
84
 
88
- when (text = @ss.scan(/[\s]*!=[\s]*/))
89
- action { [:NOT_EQUAL, text] }
85
+ when (text = @ss.scan(/[\s]*\*=[\s]*/))
86
+ action { [:SUBSTRINGMATCH, text] }
90
87
 
91
- when (text = @ss.scan(/[\s]*=[\s]*/))
92
- action { [:EQUAL, text] }
88
+ when (text = @ss.scan(/[\s]*!=[\s]*/))
89
+ action { [:NOT_EQUAL, text] }
93
90
 
94
- when (text = @ss.scan(/[\s]*\)/))
95
- action { [:RPAREN, text] }
91
+ when (text = @ss.scan(/[\s]*=[\s]*/))
92
+ action { [:EQUAL, text] }
96
93
 
97
- when (text = @ss.scan(/[\s]*\[[\s]*/))
98
- action { [:LSQUARE, text] }
94
+ when (text = @ss.scan(/[\s]*\)/))
95
+ action { [:RPAREN, text] }
99
96
 
100
- when (text = @ss.scan(/[\s]*\]/))
101
- action { [:RSQUARE, text] }
97
+ when (text = @ss.scan(/\[[\s]*/))
98
+ action { [:LSQUARE, text] }
102
99
 
103
- when (text = @ss.scan(/[\s]*\+[\s]*/))
104
- action { [:PLUS, text] }
100
+ when (text = @ss.scan(/[\s]*\]/))
101
+ action { [:RSQUARE, text] }
105
102
 
106
- when (text = @ss.scan(/[\s]*>[\s]*/))
107
- action { [:GREATER, text] }
103
+ when (text = @ss.scan(/[\s]*\+[\s]*/))
104
+ action { [:PLUS, text] }
108
105
 
109
- when (text = @ss.scan(/[\s]*,[\s]*/))
110
- action { [:COMMA, text] }
106
+ when (text = @ss.scan(/[\s]*>[\s]*/))
107
+ action { [:GREATER, text] }
111
108
 
112
- when (text = @ss.scan(/[\s]*~[\s]*/))
113
- action { [:TILDE, text] }
109
+ when (text = @ss.scan(/[\s]*,[\s]*/))
110
+ action { [:COMMA, text] }
114
111
 
115
- when (text = @ss.scan(/\:not\([\s]*/))
116
- action { [:NOT, text] }
112
+ when (text = @ss.scan(/[\s]*~[\s]*/))
113
+ action { [:TILDE, text] }
117
114
 
118
- when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
- action { [:NUMBER, text] }
115
+ when (text = @ss.scan(/\:not\([\s]*/))
116
+ action { [:NOT, text] }
120
117
 
121
- when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
- action { [:DOUBLESLASH, text] }
118
+ when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
119
+ action { [:NUMBER, text] }
123
120
 
124
- when (text = @ss.scan(/[\s]*\/[\s]*/))
125
- action { [:SLASH, text] }
121
+ when (text = @ss.scan(/[\s]*\/\/[\s]*/))
122
+ action { [:DOUBLESLASH, text] }
126
123
 
127
- when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
- action {[:UNICODE_RANGE, text] }
124
+ when (text = @ss.scan(/[\s]*\/[\s]*/))
125
+ action { [:SLASH, text] }
129
126
 
130
- when (text = @ss.scan(/[\s]+/))
131
- action { [:S, text] }
127
+ when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
128
+ action {[:UNICODE_RANGE, text] }
132
129
 
133
- when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*'/))
134
- action { [:STRING, text] }
130
+ when (text = @ss.scan(/[\s]+/))
131
+ action { [:S, text] }
135
132
 
136
- when (text = @ss.scan(/./))
137
- action { [text, text] }
133
+ when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
134
+ action { [:STRING, text] }
138
135
 
139
- else
140
- text = @ss.string[@ss.pos .. -1]
141
- raise ScanError, "can not match: '" + text + "'"
142
- end # if
136
+ when (text = @ss.scan(/./))
137
+ action { [text, text] }
143
138
 
144
- else
145
- raise ScanError, "undefined state: '" + state.to_s + "'"
146
- end # case state
147
- token
148
- end # def _next_token
139
+
140
+ else
141
+ text = @ss.string[@ss.pos .. -1]
142
+ raise ScanError, "can not match: '" + text + "'"
143
+ end # if
144
+
145
+ else
146
+ raise ScanError, "undefined state: '" + state.to_s + "'"
147
+ end # case state
148
+ token
149
+ end # def _next_token
149
150
 
150
151
  end # class
151
152
  end
@@ -14,8 +14,8 @@ macro
14
14
  nmstart [_A-Za-z]|{nonascii}|{escape}
15
15
  ident [-@]?({nmstart})({nmchar})*
16
16
  name ({nmchar})+
17
- string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
18
- string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
17
+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
18
+ string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
19
19
  string {string1}|{string2}
20
20
 
21
21
  rule
@@ -34,7 +34,7 @@ rule
34
34
  {w}!={w} { [:NOT_EQUAL, text] }
35
35
  {w}={w} { [:EQUAL, text] }
36
36
  {w}\) { [:RPAREN, text] }
37
- {w}\[{w} { [:LSQUARE, text] }
37
+ \[{w} { [:LSQUARE, text] }
38
38
  {w}\] { [:RSQUARE, text] }
39
39
  {w}\+{w} { [:PLUS, text] }
40
40
  {w}>{w} { [:GREATER, text] }
@@ -44,9 +44,9 @@ rule
44
44
  {num} { [:NUMBER, text] }
45
45
  {w}\/\/{w} { [:DOUBLESLASH, text] }
46
46
  {w}\/{w} { [:SLASH, text] }
47
-
47
+
48
48
  U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
49
-
49
+
50
50
  [\s]+ { [:S, text] }
51
51
  {string} { [:STRING, text] }
52
52
  . { [text, text] }
@@ -2,7 +2,7 @@ module Nokogiri
2
2
  module CSS
3
3
  class XPathVisitor # :nodoc:
4
4
  def visit_function node
5
- # note that nth-child and nth-last-child are preprocessed in css/node.rb.
5
+
6
6
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
7
  return self.send(msg, node) if self.respond_to?(msg)
8
8
 
@@ -13,19 +13,31 @@ module Nokogiri
13
13
  "self::#{node.value[1]}"
14
14
  when /^eq\(/
15
15
  "position() = #{node.value[1]}"
16
- when /^(nth|nth-of-type|nth-child)\(/
17
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
18
- an_plus_b(node.value[1])
16
+ when /^(nth|nth-of-type)\(/
17
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
18
+ nth(node.value[1])
19
19
  else
20
20
  "position() = #{node.value[1]}"
21
21
  end
22
- when /^(nth-last-child|nth-last-of-type)\(/
23
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
24
- an_plus_b(node.value[1], :last => true)
22
+ when /^nth-child\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
+ nth(node.value[1], :child => true)
25
+ else
26
+ "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
27
+ end
28
+ when /^nth-last-of-type\(/
29
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
+ nth(node.value[1], :last => true)
25
31
  else
26
32
  index = node.value[1].to_i - 1
27
33
  index == 0 ? "position() = last()" : "position() = last() - #{index}"
28
34
  end
35
+ when /^nth-last-child\(/
36
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
+ nth(node.value[1], :last => true, :child => true)
38
+ else
39
+ "count(following-sibling::*) = #{node.value[1].to_i-1}"
40
+ end
29
41
  when /^(first|first-of-type)\(/
30
42
  "position() = 1"
31
43
  when /^(last|last-of-type)\(/
@@ -39,7 +51,7 @@ module Nokogiri
39
51
  when /^comment\(/
40
52
  "comment()"
41
53
  when /^has\(/
42
- node.value[1].accept(self)
54
+ ".//#{node.value[1].accept(self)}"
43
55
  else
44
56
  args = ['.'] + node.value[1..-1]
45
57
  "#{node.value.first}#{args.join(', ')})"
@@ -76,6 +88,13 @@ module Nokogiri
76
88
  value = node.value.last
77
89
  value = "'#{value}'" if value !~ /^['"]/
78
90
 
91
+ if (value[0]==value[-1]) && %q{"'}.include?(value[0])
92
+ str_value = value[1..-2]
93
+ if str_value.include?(value[0])
94
+ value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
95
+ end
96
+ end
97
+
79
98
  case node.value[1]
80
99
  when :equal
81
100
  attribute + " = " + "#{value}"
@@ -105,10 +124,13 @@ module Nokogiri
105
124
  return self.send(msg, node) if self.respond_to?(msg)
106
125
 
107
126
  case node.value.first
108
- when "first", "first-child" then "position() = 1"
109
- when "last", "last-child" then "position() = last()"
127
+ when "first" then "position() = 1"
128
+ when "first-child" then "count(preceding-sibling::*) = 0"
129
+ when "last" then "position() = last()"
130
+ when "last-child" then "count(following-sibling::*) = 0"
110
131
  when "first-of-type" then "position() = 1"
111
132
  when "last-of-type" then "position() = last()"
133
+ when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
112
134
  when "only-of-type" then "last() = 1"
113
135
  when "empty" then "not(node())"
114
136
  when "parent" then "node()"
@@ -123,8 +145,15 @@ module Nokogiri
123
145
  "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
124
146
  end
125
147
 
148
+ def visit_combinator node
149
+ if is_of_type_pseudo_class?(node.value.last)
150
+ "#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
151
+ else
152
+ "#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
153
+ end
154
+ end
155
+
126
156
  {
127
- 'combinator' => ' and ',
128
157
  'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
129
158
  'following_selector' => "/following-sibling::",
130
159
  'descendant_selector' => '//',
@@ -151,21 +180,51 @@ module Nokogiri
151
180
  end
152
181
 
153
182
  private
154
- def an_plus_b node, options={}
183
+ def nth node, options={}
155
184
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
156
185
 
157
- a = node.value[0].to_i
158
- b = node.value[3].to_i
159
- position = options[:last] ? "(last()-position()+1)" : "position()"
186
+ a, b = read_a_and_positive_b node.value
187
+ position = if options[:child]
188
+ options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
189
+ else
190
+ options[:last] ? "(last()-position()+1)" : "position()"
191
+ end
160
192
 
161
- if (b == 0)
162
- return "(#{position} mod #{a}) = 0"
193
+ if b.zero?
194
+ "(#{position} mod #{a}) = 0"
163
195
  else
164
- compare = (a < 0) ? "<=" : ">="
165
- return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
196
+ compare = a < 0 ? "<=" : ">="
197
+ if a.abs == 1
198
+ "#{position} #{compare} #{b}"
199
+ else
200
+ "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
201
+ end
166
202
  end
167
203
  end
168
204
 
205
+ def read_a_and_positive_b values
206
+ op = values[2]
207
+ if op == "+"
208
+ a = values[0].to_i
209
+ b = values[3].to_i
210
+ elsif op == "-"
211
+ a = values[0].to_i
212
+ b = a - (values[3].to_i % a)
213
+ else
214
+ raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
215
+ end
216
+ [a, b]
217
+ end
218
+
219
+ def is_of_type_pseudo_class? node
220
+ if node.type==:PSEUDO_CLASS
221
+ if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
222
+ node.value[0].value[0]
223
+ else
224
+ node.value[0]
225
+ end =~ /(nth|first|last|only)-of-type(\()?/
226
+ end
227
+ end
169
228
  end
170
229
  end
171
230
  end