nokogiri 1.5.10 → 1.10.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/LICENSE-DEPENDENCIES.md +1614 -0
- data/LICENSE.md +9 -0
- data/README.md +198 -0
- data/bin/nokogiri +50 -10
- data/dependencies.yml +72 -0
- data/ext/nokogiri/extconf.rb +634 -92
- data/ext/nokogiri/html_document.c +8 -8
- data/ext/nokogiri/html_element_description.c +15 -15
- data/ext/nokogiri/html_entity_lookup.c +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +4 -4
- data/ext/nokogiri/html_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +20 -12
- data/ext/nokogiri/nokogiri.h +1 -44
- data/ext/nokogiri/xml_attr.c +34 -25
- data/ext/nokogiri/xml_cdata.c +12 -6
- data/ext/nokogiri/xml_comment.c +18 -3
- data/ext/nokogiri/xml_document.c +64 -32
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +3 -3
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_io.c +11 -6
- data/ext/nokogiri/xml_namespace.c +50 -17
- data/ext/nokogiri/xml_namespace.h +3 -2
- data/ext/nokogiri/xml_node.c +459 -240
- data/ext/nokogiri/xml_node_set.c +166 -147
- data/ext/nokogiri/xml_node_set.h +2 -4
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +6 -19
- data/ext/nokogiri/xml_sax_parser.c +11 -13
- data/ext/nokogiri/xml_sax_parser_context.c +41 -1
- data/ext/nokogiri/xml_sax_push_parser.c +56 -12
- data/ext/nokogiri/xml_schema.c +1 -1
- data/ext/nokogiri/xml_syntax_error.c +11 -5
- data/ext/nokogiri/xml_syntax_error.h +1 -1
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +17 -38
- data/ext/nokogiri/xslt_stylesheet.c +10 -10
- data/lib/nokogiri/css/node.rb +0 -50
- data/lib/nokogiri/css/parser.rb +263 -233
- data/lib/nokogiri/css/parser.y +54 -40
- data/lib/nokogiri/css/tokenizer.rb +104 -103
- data/lib/nokogiri/css/tokenizer.rex +5 -5
- data/lib/nokogiri/css/xpath_visitor.rb +78 -19
- data/lib/nokogiri/decorators/slop.rb +12 -5
- data/lib/nokogiri/html/document.rb +102 -21
- data/lib/nokogiri/html/document_fragment.rb +11 -3
- data/lib/nokogiri/html/sax/parser.rb +12 -2
- data/lib/nokogiri/html/sax/push_parser.rb +22 -2
- data/lib/nokogiri/version.rb +40 -22
- data/lib/nokogiri/xml/builder.rb +34 -31
- data/lib/nokogiri/xml/document.rb +20 -14
- data/lib/nokogiri/xml/document_fragment.rb +50 -2
- data/lib/nokogiri/xml/dtd.rb +14 -4
- data/lib/nokogiri/xml/entity_reference.rb +18 -0
- data/lib/nokogiri/xml/node.rb +148 -203
- data/lib/nokogiri/xml/node_set.rb +139 -123
- data/lib/nokogiri/xml/parse_options.rb +22 -0
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +7 -8
- data/lib/nokogiri/xml/searchable.rb +230 -0
- data/lib/nokogiri/xml/syntax_error.rb +24 -1
- data/lib/nokogiri/xml.rb +3 -1
- data/lib/nokogiri.rb +40 -24
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
- data/patches/libxml2/0002-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +120 -0
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
- metadata +252 -388
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
data/lib/nokogiri/css/parser.y
CHANGED
@@ -10,13 +10,12 @@ rule
|
|
10
10
|
result = [val.first, val.last].flatten
|
11
11
|
}
|
12
12
|
| prefixless_combinator_selector { result = val.flatten }
|
13
|
-
| simple_selector_1toN { result = val.flatten }
|
13
|
+
| optional_S simple_selector_1toN { result = [val.last].flatten }
|
14
14
|
;
|
15
15
|
combinator
|
16
16
|
: PLUS { result = :DIRECT_ADJACENT_SELECTOR }
|
17
17
|
| GREATER { result = :CHILD_SELECTOR }
|
18
18
|
| TILDE { result = :FOLLOWING_SELECTOR }
|
19
|
-
| S { result = :DESCENDANT_SELECTOR }
|
20
19
|
| DOUBLESLASH { result = :DESCENDANT_SELECTOR }
|
21
20
|
| SLASH { result = :CHILD_SELECTOR }
|
22
21
|
;
|
@@ -28,17 +27,6 @@ rule
|
|
28
27
|
Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
|
29
28
|
end
|
30
29
|
}
|
31
|
-
| element_name hcap_1toN negation {
|
32
|
-
result = Node.new(:CONDITIONAL_SELECTOR,
|
33
|
-
[
|
34
|
-
val.first,
|
35
|
-
Node.new(:COMBINATOR, [val[1], val.last])
|
36
|
-
]
|
37
|
-
)
|
38
|
-
}
|
39
|
-
| element_name negation {
|
40
|
-
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
41
|
-
}
|
42
30
|
| function
|
43
31
|
| function pseudo {
|
44
32
|
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
@@ -46,14 +34,6 @@ rule
|
|
46
34
|
| function attrib {
|
47
35
|
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
48
36
|
}
|
49
|
-
| hcap_1toN negation {
|
50
|
-
result = Node.new(:CONDITIONAL_SELECTOR,
|
51
|
-
[
|
52
|
-
Node.new(:ELEMENT_NAME, ['*']),
|
53
|
-
Node.new(:COMBINATOR, val)
|
54
|
-
]
|
55
|
-
)
|
56
|
-
}
|
57
37
|
| hcap_1toN {
|
58
38
|
result = Node.new(:CONDITIONAL_SELECTOR,
|
59
39
|
[Node.new(:ELEMENT_NAME, ['*']), val.first]
|
@@ -69,10 +49,13 @@ rule
|
|
69
49
|
: simple_selector combinator simple_selector_1toN {
|
70
50
|
result = Node.new(val[1], [val.first, val.last])
|
71
51
|
}
|
52
|
+
| simple_selector S simple_selector_1toN {
|
53
|
+
result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
|
54
|
+
}
|
72
55
|
| simple_selector
|
73
56
|
;
|
74
57
|
class
|
75
|
-
: '.' IDENT { result = Node.new(:CLASS_CONDITION, [val[1]]) }
|
58
|
+
: '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
|
76
59
|
;
|
77
60
|
element_name
|
78
61
|
: namespaced_ident
|
@@ -130,7 +113,7 @@ rule
|
|
130
113
|
| FUNCTION expr RPAREN {
|
131
114
|
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
132
115
|
}
|
133
|
-
| FUNCTION
|
116
|
+
| FUNCTION nth RPAREN {
|
134
117
|
result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
|
135
118
|
}
|
136
119
|
| NOT expr RPAREN {
|
@@ -148,12 +131,13 @@ rule
|
|
148
131
|
| STRING
|
149
132
|
| IDENT # even, odd
|
150
133
|
{
|
151
|
-
|
152
|
-
|
153
|
-
result = Node.new(:
|
154
|
-
|
155
|
-
|
156
|
-
|
134
|
+
case val[0]
|
135
|
+
when 'even'
|
136
|
+
result = Node.new(:NTH, ['2','n','+','0'])
|
137
|
+
when 'odd'
|
138
|
+
result = Node.new(:NTH, ['2','n','+','1'])
|
139
|
+
when 'n'
|
140
|
+
result = Node.new(:NTH, ['1','n','+','0'])
|
157
141
|
else
|
158
142
|
# This is not CSS standard. It allows us to support this:
|
159
143
|
# assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
|
@@ -163,11 +147,11 @@ rule
|
|
163
147
|
end
|
164
148
|
}
|
165
149
|
;
|
166
|
-
|
150
|
+
nth
|
167
151
|
: NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
|
168
152
|
{
|
169
153
|
if val[1] == 'n'
|
170
|
-
result = Node.new(:
|
154
|
+
result = Node.new(:NTH, val)
|
171
155
|
else
|
172
156
|
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
173
157
|
end
|
@@ -175,21 +159,27 @@ rule
|
|
175
159
|
| IDENT PLUS NUMBER { # n+3, -n+3
|
176
160
|
if val[0] == 'n'
|
177
161
|
val.unshift("1")
|
178
|
-
result = Node.new(:
|
162
|
+
result = Node.new(:NTH, val)
|
179
163
|
elsif val[0] == '-n'
|
180
164
|
val[0] = 'n'
|
181
165
|
val.unshift("-1")
|
182
|
-
result = Node.new(:
|
166
|
+
result = Node.new(:NTH, val)
|
183
167
|
else
|
184
168
|
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
185
169
|
end
|
186
170
|
}
|
187
|
-
| NUMBER IDENT
|
188
|
-
|
189
|
-
if
|
171
|
+
| NUMBER IDENT { # 5n, -5n, 10n-1
|
172
|
+
n = val[1]
|
173
|
+
if n[0, 2] == 'n-'
|
174
|
+
val[1] = 'n'
|
175
|
+
val << "-"
|
176
|
+
# b is contained in n as n is the string "n-b"
|
177
|
+
val << n[2, n.size]
|
178
|
+
result = Node.new(:NTH, val)
|
179
|
+
elsif n == 'n'
|
190
180
|
val << "+"
|
191
181
|
val << "0"
|
192
|
-
result = Node.new(:
|
182
|
+
result = Node.new(:NTH, val)
|
193
183
|
else
|
194
184
|
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
195
185
|
end
|
@@ -218,17 +208,22 @@ rule
|
|
218
208
|
| pseudo hcap_1toN {
|
219
209
|
result = Node.new(:COMBINATOR, val)
|
220
210
|
}
|
211
|
+
| negation hcap_1toN {
|
212
|
+
result = Node.new(:COMBINATOR, val)
|
213
|
+
}
|
221
214
|
| attribute_id
|
222
215
|
| class
|
223
216
|
| attrib
|
224
217
|
| pseudo
|
218
|
+
| negation
|
225
219
|
;
|
226
220
|
attribute_id
|
227
|
-
: HASH { result = Node.new(:ID, val) }
|
221
|
+
: HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) }
|
228
222
|
;
|
229
223
|
attrib_val_0or1
|
230
|
-
: eql_incl_dash IDENT { result = [val.first, val[1]] }
|
231
|
-
| eql_incl_dash STRING { result = [val.first, val[1]] }
|
224
|
+
: eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] }
|
225
|
+
| eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] }
|
226
|
+
| eql_incl_dash NUMBER { result = [val.first, val[1]] }
|
232
227
|
|
|
233
228
|
;
|
234
229
|
eql_incl_dash
|
@@ -250,9 +245,28 @@ rule
|
|
250
245
|
| element_name hcap_1toN
|
251
246
|
| hcap_1toN
|
252
247
|
;
|
248
|
+
optional_S
|
249
|
+
: S
|
250
|
+
|
|
251
|
+
;
|
253
252
|
end
|
254
253
|
|
255
254
|
---- header
|
256
255
|
|
257
256
|
require 'nokogiri/css/parser_extras'
|
258
257
|
|
258
|
+
---- inner
|
259
|
+
|
260
|
+
def unescape_css_identifier(identifier)
|
261
|
+
identifier.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/){ |m| $1 || [$2.hex].pack('U') }
|
262
|
+
end
|
263
|
+
|
264
|
+
def unescape_css_string(str)
|
265
|
+
str.gsub(/\\(?:([^0-9a-fA-F])|([0-9a-fA-F]{1,6})\s?)/) do |m|
|
266
|
+
if $1=="\n"
|
267
|
+
''
|
268
|
+
else
|
269
|
+
$1 || [$2.hex].pack('U')
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
@@ -1,151 +1,152 @@
|
|
1
1
|
#--
|
2
2
|
# DO NOT MODIFY!!!!
|
3
|
-
# This file is automatically generated by rex 1.0.
|
3
|
+
# This file is automatically generated by rex 1.0.7
|
4
4
|
# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
|
5
5
|
#++
|
6
6
|
|
7
7
|
module Nokogiri
|
8
8
|
module CSS
|
9
9
|
class Tokenizer # :nodoc:
|
10
|
-
|
10
|
+
require 'strscan'
|
11
11
|
|
12
|
-
|
12
|
+
class ScanError < StandardError ; end
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
attr_reader :lineno
|
15
|
+
attr_reader :filename
|
16
|
+
attr_accessor :state
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
def scan_setup(str)
|
19
|
+
@ss = StringScanner.new(str)
|
20
|
+
@lineno = 1
|
21
|
+
@state = nil
|
22
|
+
end
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
def action
|
25
|
+
yield
|
26
|
+
end
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
28
|
+
def scan_str(str)
|
29
|
+
scan_setup(str)
|
30
|
+
do_parse
|
31
|
+
end
|
32
|
+
alias :scan :scan_str
|
33
33
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
def load_file( filename )
|
35
|
+
@filename = filename
|
36
|
+
File.open(filename, "r") do |f|
|
37
|
+
scan_setup(f.read)
|
38
|
+
end
|
39
|
+
end
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
41
|
+
def scan_file( filename )
|
42
|
+
load_file(filename)
|
43
|
+
do_parse
|
44
|
+
end
|
45
45
|
|
46
46
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
# skips empty actions
|
51
|
-
until token = _next_token or @ss.eos?; end
|
52
|
-
token
|
53
|
-
end
|
47
|
+
def next_token
|
48
|
+
return if @ss.eos?
|
54
49
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
when nil
|
60
|
-
case
|
61
|
-
when (text = @ss.scan(/has\([\s]*/))
|
62
|
-
action { [:HAS, text] }
|
50
|
+
# skips empty actions
|
51
|
+
until token = _next_token or @ss.eos?; end
|
52
|
+
token
|
53
|
+
end
|
63
54
|
|
64
|
-
|
65
|
-
|
55
|
+
def _next_token
|
56
|
+
text = @ss.peek(1)
|
57
|
+
@lineno += 1 if text == "\n"
|
58
|
+
token = case @state
|
59
|
+
when nil
|
60
|
+
case
|
61
|
+
when (text = @ss.scan(/has\([\s]*/))
|
62
|
+
action { [:HAS, text] }
|
66
63
|
|
67
|
-
|
68
|
-
|
64
|
+
when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
|
65
|
+
action { [:FUNCTION, text] }
|
69
66
|
|
70
|
-
|
71
|
-
|
67
|
+
when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
|
68
|
+
action { [:IDENT, text] }
|
72
69
|
|
73
|
-
|
74
|
-
|
70
|
+
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
|
71
|
+
action { [:HASH, text] }
|
75
72
|
|
76
|
-
|
77
|
-
|
73
|
+
when (text = @ss.scan(/[\s]*~=[\s]*/))
|
74
|
+
action { [:INCLUDES, text] }
|
78
75
|
|
79
|
-
|
80
|
-
|
76
|
+
when (text = @ss.scan(/[\s]*\|=[\s]*/))
|
77
|
+
action { [:DASHMATCH, text] }
|
81
78
|
|
82
|
-
|
83
|
-
|
79
|
+
when (text = @ss.scan(/[\s]*\^=[\s]*/))
|
80
|
+
action { [:PREFIXMATCH, text] }
|
84
81
|
|
85
|
-
|
86
|
-
|
82
|
+
when (text = @ss.scan(/[\s]*\$=[\s]*/))
|
83
|
+
action { [:SUFFIXMATCH, text] }
|
87
84
|
|
88
|
-
|
89
|
-
|
85
|
+
when (text = @ss.scan(/[\s]*\*=[\s]*/))
|
86
|
+
action { [:SUBSTRINGMATCH, text] }
|
90
87
|
|
91
|
-
|
92
|
-
|
88
|
+
when (text = @ss.scan(/[\s]*!=[\s]*/))
|
89
|
+
action { [:NOT_EQUAL, text] }
|
93
90
|
|
94
|
-
|
95
|
-
|
91
|
+
when (text = @ss.scan(/[\s]*=[\s]*/))
|
92
|
+
action { [:EQUAL, text] }
|
96
93
|
|
97
|
-
|
98
|
-
|
94
|
+
when (text = @ss.scan(/[\s]*\)/))
|
95
|
+
action { [:RPAREN, text] }
|
99
96
|
|
100
|
-
|
101
|
-
|
97
|
+
when (text = @ss.scan(/\[[\s]*/))
|
98
|
+
action { [:LSQUARE, text] }
|
102
99
|
|
103
|
-
|
104
|
-
|
100
|
+
when (text = @ss.scan(/[\s]*\]/))
|
101
|
+
action { [:RSQUARE, text] }
|
105
102
|
|
106
|
-
|
107
|
-
|
103
|
+
when (text = @ss.scan(/[\s]*\+[\s]*/))
|
104
|
+
action { [:PLUS, text] }
|
108
105
|
|
109
|
-
|
110
|
-
|
106
|
+
when (text = @ss.scan(/[\s]*>[\s]*/))
|
107
|
+
action { [:GREATER, text] }
|
111
108
|
|
112
|
-
|
113
|
-
|
109
|
+
when (text = @ss.scan(/[\s]*,[\s]*/))
|
110
|
+
action { [:COMMA, text] }
|
114
111
|
|
115
|
-
|
116
|
-
|
112
|
+
when (text = @ss.scan(/[\s]*~[\s]*/))
|
113
|
+
action { [:TILDE, text] }
|
117
114
|
|
118
|
-
|
119
|
-
|
115
|
+
when (text = @ss.scan(/\:not\([\s]*/))
|
116
|
+
action { [:NOT, text] }
|
120
117
|
|
121
|
-
|
122
|
-
|
118
|
+
when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
|
119
|
+
action { [:NUMBER, text] }
|
123
120
|
|
124
|
-
|
125
|
-
|
121
|
+
when (text = @ss.scan(/[\s]*\/\/[\s]*/))
|
122
|
+
action { [:DOUBLESLASH, text] }
|
126
123
|
|
127
|
-
|
128
|
-
|
124
|
+
when (text = @ss.scan(/[\s]*\/[\s]*/))
|
125
|
+
action { [:SLASH, text] }
|
129
126
|
|
130
|
-
|
131
|
-
|
127
|
+
when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
|
128
|
+
action {[:UNICODE_RANGE, text] }
|
132
129
|
|
133
|
-
|
134
|
-
|
130
|
+
when (text = @ss.scan(/[\s]+/))
|
131
|
+
action { [:S, text] }
|
135
132
|
|
136
|
-
|
137
|
-
|
133
|
+
when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
|
134
|
+
action { [:STRING, text] }
|
138
135
|
|
139
|
-
|
140
|
-
|
141
|
-
raise ScanError, "can not match: '" + text + "'"
|
142
|
-
end # if
|
136
|
+
when (text = @ss.scan(/./))
|
137
|
+
action { [text, text] }
|
143
138
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
139
|
+
|
140
|
+
else
|
141
|
+
text = @ss.string[@ss.pos .. -1]
|
142
|
+
raise ScanError, "can not match: '" + text + "'"
|
143
|
+
end # if
|
144
|
+
|
145
|
+
else
|
146
|
+
raise ScanError, "undefined state: '" + state.to_s + "'"
|
147
|
+
end # case state
|
148
|
+
token
|
149
|
+
end # def _next_token
|
149
150
|
|
150
151
|
end # class
|
151
152
|
end
|
@@ -14,8 +14,8 @@ macro
|
|
14
14
|
nmstart [_A-Za-z]|{nonascii}|{escape}
|
15
15
|
ident [-@]?({nmstart})({nmchar})*
|
16
16
|
name ({nmchar})+
|
17
|
-
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*"
|
18
|
-
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*'
|
17
|
+
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
|
18
|
+
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
|
19
19
|
string {string1}|{string2}
|
20
20
|
|
21
21
|
rule
|
@@ -34,7 +34,7 @@ rule
|
|
34
34
|
{w}!={w} { [:NOT_EQUAL, text] }
|
35
35
|
{w}={w} { [:EQUAL, text] }
|
36
36
|
{w}\) { [:RPAREN, text] }
|
37
|
-
|
37
|
+
\[{w} { [:LSQUARE, text] }
|
38
38
|
{w}\] { [:RSQUARE, text] }
|
39
39
|
{w}\+{w} { [:PLUS, text] }
|
40
40
|
{w}>{w} { [:GREATER, text] }
|
@@ -44,9 +44,9 @@ rule
|
|
44
44
|
{num} { [:NUMBER, text] }
|
45
45
|
{w}\/\/{w} { [:DOUBLESLASH, text] }
|
46
46
|
{w}\/{w} { [:SLASH, text] }
|
47
|
-
|
47
|
+
|
48
48
|
U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})? {[:UNICODE_RANGE, text] }
|
49
|
-
|
49
|
+
|
50
50
|
[\s]+ { [:S, text] }
|
51
51
|
{string} { [:STRING, text] }
|
52
52
|
. { [text, text] }
|
@@ -2,7 +2,7 @@ module Nokogiri
|
|
2
2
|
module CSS
|
3
3
|
class XPathVisitor # :nodoc:
|
4
4
|
def visit_function node
|
5
|
-
|
5
|
+
|
6
6
|
msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
|
7
7
|
return self.send(msg, node) if self.respond_to?(msg)
|
8
8
|
|
@@ -13,19 +13,31 @@ module Nokogiri
|
|
13
13
|
"self::#{node.value[1]}"
|
14
14
|
when /^eq\(/
|
15
15
|
"position() = #{node.value[1]}"
|
16
|
-
when /^(nth|nth-of-type
|
17
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :
|
18
|
-
|
16
|
+
when /^(nth|nth-of-type)\(/
|
17
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
18
|
+
nth(node.value[1])
|
19
19
|
else
|
20
20
|
"position() = #{node.value[1]}"
|
21
21
|
end
|
22
|
-
when /^
|
23
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :
|
24
|
-
|
22
|
+
when /^nth-child\(/
|
23
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
24
|
+
nth(node.value[1], :child => true)
|
25
|
+
else
|
26
|
+
"count(preceding-sibling::*) = #{node.value[1].to_i-1}"
|
27
|
+
end
|
28
|
+
when /^nth-last-of-type\(/
|
29
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
30
|
+
nth(node.value[1], :last => true)
|
25
31
|
else
|
26
32
|
index = node.value[1].to_i - 1
|
27
33
|
index == 0 ? "position() = last()" : "position() = last() - #{index}"
|
28
34
|
end
|
35
|
+
when /^nth-last-child\(/
|
36
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
|
37
|
+
nth(node.value[1], :last => true, :child => true)
|
38
|
+
else
|
39
|
+
"count(following-sibling::*) = #{node.value[1].to_i-1}"
|
40
|
+
end
|
29
41
|
when /^(first|first-of-type)\(/
|
30
42
|
"position() = 1"
|
31
43
|
when /^(last|last-of-type)\(/
|
@@ -39,7 +51,7 @@ module Nokogiri
|
|
39
51
|
when /^comment\(/
|
40
52
|
"comment()"
|
41
53
|
when /^has\(/
|
42
|
-
node.value[1].accept(self)
|
54
|
+
".//#{node.value[1].accept(self)}"
|
43
55
|
else
|
44
56
|
args = ['.'] + node.value[1..-1]
|
45
57
|
"#{node.value.first}#{args.join(', ')})"
|
@@ -76,6 +88,13 @@ module Nokogiri
|
|
76
88
|
value = node.value.last
|
77
89
|
value = "'#{value}'" if value !~ /^['"]/
|
78
90
|
|
91
|
+
if (value[0]==value[-1]) && %q{"'}.include?(value[0])
|
92
|
+
str_value = value[1..-2]
|
93
|
+
if str_value.include?(value[0])
|
94
|
+
value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
79
98
|
case node.value[1]
|
80
99
|
when :equal
|
81
100
|
attribute + " = " + "#{value}"
|
@@ -105,10 +124,13 @@ module Nokogiri
|
|
105
124
|
return self.send(msg, node) if self.respond_to?(msg)
|
106
125
|
|
107
126
|
case node.value.first
|
108
|
-
when "first"
|
109
|
-
when "
|
127
|
+
when "first" then "position() = 1"
|
128
|
+
when "first-child" then "count(preceding-sibling::*) = 0"
|
129
|
+
when "last" then "position() = last()"
|
130
|
+
when "last-child" then "count(following-sibling::*) = 0"
|
110
131
|
when "first-of-type" then "position() = 1"
|
111
132
|
when "last-of-type" then "position() = last()"
|
133
|
+
when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
|
112
134
|
when "only-of-type" then "last() = 1"
|
113
135
|
when "empty" then "not(node())"
|
114
136
|
when "parent" then "node()"
|
@@ -123,8 +145,15 @@ module Nokogiri
|
|
123
145
|
"contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
|
124
146
|
end
|
125
147
|
|
148
|
+
def visit_combinator node
|
149
|
+
if is_of_type_pseudo_class?(node.value.last)
|
150
|
+
"#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
|
151
|
+
else
|
152
|
+
"#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
126
156
|
{
|
127
|
-
'combinator' => ' and ',
|
128
157
|
'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
|
129
158
|
'following_selector' => "/following-sibling::",
|
130
159
|
'descendant_selector' => '//',
|
@@ -151,21 +180,51 @@ module Nokogiri
|
|
151
180
|
end
|
152
181
|
|
153
182
|
private
|
154
|
-
def
|
183
|
+
def nth node, options={}
|
155
184
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
156
185
|
|
157
|
-
a = node.value
|
158
|
-
|
159
|
-
|
186
|
+
a, b = read_a_and_positive_b node.value
|
187
|
+
position = if options[:child]
|
188
|
+
options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
|
189
|
+
else
|
190
|
+
options[:last] ? "(last()-position()+1)" : "position()"
|
191
|
+
end
|
160
192
|
|
161
|
-
if
|
162
|
-
|
193
|
+
if b.zero?
|
194
|
+
"(#{position} mod #{a}) = 0"
|
163
195
|
else
|
164
|
-
compare =
|
165
|
-
|
196
|
+
compare = a < 0 ? "<=" : ">="
|
197
|
+
if a.abs == 1
|
198
|
+
"#{position} #{compare} #{b}"
|
199
|
+
else
|
200
|
+
"(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
|
201
|
+
end
|
166
202
|
end
|
167
203
|
end
|
168
204
|
|
205
|
+
def read_a_and_positive_b values
|
206
|
+
op = values[2]
|
207
|
+
if op == "+"
|
208
|
+
a = values[0].to_i
|
209
|
+
b = values[3].to_i
|
210
|
+
elsif op == "-"
|
211
|
+
a = values[0].to_i
|
212
|
+
b = a - (values[3].to_i % a)
|
213
|
+
else
|
214
|
+
raise ArgumentError, "expected an+b node to have either + or - as the operator, but is #{op.inspect}"
|
215
|
+
end
|
216
|
+
[a, b]
|
217
|
+
end
|
218
|
+
|
219
|
+
def is_of_type_pseudo_class? node
|
220
|
+
if node.type==:PSEUDO_CLASS
|
221
|
+
if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
|
222
|
+
node.value[0].value[0]
|
223
|
+
else
|
224
|
+
node.value[0]
|
225
|
+
end =~ /(nth|first|last|only)-of-type(\()?/
|
226
|
+
end
|
227
|
+
end
|
169
228
|
end
|
170
229
|
end
|
171
230
|
end
|