nokogiri 1.13.0 → 1.13.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/ext/nokogiri/xslt_stylesheet.c +107 -9
- data/lib/nokogiri/css/parser.rb +351 -340
- data/lib/nokogiri/css/parser.y +241 -244
- data/lib/nokogiri/css/tokenizer.rb +2 -2
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +16 -18
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +51 -40
- data/lib/nokogiri/xslt.rb +19 -12
- metadata +2 -2
data/lib/nokogiri/css/parser.y
CHANGED
@@ -5,250 +5,247 @@ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL
|
|
5
5
|
token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS
|
6
6
|
|
7
7
|
rule
|
8
|
-
selector
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
class
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
}
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
attrib
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
hcap_1toN
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
: S
|
250
|
-
|
|
251
|
-
;
|
8
|
+
selector:
|
9
|
+
selector COMMA simple_selector_1toN {
|
10
|
+
result = [val[0], val[2]].flatten
|
11
|
+
}
|
12
|
+
| prefixless_combinator_selector { result = val.flatten }
|
13
|
+
| optional_S simple_selector_1toN { result = [val[1]].flatten }
|
14
|
+
;
|
15
|
+
|
16
|
+
combinator:
|
17
|
+
PLUS { result = :DIRECT_ADJACENT_SELECTOR }
|
18
|
+
| GREATER { result = :CHILD_SELECTOR }
|
19
|
+
| TILDE { result = :FOLLOWING_SELECTOR }
|
20
|
+
| DOUBLESLASH { result = :DESCENDANT_SELECTOR }
|
21
|
+
| SLASH { result = :CHILD_SELECTOR }
|
22
|
+
;
|
23
|
+
|
24
|
+
xpath_attribute_name:
|
25
|
+
'@' IDENT { result = val[1] }
|
26
|
+
;
|
27
|
+
|
28
|
+
xpath_attribute:
|
29
|
+
xpath_attribute_name { result = Node.new(:ATTRIB_NAME, [val[0]]) }
|
30
|
+
;
|
31
|
+
|
32
|
+
simple_selector:
|
33
|
+
element_name hcap_0toN {
|
34
|
+
result = if val[1].nil?
|
35
|
+
val[0]
|
36
|
+
else
|
37
|
+
Node.new(:CONDITIONAL_SELECTOR, [val[0], val[1]])
|
38
|
+
end
|
39
|
+
}
|
40
|
+
| function
|
41
|
+
| function pseudo { result = Node.new(:CONDITIONAL_SELECTOR, val) }
|
42
|
+
| function attrib { result = Node.new(:CONDITIONAL_SELECTOR, val) }
|
43
|
+
| hcap_1toN { result = Node.new(:CONDITIONAL_SELECTOR, [Node.new(:ELEMENT_NAME, ['*']), val[0]]) }
|
44
|
+
| xpath_attribute
|
45
|
+
;
|
46
|
+
|
47
|
+
prefixless_combinator_selector:
|
48
|
+
combinator simple_selector_1toN { result = Node.new(val[0], [nil, val[1]]) }
|
49
|
+
;
|
50
|
+
|
51
|
+
simple_selector_1toN:
|
52
|
+
simple_selector combinator simple_selector_1toN { result = Node.new(val[1], [val[0], val[2]]) }
|
53
|
+
| simple_selector S simple_selector_1toN { result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) }
|
54
|
+
| simple_selector
|
55
|
+
;
|
56
|
+
|
57
|
+
class:
|
58
|
+
'.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) }
|
59
|
+
;
|
60
|
+
|
61
|
+
element_name:
|
62
|
+
namespaced_ident
|
63
|
+
| '*' { result = Node.new(:ELEMENT_NAME, val) }
|
64
|
+
;
|
65
|
+
|
66
|
+
namespaced_ident:
|
67
|
+
namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')]) }
|
68
|
+
| IDENT {
|
69
|
+
name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
|
70
|
+
result = Node.new(:ELEMENT_NAME, [name])
|
71
|
+
}
|
72
|
+
;
|
73
|
+
|
74
|
+
namespace:
|
75
|
+
IDENT { result = val[0] }
|
76
|
+
|
|
77
|
+
;
|
78
|
+
|
79
|
+
attrib:
|
80
|
+
LSQUARE attrib_name attrib_val_0or1 RSQUARE {
|
81
|
+
result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []))
|
82
|
+
}
|
83
|
+
| LSQUARE function attrib_val_0or1 RSQUARE {
|
84
|
+
result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []))
|
85
|
+
}
|
86
|
+
| LSQUARE NUMBER RSQUARE {
|
87
|
+
result = Node.new(:PSEUDO_CLASS, [Node.new(:FUNCTION, ['nth-child(', val[1]])])
|
88
|
+
}
|
89
|
+
;
|
90
|
+
|
91
|
+
attrib_name:
|
92
|
+
namespace '|' IDENT { result = Node.new(:ATTRIB_NAME, [[val[0], val[2]].compact.join(':')]) }
|
93
|
+
| IDENT { result = Node.new(:ATTRIB_NAME, [val[0]]) }
|
94
|
+
| xpath_attribute
|
95
|
+
;
|
96
|
+
|
97
|
+
function:
|
98
|
+
FUNCTION RPAREN {
|
99
|
+
result = Node.new(:FUNCTION, [val[0].strip])
|
100
|
+
}
|
101
|
+
| FUNCTION expr RPAREN {
|
102
|
+
result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
|
103
|
+
}
|
104
|
+
| FUNCTION nth RPAREN {
|
105
|
+
result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
|
106
|
+
}
|
107
|
+
| NOT expr RPAREN {
|
108
|
+
result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
|
109
|
+
}
|
110
|
+
| HAS selector RPAREN {
|
111
|
+
result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten)
|
112
|
+
}
|
113
|
+
;
|
114
|
+
|
115
|
+
expr:
|
116
|
+
NUMBER COMMA expr { result = [val[0], val[2]] }
|
117
|
+
| STRING COMMA expr { result = [val[0], val[2]] }
|
118
|
+
| IDENT COMMA expr { result = [val[0], val[2]] }
|
119
|
+
| xpath_attribute COMMA expr { result = [val[0], val[2]] }
|
120
|
+
| NUMBER
|
121
|
+
| STRING
|
122
|
+
| IDENT {
|
123
|
+
case val[0]
|
124
|
+
when 'even'
|
125
|
+
result = Node.new(:NTH, ['2','n','+','0'])
|
126
|
+
when 'odd'
|
127
|
+
result = Node.new(:NTH, ['2','n','+','1'])
|
128
|
+
when 'n'
|
129
|
+
result = Node.new(:NTH, ['1','n','+','0'])
|
130
|
+
else
|
131
|
+
result = val
|
132
|
+
end
|
133
|
+
}
|
134
|
+
| xpath_attribute
|
135
|
+
;
|
136
|
+
|
137
|
+
nth:
|
138
|
+
NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3
|
139
|
+
{
|
140
|
+
if val[1] == 'n'
|
141
|
+
result = Node.new(:NTH, val)
|
142
|
+
else
|
143
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
144
|
+
end
|
145
|
+
}
|
146
|
+
| IDENT PLUS NUMBER { # n+3, -n+3
|
147
|
+
if val[0] == 'n'
|
148
|
+
val.unshift("1")
|
149
|
+
result = Node.new(:NTH, val)
|
150
|
+
elsif val[0] == '-n'
|
151
|
+
val[0] = 'n'
|
152
|
+
val.unshift("-1")
|
153
|
+
result = Node.new(:NTH, val)
|
154
|
+
else
|
155
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
156
|
+
end
|
157
|
+
}
|
158
|
+
| NUMBER IDENT { # 5n, -5n, 10n-1
|
159
|
+
n = val[1]
|
160
|
+
if n[0, 2] == 'n-'
|
161
|
+
val[1] = 'n'
|
162
|
+
val << "-"
|
163
|
+
# b is contained in n as n is the string "n-b"
|
164
|
+
val << n[2, n.size]
|
165
|
+
result = Node.new(:NTH, val)
|
166
|
+
elsif n == 'n'
|
167
|
+
val << "+"
|
168
|
+
val << "0"
|
169
|
+
result = Node.new(:NTH, val)
|
170
|
+
else
|
171
|
+
raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
|
172
|
+
end
|
173
|
+
}
|
174
|
+
;
|
175
|
+
|
176
|
+
pseudo:
|
177
|
+
':' function {
|
178
|
+
result = Node.new(:PSEUDO_CLASS, [val[1]])
|
179
|
+
}
|
180
|
+
| ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) }
|
181
|
+
;
|
182
|
+
|
183
|
+
hcap_0toN:
|
184
|
+
hcap_1toN
|
185
|
+
|
|
186
|
+
;
|
187
|
+
|
188
|
+
hcap_1toN:
|
189
|
+
attribute_id hcap_1toN {
|
190
|
+
result = Node.new(:COMBINATOR, val)
|
191
|
+
}
|
192
|
+
| class hcap_1toN {
|
193
|
+
result = Node.new(:COMBINATOR, val)
|
194
|
+
}
|
195
|
+
| attrib hcap_1toN {
|
196
|
+
result = Node.new(:COMBINATOR, val)
|
197
|
+
}
|
198
|
+
| pseudo hcap_1toN {
|
199
|
+
result = Node.new(:COMBINATOR, val)
|
200
|
+
}
|
201
|
+
| negation hcap_1toN {
|
202
|
+
result = Node.new(:COMBINATOR, val)
|
203
|
+
}
|
204
|
+
| attribute_id
|
205
|
+
| class
|
206
|
+
| attrib
|
207
|
+
| pseudo
|
208
|
+
| negation
|
209
|
+
;
|
210
|
+
|
211
|
+
attribute_id:
|
212
|
+
HASH { result = Node.new(:ID, [unescape_css_identifier(val[0])]) }
|
213
|
+
;
|
214
|
+
|
215
|
+
attrib_val_0or1:
|
216
|
+
eql_incl_dash IDENT { result = [val[0], unescape_css_identifier(val[1])] }
|
217
|
+
| eql_incl_dash STRING { result = [val[0], unescape_css_string(val[1])] }
|
218
|
+
| eql_incl_dash NUMBER { result = [val[0], val[1]] }
|
219
|
+
|
|
220
|
+
;
|
221
|
+
|
222
|
+
eql_incl_dash:
|
223
|
+
EQUAL { result = :equal }
|
224
|
+
| PREFIXMATCH { result = :prefix_match }
|
225
|
+
| SUFFIXMATCH { result = :suffix_match }
|
226
|
+
| SUBSTRINGMATCH { result = :substring_match }
|
227
|
+
| NOT_EQUAL { result = :not_equal }
|
228
|
+
| INCLUDES { result = :includes }
|
229
|
+
| DASHMATCH { result = :dash_match }
|
230
|
+
;
|
231
|
+
|
232
|
+
negation:
|
233
|
+
NOT negation_arg RPAREN {
|
234
|
+
result = Node.new(:NOT, [val[1]])
|
235
|
+
}
|
236
|
+
;
|
237
|
+
|
238
|
+
negation_arg:
|
239
|
+
element_name
|
240
|
+
| element_name hcap_1toN
|
241
|
+
| hcap_1toN
|
242
|
+
;
|
243
|
+
|
244
|
+
optional_S:
|
245
|
+
S
|
246
|
+
|
|
247
|
+
;
|
248
|
+
|
252
249
|
end
|
253
250
|
|
254
251
|
---- header
|
@@ -63,10 +63,10 @@ class Tokenizer
|
|
63
63
|
when (text = @ss.scan(/has\([\s]*/))
|
64
64
|
action { [:HAS, text] }
|
65
65
|
|
66
|
-
when (text = @ss.scan(
|
66
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
|
67
67
|
action { [:FUNCTION, text] }
|
68
68
|
|
69
|
-
when (text = @ss.scan(
|
69
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
|
70
70
|
action { [:IDENT, text] }
|
71
71
|
|
72
72
|
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
|
@@ -13,7 +13,7 @@ macro
|
|
13
13
|
escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
|
14
14
|
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
|
15
15
|
nmstart [_A-Za-z]|{nonascii}|{escape}
|
16
|
-
ident
|
16
|
+
ident -?({nmstart})({nmchar})*
|
17
17
|
name ({nmchar})+
|
18
18
|
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
|
19
19
|
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
|
@@ -128,8 +128,11 @@ module Nokogiri
|
|
128
128
|
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
129
129
|
".#{"//" unless is_direct}#{node.value[1].accept(self)}"
|
130
130
|
else
|
131
|
-
#
|
132
|
-
args = ["."]
|
131
|
+
# xpath function call, let's marshal those arguments
|
132
|
+
args = ["."]
|
133
|
+
args += node.value[1..-1].map do |n|
|
134
|
+
n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
|
135
|
+
end
|
133
136
|
"#{node.value.first}#{args.join(",")})"
|
134
137
|
end
|
135
138
|
end
|
@@ -149,17 +152,8 @@ module Nokogiri
|
|
149
152
|
end
|
150
153
|
|
151
154
|
def visit_attribute_condition(node)
|
152
|
-
attribute =
|
153
|
-
|
154
|
-
else
|
155
|
-
"@"
|
156
|
-
end
|
157
|
-
attribute += node.value.first.accept(self)
|
158
|
-
|
159
|
-
# non-standard. attributes starting with '@'
|
160
|
-
attribute.gsub!(/^@@/, "@")
|
161
|
-
|
162
|
-
return attribute unless node.value.length == 3
|
155
|
+
attribute = node.value.first.accept(self)
|
156
|
+
return attribute if node.value.length == 1
|
163
157
|
|
164
158
|
value = node.value.last
|
165
159
|
value = "'#{value}'" unless /^['"]/.match?(value)
|
@@ -249,10 +243,7 @@ module Nokogiri
|
|
249
243
|
end
|
250
244
|
|
251
245
|
def visit_element_name(node)
|
252
|
-
if @doctype == DoctypeConfig::HTML5 && node
|
253
|
-
# if there is already a namespace, use it as normal
|
254
|
-
return node.value.first if node.value.first.include?(":")
|
255
|
-
|
246
|
+
if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
|
256
247
|
# HTML5 has namespaces that should be ignored in CSS queries
|
257
248
|
# https://github.com/sparklemotion/nokogiri/issues/2376
|
258
249
|
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
@@ -270,7 +261,7 @@ module Nokogiri
|
|
270
261
|
end
|
271
262
|
|
272
263
|
def visit_attrib_name(node)
|
273
|
-
node.value.first
|
264
|
+
"@#{node.value.first}"
|
274
265
|
end
|
275
266
|
|
276
267
|
def accept(node)
|
@@ -279,6 +270,13 @@ module Nokogiri
|
|
279
270
|
|
280
271
|
private
|
281
272
|
|
273
|
+
def html5_element_name_needs_namespace_handling(node)
|
274
|
+
# if this is the wildcard selector "*", use it as normal
|
275
|
+
node.value.first != "*" &&
|
276
|
+
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
|
277
|
+
!node.value.first.include?(":")
|
278
|
+
end
|
279
|
+
|
282
280
|
def nth(node, options = {})
|
283
281
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
284
282
|
|