nokogiri 1.12.3 → 1.13.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +5 -6
- data/ext/nokogiri/extconf.rb +47 -35
- data/ext/nokogiri/xml_document.c +35 -35
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_node.c +645 -333
- data/ext/nokogiri/xml_reader.c +37 -11
- data/ext/nokogiri/xml_xpath_context.c +72 -49
- data/ext/nokogiri/xslt_stylesheet.c +107 -9
- data/gumbo-parser/src/parser.c +0 -11
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +20 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +84 -75
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +2 -1
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +24 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +6 -3
- data/lib/nokogiri/html5.rb +68 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +19 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +69 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +178 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +7 -4
- data/lib/nokogiri/xml/node.rb +512 -348
- data/lib/nokogiri/xml/node_set.rb +46 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +11 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +36 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +4 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +3 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- metadata +101 -27
@@ -1,42 +1,116 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
module Nokogiri
|
3
5
|
module CSS
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
6
|
+
# When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
|
7
|
+
# class allows for changing some of the behaviors related to builtin xpath functions and quirks
|
8
|
+
# of HTML5.
|
9
|
+
class XPathVisitor
|
10
|
+
WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
|
11
|
+
|
12
|
+
# Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
|
13
|
+
module BuiltinsConfig
|
14
|
+
# Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
|
15
|
+
# the default when calling Nokogiri::CSS.xpath_for directly.
|
16
|
+
NEVER = :never
|
17
|
+
|
18
|
+
# Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
|
19
|
+
ALWAYS = :always
|
20
|
+
|
21
|
+
# Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
|
22
|
+
# the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
|
23
|
+
# node.
|
24
|
+
OPTIMAL = :optimal
|
25
|
+
|
26
|
+
# :nodoc: array of values for validation
|
27
|
+
VALUES = [NEVER, ALWAYS, OPTIMAL]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
|
31
|
+
# being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
|
32
|
+
# node will choose the correct option automatically.
|
33
|
+
module DoctypeConfig
|
34
|
+
# The document being searched is an XML document. This is the default.
|
35
|
+
XML = :xml
|
36
|
+
|
37
|
+
# The document being searched is an HTML4 document.
|
38
|
+
HTML4 = :html4
|
39
|
+
|
40
|
+
# The document being searched is an HTML5 document.
|
41
|
+
HTML5 = :html5
|
42
|
+
|
43
|
+
# :nodoc: array of values for validation
|
44
|
+
VALUES = [XML, HTML4, HTML5]
|
45
|
+
end
|
46
|
+
|
47
|
+
# :call-seq:
|
48
|
+
# new() → XPathVisitor
|
49
|
+
# new(builtins:, doctype:) → XPathVisitor
|
50
|
+
#
|
51
|
+
# [Parameters]
|
52
|
+
# - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
|
53
|
+
# - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
|
54
|
+
#
|
55
|
+
# [Returns] XPathVisitor
|
56
|
+
#
|
57
|
+
def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
|
58
|
+
unless BuiltinsConfig::VALUES.include?(builtins)
|
59
|
+
raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
|
60
|
+
end
|
61
|
+
unless DoctypeConfig::VALUES.include?(doctype)
|
62
|
+
raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
|
63
|
+
end
|
64
|
+
|
65
|
+
@builtins = builtins
|
66
|
+
@doctype = doctype
|
67
|
+
end
|
68
|
+
|
69
|
+
# :call-seq: config() → Hash
|
70
|
+
#
|
71
|
+
# [Returns]
|
72
|
+
# a Hash representing the configuration of the XPathVisitor, suitable for use as
|
73
|
+
# part of the CSS cache key.
|
74
|
+
def config
|
75
|
+
{ builtins: @builtins, doctype: @doctype }
|
76
|
+
end
|
77
|
+
|
78
|
+
# :stopdoc:
|
79
|
+
def visit_function(node)
|
80
|
+
msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
|
81
|
+
return send(msg, node) if respond_to?(msg)
|
8
82
|
|
9
83
|
case node.value.first
|
10
84
|
when /^text\(/
|
11
|
-
|
85
|
+
"child::text()"
|
12
86
|
when /^self\(/
|
13
87
|
"self::#{node.value[1]}"
|
14
88
|
when /^eq\(/
|
15
89
|
"position()=#{node.value[1]}"
|
16
90
|
when /^(nth|nth-of-type)\(/
|
17
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
91
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
18
92
|
nth(node.value[1])
|
19
93
|
else
|
20
94
|
"position()=#{node.value[1]}"
|
21
95
|
end
|
22
96
|
when /^nth-child\(/
|
23
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
24
|
-
nth(node.value[1], :
|
97
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
98
|
+
nth(node.value[1], child: true)
|
25
99
|
else
|
26
|
-
"count(preceding-sibling::*)=#{node.value[1].to_i-1}"
|
100
|
+
"count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
|
27
101
|
end
|
28
102
|
when /^nth-last-of-type\(/
|
29
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
30
|
-
nth(node.value[1], :
|
103
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
104
|
+
nth(node.value[1], last: true)
|
31
105
|
else
|
32
106
|
index = node.value[1].to_i - 1
|
33
107
|
index == 0 ? "position()=last()" : "position()=last()-#{index}"
|
34
108
|
end
|
35
109
|
when /^nth-last-child\(/
|
36
|
-
if node.value[1].is_a?(Nokogiri::CSS::Node)
|
37
|
-
nth(node.value[1], :
|
110
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
|
111
|
+
nth(node.value[1], last: true, child: true)
|
38
112
|
else
|
39
|
-
"count(following-sibling::*)=#{node.value[1].to_i-1}"
|
113
|
+
"count(following-sibling::*)=#{node.value[1].to_i - 1}"
|
40
114
|
end
|
41
115
|
when /^(first|first-of-type)\(/
|
42
116
|
"position()=1"
|
@@ -52,15 +126,18 @@ module Nokogiri
|
|
52
126
|
"comment()"
|
53
127
|
when /^has\(/
|
54
128
|
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
55
|
-
".#{"//"
|
129
|
+
".#{"//" unless is_direct}#{node.value[1].accept(self)}"
|
56
130
|
else
|
57
|
-
#
|
58
|
-
args = [
|
59
|
-
|
131
|
+
# xpath function call, let's marshal those arguments
|
132
|
+
args = ["."]
|
133
|
+
args += node.value[1..-1].map do |n|
|
134
|
+
n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
|
135
|
+
end
|
136
|
+
"#{node.value.first}#{args.join(",")})"
|
60
137
|
end
|
61
138
|
end
|
62
139
|
|
63
|
-
def visit_not
|
140
|
+
def visit_not(node)
|
64
141
|
child = node.value.first
|
65
142
|
if :ELEMENT_NAME == child.type
|
66
143
|
"not(self::#{child.accept(self)})"
|
@@ -69,29 +146,20 @@ module Nokogiri
|
|
69
146
|
end
|
70
147
|
end
|
71
148
|
|
72
|
-
def visit_id
|
149
|
+
def visit_id(node)
|
73
150
|
node.value.first =~ /^#(.*)$/
|
74
|
-
"@id='#{
|
151
|
+
"@id='#{Regexp.last_match(1)}'"
|
75
152
|
end
|
76
153
|
|
77
|
-
def visit_attribute_condition
|
78
|
-
attribute =
|
79
|
-
|
80
|
-
else
|
81
|
-
'@'
|
82
|
-
end
|
83
|
-
attribute += node.value.first.accept(self)
|
84
|
-
|
85
|
-
# non-standard. attributes starting with '@'
|
86
|
-
attribute.gsub!(/^@@/, '@')
|
87
|
-
|
88
|
-
return attribute unless node.value.length == 3
|
154
|
+
def visit_attribute_condition(node)
|
155
|
+
attribute = node.value.first.accept(self)
|
156
|
+
return attribute if node.value.length == 1
|
89
157
|
|
90
158
|
value = node.value.last
|
91
|
-
value = "'#{value}'"
|
159
|
+
value = "'#{value}'" unless /^['"]/.match?(value)
|
92
160
|
|
93
161
|
# quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
|
94
|
-
if (value[0]==value[-1]) && %q{"'}.include?(value[0])
|
162
|
+
if (value[0] == value[-1]) && %q{"'}.include?(value[0])
|
95
163
|
str_value = value[1..-2]
|
96
164
|
if str_value.include?(value[0])
|
97
165
|
value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
|
@@ -100,9 +168,9 @@ module Nokogiri
|
|
100
168
|
|
101
169
|
case node.value[1]
|
102
170
|
when :equal
|
103
|
-
attribute + "=" +
|
171
|
+
attribute + "=" + value.to_s
|
104
172
|
when :not_equal
|
105
|
-
attribute + "!=" +
|
173
|
+
attribute + "!=" + value.to_s
|
106
174
|
when :substring_match
|
107
175
|
"contains(#{attribute},#{value})"
|
108
176
|
when :prefix_match
|
@@ -115,16 +183,16 @@ module Nokogiri
|
|
115
183
|
when :suffix_match
|
116
184
|
"substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
|
117
185
|
else
|
118
|
-
attribute + " #{node.value[1]} " +
|
186
|
+
attribute + " #{node.value[1]} " + value.to_s
|
119
187
|
end
|
120
188
|
end
|
121
189
|
|
122
|
-
def visit_pseudo_class
|
123
|
-
if node.value.first.is_a?(Nokogiri::CSS::Node)
|
190
|
+
def visit_pseudo_class(node)
|
191
|
+
if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
|
124
192
|
node.value.first.accept(self)
|
125
193
|
else
|
126
|
-
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/,
|
127
|
-
return
|
194
|
+
msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
|
195
|
+
return send(msg, node) if respond_to?(msg)
|
128
196
|
|
129
197
|
case node.value.first
|
130
198
|
when "first" then "position()=1"
|
@@ -144,24 +212,24 @@ module Nokogiri
|
|
144
212
|
end
|
145
213
|
end
|
146
214
|
|
147
|
-
def visit_class_condition
|
215
|
+
def visit_class_condition(node)
|
148
216
|
css_class("@class", node.value.first)
|
149
217
|
end
|
150
218
|
|
151
|
-
def visit_combinator
|
219
|
+
def visit_combinator(node)
|
152
220
|
if is_of_type_pseudo_class?(node.value.last)
|
153
|
-
"#{node.value.first
|
221
|
+
"#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
|
154
222
|
else
|
155
|
-
"#{node.value.first
|
223
|
+
"#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
|
156
224
|
end
|
157
225
|
end
|
158
226
|
|
159
227
|
{
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
}.each do |k,v|
|
228
|
+
"direct_adjacent_selector" => "/following-sibling::*[1]/self::",
|
229
|
+
"following_selector" => "/following-sibling::",
|
230
|
+
"descendant_selector" => "//",
|
231
|
+
"child_selector" => "/",
|
232
|
+
}.each do |k, v|
|
165
233
|
class_eval %{
|
166
234
|
def visit_#{k} node
|
167
235
|
"\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
|
@@ -169,25 +237,50 @@ module Nokogiri
|
|
169
237
|
}
|
170
238
|
end
|
171
239
|
|
172
|
-
def visit_conditional_selector
|
173
|
-
node.value.first.accept(self) +
|
174
|
-
|
240
|
+
def visit_conditional_selector(node)
|
241
|
+
node.value.first.accept(self) + "[" +
|
242
|
+
node.value.last.accept(self) + "]"
|
243
|
+
end
|
244
|
+
|
245
|
+
def visit_element_name(node)
|
246
|
+
if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
|
247
|
+
# HTML5 has namespaces that should be ignored in CSS queries
|
248
|
+
# https://github.com/sparklemotion/nokogiri/issues/2376
|
249
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
250
|
+
if WILDCARD_NAMESPACES
|
251
|
+
"*:#{node.value.first}"
|
252
|
+
else
|
253
|
+
"*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
|
254
|
+
end
|
255
|
+
else
|
256
|
+
"*[local-name()='#{node.value.first}']"
|
257
|
+
end
|
258
|
+
else
|
259
|
+
node.value.first
|
260
|
+
end
|
175
261
|
end
|
176
262
|
|
177
|
-
def
|
178
|
-
node.value.first
|
263
|
+
def visit_attrib_name(node)
|
264
|
+
"@#{node.value.first}"
|
179
265
|
end
|
180
266
|
|
181
|
-
def accept
|
267
|
+
def accept(node)
|
182
268
|
node.accept(self)
|
183
269
|
end
|
184
270
|
|
185
271
|
private
|
186
272
|
|
187
|
-
def
|
273
|
+
def html5_element_name_needs_namespace_handling(node)
|
274
|
+
# if this is the wildcard selector "*", use it as normal
|
275
|
+
node.value.first != "*" &&
|
276
|
+
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
|
277
|
+
!node.value.first.include?(":")
|
278
|
+
end
|
279
|
+
|
280
|
+
def nth(node, options = {})
|
188
281
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
189
282
|
|
190
|
-
a, b = read_a_and_positive_b
|
283
|
+
a, b = read_a_and_positive_b(node.value)
|
191
284
|
position = if options[:child]
|
192
285
|
options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
|
193
286
|
else
|
@@ -206,7 +299,7 @@ module Nokogiri
|
|
206
299
|
end
|
207
300
|
end
|
208
301
|
|
209
|
-
def read_a_and_positive_b
|
302
|
+
def read_a_and_positive_b(values)
|
210
303
|
op = values[2]
|
211
304
|
if op == "+"
|
212
305
|
a = values[0].to_i
|
@@ -220,9 +313,9 @@ module Nokogiri
|
|
220
313
|
[a, b]
|
221
314
|
end
|
222
315
|
|
223
|
-
def is_of_type_pseudo_class?
|
224
|
-
if node.type
|
225
|
-
if node.value[0].is_a?(Nokogiri::CSS::Node)
|
316
|
+
def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
|
317
|
+
if node.type == :PSEUDO_CLASS
|
318
|
+
if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
|
226
319
|
node.value[0].value[0]
|
227
320
|
else
|
228
321
|
node.value[0]
|
@@ -230,30 +323,34 @@ module Nokogiri
|
|
230
323
|
end
|
231
324
|
end
|
232
325
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
326
|
+
def css_class(hay, needle)
|
327
|
+
if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
|
328
|
+
# use the builtin implementation
|
329
|
+
"nokogiri-builtin:css-class(#{hay},'#{needle}')"
|
330
|
+
else
|
331
|
+
# use only ordinary xpath functions
|
332
|
+
"contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
|
333
|
+
end
|
241
334
|
end
|
242
|
-
|
243
|
-
alias_method :css_class, :css_class_standard
|
244
335
|
end
|
245
336
|
|
246
|
-
|
247
|
-
|
248
|
-
|
337
|
+
module XPathVisitorAlwaysUseBuiltins # :nodoc:
|
338
|
+
def self.new
|
339
|
+
warn(
|
340
|
+
"Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
341
|
+
{ uplevel: 1 },
|
342
|
+
)
|
343
|
+
XPathVisitor.new(builtins: :always)
|
344
|
+
end
|
249
345
|
end
|
250
346
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
347
|
+
module XPathVisitorOptimallyUseBuiltins # :nodoc:
|
348
|
+
def self.new
|
349
|
+
warn(
|
350
|
+
"Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
351
|
+
{ uplevel: 1 },
|
352
|
+
)
|
353
|
+
XPathVisitor.new(builtins: :optimal)
|
257
354
|
end
|
258
355
|
end
|
259
356
|
end
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,17 +1,49 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
module Nokogiri
|
5
|
+
# Translate a CSS selector into an XPath 1.0 query
|
3
6
|
module CSS
|
4
7
|
class << self
|
5
|
-
|
6
|
-
#
|
7
|
-
def parse(selector)
|
8
|
+
# TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
|
9
|
+
# It is not used by Nokogiri and shouldn't be part of the public API.
|
10
|
+
def parse(selector) # :nodoc:
|
8
11
|
Parser.new.parse(selector)
|
9
12
|
end
|
10
13
|
|
11
|
-
|
12
|
-
#
|
14
|
+
# :call-seq:
|
15
|
+
# xpath_for(selector) → String
|
16
|
+
# xpath_for(selector [, prefix:] [, visitor:] [, ns:]) → String
|
17
|
+
#
|
18
|
+
# Translate a CSS selector to the equivalent XPath query.
|
19
|
+
#
|
20
|
+
# [Parameters]
|
21
|
+
# - +selector+ (String) The CSS selector to be translated into XPath
|
22
|
+
#
|
23
|
+
# - +prefix:+ (String)
|
24
|
+
#
|
25
|
+
# The XPath prefix for the query, see Nokogiri::XML::XPath for some options. Default is
|
26
|
+
# +XML::XPath::GLOBAL_SEARCH_PREFIX+.
|
27
|
+
#
|
28
|
+
# - +visitor:+ (Nokogiri::CSS::XPathVisitor)
|
29
|
+
#
|
30
|
+
# The visitor class to use to transform the AST into XPath. Default is
|
31
|
+
# +Nokogiri::CSS::XPathVisitor.new+.
|
32
|
+
#
|
33
|
+
# - +ns:+ (Hash<String ⇒ String>)
|
34
|
+
#
|
35
|
+
# The namespaces that are referenced in the query, if any. This is a hash where the keys are
|
36
|
+
# the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
|
37
|
+
#
|
38
|
+
# [Returns] (String) The equivalent XPath query for +selector+
|
39
|
+
#
|
40
|
+
# 💡 Note that translated queries are cached for performance concerns.
|
41
|
+
#
|
13
42
|
def xpath_for(selector, options = {})
|
14
|
-
|
43
|
+
prefix = options.fetch(:prefix, Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX)
|
44
|
+
visitor = options.fetch(:visitor) { Nokogiri::CSS::XPathVisitor.new }
|
45
|
+
ns = options.fetch(:ns, {})
|
46
|
+
Parser.new(ns).xpath_for(selector, prefix, visitor)
|
15
47
|
end
|
16
48
|
end
|
17
49
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module Decorators
|
4
5
|
###
|
@@ -10,21 +11,21 @@ module Nokogiri
|
|
10
11
|
|
11
12
|
###
|
12
13
|
# look for node with +name+. See Nokogiri.Slop
|
13
|
-
def method_missing
|
14
|
+
def method_missing(name, *args, &block)
|
14
15
|
if args.empty?
|
15
|
-
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/,
|
16
|
-
elsif args.first.is_a?
|
16
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
17
|
+
elsif args.first.is_a?(Hash)
|
17
18
|
hash = args.first
|
18
19
|
if hash[:css]
|
19
20
|
list = css("#{name}#{hash[:css]}")
|
20
21
|
elsif hash[:xpath]
|
21
|
-
conds = Array(hash[:xpath]).join(
|
22
|
+
conds = Array(hash[:xpath]).join(" and ")
|
22
23
|
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
23
24
|
end
|
24
25
|
else
|
25
26
|
CSS::Parser.without_cache do
|
26
27
|
list = xpath(
|
27
|
-
*CSS.xpath_for("#{name}#{args.first}", :
|
28
|
+
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX)
|
28
29
|
)
|
29
30
|
end
|
30
31
|
end
|
@@ -33,8 +34,8 @@ module Nokogiri
|
|
33
34
|
list.length == 1 ? list.first : list
|
34
35
|
end
|
35
36
|
|
36
|
-
def respond_to_missing?
|
37
|
-
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/,
|
37
|
+
def respond_to_missing?(name, include_private = false)
|
38
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
|
38
39
|
|
39
40
|
!list.empty?
|
40
41
|
end
|
data/lib/nokogiri/extension.rb
CHANGED
@@ -6,7 +6,7 @@ begin
|
|
6
6
|
::RUBY_VERSION =~ /(\d+\.\d+)/
|
7
7
|
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
8
|
rescue LoadError => e
|
9
|
-
if e.message
|
9
|
+
if /GLIBC/.match?(e.message)
|
10
10
|
warn(<<~EOM)
|
11
11
|
|
12
12
|
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
|
data/lib/nokogiri/gumbo.rb
CHANGED
data/lib/nokogiri/html.rb
CHANGED
@@ -1,40 +1,46 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
require_relative "html4"
|
3
5
|
|
4
6
|
module Nokogiri
|
7
|
+
# Alias for Nokogiri::HTML4
|
5
8
|
HTML = Nokogiri::HTML4
|
6
9
|
|
7
|
-
#
|
10
|
+
# :singleton-method: HTML
|
11
|
+
# :call-seq: HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
|
12
|
+
#
|
8
13
|
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
9
|
-
|
14
|
+
|
15
|
+
# :nodoc:
|
10
16
|
define_singleton_method(:HTML, Nokogiri.method(:HTML4))
|
11
17
|
|
12
|
-
#
|
13
|
-
#
|
18
|
+
# 💡 This module/namespace is an alias for Nokogiri::HTML4 as of v1.12.0. Before v1.12.0,
|
19
|
+
# Nokogiri::HTML4 did not exist, and this was the module/namespace for all HTML-related
|
14
20
|
# classes.
|
15
21
|
module HTML
|
16
|
-
#
|
22
|
+
# 💡 This class is an alias for Nokogiri::HTML4::Document as of v1.12.0.
|
17
23
|
class Document < Nokogiri::XML::Document
|
18
24
|
end
|
19
25
|
|
20
|
-
#
|
26
|
+
# 💡 This class is an alias for Nokogiri::HTML4::DocumentFragment as of v1.12.0.
|
21
27
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
22
28
|
end
|
23
29
|
|
24
|
-
#
|
30
|
+
# 💡 This class is an alias for Nokogiri::HTML4::Builder as of v1.12.0.
|
25
31
|
class Builder < Nokogiri::XML::Builder
|
26
32
|
end
|
27
33
|
|
28
34
|
module SAX
|
29
|
-
#
|
35
|
+
# 💡 This class is an alias for Nokogiri::HTML4::SAX::Parser as of v1.12.0.
|
30
36
|
class Parser < Nokogiri::XML::SAX::Parser
|
31
37
|
end
|
32
38
|
|
33
|
-
#
|
39
|
+
# 💡 This class is an alias for Nokogiri::HTML4::SAX::ParserContext as of v1.12.0.
|
34
40
|
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
35
41
|
end
|
36
42
|
|
37
|
-
#
|
43
|
+
# 💡 This class is an alias for Nokogiri::HTML4::SAX::PushParser as of v1.12.0.
|
38
44
|
class PushParser
|
39
45
|
end
|
40
46
|
end
|