nokogiri 1.16.8-x86_64-linux → 1.17.1-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
- data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
- data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +134 -103
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- metadata +8 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
data/lib/nokogiri/css/node.rb
CHANGED
@@ -23,8 +23,12 @@ module Nokogiri
|
|
23
23
|
|
24
24
|
###
|
25
25
|
# Convert this CSS node to xpath with +prefix+ using +visitor+
|
26
|
-
def to_xpath(
|
27
|
-
prefix =
|
26
|
+
def to_xpath(visitor)
|
27
|
+
prefix = if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
|
28
|
+
"."
|
29
|
+
else
|
30
|
+
visitor.prefix
|
31
|
+
end
|
28
32
|
prefix + visitor.accept(self)
|
29
33
|
end
|
30
34
|
|
data/lib/nokogiri/css/parser.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
#
|
3
3
|
# DO NOT MODIFY!!!!
|
4
|
-
# This file is automatically generated by Racc 1.
|
5
|
-
# from Racc grammar file "".
|
4
|
+
# This file is automatically generated by Racc 1.8.0
|
5
|
+
# from Racc grammar file "parser.y".
|
6
6
|
#
|
7
7
|
|
8
8
|
require 'racc/parser.rb'
|
@@ -291,6 +291,7 @@ Racc_arg = [
|
|
291
291
|
racc_shift_n,
|
292
292
|
racc_reduce_n,
|
293
293
|
racc_use_result_var ]
|
294
|
+
Ractor.make_shareable(Racc_arg) if defined?(Ractor)
|
294
295
|
|
295
296
|
Racc_token_to_s_table = [
|
296
297
|
"$end",
|
@@ -351,6 +352,7 @@ Racc_token_to_s_table = [
|
|
351
352
|
"negation",
|
352
353
|
"eql_incl_dash",
|
353
354
|
"negation_arg" ]
|
355
|
+
Ractor.make_shareable(Racc_token_to_s_table) if defined?(Ractor)
|
354
356
|
|
355
357
|
Racc_debug_parser = false
|
356
358
|
|
@@ -468,12 +470,12 @@ def _reduce_23(val, _values, result)
|
|
468
470
|
end
|
469
471
|
|
470
472
|
def _reduce_24(val, _values, result)
|
471
|
-
result = Node.new(:ELEMENT_NAME, [
|
473
|
+
result = Node.new(:ELEMENT_NAME, [val[0], val[2]])
|
472
474
|
result
|
473
475
|
end
|
474
476
|
|
475
477
|
def _reduce_25(val, _values, result)
|
476
|
-
name =
|
478
|
+
name = val[0]
|
477
479
|
result = Node.new(:ELEMENT_NAME, [name])
|
478
480
|
|
479
481
|
result
|
data/lib/nokogiri/css/parser.y
CHANGED
@@ -64,9 +64,9 @@ rule
|
|
64
64
|
;
|
65
65
|
|
66
66
|
namespaced_ident:
|
67
|
-
namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [
|
67
|
+
namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [val[0], val[2]]) }
|
68
68
|
| IDENT {
|
69
|
-
name =
|
69
|
+
name = val[0]
|
70
70
|
result = Node.new(:ELEMENT_NAME, [name])
|
71
71
|
}
|
72
72
|
;
|
@@ -5,62 +5,9 @@ require "thread"
|
|
5
5
|
module Nokogiri
|
6
6
|
module CSS
|
7
7
|
class Parser < Racc::Parser # :nodoc:
|
8
|
-
|
9
|
-
|
10
|
-
@cache = {}
|
11
|
-
@mutex = Mutex.new
|
12
|
-
|
13
|
-
class << self
|
14
|
-
# Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
|
15
|
-
def cache_on?
|
16
|
-
!Thread.current[CACHE_SWITCH_NAME]
|
17
|
-
end
|
18
|
-
|
19
|
-
# Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
|
20
|
-
def set_cache(value) # rubocop:disable Naming/AccessorMethodName
|
21
|
-
Thread.current[CACHE_SWITCH_NAME] = !value
|
22
|
-
end
|
23
|
-
|
24
|
-
# Get the css selector in +string+ from the cache
|
25
|
-
def [](string)
|
26
|
-
return unless cache_on?
|
27
|
-
|
28
|
-
@mutex.synchronize { @cache[string] }
|
29
|
-
end
|
30
|
-
|
31
|
-
# Set the css selector in +string+ in the cache to +value+
|
32
|
-
def []=(string, value)
|
33
|
-
return value unless cache_on?
|
34
|
-
|
35
|
-
@mutex.synchronize { @cache[string] = value }
|
36
|
-
end
|
37
|
-
|
38
|
-
# Clear the cache
|
39
|
-
def clear_cache(create_new_object = false)
|
40
|
-
@mutex.synchronize do
|
41
|
-
if create_new_object
|
42
|
-
@cache = {}
|
43
|
-
else
|
44
|
-
@cache.clear
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Execute +block+ without cache
|
50
|
-
def without_cache(&block)
|
51
|
-
original_cache_setting = cache_on?
|
52
|
-
set_cache(false)
|
53
|
-
yield
|
54
|
-
ensure
|
55
|
-
set_cache(original_cache_setting)
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
# Create a new CSS parser with respect to +namespaces+
|
60
|
-
def initialize(namespaces = {})
|
8
|
+
def initialize
|
61
9
|
@tokenizer = Tokenizer.new
|
62
|
-
|
63
|
-
super()
|
10
|
+
super
|
64
11
|
end
|
65
12
|
|
66
13
|
def parse(string)
|
@@ -72,11 +19,10 @@ module Nokogiri
|
|
72
19
|
@tokenizer.next_token
|
73
20
|
end
|
74
21
|
|
75
|
-
# Get the xpath for +
|
76
|
-
def xpath_for(
|
77
|
-
|
78
|
-
|
79
|
-
ast.to_xpath(prefix, visitor)
|
22
|
+
# Get the xpath for +selector+ using +visitor+
|
23
|
+
def xpath_for(selector, visitor)
|
24
|
+
parse(selector).map do |ast|
|
25
|
+
ast.to_xpath(visitor)
|
80
26
|
end
|
81
27
|
end
|
82
28
|
|
@@ -85,12 +31,6 @@ module Nokogiri
|
|
85
31
|
after = value_stack.compact.last
|
86
32
|
raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
|
87
33
|
end
|
88
|
-
|
89
|
-
def cache_key(query, prefix, visitor)
|
90
|
-
if self.class.cache_on?
|
91
|
-
[query, prefix, @namespaces, visitor.config]
|
92
|
-
end
|
93
|
-
end
|
94
34
|
end
|
95
35
|
end
|
96
36
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module CSS
|
5
|
+
module SelectorCache # :nodoc:
|
6
|
+
@cache = {}
|
7
|
+
@mutex = Mutex.new
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Retrieve the cached XPath expressions for the key
|
11
|
+
def [](key)
|
12
|
+
@mutex.synchronize { @cache[key] }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Insert the XPath expressions `value` at the cache key
|
16
|
+
def []=(key, value)
|
17
|
+
@mutex.synchronize { @cache[key] = value }
|
18
|
+
end
|
19
|
+
|
20
|
+
# Clear the cache
|
21
|
+
def clear_cache(create_new_object = false)
|
22
|
+
@mutex.synchronize do
|
23
|
+
if create_new_object # used in tests to avoid 'method redefined' warnings when injecting spies
|
24
|
+
@cache = {}
|
25
|
+
else
|
26
|
+
@cache.clear
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Construct a unique key cache key
|
32
|
+
def key(selector:, visitor:)
|
33
|
+
[selector, visitor.config]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -63,13 +63,13 @@ class Tokenizer
|
|
63
63
|
when (text = @ss.scan(/has\([\s]*/))
|
64
64
|
action { [:HAS, text] }
|
65
65
|
|
66
|
-
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]
|
66
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/))
|
67
67
|
action { [:FUNCTION, text] }
|
68
68
|
|
69
|
-
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]
|
69
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/))
|
70
70
|
action { [:IDENT, text] }
|
71
71
|
|
72
|
-
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]
|
72
|
+
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/))
|
73
73
|
action { [:HASH, text] }
|
74
74
|
|
75
75
|
when (text = @ss.scan(/[\s]*~=[\s]*/))
|
@@ -132,7 +132,7 @@ class Tokenizer
|
|
132
132
|
when (text = @ss.scan(/[\s]+/))
|
133
133
|
action { [:S, text] }
|
134
134
|
|
135
|
-
when (text = @ss.scan(/"([^\n\r\f"]
|
135
|
+
when (text = @ss.scan(/("([^\n\r\f"]|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*')/))
|
136
136
|
action { [:STRING, text] }
|
137
137
|
|
138
138
|
when (text = @ss.scan(/./))
|
@@ -4,20 +4,21 @@ module CSS
|
|
4
4
|
class Tokenizer
|
5
5
|
|
6
6
|
macro
|
7
|
-
nl \n|\r\n|\r|\f
|
7
|
+
nl (\n|\r\n|\r|\f)
|
8
8
|
w [\s]*
|
9
9
|
nonascii [^\0-\177]
|
10
10
|
num -?([0-9]+|[0-9]*\.[0-9]+)
|
11
11
|
unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
|
12
12
|
|
13
|
-
escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
|
14
|
-
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
|
15
|
-
nmstart [_A-Za-z]|{nonascii}|{escape}
|
16
|
-
|
17
|
-
name
|
13
|
+
escape ({unicode}|\\[^\n\r\f0-9A-Fa-f])
|
14
|
+
nmchar ([_A-Za-z0-9-]|{nonascii}|{escape})
|
15
|
+
nmstart ([_A-Za-z]|{nonascii}|{escape})
|
16
|
+
name {nmstart}{nmchar}*
|
17
|
+
ident -?{name}
|
18
|
+
charref {nmchar}+
|
18
19
|
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
|
19
20
|
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
|
20
|
-
string {string1}|{string2}
|
21
|
+
string ({string1}|{string2})
|
21
22
|
|
22
23
|
rule
|
23
24
|
|
@@ -26,7 +27,7 @@ rule
|
|
26
27
|
has\({w} { [:HAS, text] }
|
27
28
|
{ident}\({w} { [:FUNCTION, text] }
|
28
29
|
{ident} { [:IDENT, text] }
|
29
|
-
\#{
|
30
|
+
\#{charref} { [:HASH, text] }
|
30
31
|
{w}~={w} { [:INCLUDES, text] }
|
31
32
|
{w}\|={w} { [:DASHMATCH, text] }
|
32
33
|
{w}\^={w} { [:PREFIXMATCH, text] }
|
@@ -44,6 +44,18 @@ module Nokogiri
|
|
44
44
|
VALUES = [XML, HTML4, HTML5]
|
45
45
|
end
|
46
46
|
|
47
|
+
# The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
|
48
|
+
attr_reader :builtins
|
49
|
+
|
50
|
+
# The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
|
51
|
+
attr_reader :doctype
|
52
|
+
|
53
|
+
# The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
|
54
|
+
attr_reader :prefix
|
55
|
+
|
56
|
+
# The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
|
57
|
+
attr_reader :namespaces
|
58
|
+
|
47
59
|
# :call-seq:
|
48
60
|
# new() → XPathVisitor
|
49
61
|
# new(builtins:, doctype:) → XPathVisitor
|
@@ -54,7 +66,12 @@ module Nokogiri
|
|
54
66
|
#
|
55
67
|
# [Returns] XPathVisitor
|
56
68
|
#
|
57
|
-
def initialize(
|
69
|
+
def initialize(
|
70
|
+
builtins: BuiltinsConfig::NEVER,
|
71
|
+
doctype: DoctypeConfig::XML,
|
72
|
+
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
|
73
|
+
namespaces: nil
|
74
|
+
)
|
58
75
|
unless BuiltinsConfig::VALUES.include?(builtins)
|
59
76
|
raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
|
60
77
|
end
|
@@ -64,6 +81,8 @@ module Nokogiri
|
|
64
81
|
|
65
82
|
@builtins = builtins
|
66
83
|
@doctype = doctype
|
84
|
+
@prefix = prefix
|
85
|
+
@namespaces = namespaces
|
67
86
|
end
|
68
87
|
|
69
88
|
# :call-seq: config() → Hash
|
@@ -72,7 +91,7 @@ module Nokogiri
|
|
72
91
|
# a Hash representing the configuration of the XPathVisitor, suitable for use as
|
73
92
|
# part of the CSS cache key.
|
74
93
|
def config
|
75
|
-
{ builtins: @builtins, doctype: @doctype }
|
94
|
+
{ builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
|
76
95
|
end
|
77
96
|
|
78
97
|
# :stopdoc:
|
@@ -128,6 +147,8 @@ module Nokogiri
|
|
128
147
|
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
129
148
|
".#{"//" unless is_direct}#{node.value[1].accept(self)}"
|
130
149
|
else
|
150
|
+
validate_xpath_function_name(node.value.first)
|
151
|
+
|
131
152
|
# xpath function call, let's marshal those arguments
|
132
153
|
args = ["."]
|
133
154
|
args += node.value[1..-1].map do |n|
|
@@ -207,6 +228,7 @@ module Nokogiri
|
|
207
228
|
when "parent" then "node()"
|
208
229
|
when "root" then "not(parent::*)"
|
209
230
|
else
|
231
|
+
validate_xpath_function_name(node.value.first)
|
210
232
|
"nokogiri:#{node.value.first}(.)"
|
211
233
|
end
|
212
234
|
end
|
@@ -255,6 +277,14 @@ module Nokogiri
|
|
255
277
|
else
|
256
278
|
"*[local-name()='#{node.value.first}']"
|
257
279
|
end
|
280
|
+
elsif node.value.length == 2 # has a namespace prefix
|
281
|
+
if node.value.first.nil? # namespace prefix is empty
|
282
|
+
node.value.last
|
283
|
+
else
|
284
|
+
node.value.join(":")
|
285
|
+
end
|
286
|
+
elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
|
287
|
+
"xmlns:#{node.value.first}"
|
258
288
|
else
|
259
289
|
node.value.first
|
260
290
|
end
|
@@ -270,11 +300,17 @@ module Nokogiri
|
|
270
300
|
|
271
301
|
private
|
272
302
|
|
303
|
+
def validate_xpath_function_name(name)
|
304
|
+
if name.start_with?("-")
|
305
|
+
raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
273
309
|
def html5_element_name_needs_namespace_handling(node)
|
274
|
-
# if
|
275
|
-
node.value.
|
276
|
-
# if
|
277
|
-
|
310
|
+
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
|
311
|
+
node.value.length == 1 &&
|
312
|
+
# if this is the wildcard selector "*", use it as normal
|
313
|
+
node.value.first != "*"
|
278
314
|
end
|
279
315
|
|
280
316
|
def nth(node, options = {})
|
data/lib/nokogiri/css.rb
CHANGED
@@ -8,53 +8,119 @@ module Nokogiri
|
|
8
8
|
# TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
|
9
9
|
# It is not used by Nokogiri and shouldn't be part of the public API.
|
10
10
|
def parse(selector) # :nodoc:
|
11
|
+
warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
|
11
12
|
Parser.new.parse(selector)
|
12
13
|
end
|
13
14
|
|
14
15
|
# :call-seq:
|
15
|
-
# xpath_for(
|
16
|
-
# xpath_for(
|
16
|
+
# xpath_for(selector_list) → Array<String>
|
17
|
+
# xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
|
17
18
|
#
|
18
|
-
# Translate a CSS selector to the equivalent XPath
|
19
|
+
# Translate a CSS selector list to the equivalent XPath expressions.
|
20
|
+
#
|
21
|
+
# 💡 Note that translated queries are cached by default for performance concerns.
|
22
|
+
#
|
23
|
+
# ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
|
24
|
+
# node classes, for querying documents with CSS selectors. This method is the underlying
|
25
|
+
# mechanism used by XML::Searchable and is provided solely for advanced users to translate
|
26
|
+
# \CSS selectors to XPath directly.
|
27
|
+
#
|
28
|
+
# Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
|
29
|
+
# some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
|
30
|
+
# functions.
|
19
31
|
#
|
20
32
|
# [Parameters]
|
21
|
-
# - +
|
33
|
+
# - +selector_list+ (String)
|
22
34
|
#
|
35
|
+
# The CSS selector to be translated into XPath. This is always a String, but that string
|
36
|
+
# value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
|
37
|
+
# examples).
|
38
|
+
#
|
39
|
+
# [Keyword arguments]
|
23
40
|
# - +prefix:+ (String)
|
24
41
|
#
|
25
|
-
# The XPath prefix
|
26
|
-
# +
|
42
|
+
# The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
|
43
|
+
# for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
|
44
|
+
#
|
45
|
+
# - +ns:+ (Hash<String ⇒ String>, nil)
|
46
|
+
#
|
47
|
+
# Namespaces that are referenced in the query, if any. This is a hash where the keys are the
|
48
|
+
# namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
|
49
|
+
# empty set of namespaces.
|
27
50
|
#
|
28
51
|
# - +visitor:+ (Nokogiri::CSS::XPathVisitor)
|
29
52
|
#
|
30
|
-
#
|
31
|
-
#
|
53
|
+
# Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
|
54
|
+
# Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
|
55
|
+
# be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
|
56
|
+
#
|
57
|
+
# ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
|
58
|
+
# provided, +prefix+ and +ns+ must not be present.
|
59
|
+
#
|
60
|
+
# - +cache:+ (Boolean)
|
61
|
+
#
|
62
|
+
# Whether to use the SelectorCache for the translated query to ensure that repeated queries
|
63
|
+
# don't incur the overhead of re-parsing the selector. Default is +true+.
|
32
64
|
#
|
33
|
-
#
|
65
|
+
# [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
|
34
66
|
#
|
35
|
-
#
|
36
|
-
# the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
|
67
|
+
# *Example* with a simple selector:
|
37
68
|
#
|
38
|
-
#
|
69
|
+
# Nokogiri::CSS.xpath_for("div") # => ["//div"]
|
39
70
|
#
|
40
|
-
#
|
71
|
+
# *Example* with a compound selector:
|
41
72
|
#
|
42
|
-
|
43
|
-
|
73
|
+
# Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
|
74
|
+
#
|
75
|
+
# *Example* with a complex selector:
|
76
|
+
#
|
77
|
+
# Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
|
78
|
+
#
|
79
|
+
# *Example* with a selector list:
|
80
|
+
#
|
81
|
+
# Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
|
82
|
+
#
|
83
|
+
def xpath_for(
|
84
|
+
selector, options = nil,
|
85
|
+
prefix: options&.delete(:prefix),
|
86
|
+
visitor: options&.delete(:visitor),
|
87
|
+
ns: options&.delete(:ns),
|
88
|
+
cache: true
|
89
|
+
)
|
90
|
+
unless options.nil?
|
91
|
+
warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
|
92
|
+
end
|
93
|
+
|
94
|
+
raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
|
44
95
|
|
45
96
|
selector = selector.to_str
|
46
|
-
raise
|
97
|
+
raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
|
98
|
+
|
99
|
+
if visitor
|
100
|
+
raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
|
101
|
+
raise ArgumentError, "cannot provide both :ns and :visitor" if ns
|
102
|
+
end
|
103
|
+
|
104
|
+
visitor ||= begin
|
105
|
+
visitor_kw = {}
|
106
|
+
visitor_kw[:prefix] = prefix if prefix
|
107
|
+
visitor_kw[:namespaces] = ns if ns
|
47
108
|
|
48
|
-
|
49
|
-
|
50
|
-
ns = options.fetch(:ns, {})
|
109
|
+
Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
|
110
|
+
end
|
51
111
|
|
52
|
-
|
112
|
+
if cache
|
113
|
+
key = SelectorCache.key(selector: selector, visitor: visitor)
|
114
|
+
SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
|
115
|
+
else
|
116
|
+
Parser.new.xpath_for(selector, visitor)
|
117
|
+
end
|
53
118
|
end
|
54
119
|
end
|
55
120
|
end
|
56
121
|
end
|
57
122
|
|
123
|
+
require_relative "css/selector_cache"
|
58
124
|
require_relative "css/node"
|
59
125
|
require_relative "css/xpath_visitor"
|
60
126
|
x = $-w
|
@@ -23,11 +23,9 @@ module Nokogiri
|
|
23
23
|
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
24
24
|
end
|
25
25
|
else
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
)
|
30
|
-
end
|
26
|
+
list = xpath(
|
27
|
+
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
|
28
|
+
)
|
31
29
|
end
|
32
30
|
|
33
31
|
super if list.empty?
|
@@ -6,9 +6,9 @@ module Nokogiri
|
|
6
6
|
# Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
|
7
7
|
USEFUL_ALIASES = {
|
8
8
|
# alias_name => true_name
|
9
|
-
"
|
9
|
+
"ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
|
10
|
+
"NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
|
10
11
|
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
11
|
-
"UTF-8" => "UTF-8", # for JRuby tests, this is a no-op in CRuby
|
12
12
|
}
|
13
13
|
|
14
14
|
class << self
|
@@ -161,52 +161,73 @@ module Nokogiri
|
|
161
161
|
end
|
162
162
|
|
163
163
|
class << self
|
164
|
-
|
165
|
-
#
|
166
|
-
#
|
167
|
-
#
|
168
|
-
#
|
169
|
-
#
|
170
|
-
#
|
171
|
-
#
|
172
|
-
|
164
|
+
# :call-seq:
|
165
|
+
# parse(input) { |options| ... } => Nokogiri::HTML4::Document
|
166
|
+
# parse(input, url:, encoding:, options:) => Nokogiri::HTML4::Document
|
167
|
+
#
|
168
|
+
# Parse \HTML4 input from a String or IO object, and return a new HTML4::Document.
|
169
|
+
#
|
170
|
+
# [Required Parameters]
|
171
|
+
# - +input+ (String | IO) The content to be parsed.
|
172
|
+
#
|
173
|
+
# [Optional Keyword Arguments]
|
174
|
+
# - +url:+ (String) The base URI for this document.
|
175
|
+
#
|
176
|
+
# - +encoding:+ (String) The name of the encoding that should be used when processing the
|
177
|
+
# document. When not provided, the encoding will be determined based on the document
|
178
|
+
# content.
|
179
|
+
#
|
180
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
181
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
182
|
+
# +ParseOptions::DEFAULT_HTML+.
|
183
|
+
#
|
184
|
+
# [Yields]
|
185
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
186
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
187
|
+
#
|
188
|
+
# [Returns] Nokogiri::HTML4::Document
|
189
|
+
def parse(
|
190
|
+
input,
|
191
|
+
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML,
|
192
|
+
url: url_, encoding: encoding_, options: options_
|
193
|
+
)
|
173
194
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
174
195
|
yield options if block_given?
|
175
196
|
|
176
|
-
url ||=
|
197
|
+
url ||= input.respond_to?(:path) ? input.path : nil
|
177
198
|
|
178
|
-
if
|
179
|
-
unless
|
180
|
-
encoding ||=
|
199
|
+
if input.respond_to?(:encoding)
|
200
|
+
unless input.encoding == Encoding::ASCII_8BIT
|
201
|
+
encoding ||= input.encoding.name
|
181
202
|
end
|
182
203
|
end
|
183
204
|
|
184
|
-
if
|
185
|
-
if
|
205
|
+
if input.respond_to?(:read)
|
206
|
+
if input.is_a?(Pathname)
|
186
207
|
# resolve the Pathname to the file and open it as an IO object, see #2110
|
187
|
-
|
188
|
-
url ||=
|
208
|
+
input = input.expand_path.open
|
209
|
+
url ||= input.path
|
189
210
|
end
|
190
211
|
|
191
212
|
unless encoding
|
192
|
-
|
213
|
+
input = EncodingReader.new(input)
|
193
214
|
begin
|
194
|
-
return read_io(
|
215
|
+
return read_io(input, url, encoding, options.to_i)
|
195
216
|
rescue EncodingReader::EncodingFound => e
|
196
217
|
encoding = e.found_encoding
|
197
218
|
end
|
198
219
|
end
|
199
|
-
return read_io(
|
220
|
+
return read_io(input, url, encoding, options.to_i)
|
200
221
|
end
|
201
222
|
|
202
223
|
# read_memory pukes on empty docs
|
203
|
-
if
|
224
|
+
if input.nil? || input.empty?
|
204
225
|
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
205
226
|
end
|
206
227
|
|
207
|
-
encoding ||= EncodingReader.detect_encoding(
|
228
|
+
encoding ||= EncodingReader.detect_encoding(input)
|
208
229
|
|
209
|
-
read_memory(
|
230
|
+
read_memory(input, url, encoding, options.to_i)
|
210
231
|
end
|
211
232
|
end
|
212
233
|
end
|