nokogiri 1.16.8-x86-mingw32 → 1.17.0-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
- data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
- data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +130 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- metadata +8 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
data/lib/nokogiri/css/node.rb
CHANGED
@@ -23,8 +23,12 @@ module Nokogiri
|
|
23
23
|
|
24
24
|
###
|
25
25
|
# Convert this CSS node to xpath with +prefix+ using +visitor+
|
26
|
-
def to_xpath(
|
27
|
-
prefix =
|
26
|
+
def to_xpath(visitor)
|
27
|
+
prefix = if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
|
28
|
+
"."
|
29
|
+
else
|
30
|
+
visitor.prefix
|
31
|
+
end
|
28
32
|
prefix + visitor.accept(self)
|
29
33
|
end
|
30
34
|
|
data/lib/nokogiri/css/parser.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
#
|
3
3
|
# DO NOT MODIFY!!!!
|
4
|
-
# This file is automatically generated by Racc 1.
|
5
|
-
# from Racc grammar file "".
|
4
|
+
# This file is automatically generated by Racc 1.8.0
|
5
|
+
# from Racc grammar file "parser.y".
|
6
6
|
#
|
7
7
|
|
8
8
|
require 'racc/parser.rb'
|
@@ -291,6 +291,7 @@ Racc_arg = [
|
|
291
291
|
racc_shift_n,
|
292
292
|
racc_reduce_n,
|
293
293
|
racc_use_result_var ]
|
294
|
+
Ractor.make_shareable(Racc_arg) if defined?(Ractor)
|
294
295
|
|
295
296
|
Racc_token_to_s_table = [
|
296
297
|
"$end",
|
@@ -351,6 +352,7 @@ Racc_token_to_s_table = [
|
|
351
352
|
"negation",
|
352
353
|
"eql_incl_dash",
|
353
354
|
"negation_arg" ]
|
355
|
+
Ractor.make_shareable(Racc_token_to_s_table) if defined?(Ractor)
|
354
356
|
|
355
357
|
Racc_debug_parser = false
|
356
358
|
|
@@ -468,12 +470,12 @@ def _reduce_23(val, _values, result)
|
|
468
470
|
end
|
469
471
|
|
470
472
|
def _reduce_24(val, _values, result)
|
471
|
-
result = Node.new(:ELEMENT_NAME, [
|
473
|
+
result = Node.new(:ELEMENT_NAME, [val[0], val[2]])
|
472
474
|
result
|
473
475
|
end
|
474
476
|
|
475
477
|
def _reduce_25(val, _values, result)
|
476
|
-
name =
|
478
|
+
name = val[0]
|
477
479
|
result = Node.new(:ELEMENT_NAME, [name])
|
478
480
|
|
479
481
|
result
|
data/lib/nokogiri/css/parser.y
CHANGED
@@ -64,9 +64,9 @@ rule
|
|
64
64
|
;
|
65
65
|
|
66
66
|
namespaced_ident:
|
67
|
-
namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [
|
67
|
+
namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [val[0], val[2]]) }
|
68
68
|
| IDENT {
|
69
|
-
name =
|
69
|
+
name = val[0]
|
70
70
|
result = Node.new(:ELEMENT_NAME, [name])
|
71
71
|
}
|
72
72
|
;
|
@@ -5,62 +5,9 @@ require "thread"
|
|
5
5
|
module Nokogiri
|
6
6
|
module CSS
|
7
7
|
class Parser < Racc::Parser # :nodoc:
|
8
|
-
|
9
|
-
|
10
|
-
@cache = {}
|
11
|
-
@mutex = Mutex.new
|
12
|
-
|
13
|
-
class << self
|
14
|
-
# Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
|
15
|
-
def cache_on?
|
16
|
-
!Thread.current[CACHE_SWITCH_NAME]
|
17
|
-
end
|
18
|
-
|
19
|
-
# Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
|
20
|
-
def set_cache(value) # rubocop:disable Naming/AccessorMethodName
|
21
|
-
Thread.current[CACHE_SWITCH_NAME] = !value
|
22
|
-
end
|
23
|
-
|
24
|
-
# Get the css selector in +string+ from the cache
|
25
|
-
def [](string)
|
26
|
-
return unless cache_on?
|
27
|
-
|
28
|
-
@mutex.synchronize { @cache[string] }
|
29
|
-
end
|
30
|
-
|
31
|
-
# Set the css selector in +string+ in the cache to +value+
|
32
|
-
def []=(string, value)
|
33
|
-
return value unless cache_on?
|
34
|
-
|
35
|
-
@mutex.synchronize { @cache[string] = value }
|
36
|
-
end
|
37
|
-
|
38
|
-
# Clear the cache
|
39
|
-
def clear_cache(create_new_object = false)
|
40
|
-
@mutex.synchronize do
|
41
|
-
if create_new_object
|
42
|
-
@cache = {}
|
43
|
-
else
|
44
|
-
@cache.clear
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Execute +block+ without cache
|
50
|
-
def without_cache(&block)
|
51
|
-
original_cache_setting = cache_on?
|
52
|
-
set_cache(false)
|
53
|
-
yield
|
54
|
-
ensure
|
55
|
-
set_cache(original_cache_setting)
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
# Create a new CSS parser with respect to +namespaces+
|
60
|
-
def initialize(namespaces = {})
|
8
|
+
def initialize
|
61
9
|
@tokenizer = Tokenizer.new
|
62
|
-
|
63
|
-
super()
|
10
|
+
super
|
64
11
|
end
|
65
12
|
|
66
13
|
def parse(string)
|
@@ -72,11 +19,10 @@ module Nokogiri
|
|
72
19
|
@tokenizer.next_token
|
73
20
|
end
|
74
21
|
|
75
|
-
# Get the xpath for +
|
76
|
-
def xpath_for(
|
77
|
-
|
78
|
-
|
79
|
-
ast.to_xpath(prefix, visitor)
|
22
|
+
# Get the xpath for +selector+ using +visitor+
|
23
|
+
def xpath_for(selector, visitor)
|
24
|
+
parse(selector).map do |ast|
|
25
|
+
ast.to_xpath(visitor)
|
80
26
|
end
|
81
27
|
end
|
82
28
|
|
@@ -85,12 +31,6 @@ module Nokogiri
|
|
85
31
|
after = value_stack.compact.last
|
86
32
|
raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
|
87
33
|
end
|
88
|
-
|
89
|
-
def cache_key(query, prefix, visitor)
|
90
|
-
if self.class.cache_on?
|
91
|
-
[query, prefix, @namespaces, visitor.config]
|
92
|
-
end
|
93
|
-
end
|
94
34
|
end
|
95
35
|
end
|
96
36
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module CSS
|
5
|
+
module SelectorCache # :nodoc:
|
6
|
+
@cache = {}
|
7
|
+
@mutex = Mutex.new
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Retrieve the cached XPath expressions for the key
|
11
|
+
def [](key)
|
12
|
+
@mutex.synchronize { @cache[key] }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Insert the XPath expressions `value` at the cache key
|
16
|
+
def []=(key, value)
|
17
|
+
@mutex.synchronize { @cache[key] = value }
|
18
|
+
end
|
19
|
+
|
20
|
+
# Clear the cache
|
21
|
+
def clear_cache(create_new_object = false)
|
22
|
+
@mutex.synchronize do
|
23
|
+
if create_new_object # used in tests to avoid 'method redefined' warnings when injecting spies
|
24
|
+
@cache = {}
|
25
|
+
else
|
26
|
+
@cache.clear
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Construct a unique key cache key
|
32
|
+
def key(selector:, visitor:)
|
33
|
+
[selector, visitor.config]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -63,13 +63,13 @@ class Tokenizer
|
|
63
63
|
when (text = @ss.scan(/has\([\s]*/))
|
64
64
|
action { [:HAS, text] }
|
65
65
|
|
66
|
-
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]
|
66
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/))
|
67
67
|
action { [:FUNCTION, text] }
|
68
68
|
|
69
|
-
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]
|
69
|
+
when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/))
|
70
70
|
action { [:IDENT, text] }
|
71
71
|
|
72
|
-
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]
|
72
|
+
when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/))
|
73
73
|
action { [:HASH, text] }
|
74
74
|
|
75
75
|
when (text = @ss.scan(/[\s]*~=[\s]*/))
|
@@ -132,7 +132,7 @@ class Tokenizer
|
|
132
132
|
when (text = @ss.scan(/[\s]+/))
|
133
133
|
action { [:S, text] }
|
134
134
|
|
135
|
-
when (text = @ss.scan(/"([^\n\r\f"]
|
135
|
+
when (text = @ss.scan(/("([^\n\r\f"]|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*')/))
|
136
136
|
action { [:STRING, text] }
|
137
137
|
|
138
138
|
when (text = @ss.scan(/./))
|
@@ -4,20 +4,21 @@ module CSS
|
|
4
4
|
class Tokenizer
|
5
5
|
|
6
6
|
macro
|
7
|
-
nl \n|\r\n|\r|\f
|
7
|
+
nl (\n|\r\n|\r|\f)
|
8
8
|
w [\s]*
|
9
9
|
nonascii [^\0-\177]
|
10
10
|
num -?([0-9]+|[0-9]*\.[0-9]+)
|
11
11
|
unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
|
12
12
|
|
13
|
-
escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
|
14
|
-
nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
|
15
|
-
nmstart [_A-Za-z]|{nonascii}|{escape}
|
16
|
-
|
17
|
-
name
|
13
|
+
escape ({unicode}|\\[^\n\r\f0-9A-Fa-f])
|
14
|
+
nmchar ([_A-Za-z0-9-]|{nonascii}|{escape})
|
15
|
+
nmstart ([_A-Za-z]|{nonascii}|{escape})
|
16
|
+
name {nmstart}{nmchar}*
|
17
|
+
ident -?{name}
|
18
|
+
charref {nmchar}+
|
18
19
|
string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
|
19
20
|
string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
|
20
|
-
string {string1}|{string2}
|
21
|
+
string ({string1}|{string2})
|
21
22
|
|
22
23
|
rule
|
23
24
|
|
@@ -26,7 +27,7 @@ rule
|
|
26
27
|
has\({w} { [:HAS, text] }
|
27
28
|
{ident}\({w} { [:FUNCTION, text] }
|
28
29
|
{ident} { [:IDENT, text] }
|
29
|
-
\#{
|
30
|
+
\#{charref} { [:HASH, text] }
|
30
31
|
{w}~={w} { [:INCLUDES, text] }
|
31
32
|
{w}\|={w} { [:DASHMATCH, text] }
|
32
33
|
{w}\^={w} { [:PREFIXMATCH, text] }
|
@@ -44,6 +44,18 @@ module Nokogiri
|
|
44
44
|
VALUES = [XML, HTML4, HTML5]
|
45
45
|
end
|
46
46
|
|
47
|
+
# The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
|
48
|
+
attr_reader :builtins
|
49
|
+
|
50
|
+
# The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
|
51
|
+
attr_reader :doctype
|
52
|
+
|
53
|
+
# The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
|
54
|
+
attr_reader :prefix
|
55
|
+
|
56
|
+
# The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
|
57
|
+
attr_reader :namespaces
|
58
|
+
|
47
59
|
# :call-seq:
|
48
60
|
# new() → XPathVisitor
|
49
61
|
# new(builtins:, doctype:) → XPathVisitor
|
@@ -54,7 +66,12 @@ module Nokogiri
|
|
54
66
|
#
|
55
67
|
# [Returns] XPathVisitor
|
56
68
|
#
|
57
|
-
def initialize(
|
69
|
+
def initialize(
|
70
|
+
builtins: BuiltinsConfig::NEVER,
|
71
|
+
doctype: DoctypeConfig::XML,
|
72
|
+
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
|
73
|
+
namespaces: nil
|
74
|
+
)
|
58
75
|
unless BuiltinsConfig::VALUES.include?(builtins)
|
59
76
|
raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
|
60
77
|
end
|
@@ -64,6 +81,8 @@ module Nokogiri
|
|
64
81
|
|
65
82
|
@builtins = builtins
|
66
83
|
@doctype = doctype
|
84
|
+
@prefix = prefix
|
85
|
+
@namespaces = namespaces
|
67
86
|
end
|
68
87
|
|
69
88
|
# :call-seq: config() → Hash
|
@@ -72,7 +91,7 @@ module Nokogiri
|
|
72
91
|
# a Hash representing the configuration of the XPathVisitor, suitable for use as
|
73
92
|
# part of the CSS cache key.
|
74
93
|
def config
|
75
|
-
{ builtins: @builtins, doctype: @doctype }
|
94
|
+
{ builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
|
76
95
|
end
|
77
96
|
|
78
97
|
# :stopdoc:
|
@@ -128,6 +147,8 @@ module Nokogiri
|
|
128
147
|
is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
|
129
148
|
".#{"//" unless is_direct}#{node.value[1].accept(self)}"
|
130
149
|
else
|
150
|
+
validate_xpath_function_name(node.value.first)
|
151
|
+
|
131
152
|
# xpath function call, let's marshal those arguments
|
132
153
|
args = ["."]
|
133
154
|
args += node.value[1..-1].map do |n|
|
@@ -207,6 +228,7 @@ module Nokogiri
|
|
207
228
|
when "parent" then "node()"
|
208
229
|
when "root" then "not(parent::*)"
|
209
230
|
else
|
231
|
+
validate_xpath_function_name(node.value.first)
|
210
232
|
"nokogiri:#{node.value.first}(.)"
|
211
233
|
end
|
212
234
|
end
|
@@ -255,6 +277,14 @@ module Nokogiri
|
|
255
277
|
else
|
256
278
|
"*[local-name()='#{node.value.first}']"
|
257
279
|
end
|
280
|
+
elsif node.value.length == 2 # has a namespace prefix
|
281
|
+
if node.value.first.nil? # namespace prefix is empty
|
282
|
+
node.value.last
|
283
|
+
else
|
284
|
+
node.value.join(":")
|
285
|
+
end
|
286
|
+
elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
|
287
|
+
"xmlns:#{node.value.first}"
|
258
288
|
else
|
259
289
|
node.value.first
|
260
290
|
end
|
@@ -270,11 +300,17 @@ module Nokogiri
|
|
270
300
|
|
271
301
|
private
|
272
302
|
|
303
|
+
def validate_xpath_function_name(name)
|
304
|
+
if name.start_with?("-")
|
305
|
+
raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
273
309
|
def html5_element_name_needs_namespace_handling(node)
|
274
|
-
# if
|
275
|
-
node.value.
|
276
|
-
# if
|
277
|
-
|
310
|
+
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
|
311
|
+
node.value.length == 1 &&
|
312
|
+
# if this is the wildcard selector "*", use it as normal
|
313
|
+
node.value.first != "*"
|
278
314
|
end
|
279
315
|
|
280
316
|
def nth(node, options = {})
|
data/lib/nokogiri/css.rb
CHANGED
@@ -8,53 +8,119 @@ module Nokogiri
|
|
8
8
|
# TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
|
9
9
|
# It is not used by Nokogiri and shouldn't be part of the public API.
|
10
10
|
def parse(selector) # :nodoc:
|
11
|
+
warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
|
11
12
|
Parser.new.parse(selector)
|
12
13
|
end
|
13
14
|
|
14
15
|
# :call-seq:
|
15
|
-
# xpath_for(
|
16
|
-
# xpath_for(
|
16
|
+
# xpath_for(selector_list) → Array<String>
|
17
|
+
# xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
|
17
18
|
#
|
18
|
-
# Translate a CSS selector to the equivalent XPath
|
19
|
+
# Translate a CSS selector list to the equivalent XPath expressions.
|
20
|
+
#
|
21
|
+
# 💡 Note that translated queries are cached by default for performance concerns.
|
22
|
+
#
|
23
|
+
# ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
|
24
|
+
# node classes, for querying documents with CSS selectors. This method is the underlying
|
25
|
+
# mechanism used by XML::Searchable and is provided solely for advanced users to translate
|
26
|
+
# \CSS selectors to XPath directly.
|
27
|
+
#
|
28
|
+
# Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
|
29
|
+
# some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
|
30
|
+
# functions.
|
19
31
|
#
|
20
32
|
# [Parameters]
|
21
|
-
# - +
|
33
|
+
# - +selector_list+ (String)
|
22
34
|
#
|
35
|
+
# The CSS selector to be translated into XPath. This is always a String, but that string
|
36
|
+
# value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
|
37
|
+
# examples).
|
38
|
+
#
|
39
|
+
# [Keyword arguments]
|
23
40
|
# - +prefix:+ (String)
|
24
41
|
#
|
25
|
-
# The XPath prefix
|
26
|
-
# +
|
42
|
+
# The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
|
43
|
+
# for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
|
44
|
+
#
|
45
|
+
# - +ns:+ (Hash<String ⇒ String>, nil)
|
46
|
+
#
|
47
|
+
# Namespaces that are referenced in the query, if any. This is a hash where the keys are the
|
48
|
+
# namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
|
49
|
+
# empty set of namespaces.
|
27
50
|
#
|
28
51
|
# - +visitor:+ (Nokogiri::CSS::XPathVisitor)
|
29
52
|
#
|
30
|
-
#
|
31
|
-
#
|
53
|
+
# Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
|
54
|
+
# Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
|
55
|
+
# be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
|
56
|
+
#
|
57
|
+
# ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
|
58
|
+
# provided, +prefix+ and +ns+ must not be present.
|
59
|
+
#
|
60
|
+
# - +cache:+ (Boolean)
|
61
|
+
#
|
62
|
+
# Whether to use the SelectorCache for the translated query to ensure that repeated queries
|
63
|
+
# don't incur the overhead of re-parsing the selector. Default is +true+.
|
32
64
|
#
|
33
|
-
#
|
65
|
+
# [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
|
34
66
|
#
|
35
|
-
#
|
36
|
-
# the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
|
67
|
+
# *Example* with a simple selector:
|
37
68
|
#
|
38
|
-
#
|
69
|
+
# Nokogiri::CSS.xpath_for("div") # => ["//div"]
|
39
70
|
#
|
40
|
-
#
|
71
|
+
# *Example* with a compound selector:
|
41
72
|
#
|
42
|
-
|
43
|
-
|
73
|
+
# Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
|
74
|
+
#
|
75
|
+
# *Example* with a complex selector:
|
76
|
+
#
|
77
|
+
# Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
|
78
|
+
#
|
79
|
+
# *Example* with a selector list:
|
80
|
+
#
|
81
|
+
# Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
|
82
|
+
#
|
83
|
+
def xpath_for(
|
84
|
+
selector, options = nil,
|
85
|
+
prefix: options&.delete(:prefix),
|
86
|
+
visitor: options&.delete(:visitor),
|
87
|
+
ns: options&.delete(:ns),
|
88
|
+
cache: true
|
89
|
+
)
|
90
|
+
unless options.nil?
|
91
|
+
warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
|
92
|
+
end
|
93
|
+
|
94
|
+
raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
|
44
95
|
|
45
96
|
selector = selector.to_str
|
46
|
-
raise
|
97
|
+
raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
|
98
|
+
|
99
|
+
if visitor
|
100
|
+
raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
|
101
|
+
raise ArgumentError, "cannot provide both :ns and :visitor" if ns
|
102
|
+
end
|
103
|
+
|
104
|
+
visitor ||= begin
|
105
|
+
visitor_kw = {}
|
106
|
+
visitor_kw[:prefix] = prefix if prefix
|
107
|
+
visitor_kw[:namespaces] = ns if ns
|
47
108
|
|
48
|
-
|
49
|
-
|
50
|
-
ns = options.fetch(:ns, {})
|
109
|
+
Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
|
110
|
+
end
|
51
111
|
|
52
|
-
|
112
|
+
if cache
|
113
|
+
key = SelectorCache.key(selector: selector, visitor: visitor)
|
114
|
+
SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
|
115
|
+
else
|
116
|
+
Parser.new.xpath_for(selector, visitor)
|
117
|
+
end
|
53
118
|
end
|
54
119
|
end
|
55
120
|
end
|
56
121
|
end
|
57
122
|
|
123
|
+
require_relative "css/selector_cache"
|
58
124
|
require_relative "css/node"
|
59
125
|
require_relative "css/xpath_visitor"
|
60
126
|
x = $-w
|
@@ -23,11 +23,9 @@ module Nokogiri
|
|
23
23
|
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
24
24
|
end
|
25
25
|
else
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
)
|
30
|
-
end
|
26
|
+
list = xpath(
|
27
|
+
*CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
|
28
|
+
)
|
31
29
|
end
|
32
30
|
|
33
31
|
super if list.empty?
|
@@ -6,9 +6,9 @@ module Nokogiri
|
|
6
6
|
# Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
|
7
7
|
USEFUL_ALIASES = {
|
8
8
|
# alias_name => true_name
|
9
|
-
"
|
9
|
+
"ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
|
10
|
+
"NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
|
10
11
|
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
11
|
-
"UTF-8" => "UTF-8", # for JRuby tests, this is a no-op in CRuby
|
12
12
|
}
|
13
13
|
|
14
14
|
class << self
|
@@ -161,52 +161,73 @@ module Nokogiri
|
|
161
161
|
end
|
162
162
|
|
163
163
|
class << self
|
164
|
-
|
165
|
-
#
|
166
|
-
#
|
167
|
-
#
|
168
|
-
#
|
169
|
-
#
|
170
|
-
#
|
171
|
-
#
|
172
|
-
|
164
|
+
# :call-seq:
|
165
|
+
# parse(input) { |options| ... } => Nokogiri::HTML4::Document
|
166
|
+
# parse(input, url:, encoding:, options:) => Nokogiri::HTML4::Document
|
167
|
+
#
|
168
|
+
# Parse \HTML4 input from a String or IO object, and return a new HTML4::Document.
|
169
|
+
#
|
170
|
+
# [Required Parameters]
|
171
|
+
# - +input+ (String | IO) The content to be parsed.
|
172
|
+
#
|
173
|
+
# [Optional Keyword Arguments]
|
174
|
+
# - +url:+ (String) The base URI for this document.
|
175
|
+
#
|
176
|
+
# - +encoding:+ (String) The name of the encoding that should be used when processing the
|
177
|
+
# document. When not provided, the encoding will be determined based on the document
|
178
|
+
# content.
|
179
|
+
#
|
180
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
181
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
182
|
+
# +ParseOptions::DEFAULT_HTML+.
|
183
|
+
#
|
184
|
+
# [Yields]
|
185
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
186
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
187
|
+
#
|
188
|
+
# [Returns] Nokogiri::HTML4::Document
|
189
|
+
def parse(
|
190
|
+
input,
|
191
|
+
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML,
|
192
|
+
url: url_, encoding: encoding_, options: options_
|
193
|
+
)
|
173
194
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
174
195
|
yield options if block_given?
|
175
196
|
|
176
|
-
url ||=
|
197
|
+
url ||= input.respond_to?(:path) ? input.path : nil
|
177
198
|
|
178
|
-
if
|
179
|
-
unless
|
180
|
-
encoding ||=
|
199
|
+
if input.respond_to?(:encoding)
|
200
|
+
unless input.encoding == Encoding::ASCII_8BIT
|
201
|
+
encoding ||= input.encoding.name
|
181
202
|
end
|
182
203
|
end
|
183
204
|
|
184
|
-
if
|
185
|
-
if
|
205
|
+
if input.respond_to?(:read)
|
206
|
+
if input.is_a?(Pathname)
|
186
207
|
# resolve the Pathname to the file and open it as an IO object, see #2110
|
187
|
-
|
188
|
-
url ||=
|
208
|
+
input = input.expand_path.open
|
209
|
+
url ||= input.path
|
189
210
|
end
|
190
211
|
|
191
212
|
unless encoding
|
192
|
-
|
213
|
+
input = EncodingReader.new(input)
|
193
214
|
begin
|
194
|
-
return read_io(
|
215
|
+
return read_io(input, url, encoding, options.to_i)
|
195
216
|
rescue EncodingReader::EncodingFound => e
|
196
217
|
encoding = e.found_encoding
|
197
218
|
end
|
198
219
|
end
|
199
|
-
return read_io(
|
220
|
+
return read_io(input, url, encoding, options.to_i)
|
200
221
|
end
|
201
222
|
|
202
223
|
# read_memory pukes on empty docs
|
203
|
-
if
|
224
|
+
if input.nil? || input.empty?
|
204
225
|
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
205
226
|
end
|
206
227
|
|
207
|
-
encoding ||= EncodingReader.detect_encoding(
|
228
|
+
encoding ||= EncodingReader.detect_encoding(input)
|
208
229
|
|
209
|
-
read_memory(
|
230
|
+
read_memory(input, url, encoding, options.to_i)
|
210
231
|
end
|
211
232
|
end
|
212
233
|
end
|