nokogiri 1.12.3 → 1.13.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +5 -6
- data/ext/nokogiri/extconf.rb +47 -35
- data/ext/nokogiri/xml_document.c +35 -35
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_node.c +645 -333
- data/ext/nokogiri/xml_reader.c +37 -11
- data/ext/nokogiri/xml_xpath_context.c +72 -49
- data/ext/nokogiri/xslt_stylesheet.c +107 -9
- data/gumbo-parser/src/parser.c +0 -11
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +20 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +84 -75
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +2 -1
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +24 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +6 -3
- data/lib/nokogiri/html5.rb +68 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +19 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +69 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +178 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +7 -4
- data/lib/nokogiri/xml/node.rb +512 -348
- data/lib/nokogiri/xml/node_set.rb +46 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +11 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +36 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +4 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +3 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- metadata +101 -27
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -36,29 +37,29 @@ module Nokogiri
|
|
36
37
|
|
37
38
|
# Encodinds this parser supports
|
38
39
|
ENCODINGS = {
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
40
|
+
"NONE" => 0, # No char encoding detected
|
41
|
+
"UTF-8" => 1, # UTF-8
|
42
|
+
"UTF16LE" => 2, # UTF-16 little endian
|
43
|
+
"UTF16BE" => 3, # UTF-16 big endian
|
44
|
+
"UCS4LE" => 4, # UCS-4 little endian
|
45
|
+
"UCS4BE" => 5, # UCS-4 big endian
|
46
|
+
"EBCDIC" => 6, # EBCDIC uh!
|
47
|
+
"UCS4-2143" => 7, # UCS-4 unusual ordering
|
48
|
+
"UCS4-3412" => 8, # UCS-4 unusual ordering
|
49
|
+
"UCS2" => 9, # UCS-2
|
50
|
+
"ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
|
51
|
+
"ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
|
52
|
+
"ISO-8859-3" => 12, # ISO-8859-3
|
53
|
+
"ISO-8859-4" => 13, # ISO-8859-4
|
54
|
+
"ISO-8859-5" => 14, # ISO-8859-5
|
55
|
+
"ISO-8859-6" => 15, # ISO-8859-6
|
56
|
+
"ISO-8859-7" => 16, # ISO-8859-7
|
57
|
+
"ISO-8859-8" => 17, # ISO-8859-8
|
58
|
+
"ISO-8859-9" => 18, # ISO-8859-9
|
59
|
+
"ISO-2022-JP" => 19, # ISO-2022-JP
|
60
|
+
"SHIFT-JIS" => 20, # Shift_JIS
|
61
|
+
"EUC-JP" => 21, # EUC-JP
|
62
|
+
"ASCII" => 22, # pure ASCII
|
62
63
|
}
|
63
64
|
|
64
65
|
# The Nokogiri::XML::SAX::Document where events will be sent.
|
@@ -68,7 +69,7 @@ module Nokogiri
|
|
68
69
|
attr_accessor :encoding
|
69
70
|
|
70
71
|
# Create a new Parser with +doc+ and +encoding+
|
71
|
-
def initialize
|
72
|
+
def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
|
72
73
|
@encoding = check_encoding(encoding)
|
73
74
|
@document = doc
|
74
75
|
@warned = false
|
@@ -77,7 +78,7 @@ module Nokogiri
|
|
77
78
|
###
|
78
79
|
# Parse given +thing+ which may be a string containing xml, or an
|
79
80
|
# IO object.
|
80
|
-
def parse
|
81
|
+
def parse(thing, &block)
|
81
82
|
if thing.respond_to?(:read) && thing.respond_to?(:close)
|
82
83
|
parse_io(thing, &block)
|
83
84
|
else
|
@@ -87,34 +88,35 @@ module Nokogiri
|
|
87
88
|
|
88
89
|
###
|
89
90
|
# Parse given +io+
|
90
|
-
def parse_io
|
91
|
+
def parse_io(io, encoding = "ASCII")
|
91
92
|
@encoding = check_encoding(encoding)
|
92
93
|
ctx = ParserContext.io(io, ENCODINGS[@encoding])
|
93
94
|
yield ctx if block_given?
|
94
|
-
ctx.parse_with
|
95
|
+
ctx.parse_with(self)
|
95
96
|
end
|
96
97
|
|
97
98
|
###
|
98
99
|
# Parse a file with +filename+
|
99
|
-
def parse_file
|
100
|
+
def parse_file(filename)
|
100
101
|
raise ArgumentError unless filename
|
101
102
|
raise Errno::ENOENT unless File.exist?(filename)
|
102
103
|
raise Errno::EISDIR if File.directory?(filename)
|
103
|
-
ctx = ParserContext.file
|
104
|
+
ctx = ParserContext.file(filename)
|
104
105
|
yield ctx if block_given?
|
105
|
-
ctx.parse_with
|
106
|
+
ctx.parse_with(self)
|
106
107
|
end
|
107
108
|
|
108
|
-
def parse_memory
|
109
|
-
ctx = ParserContext.memory
|
109
|
+
def parse_memory(data)
|
110
|
+
ctx = ParserContext.memory(data)
|
110
111
|
yield ctx if block_given?
|
111
|
-
ctx.parse_with
|
112
|
+
ctx.parse_with(self)
|
112
113
|
end
|
113
114
|
|
114
115
|
private
|
116
|
+
|
115
117
|
def check_encoding(encoding)
|
116
118
|
encoding.upcase.tap do |enc|
|
117
|
-
raise ArgumentError
|
119
|
+
raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
|
118
120
|
end
|
119
121
|
end
|
120
122
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -7,9 +8,12 @@ module Nokogiri
|
|
7
8
|
# by the user. Instead, you should be looking at
|
8
9
|
# Nokogiri::XML::SAX::Parser
|
9
10
|
class ParserContext
|
10
|
-
def self.new
|
11
|
-
[:read, :close].all? { |x| thing.respond_to?(x) }
|
12
|
-
io(thing, Parser::ENCODINGS[encoding])
|
11
|
+
def self.new(thing, encoding = "UTF-8")
|
12
|
+
if [:read, :close].all? { |x| thing.respond_to?(x) }
|
13
|
+
io(thing, Parser::ENCODINGS[encoding])
|
14
|
+
else
|
15
|
+
memory(thing)
|
16
|
+
end
|
13
17
|
end
|
14
18
|
end
|
15
19
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -24,7 +25,6 @@ module Nokogiri
|
|
24
25
|
# parser << "/div>"
|
25
26
|
# parser.finish
|
26
27
|
class PushParser
|
27
|
-
|
28
28
|
# The Nokogiri::XML::SAX::Document on which the PushParser will be
|
29
29
|
# operating
|
30
30
|
attr_accessor :document
|
@@ -32,7 +32,7 @@ module Nokogiri
|
|
32
32
|
###
|
33
33
|
# Create a new PushParser with +doc+ as the SAX Document, providing
|
34
34
|
# an optional +file_name+ and +encoding+
|
35
|
-
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding =
|
35
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
|
36
36
|
@document = doc
|
37
37
|
@encoding = encoding
|
38
38
|
@sax_parser = XML::SAX::Parser.new(doc)
|
@@ -44,16 +44,16 @@ module Nokogiri
|
|
44
44
|
###
|
45
45
|
# Write a +chunk+ of XML to the PushParser. Any callback methods
|
46
46
|
# that can be called will be called immediately.
|
47
|
-
def write
|
47
|
+
def write(chunk, last_chunk = false)
|
48
48
|
native_write(chunk, last_chunk)
|
49
49
|
end
|
50
|
-
|
50
|
+
alias_method :<<, :write
|
51
51
|
|
52
52
|
###
|
53
53
|
# Finish the parsing. This method is only necessary for
|
54
54
|
# Nokogiri::XML::SAX::Document#end_document to be called.
|
55
55
|
def finish
|
56
|
-
write
|
56
|
+
write("", true)
|
57
57
|
end
|
58
58
|
end
|
59
59
|
end
|
data/lib/nokogiri/xml/sax.rb
CHANGED
data/lib/nokogiri/xml/schema.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
class << self
|
@@ -42,7 +43,7 @@ module Nokogiri
|
|
42
43
|
###
|
43
44
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
44
45
|
# object.
|
45
|
-
def self.new
|
46
|
+
def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
46
47
|
from_document(Nokogiri::XML(string_or_io), options)
|
47
48
|
end
|
48
49
|
|
@@ -51,9 +52,9 @@ module Nokogiri
|
|
51
52
|
# Nokogiri::XML::Document object, or a filename. An Array of
|
52
53
|
# Nokogiri::XML::SyntaxError objects found while validating the
|
53
54
|
# +thing+ is returned.
|
54
|
-
def validate
|
55
|
-
if thing.is_a?(Nokogiri::XML::Document)
|
56
|
-
validate_document(thing)
|
55
|
+
def validate(thing)
|
56
|
+
if thing.is_a?(Nokogiri::XML::Document)
|
57
|
+
validate_document(thing)
|
57
58
|
elsif File.file?(thing)
|
58
59
|
validate_file(thing)
|
59
60
|
else
|
@@ -64,8 +65,8 @@ module Nokogiri
|
|
64
65
|
###
|
65
66
|
# Returns true if +thing+ is a valid Nokogiri::XML::Document or
|
66
67
|
# file.
|
67
|
-
def valid?
|
68
|
-
validate(thing).
|
68
|
+
def valid?(thing)
|
69
|
+
validate(thing).empty?
|
69
70
|
end
|
70
71
|
end
|
71
72
|
end
|
@@ -1,22 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
module Nokogiri
|
3
5
|
module XML
|
4
6
|
#
|
5
7
|
# The Searchable module declares the interface used for searching your DOM.
|
6
8
|
#
|
7
|
-
# It implements the public methods
|
9
|
+
# It implements the public methods #search, #css, and #xpath,
|
8
10
|
# as well as allowing specific implementations to specialize some
|
9
11
|
# of the important behaviors.
|
10
12
|
#
|
11
13
|
module Searchable
|
12
14
|
# Regular expression used by Searchable#search to determine if a query
|
13
15
|
# string is CSS or XPath
|
14
|
-
LOOKS_LIKE_XPATH =
|
16
|
+
LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
|
15
17
|
|
16
|
-
#
|
18
|
+
# :section: Searching via XPath or CSS Queries
|
17
19
|
|
18
20
|
###
|
19
|
-
# call-seq:
|
21
|
+
# call-seq:
|
22
|
+
# search(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
|
20
23
|
#
|
21
24
|
# Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
|
22
25
|
#
|
@@ -27,41 +30,39 @@ module Nokogiri
|
|
27
30
|
# node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
|
28
31
|
# node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
|
29
32
|
#
|
30
|
-
# For XPath queries, a hash of variable bindings may also be
|
31
|
-
#
|
33
|
+
# For XPath queries, a hash of variable bindings may also be appended to the namespace
|
34
|
+
# bindings. For example:
|
32
35
|
#
|
33
36
|
# node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
34
37
|
#
|
35
|
-
# Custom XPath functions and CSS pseudo-selectors may also be
|
36
|
-
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
# end
|
47
|
-
# }.new
|
48
|
-
# )
|
38
|
+
# 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
|
39
|
+
# functions create a class and implement the function you want to define. The first argument
|
40
|
+
# to the method will be the current matching NodeSet. Any other arguments are ones that you
|
41
|
+
# pass in. Note that this class may appear anywhere in the argument list. For example:
|
42
|
+
#
|
43
|
+
# handler = Class.new {
|
44
|
+
# def regex node_set, regex
|
45
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
46
|
+
# end
|
47
|
+
# }.new
|
48
|
+
# node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
|
49
49
|
#
|
50
50
|
# See Searchable#xpath and Searchable#css for further usage help.
|
51
51
|
def search(*args)
|
52
52
|
paths, handler, ns, binds = extract_params(args)
|
53
53
|
|
54
54
|
xpaths = paths.map(&:to_s).map do |path|
|
55
|
-
(path
|
55
|
+
LOOKS_LIKE_XPATH.match?(path) ? path : xpath_query_from_css_rule(path, ns)
|
56
56
|
end.flatten.uniq
|
57
57
|
|
58
58
|
xpath(*(xpaths + [ns, handler, binds].compact))
|
59
59
|
end
|
60
60
|
|
61
|
-
|
61
|
+
alias_method :/, :search
|
62
62
|
|
63
63
|
###
|
64
|
-
# call-seq:
|
64
|
+
# call-seq:
|
65
|
+
# at(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
|
65
66
|
#
|
66
67
|
# Search this object for +paths+, and return only the first
|
67
68
|
# result. +paths+ must be one or more XPath or CSS queries.
|
@@ -71,10 +72,11 @@ module Nokogiri
|
|
71
72
|
search(*args).first
|
72
73
|
end
|
73
74
|
|
74
|
-
|
75
|
+
alias_method :%, :at
|
75
76
|
|
76
77
|
###
|
77
|
-
# call-seq:
|
78
|
+
# call-seq:
|
79
|
+
# css(*rules, [namespace-bindings, custom-pseudo-class])
|
78
80
|
#
|
79
81
|
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
80
82
|
# selectors. For example:
|
@@ -87,33 +89,49 @@ module Nokogiri
|
|
87
89
|
#
|
88
90
|
# node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
|
89
91
|
#
|
90
|
-
# Custom CSS pseudo classes may also be defined
|
91
|
-
# custom pseudo classes, create a class and implement the custom
|
92
|
-
#
|
93
|
-
#
|
94
|
-
# arguments are ones that you pass in. For example:
|
92
|
+
# 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
|
93
|
+
# function. To define custom pseudo classes, create a class and implement the custom pseudo
|
94
|
+
# class you want defined. The first argument to the method will be the matching context
|
95
|
+
# NodeSet. Any other arguments are ones that you pass in. For example:
|
95
96
|
#
|
96
|
-
#
|
97
|
-
# def regex
|
97
|
+
# handler = Class.new {
|
98
|
+
# def regex(node_set, regex)
|
98
99
|
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
99
100
|
# end
|
100
|
-
# }.new
|
101
|
+
# }.new
|
102
|
+
# node.css('title:regex("\w+")', handler)
|
103
|
+
#
|
104
|
+
# 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
|
105
|
+
#
|
106
|
+
# node.css('img > @href') # returns all +href+ attributes on an +img+ element
|
107
|
+
# node.css('img / @href') # same
|
108
|
+
#
|
109
|
+
# # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
|
110
|
+
# node.css('div @class')
|
101
111
|
#
|
102
|
-
#
|
103
|
-
# to your document type. That is, if you're looking for "H1" in
|
104
|
-
# an HTML document, you'll never find anything, since HTML tags
|
105
|
-
# will match only lowercase CSS queries. However, "H1" might be
|
106
|
-
# found in an XML document, where tags names are case-sensitive
|
107
|
-
# (e.g., "H1" is distinct from "h1").
|
112
|
+
# node.css
|
108
113
|
#
|
114
|
+
# 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
|
115
|
+
#
|
116
|
+
# ⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
|
117
|
+
# example:
|
118
|
+
#
|
119
|
+
# # equivalent to 'li:nth-child(2)'
|
120
|
+
# node.css('li[2]') # retrieve the second li element in a list
|
121
|
+
#
|
122
|
+
# ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
|
123
|
+
# tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
|
124
|
+
# you'll never find anything. However, "H1" might be found in an XML document, where tags
|
125
|
+
# names are case-sensitive (e.g., "H1" is distinct from "h1").
|
109
126
|
def css(*args)
|
110
127
|
rules, handler, ns, _ = extract_params(args)
|
111
128
|
|
112
|
-
css_internal
|
129
|
+
css_internal(self, rules, handler, ns)
|
113
130
|
end
|
114
131
|
|
115
132
|
##
|
116
|
-
# call-seq:
|
133
|
+
# call-seq:
|
134
|
+
# at_css(*rules, [namespace-bindings, custom-pseudo-class])
|
117
135
|
#
|
118
136
|
# Search this object for CSS +rules+, and return only the first
|
119
137
|
# match. +rules+ must be one or more CSS selectors.
|
@@ -124,7 +142,8 @@ module Nokogiri
|
|
124
142
|
end
|
125
143
|
|
126
144
|
###
|
127
|
-
# call-seq:
|
145
|
+
# call-seq:
|
146
|
+
# xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
|
128
147
|
#
|
129
148
|
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
130
149
|
# queries.
|
@@ -140,27 +159,27 @@ module Nokogiri
|
|
140
159
|
#
|
141
160
|
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
142
161
|
#
|
143
|
-
# Custom XPath functions may also be defined.
|
144
|
-
#
|
145
|
-
#
|
146
|
-
#
|
147
|
-
# you pass in. Note that this class may appear anywhere in the
|
148
|
-
# argument list. For example:
|
162
|
+
# 💡 Custom XPath functions may also be defined. To define custom functions create a class and
|
163
|
+
# implement the function you want to define. The first argument to the method will be the
|
164
|
+
# current matching NodeSet. Any other arguments are ones that you pass in. Note that this
|
165
|
+
# class may appear anywhere in the argument list. For example:
|
149
166
|
#
|
150
|
-
#
|
151
|
-
# def regex
|
167
|
+
# handler = Class.new {
|
168
|
+
# def regex(node_set, regex)
|
152
169
|
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
153
170
|
# end
|
154
|
-
# }.new
|
171
|
+
# }.new
|
172
|
+
# node.xpath('.//title[regex(., "\w+")]', handler)
|
155
173
|
#
|
156
174
|
def xpath(*args)
|
157
175
|
paths, handler, ns, binds = extract_params(args)
|
158
176
|
|
159
|
-
xpath_internal
|
177
|
+
xpath_internal(self, paths, handler, ns, binds)
|
160
178
|
end
|
161
179
|
|
162
180
|
##
|
163
|
-
# call-seq:
|
181
|
+
# call-seq:
|
182
|
+
# at_xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
|
164
183
|
#
|
165
184
|
# Search this node for XPath +paths+, and return only the first
|
166
185
|
# match. +paths+ must be one or more XPath queries.
|
@@ -170,12 +189,21 @@ module Nokogiri
|
|
170
189
|
xpath(*args).first
|
171
190
|
end
|
172
191
|
|
173
|
-
#
|
192
|
+
# :call-seq:
|
193
|
+
# >(selector) → NodeSet
|
194
|
+
#
|
195
|
+
# Search this node's immediate children using CSS selector +selector+
|
196
|
+
def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
|
197
|
+
ns = (document.root&.namespaces || {})
|
198
|
+
xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
|
199
|
+
end
|
200
|
+
|
201
|
+
# :section:
|
174
202
|
|
175
203
|
private
|
176
204
|
|
177
205
|
def css_internal(node, rules, handler, ns)
|
178
|
-
xpath_internal
|
206
|
+
xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
|
179
207
|
end
|
180
208
|
|
181
209
|
def xpath_internal(node, paths, handler, ns, binds)
|
@@ -198,9 +226,9 @@ module Nokogiri
|
|
198
226
|
ctx.register_namespaces(ns)
|
199
227
|
path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
|
200
228
|
|
201
|
-
binds
|
202
|
-
ctx.register_variable
|
203
|
-
end
|
229
|
+
binds&.each do |key, value|
|
230
|
+
ctx.register_variable(key.to_s, value)
|
231
|
+
end
|
204
232
|
|
205
233
|
ctx.evaluate(path, handler)
|
206
234
|
end
|
@@ -210,10 +238,13 @@ module Nokogiri
|
|
210
238
|
end
|
211
239
|
|
212
240
|
def xpath_query_from_css_rule(rule, ns)
|
213
|
-
visitor = Nokogiri::CSS::
|
241
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
242
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
243
|
+
doctype: document.xpath_doctype,
|
244
|
+
)
|
214
245
|
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
215
|
-
CSS.xpath_for(rule.to_s, {:
|
216
|
-
|
246
|
+
CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
|
247
|
+
visitor: visitor, })
|
217
248
|
end.join(" | ")
|
218
249
|
end
|
219
250
|
|
@@ -230,7 +261,7 @@ module Nokogiri
|
|
230
261
|
end
|
231
262
|
ns, binds = hashes.reverse
|
232
263
|
|
233
|
-
ns ||= document.root
|
264
|
+
ns ||= (document.root&.namespaces || {})
|
234
265
|
|
235
266
|
[params, handler, ns, binds]
|
236
267
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
###
|
@@ -42,9 +43,9 @@ module Nokogiri
|
|
42
43
|
|
43
44
|
def to_s
|
44
45
|
message = super.chomp
|
45
|
-
[location_to_s, level_to_s, message]
|
46
|
-
compact.join(": ")
|
47
|
-
force_encoding(message.encoding)
|
46
|
+
[location_to_s, level_to_s, message]
|
47
|
+
.compact.join(": ")
|
48
|
+
.force_encoding(message.encoding)
|
48
49
|
end
|
49
50
|
|
50
51
|
private
|
@@ -54,7 +55,6 @@ module Nokogiri
|
|
54
55
|
when 3 then "FATAL"
|
55
56
|
when 2 then "ERROR"
|
56
57
|
when 1 then "WARNING"
|
57
|
-
else nil
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
data/lib/nokogiri/xml/text.rb
CHANGED
data/lib/nokogiri/xml/xpath.rb
CHANGED
@@ -1,7 +1,19 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module XPath
|
6
|
+
# The XPath search prefix to search globally, +//+
|
7
|
+
GLOBAL_SEARCH_PREFIX = "//"
|
8
|
+
|
9
|
+
# The XPath search prefix to search direct descendants of the root element, +/+
|
10
|
+
ROOT_SEARCH_PREFIX = "/"
|
11
|
+
|
12
|
+
# The XPath search prefix to search direct descendants of the current element, +./+
|
13
|
+
CURRENT_SEARCH_PREFIX = "./"
|
14
|
+
|
15
|
+
# The XPath search prefix to search anywhere in the current element's subtree, +.//+
|
16
|
+
SUBTREE_SEARCH_PREFIX = ".//"
|
5
17
|
end
|
6
18
|
end
|
7
19
|
end
|
@@ -1,17 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
class XPathContext
|
5
|
-
|
6
6
|
###
|
7
7
|
# Register namespaces in +namespaces+
|
8
8
|
def register_namespaces(namespaces)
|
9
9
|
namespaces.each do |k, v|
|
10
|
-
k = k.to_s.gsub(/.*:/,
|
10
|
+
k = k.to_s.gsub(/.*:/, "") # strip off 'xmlns:' or 'xml:'
|
11
11
|
register_ns(k, v)
|
12
12
|
end
|
13
13
|
end
|
14
|
-
|
15
14
|
end
|
16
15
|
end
|
17
16
|
end
|
data/lib/nokogiri/xml.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
class << self
|
4
5
|
###
|
@@ -21,7 +22,6 @@ module Nokogiri
|
|
21
22
|
# Nokogiri::XML::Reader for mor information
|
22
23
|
def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
|
23
24
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
24
|
-
# Give the options to the user
|
25
25
|
yield options if block_given?
|
26
26
|
|
27
27
|
if string_or_io.respond_to?(:read)
|
@@ -38,8 +38,8 @@ module Nokogiri
|
|
38
38
|
|
39
39
|
####
|
40
40
|
# Parse a fragment from +string+ in to a NodeSet.
|
41
|
-
def fragment(string)
|
42
|
-
XML::DocumentFragment.parse(string)
|
41
|
+
def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
|
42
|
+
XML::DocumentFragment.parse(string, options, &block)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
end
|