nokogiri 1.15.4 → 1.17.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +12 -19
- data/README.md +8 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +194 -141
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +26 -25
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +25 -33
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +3 -12
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +167 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -10
- data/ext/nokogiri/xml_node.c +142 -108
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -100
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +214 -128
- data/ext/nokogiri/xml_sax_push_parser.c +69 -50
- data/ext/nokogiri/xml_schema.c +51 -87
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +3 -6
- data/ext/nokogiri/xml_xpath_context.c +4 -7
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +64 -23
- data/gumbo-parser/src/tokenizer.c +7 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +43 -27
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +45 -24
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +2 -2
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -138
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +74 -31
- data/lib/nokogiri/xml/document_fragment.rb +86 -15
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +113 -35
- data/lib/nokogiri/xml/node_set.rb +12 -10
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +9 -11
- data/lib/nokogiri/xml/syntax_error.rb +23 -1
- data/lib/nokogiri/xml.rb +14 -25
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +4 -10
- data/lib/nokogiri.rb +1 -1
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +15 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -8,6 +8,11 @@ module Nokogiri
|
|
8
8
|
COLLECTIONS = [:attribute_nodes, :children]
|
9
9
|
|
10
10
|
def inspect
|
11
|
+
# handle the case where an exception is thrown during object construction
|
12
|
+
if respond_to?(:data_ptr?) && !data_ptr?
|
13
|
+
return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
|
14
|
+
end
|
15
|
+
|
11
16
|
attributes = inspect_attributes.reject do |x|
|
12
17
|
attribute = send(x)
|
13
18
|
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
@@ -21,7 +26,7 @@ module Nokogiri
|
|
21
26
|
"#{attribute}=#{send(attribute).inspect}"
|
22
27
|
end.join(" ")
|
23
28
|
end
|
24
|
-
"#<#{self.class
|
29
|
+
"#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
|
25
30
|
end
|
26
31
|
|
27
32
|
def pretty_print(pp)
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,32 +3,34 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
|
7
|
+
# call Nokogiri::XML::Reader#each to iterate over each node.
|
8
|
+
#
|
9
|
+
# Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
|
10
|
+
# Reader is given an \XML document, and yields nodes to an each block.
|
11
|
+
#
|
12
|
+
# The Reader parser might be good for when you need the speed and low memory usage of a \SAX
|
13
|
+
# parser, but do not want to write a SAX::Document handler.
|
9
14
|
#
|
10
15
|
# Here is an example of usage:
|
11
16
|
#
|
12
|
-
# reader = Nokogiri::XML::Reader
|
17
|
+
# reader = Nokogiri::XML::Reader.new <<~XML
|
13
18
|
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
14
19
|
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
15
20
|
# </x>
|
16
|
-
#
|
21
|
+
# XML
|
17
22
|
#
|
18
23
|
# reader.each do |node|
|
19
|
-
#
|
20
24
|
# # node is an instance of Nokogiri::XML::Reader
|
21
25
|
# puts node.name
|
22
|
-
#
|
23
26
|
# end
|
24
27
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# need during the first iteration.
|
28
|
+
# âš Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
29
|
+
# document, you must parse the document again. It may be better to capture all information you
|
30
|
+
# need during a single iteration.
|
29
31
|
#
|
30
|
-
#
|
31
|
-
#
|
32
|
+
# âš libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
|
33
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
32
34
|
class Reader
|
33
35
|
include Enumerable
|
34
36
|
|
@@ -65,23 +67,55 @@ module Nokogiri
|
|
65
67
|
TYPE_END_ELEMENT = 15
|
66
68
|
# Entity end node type
|
67
69
|
TYPE_END_ENTITY = 16
|
68
|
-
# XML Declaration node type
|
70
|
+
# \XML Declaration node type
|
69
71
|
TYPE_XML_DECLARATION = 17
|
70
72
|
|
71
73
|
# A list of errors encountered while parsing
|
72
74
|
attr_accessor :errors
|
73
75
|
|
74
|
-
# The XML source
|
76
|
+
# The \XML source
|
75
77
|
attr_reader :source
|
76
78
|
|
77
79
|
alias_method :self_closing?, :empty_element?
|
78
80
|
|
79
|
-
|
81
|
+
# :call-seq:
|
82
|
+
# Reader.new(input) { |options| ... } → Reader
|
83
|
+
# Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
|
84
|
+
#
|
85
|
+
# Create a new Reader to parse an \XML document.
|
86
|
+
#
|
87
|
+
# [Required Parameters]
|
88
|
+
# - +input+ (String | IO): The \XML document to parse.
|
89
|
+
#
|
90
|
+
# [Optional Parameters]
|
91
|
+
# - +url:+ (String) The base URL of the document.
|
92
|
+
# - +encoding:+ (String) The name of the encoding of the document.
|
93
|
+
# - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
|
94
|
+
# Defaults to +ParseOptions::STRICT+.
|
95
|
+
#
|
96
|
+
# [Yields]
|
97
|
+
# If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
|
98
|
+
# the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
|
99
|
+
def self.new(
|
100
|
+
string_or_io,
|
101
|
+
url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
|
102
|
+
url: url_, encoding: encoding_, options: options_
|
103
|
+
)
|
104
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
105
|
+
yield options if block_given?
|
106
|
+
|
107
|
+
if string_or_io.respond_to?(:read)
|
108
|
+
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
109
|
+
end
|
110
|
+
|
111
|
+
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
112
|
+
end
|
113
|
+
|
114
|
+
private def initialize(source, url = nil, encoding = nil) # :nodoc:
|
80
115
|
@source = source
|
81
116
|
@errors = []
|
82
117
|
@encoding = encoding
|
83
118
|
end
|
84
|
-
private :initialize
|
85
119
|
|
86
120
|
# Get the attributes and namespaces of the current node as a Hash.
|
87
121
|
#
|
@@ -3,36 +3,73 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
class << self
|
6
|
-
|
7
|
-
#
|
8
|
-
#
|
9
|
-
|
10
|
-
|
6
|
+
# :call-seq:
|
7
|
+
# RelaxNG(input) → Nokogiri::XML::RelaxNG
|
8
|
+
# RelaxNG(input, options:) → Nokogiri::XML::RelaxNG
|
9
|
+
#
|
10
|
+
# Convenience method for Nokogiri::XML::RelaxNG.new
|
11
|
+
def RelaxNG(...)
|
12
|
+
RelaxNG.new(...)
|
11
13
|
end
|
12
14
|
end
|
13
15
|
|
14
|
-
|
15
|
-
# Nokogiri::XML::RelaxNG is used for validating XML against a
|
16
|
-
# RelaxNG schema.
|
16
|
+
# Nokogiri::XML::RelaxNG is used for validating \XML against a RELAX NG schema definition.
|
17
17
|
#
|
18
|
-
#
|
18
|
+
# 🛡 <b>Do not use this class for untrusted schema documents.</b> RELAX NG input is always
|
19
|
+
# treated as *trusted*, meaning that the underlying parsing libraries <b>will access network
|
20
|
+
# resources</b>. This is counter to Nokogiri's "untrusted by default" security policy, but is an
|
21
|
+
# unfortunate limitation of the underlying libraries.
|
19
22
|
#
|
20
|
-
#
|
21
|
-
# that are returned and print them out:
|
23
|
+
# *Example:* Determine whether an \XML document is valid.
|
22
24
|
#
|
23
|
-
# schema
|
24
|
-
# doc
|
25
|
+
# schema = Nokogiri::XML::RelaxNG.new(File.read(RELAX_NG_FILE))
|
26
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
27
|
+
# schema.valid?(doc) # Boolean
|
25
28
|
#
|
26
|
-
#
|
27
|
-
# puts error.message
|
28
|
-
# end
|
29
|
+
# *Example:* Validate an \XML document against a \RelaxNG schema, and capture any errors that are found.
|
29
30
|
#
|
30
|
-
#
|
31
|
+
# schema = Nokogiri::XML::RelaxNG.new(File.open(RELAX_NG_FILE))
|
32
|
+
# doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
|
33
|
+
# errors = schema.validate(doc) # Array<SyntaxError>
|
34
|
+
#
|
35
|
+
# *Example:* Validate an \XML document using a Document containing a RELAX NG schema definition.
|
36
|
+
#
|
37
|
+
# schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
|
38
|
+
# schema = Nokogiri::XML::RelaxNG.from_document(schema_doc)
|
39
|
+
# doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
|
40
|
+
# schema.valid?(doc) # Boolean
|
31
41
|
#
|
32
|
-
# NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
|
33
|
-
# underlying parsing libraries to access network resources. This is counter to Nokogiri's
|
34
|
-
# "untrusted by default" security policy, but is a limitation of the underlying libraries.
|
35
42
|
class RelaxNG < Nokogiri::XML::Schema
|
43
|
+
# :call-seq:
|
44
|
+
# new(input) → Nokogiri::XML::RelaxNG
|
45
|
+
# new(input, options:) → Nokogiri::XML::RelaxNG
|
46
|
+
#
|
47
|
+
# Parse a RELAX NG schema definition from a String or IO to create a new Nokogiri::XML::RelaxNG.
|
48
|
+
#
|
49
|
+
# [Parameters]
|
50
|
+
# - +input+ (String | IO) RELAX NG schema definition
|
51
|
+
# - +options:+ (Nokogiri::XML::ParseOptions)
|
52
|
+
# Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA âš Unused
|
53
|
+
#
|
54
|
+
# [Returns] Nokogiri::XML::RelaxNG
|
55
|
+
#
|
56
|
+
# âš +parse_options+ is currently unused by this method and is present only as a placeholder for
|
57
|
+
# future functionality.
|
58
|
+
#
|
59
|
+
# Also see convenience method Nokogiri::XML::RelaxNG()
|
60
|
+
def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, options: parse_options_)
|
61
|
+
from_document(Nokogiri::XML::Document.parse(input), options)
|
62
|
+
end
|
63
|
+
|
64
|
+
# :call-seq:
|
65
|
+
# read_memory(input) → Nokogiri::XML::RelaxNG
|
66
|
+
# read_memory(input, options:) → Nokogiri::XML::RelaxNG
|
67
|
+
#
|
68
|
+
# Convenience method for Nokogiri::XML::RelaxNG.new.
|
69
|
+
def self.read_memory(...)
|
70
|
+
# TODO deprecate this method
|
71
|
+
new(...)
|
72
|
+
end
|
36
73
|
end
|
37
74
|
end
|
38
75
|
end
|
@@ -2,106 +2,168 @@
|
|
2
2
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
|
-
###
|
6
|
-
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
|
7
|
-
# dealing with XML. If you want to do SAX style parsing using HTML, check out
|
8
|
-
# Nokogiri::HTML4::SAX.
|
9
|
-
#
|
10
|
-
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
11
|
-
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
12
|
-
# you when it encounters events you said you would like to know about.
|
13
|
-
#
|
14
|
-
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
|
15
|
-
# methods for which you would like notification.
|
16
|
-
#
|
17
|
-
# For example, if I want to be notified when a document ends, and when an element starts, I
|
18
|
-
# would write a class like this:
|
19
|
-
#
|
20
|
-
# class MyDocument < Nokogiri::XML::SAX::Document
|
21
|
-
# def end_document
|
22
|
-
# puts "the document has ended"
|
23
|
-
# end
|
24
|
-
#
|
25
|
-
# def start_element name, attributes = []
|
26
|
-
# puts "#{name} started"
|
27
|
-
# end
|
28
|
-
# end
|
29
|
-
#
|
30
|
-
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
31
|
-
#
|
32
|
-
# # Create a new parser
|
33
|
-
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
34
|
-
#
|
35
|
-
# # Feed the parser some XML
|
36
|
-
# parser.parse(File.open(ARGV[0]))
|
37
|
-
#
|
38
|
-
# Now my document handler will be called when each node starts, and when then document ends. To
|
39
|
-
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
40
|
-
#
|
41
|
-
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
42
|
-
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
|
43
|
-
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
44
|
-
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
45
5
|
module SAX
|
46
|
-
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
6
|
+
# :markup: markdown
|
7
|
+
#
|
8
|
+
# The SAX::Document class is used for registering types of events you are interested in
|
9
|
+
# handling. All of the methods on this class are available as possible events while parsing an
|
10
|
+
# \XML document. To register for any particular event, subclass this class and implement the
|
11
|
+
# methods you are interested in knowing about.
|
51
12
|
#
|
52
13
|
# To only be notified about start and end element events, write a class like this:
|
53
14
|
#
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
15
|
+
# class MyHandler < Nokogiri::XML::SAX::Document
|
16
|
+
# def start_element name, attrs = []
|
17
|
+
# puts "#{name} started!"
|
18
|
+
# end
|
58
19
|
#
|
59
|
-
#
|
60
|
-
#
|
20
|
+
# def end_element name
|
21
|
+
# puts "#{name} ended"
|
22
|
+
# end
|
61
23
|
# end
|
62
|
-
# end
|
63
24
|
#
|
64
|
-
# You can use this event handler for any SAX
|
65
|
-
#
|
25
|
+
# You can use this event handler for any SAX-style parser included with Nokogiri.
|
26
|
+
#
|
27
|
+
# See also:
|
28
|
+
#
|
29
|
+
# - Nokogiri::XML::SAX
|
30
|
+
# - Nokogiri::HTML4::SAX
|
31
|
+
#
|
32
|
+
# ### Entity Handling
|
33
|
+
#
|
34
|
+
# âš Entity handling is complicated in a SAX parser! Please read this section carefully if
|
35
|
+
# you're not getting the behavior you expect.
|
36
|
+
#
|
37
|
+
# Entities will be reported to the user via callbacks to #characters, to #reference, or
|
38
|
+
# possibly to both. The behavior is determined by a combination of _entity type_ and the value
|
39
|
+
# of ParserContext#replace_entities. (Recall that the default value of
|
40
|
+
# ParserContext#replace_entities is `false`.)
|
41
|
+
#
|
42
|
+
# âš <b>It is UNSAFE to set ParserContext#replace_entities to `true`</b> when parsing untrusted
|
43
|
+
# documents.
|
44
|
+
#
|
45
|
+
# 💡 For more information on entity types, see [Wikipedia's page on
|
46
|
+
# DTDs](https://en.wikipedia.org/wiki/Document_type_definition#Entity_declarations).
|
47
|
+
#
|
48
|
+
# | Entity type | #characters | #reference |
|
49
|
+
# |--------------------------------------|------------------------------------|-------------------------------------|
|
50
|
+
# | Char ref (e.g., <tt>’</tt>) | always | never |
|
51
|
+
# | Predefined (e.g., <tt>&</tt>) | always | never |
|
52
|
+
# | Undeclared †| never | <tt>#replace_entities == false</tt> |
|
53
|
+
# | Internal | always | <tt>#replace_entities == false</tt> |
|
54
|
+
# | External †| <tt>#replace_entities == true</tt> | <tt>#replace_entities == false</tt> |
|
55
|
+
#
|
56
|
+
#
|
57
|
+
#
|
58
|
+
# †In the case where the replacement text for the entity is unknown (e.g., an undeclared entity
|
59
|
+
# or an external entity that could not be resolved because of network issues), then the
|
60
|
+
# replacement text will not be reported. If ParserContext#replace_entities is `true`, this
|
61
|
+
# means the #characters callback will not be invoked. If ParserContext#replace_entities is
|
62
|
+
# `false`, then the #reference callback will be invoked, but with `nil` for the `content`
|
63
|
+
# argument.
|
64
|
+
#
|
66
65
|
class Document
|
67
66
|
###
|
68
|
-
# Called when an XML declaration is parsed
|
67
|
+
# Called when an \XML declaration is parsed.
|
68
|
+
#
|
69
|
+
# [Parameters]
|
70
|
+
# - +version+ (String) the version attribute
|
71
|
+
# - +encoding+ (String, nil) the encoding of the document if present, else +nil+
|
72
|
+
# - +standalone+ ("yes", "no", nil) the standalone attribute if present, else +nil+
|
69
73
|
def xmldecl(version, encoding, standalone)
|
70
74
|
end
|
71
75
|
|
72
76
|
###
|
73
|
-
# Called when document starts parsing
|
77
|
+
# Called when document starts parsing.
|
74
78
|
def start_document
|
75
79
|
end
|
76
80
|
|
77
81
|
###
|
78
|
-
# Called when document ends parsing
|
82
|
+
# Called when document ends parsing.
|
79
83
|
def end_document
|
80
84
|
end
|
81
85
|
|
82
86
|
###
|
83
|
-
# Called at the beginning of an element
|
84
|
-
#
|
85
|
-
#
|
87
|
+
# Called at the beginning of an element.
|
88
|
+
#
|
89
|
+
# [Parameters]
|
90
|
+
# - +name+ (String) the name of the element
|
91
|
+
# - +attrs+ (Array<Array<String>>) an assoc list of namespace declarations and attributes, e.g.:
|
86
92
|
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
|
93
|
+
#
|
94
|
+
# 💡If you're dealing with XML and need to handle namespaces, use the
|
95
|
+
# #start_element_namespace method instead.
|
96
|
+
#
|
97
|
+
# Note that the element namespace and any attribute namespaces are not provided, and so any
|
98
|
+
# namespaced elements or attributes will be returned as strings including the prefix:
|
99
|
+
#
|
100
|
+
# parser.parse(<<~XML)
|
101
|
+
# <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
|
102
|
+
# <foo:bar foo:quux="xxx">hello world</foo:bar>
|
103
|
+
# </root>
|
104
|
+
# XML
|
105
|
+
#
|
106
|
+
# assert_pattern do
|
107
|
+
# parser.document.start_elements => [
|
108
|
+
# ["root", [["xmlns:foo", "http://foo.example.com/"], ["xmlns", "http://example.com/"]]],
|
109
|
+
# ["foo:bar", [["foo:quux", "xxx"]]],
|
110
|
+
# ]
|
111
|
+
# end
|
112
|
+
#
|
87
113
|
def start_element(name, attrs = [])
|
88
114
|
end
|
89
115
|
|
90
116
|
###
|
91
|
-
# Called at the end of an element
|
92
|
-
#
|
117
|
+
# Called at the end of an element.
|
118
|
+
#
|
119
|
+
# [Parameters]
|
120
|
+
# - +name+ (String) the name of the element being closed
|
121
|
+
#
|
93
122
|
def end_element(name)
|
94
123
|
end
|
95
124
|
|
96
125
|
###
|
97
|
-
# Called at the beginning of an element
|
98
|
-
#
|
99
|
-
#
|
100
|
-
# +
|
101
|
-
# +
|
102
|
-
# +
|
126
|
+
# Called at the beginning of an element.
|
127
|
+
#
|
128
|
+
# [Parameters]
|
129
|
+
# - +name+ (String) is the name of the element
|
130
|
+
# - +attrs+ (Array<Attribute>) is an array of structs with the following properties:
|
131
|
+
# - +localname+ (String) the local name of the attribute
|
132
|
+
# - +value+ (String) the value of the attribute
|
133
|
+
# - +prefix+ (String, nil) the namespace prefix of the attribute
|
134
|
+
# - +uri+ (String, nil) the namespace URI of the attribute
|
135
|
+
# - +prefix+ (String, nil) is the namespace prefix for the element
|
136
|
+
# - +uri+ (String, nil) is the associated URI for the element's namespace
|
137
|
+
# - +ns+ (Array<Array<String, String>>) is an assoc list of namespace declarations on the element
|
138
|
+
#
|
139
|
+
# 💡If you're dealing with HTML or don't care about namespaces, try #start_element instead.
|
140
|
+
#
|
141
|
+
# [Example]
|
142
|
+
# it "start_elements_namespace is called with namespaced attributes" do
|
143
|
+
# parser.parse(<<~XML)
|
144
|
+
# <root xmlns:foo='http://foo.example.com/'>
|
145
|
+
# <foo:a foo:bar='hello' />
|
146
|
+
# </root>
|
147
|
+
# XML
|
148
|
+
#
|
149
|
+
# assert_pattern do
|
150
|
+
# parser.document.start_elements_namespace => [
|
151
|
+
# [
|
152
|
+
# "root",
|
153
|
+
# [],
|
154
|
+
# nil, nil,
|
155
|
+
# [["foo", "http://foo.example.com/"]], # namespace declarations
|
156
|
+
# ], [
|
157
|
+
# "a",
|
158
|
+
# [Nokogiri::XML::SAX::Parser::Attribute(localname: "bar", prefix: "foo", uri: "http://foo.example.com/", value: "hello")], # prefixed attribute
|
159
|
+
# "foo", "http://foo.example.com/", # prefix and uri for the "a" element
|
160
|
+
# [],
|
161
|
+
# ]
|
162
|
+
# ]
|
163
|
+
# end
|
164
|
+
# end
|
165
|
+
#
|
103
166
|
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
|
104
|
-
###
|
105
167
|
# Deal with SAX v1 interface
|
106
168
|
name = [prefix, name].compact.join(":")
|
107
169
|
attributes = ns.map do |ns_prefix, ns_uri|
|
@@ -113,52 +175,81 @@ module Nokogiri
|
|
113
175
|
end
|
114
176
|
|
115
177
|
###
|
116
|
-
# Called at the end of an element
|
117
|
-
#
|
118
|
-
#
|
119
|
-
# +
|
178
|
+
# Called at the end of an element.
|
179
|
+
#
|
180
|
+
# [Parameters]
|
181
|
+
# - +name+ (String) is the name of the element
|
182
|
+
# - +prefix+ (String, nil) is the namespace prefix for the element
|
183
|
+
# - +uri+ (String, nil) is the associated URI for the element's namespace
|
184
|
+
#
|
120
185
|
def end_element_namespace(name, prefix = nil, uri = nil)
|
121
|
-
###
|
122
186
|
# Deal with SAX v1 interface
|
123
187
|
end_element([prefix, name].compact.join(":"))
|
124
188
|
end
|
125
189
|
|
126
190
|
###
|
127
|
-
#
|
128
|
-
#
|
191
|
+
# Called when character data is parsed, and for parsed entities when
|
192
|
+
# ParserContext#replace_entities is +true+.
|
193
|
+
#
|
194
|
+
# [Parameters]
|
195
|
+
# - +string+ contains the character data or entity replacement text
|
196
|
+
#
|
197
|
+
# âš Please see Document@Entity+Handling for important information about how entities are handled.
|
198
|
+
#
|
199
|
+
# âš This method might be called multiple times for a contiguous string of characters.
|
129
200
|
#
|
130
|
-
# +string+ contains the character data
|
131
201
|
def characters(string)
|
132
202
|
end
|
133
203
|
|
204
|
+
###
|
205
|
+
# Called when a parsed entity is referenced and not replaced.
|
206
|
+
#
|
207
|
+
# [Parameters]
|
208
|
+
# - +name+ (String) is the name of the entity
|
209
|
+
# - +content+ (String, nil) is the replacement text for the entity, if known
|
210
|
+
#
|
211
|
+
# âš Please see Document@Entity+Handling for important information about how entities are handled.
|
212
|
+
#
|
213
|
+
# âš An internal entity may result in a call to both #characters and #reference.
|
214
|
+
#
|
215
|
+
# Since v1.17.0
|
216
|
+
#
|
217
|
+
def reference(name, content)
|
218
|
+
end
|
219
|
+
|
134
220
|
###
|
135
221
|
# Called when comments are encountered
|
136
|
-
#
|
222
|
+
# [Parameters]
|
223
|
+
# - +string+ contains the comment data
|
137
224
|
def comment(string)
|
138
225
|
end
|
139
226
|
|
140
227
|
###
|
141
228
|
# Called on document warnings
|
142
|
-
#
|
229
|
+
# [Parameters]
|
230
|
+
# - +string+ contains the warning
|
143
231
|
def warning(string)
|
144
232
|
end
|
145
233
|
|
146
234
|
###
|
147
235
|
# Called on document errors
|
148
|
-
#
|
236
|
+
# [Parameters]
|
237
|
+
# - +string+ contains the error
|
149
238
|
def error(string)
|
150
239
|
end
|
151
240
|
|
152
241
|
###
|
153
242
|
# Called when cdata blocks are found
|
154
|
-
#
|
243
|
+
# [Parameters]
|
244
|
+
# - +string+ contains the cdata content
|
155
245
|
def cdata_block(string)
|
156
246
|
end
|
157
247
|
|
158
248
|
###
|
159
249
|
# Called when processing instructions are found
|
160
|
-
#
|
161
|
-
# +
|
250
|
+
# [Parameters]
|
251
|
+
# - +name+ is the target of the instruction
|
252
|
+
# - +content+ is the value of the instruction
|
162
253
|
def processing_instruction(name, content)
|
163
254
|
end
|
164
255
|
end
|