nokogiri 1.15.4 → 1.17.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -19
  3. data/README.md +8 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +194 -141
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  12. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  13. data/ext/nokogiri/nokogiri.c +9 -2
  14. data/ext/nokogiri/nokogiri.h +25 -33
  15. data/ext/nokogiri/test_global_handlers.c +1 -1
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +3 -12
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +167 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -10
  25. data/ext/nokogiri/xml_node.c +142 -108
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +74 -100
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +214 -128
  31. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  32. data/ext/nokogiri/xml_schema.c +51 -87
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +3 -6
  35. data/ext/nokogiri/xml_xpath_context.c +4 -7
  36. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +64 -23
  42. data/gumbo-parser/src/tokenizer.c +7 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -27
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +45 -24
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -138
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/version/info.rb +6 -5
  68. data/lib/nokogiri/xml/attr.rb +2 -2
  69. data/lib/nokogiri/xml/builder.rb +8 -1
  70. data/lib/nokogiri/xml/document.rb +74 -31
  71. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  72. data/lib/nokogiri/xml/namespace.rb +1 -2
  73. data/lib/nokogiri/xml/node.rb +113 -35
  74. data/lib/nokogiri/xml/node_set.rb +12 -10
  75. data/lib/nokogiri/xml/parse_options.rb +1 -1
  76. data/lib/nokogiri/xml/pp/node.rb +6 -1
  77. data/lib/nokogiri/xml/reader.rb +51 -17
  78. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  79. data/lib/nokogiri/xml/sax/document.rb +174 -83
  80. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  81. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  82. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  83. data/lib/nokogiri/xml/sax.rb +48 -0
  84. data/lib/nokogiri/xml/schema.rb +112 -45
  85. data/lib/nokogiri/xml/searchable.rb +9 -11
  86. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  87. data/lib/nokogiri/xml.rb +14 -25
  88. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  89. data/lib/nokogiri/xslt.rb +4 -10
  90. data/lib/nokogiri.rb +1 -1
  91. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  92. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  93. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  94. metadata +15 -14
  95. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  96. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  97. data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
  98. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -8,6 +8,11 @@ module Nokogiri
8
8
  COLLECTIONS = [:attribute_nodes, :children]
9
9
 
10
10
  def inspect
11
+ # handle the case where an exception is thrown during object construction
12
+ if respond_to?(:data_ptr?) && !data_ptr?
13
+ return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
14
+ end
15
+
11
16
  attributes = inspect_attributes.reject do |x|
12
17
  attribute = send(x)
13
18
  !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
@@ -21,7 +26,7 @@ module Nokogiri
21
26
  "#{attribute}=#{send(attribute).inspect}"
22
27
  end.join(" ")
23
28
  end
24
- "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
29
+ "#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
25
30
  end
26
31
 
27
32
  def pretty_print(pp)
@@ -3,32 +3,34 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  ###
6
- # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
7
- # would move. The Reader is given an XML document, and yields nodes
8
- # to an each block.
6
+ # The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
7
+ # call Nokogiri::XML::Reader#each to iterate over each node.
8
+ #
9
+ # Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
10
+ # Reader is given an \XML document, and yields nodes to an each block.
11
+ #
12
+ # The Reader parser might be good for when you need the speed and low memory usage of a \SAX
13
+ # parser, but do not want to write a SAX::Document handler.
9
14
  #
10
15
  # Here is an example of usage:
11
16
  #
12
- # reader = Nokogiri::XML::Reader(<<-eoxml)
17
+ # reader = Nokogiri::XML::Reader.new <<~XML
13
18
  # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
14
19
  # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
15
20
  # </x>
16
- # eoxml
21
+ # XML
17
22
  #
18
23
  # reader.each do |node|
19
- #
20
24
  # # node is an instance of Nokogiri::XML::Reader
21
25
  # puts node.name
22
- #
23
26
  # end
24
27
  #
25
- # Note that Nokogiri::XML::Reader#each can only be called once!! Once
26
- # the cursor moves through the entire document, you must parse the
27
- # document again. So make sure that you capture any information you
28
- # need during the first iteration.
28
+ # âš  Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
29
+ # document, you must parse the document again. It may be better to capture all information you
30
+ # need during a single iteration.
29
31
  #
30
- # The Reader parser is good for when you need the speed of a SAX parser,
31
- # but do not want to write a Document handler.
32
+ # âš  libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
33
+ # ignored. If a syntax error is encountered during parsing, an exception will be raised.
32
34
  class Reader
33
35
  include Enumerable
34
36
 
@@ -65,23 +67,55 @@ module Nokogiri
65
67
  TYPE_END_ELEMENT = 15
66
68
  # Entity end node type
67
69
  TYPE_END_ENTITY = 16
68
- # XML Declaration node type
70
+ # \XML Declaration node type
69
71
  TYPE_XML_DECLARATION = 17
70
72
 
71
73
  # A list of errors encountered while parsing
72
74
  attr_accessor :errors
73
75
 
74
- # The XML source
76
+ # The \XML source
75
77
  attr_reader :source
76
78
 
77
79
  alias_method :self_closing?, :empty_element?
78
80
 
79
- def initialize(source, url = nil, encoding = nil) # :nodoc:
81
+ # :call-seq:
82
+ # Reader.new(input) { |options| ... } → Reader
83
+ # Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
84
+ #
85
+ # Create a new Reader to parse an \XML document.
86
+ #
87
+ # [Required Parameters]
88
+ # - +input+ (String | IO): The \XML document to parse.
89
+ #
90
+ # [Optional Parameters]
91
+ # - +url:+ (String) The base URL of the document.
92
+ # - +encoding:+ (String) The name of the encoding of the document.
93
+ # - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
94
+ # Defaults to +ParseOptions::STRICT+.
95
+ #
96
+ # [Yields]
97
+ # If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
98
+ # the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
99
+ def self.new(
100
+ string_or_io,
101
+ url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
102
+ url: url_, encoding: encoding_, options: options_
103
+ )
104
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
105
+ yield options if block_given?
106
+
107
+ if string_or_io.respond_to?(:read)
108
+ return Reader.from_io(string_or_io, url, encoding, options.to_i)
109
+ end
110
+
111
+ Reader.from_memory(string_or_io, url, encoding, options.to_i)
112
+ end
113
+
114
+ private def initialize(source, url = nil, encoding = nil) # :nodoc:
80
115
  @source = source
81
116
  @errors = []
82
117
  @encoding = encoding
83
118
  end
84
- private :initialize
85
119
 
86
120
  # Get the attributes and namespaces of the current node as a Hash.
87
121
  #
@@ -3,36 +3,73 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  class << self
6
- ###
7
- # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
8
- # See Nokogiri::XML::RelaxNG for an example.
9
- def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
10
- RelaxNG.new(string_or_io, options)
6
+ # :call-seq:
7
+ # RelaxNG(input) → Nokogiri::XML::RelaxNG
8
+ # RelaxNG(input, options:) → Nokogiri::XML::RelaxNG
9
+ #
10
+ # Convenience method for Nokogiri::XML::RelaxNG.new
11
+ def RelaxNG(...)
12
+ RelaxNG.new(...)
11
13
  end
12
14
  end
13
15
 
14
- ###
15
- # Nokogiri::XML::RelaxNG is used for validating XML against a
16
- # RelaxNG schema.
16
+ # Nokogiri::XML::RelaxNG is used for validating \XML against a RELAX NG schema definition.
17
17
  #
18
- # == Synopsis
18
+ # 🛡 <b>Do not use this class for untrusted schema documents.</b> RELAX NG input is always
19
+ # treated as *trusted*, meaning that the underlying parsing libraries <b>will access network
20
+ # resources</b>. This is counter to Nokogiri's "untrusted by default" security policy, but is an
21
+ # unfortunate limitation of the underlying libraries.
19
22
  #
20
- # Validate an XML document against a RelaxNG schema. Loop over the errors
21
- # that are returned and print them out:
23
+ # *Example:* Determine whether an \XML document is valid.
22
24
  #
23
- # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
24
- # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
25
+ # schema = Nokogiri::XML::RelaxNG.new(File.read(RELAX_NG_FILE))
26
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
27
+ # schema.valid?(doc) # Boolean
25
28
  #
26
- # schema.validate(doc).each do |error|
27
- # puts error.message
28
- # end
29
+ # *Example:* Validate an \XML document against a \RelaxNG schema, and capture any errors that are found.
29
30
  #
30
- # The list of errors are Nokogiri::XML::SyntaxError objects.
31
+ # schema = Nokogiri::XML::RelaxNG.new(File.open(RELAX_NG_FILE))
32
+ # doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
33
+ # errors = schema.validate(doc) # Array<SyntaxError>
34
+ #
35
+ # *Example:* Validate an \XML document using a Document containing a RELAX NG schema definition.
36
+ #
37
+ # schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
38
+ # schema = Nokogiri::XML::RelaxNG.from_document(schema_doc)
39
+ # doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
40
+ # schema.valid?(doc) # Boolean
31
41
  #
32
- # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
33
- # underlying parsing libraries to access network resources. This is counter to Nokogiri's
34
- # "untrusted by default" security policy, but is a limitation of the underlying libraries.
35
42
  class RelaxNG < Nokogiri::XML::Schema
43
+ # :call-seq:
44
+ # new(input) → Nokogiri::XML::RelaxNG
45
+ # new(input, options:) → Nokogiri::XML::RelaxNG
46
+ #
47
+ # Parse a RELAX NG schema definition from a String or IO to create a new Nokogiri::XML::RelaxNG.
48
+ #
49
+ # [Parameters]
50
+ # - +input+ (String | IO) RELAX NG schema definition
51
+ # - +options:+ (Nokogiri::XML::ParseOptions)
52
+ # Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA âš  Unused
53
+ #
54
+ # [Returns] Nokogiri::XML::RelaxNG
55
+ #
56
+ # âš  +parse_options+ is currently unused by this method and is present only as a placeholder for
57
+ # future functionality.
58
+ #
59
+ # Also see convenience method Nokogiri::XML::RelaxNG()
60
+ def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, options: parse_options_)
61
+ from_document(Nokogiri::XML::Document.parse(input), options)
62
+ end
63
+
64
+ # :call-seq:
65
+ # read_memory(input) → Nokogiri::XML::RelaxNG
66
+ # read_memory(input, options:) → Nokogiri::XML::RelaxNG
67
+ #
68
+ # Convenience method for Nokogiri::XML::RelaxNG.new.
69
+ def self.read_memory(...)
70
+ # TODO deprecate this method
71
+ new(...)
72
+ end
36
73
  end
37
74
  end
38
75
  end
@@ -2,106 +2,168 @@
2
2
 
3
3
  module Nokogiri
4
4
  module XML
5
- ###
6
- # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
7
- # dealing with XML. If you want to do SAX style parsing using HTML, check out
8
- # Nokogiri::HTML4::SAX.
9
- #
10
- # The basic way a SAX style parser works is by creating a parser, telling the parser about the
11
- # events we're interested in, then giving the parser some XML to process. The parser will notify
12
- # you when it encounters events you said you would like to know about.
13
- #
14
- # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
15
- # methods for which you would like notification.
16
- #
17
- # For example, if I want to be notified when a document ends, and when an element starts, I
18
- # would write a class like this:
19
- #
20
- # class MyDocument < Nokogiri::XML::SAX::Document
21
- # def end_document
22
- # puts "the document has ended"
23
- # end
24
- #
25
- # def start_element name, attributes = []
26
- # puts "#{name} started"
27
- # end
28
- # end
29
- #
30
- # Then I would instantiate a SAX parser with this document, and feed the parser some XML
31
- #
32
- # # Create a new parser
33
- # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
34
- #
35
- # # Feed the parser some XML
36
- # parser.parse(File.open(ARGV[0]))
37
- #
38
- # Now my document handler will be called when each node starts, and when then document ends. To
39
- # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
40
- #
41
- # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
42
- # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
43
- # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
44
- # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
45
5
  module SAX
46
- ###
47
- # This class is used for registering types of events you are interested in handling. All of
48
- # the methods on this class are available as possible events while parsing an XML document. To
49
- # register for any particular event, just subclass this class and implement the methods you
50
- # are interested in knowing about.
6
+ # :markup: markdown
7
+ #
8
+ # The SAX::Document class is used for registering types of events you are interested in
9
+ # handling. All of the methods on this class are available as possible events while parsing an
10
+ # \XML document. To register for any particular event, subclass this class and implement the
11
+ # methods you are interested in knowing about.
51
12
  #
52
13
  # To only be notified about start and end element events, write a class like this:
53
14
  #
54
- # class MyDocument < Nokogiri::XML::SAX::Document
55
- # def start_element name, attrs = []
56
- # puts "#{name} started!"
57
- # end
15
+ # class MyHandler < Nokogiri::XML::SAX::Document
16
+ # def start_element name, attrs = []
17
+ # puts "#{name} started!"
18
+ # end
58
19
  #
59
- # def end_element name
60
- # puts "#{name} ended"
20
+ # def end_element name
21
+ # puts "#{name} ended"
22
+ # end
61
23
  # end
62
- # end
63
24
  #
64
- # You can use this event handler for any SAX style parser included with Nokogiri. See
65
- # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
25
+ # You can use this event handler for any SAX-style parser included with Nokogiri.
26
+ #
27
+ # See also:
28
+ #
29
+ # - Nokogiri::XML::SAX
30
+ # - Nokogiri::HTML4::SAX
31
+ #
32
+ # ### Entity Handling
33
+ #
34
+ # âš  Entity handling is complicated in a SAX parser! Please read this section carefully if
35
+ # you're not getting the behavior you expect.
36
+ #
37
+ # Entities will be reported to the user via callbacks to #characters, to #reference, or
38
+ # possibly to both. The behavior is determined by a combination of _entity type_ and the value
39
+ # of ParserContext#replace_entities. (Recall that the default value of
40
+ # ParserContext#replace_entities is `false`.)
41
+ #
42
+ # âš  <b>It is UNSAFE to set ParserContext#replace_entities to `true`</b> when parsing untrusted
43
+ # documents.
44
+ #
45
+ # 💡 For more information on entity types, see [Wikipedia's page on
46
+ # DTDs](https://en.wikipedia.org/wiki/Document_type_definition#Entity_declarations).
47
+ #
48
+ # | Entity type | #characters | #reference |
49
+ # |--------------------------------------|------------------------------------|-------------------------------------|
50
+ # | Char ref (e.g., <tt>&#146;</tt>) | always | never |
51
+ # | Predefined (e.g., <tt>&amp;</tt>) | always | never |
52
+ # | Undeclared † | never | <tt>#replace_entities == false</tt> |
53
+ # | Internal | always | <tt>#replace_entities == false</tt> |
54
+ # | External † | <tt>#replace_entities == true</tt> | <tt>#replace_entities == false</tt> |
55
+ #
56
+ # &nbsp;
57
+ #
58
+ # † In the case where the replacement text for the entity is unknown (e.g., an undeclared entity
59
+ # or an external entity that could not be resolved because of network issues), then the
60
+ # replacement text will not be reported. If ParserContext#replace_entities is `true`, this
61
+ # means the #characters callback will not be invoked. If ParserContext#replace_entities is
62
+ # `false`, then the #reference callback will be invoked, but with `nil` for the `content`
63
+ # argument.
64
+ #
66
65
  class Document
67
66
  ###
68
- # Called when an XML declaration is parsed
67
+ # Called when an \XML declaration is parsed.
68
+ #
69
+ # [Parameters]
70
+ # - +version+ (String) the version attribute
71
+ # - +encoding+ (String, nil) the encoding of the document if present, else +nil+
72
+ # - +standalone+ ("yes", "no", nil) the standalone attribute if present, else +nil+
69
73
  def xmldecl(version, encoding, standalone)
70
74
  end
71
75
 
72
76
  ###
73
- # Called when document starts parsing
77
+ # Called when document starts parsing.
74
78
  def start_document
75
79
  end
76
80
 
77
81
  ###
78
- # Called when document ends parsing
82
+ # Called when document ends parsing.
79
83
  def end_document
80
84
  end
81
85
 
82
86
  ###
83
- # Called at the beginning of an element
84
- # * +name+ is the name of the tag
85
- # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
87
+ # Called at the beginning of an element.
88
+ #
89
+ # [Parameters]
90
+ # - +name+ (String) the name of the element
91
+ # - +attrs+ (Array<Array<String>>) an assoc list of namespace declarations and attributes, e.g.:
86
92
  # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
93
+ #
94
+ # 💡If you're dealing with XML and need to handle namespaces, use the
95
+ # #start_element_namespace method instead.
96
+ #
97
+ # Note that the element namespace and any attribute namespaces are not provided, and so any
98
+ # namespaced elements or attributes will be returned as strings including the prefix:
99
+ #
100
+ # parser.parse(<<~XML)
101
+ # <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
102
+ # <foo:bar foo:quux="xxx">hello world</foo:bar>
103
+ # </root>
104
+ # XML
105
+ #
106
+ # assert_pattern do
107
+ # parser.document.start_elements => [
108
+ # ["root", [["xmlns:foo", "http://foo.example.com/"], ["xmlns", "http://example.com/"]]],
109
+ # ["foo:bar", [["foo:quux", "xxx"]]],
110
+ # ]
111
+ # end
112
+ #
87
113
  def start_element(name, attrs = [])
88
114
  end
89
115
 
90
116
  ###
91
- # Called at the end of an element
92
- # +name+ is the tag name
117
+ # Called at the end of an element.
118
+ #
119
+ # [Parameters]
120
+ # - +name+ (String) the name of the element being closed
121
+ #
93
122
  def end_element(name)
94
123
  end
95
124
 
96
125
  ###
97
- # Called at the beginning of an element
98
- # +name+ is the element name
99
- # +attrs+ is a list of attributes
100
- # +prefix+ is the namespace prefix for the element
101
- # +uri+ is the associated namespace URI
102
- # +ns+ is a hash of namespace prefix:urls associated with the element
126
+ # Called at the beginning of an element.
127
+ #
128
+ # [Parameters]
129
+ # - +name+ (String) is the name of the element
130
+ # - +attrs+ (Array<Attribute>) is an array of structs with the following properties:
131
+ # - +localname+ (String) the local name of the attribute
132
+ # - +value+ (String) the value of the attribute
133
+ # - +prefix+ (String, nil) the namespace prefix of the attribute
134
+ # - +uri+ (String, nil) the namespace URI of the attribute
135
+ # - +prefix+ (String, nil) is the namespace prefix for the element
136
+ # - +uri+ (String, nil) is the associated URI for the element's namespace
137
+ # - +ns+ (Array<Array<String, String>>) is an assoc list of namespace declarations on the element
138
+ #
139
+ # 💡If you're dealing with HTML or don't care about namespaces, try #start_element instead.
140
+ #
141
+ # [Example]
142
+ # it "start_elements_namespace is called with namespaced attributes" do
143
+ # parser.parse(<<~XML)
144
+ # <root xmlns:foo='http://foo.example.com/'>
145
+ # <foo:a foo:bar='hello' />
146
+ # </root>
147
+ # XML
148
+ #
149
+ # assert_pattern do
150
+ # parser.document.start_elements_namespace => [
151
+ # [
152
+ # "root",
153
+ # [],
154
+ # nil, nil,
155
+ # [["foo", "http://foo.example.com/"]], # namespace declarations
156
+ # ], [
157
+ # "a",
158
+ # [Nokogiri::XML::SAX::Parser::Attribute(localname: "bar", prefix: "foo", uri: "http://foo.example.com/", value: "hello")], # prefixed attribute
159
+ # "foo", "http://foo.example.com/", # prefix and uri for the "a" element
160
+ # [],
161
+ # ]
162
+ # ]
163
+ # end
164
+ # end
165
+ #
103
166
  def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
104
- ###
105
167
  # Deal with SAX v1 interface
106
168
  name = [prefix, name].compact.join(":")
107
169
  attributes = ns.map do |ns_prefix, ns_uri|
@@ -113,52 +175,81 @@ module Nokogiri
113
175
  end
114
176
 
115
177
  ###
116
- # Called at the end of an element
117
- # +name+ is the element's name
118
- # +prefix+ is the namespace prefix associated with the element
119
- # +uri+ is the associated namespace URI
178
+ # Called at the end of an element.
179
+ #
180
+ # [Parameters]
181
+ # - +name+ (String) is the name of the element
182
+ # - +prefix+ (String, nil) is the namespace prefix for the element
183
+ # - +uri+ (String, nil) is the associated URI for the element's namespace
184
+ #
120
185
  def end_element_namespace(name, prefix = nil, uri = nil)
121
- ###
122
186
  # Deal with SAX v1 interface
123
187
  end_element([prefix, name].compact.join(":"))
124
188
  end
125
189
 
126
190
  ###
127
- # Characters read between a tag. This method might be called multiple
128
- # times given one contiguous string of characters.
191
+ # Called when character data is parsed, and for parsed entities when
192
+ # ParserContext#replace_entities is +true+.
193
+ #
194
+ # [Parameters]
195
+ # - +string+ contains the character data or entity replacement text
196
+ #
197
+ # âš  Please see Document@Entity+Handling for important information about how entities are handled.
198
+ #
199
+ # âš  This method might be called multiple times for a contiguous string of characters.
129
200
  #
130
- # +string+ contains the character data
131
201
  def characters(string)
132
202
  end
133
203
 
204
+ ###
205
+ # Called when a parsed entity is referenced and not replaced.
206
+ #
207
+ # [Parameters]
208
+ # - +name+ (String) is the name of the entity
209
+ # - +content+ (String, nil) is the replacement text for the entity, if known
210
+ #
211
+ # âš  Please see Document@Entity+Handling for important information about how entities are handled.
212
+ #
213
+ # âš  An internal entity may result in a call to both #characters and #reference.
214
+ #
215
+ # Since v1.17.0
216
+ #
217
+ def reference(name, content)
218
+ end
219
+
134
220
  ###
135
221
  # Called when comments are encountered
136
- # +string+ contains the comment data
222
+ # [Parameters]
223
+ # - +string+ contains the comment data
137
224
  def comment(string)
138
225
  end
139
226
 
140
227
  ###
141
228
  # Called on document warnings
142
- # +string+ contains the warning
229
+ # [Parameters]
230
+ # - +string+ contains the warning
143
231
  def warning(string)
144
232
  end
145
233
 
146
234
  ###
147
235
  # Called on document errors
148
- # +string+ contains the error
236
+ # [Parameters]
237
+ # - +string+ contains the error
149
238
  def error(string)
150
239
  end
151
240
 
152
241
  ###
153
242
  # Called when cdata blocks are found
154
- # +string+ contains the cdata content
243
+ # [Parameters]
244
+ # - +string+ contains the cdata content
155
245
  def cdata_block(string)
156
246
  end
157
247
 
158
248
  ###
159
249
  # Called when processing instructions are found
160
- # +name+ is the target of the instruction
161
- # +content+ is the value of the instruction
250
+ # [Parameters]
251
+ # - +name+ is the target of the instruction
252
+ # - +content+ is the value of the instruction
162
253
  def processing_instruction(name, content)
163
254
  end
164
255
  end