nokogiri 1.12.5 → 1.14.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +41 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +23 -14
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -66
  8. data/ext/nokogiri/extconf.rb +159 -63
  9. data/ext/nokogiri/gumbo.c +21 -11
  10. data/ext/nokogiri/html4_document.c +2 -2
  11. data/ext/nokogiri/html4_element_description.c +1 -1
  12. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  13. data/ext/nokogiri/html4_sax_parser_context.c +3 -9
  14. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  15. data/ext/nokogiri/nokogiri.c +38 -51
  16. data/ext/nokogiri/nokogiri.h +26 -14
  17. data/ext/nokogiri/test_global_handlers.c +1 -1
  18. data/ext/nokogiri/xml_attr.c +3 -3
  19. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  20. data/ext/nokogiri/xml_cdata.c +3 -3
  21. data/ext/nokogiri/xml_comment.c +1 -1
  22. data/ext/nokogiri/xml_document.c +53 -44
  23. data/ext/nokogiri/xml_document_fragment.c +1 -3
  24. data/ext/nokogiri/xml_dtd.c +11 -11
  25. data/ext/nokogiri/xml_element_content.c +3 -3
  26. data/ext/nokogiri/xml_element_decl.c +5 -5
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -14
  28. data/ext/nokogiri/xml_entity_decl.c +6 -6
  29. data/ext/nokogiri/xml_entity_reference.c +1 -1
  30. data/ext/nokogiri/xml_namespace.c +80 -14
  31. data/ext/nokogiri/xml_node.c +982 -396
  32. data/ext/nokogiri/xml_node_set.c +4 -6
  33. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  34. data/ext/nokogiri/xml_reader.c +133 -32
  35. data/ext/nokogiri/xml_relax_ng.c +1 -3
  36. data/ext/nokogiri/xml_sax_parser.c +23 -17
  37. data/ext/nokogiri/xml_sax_parser_context.c +11 -9
  38. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  39. data/ext/nokogiri/xml_schema.c +4 -6
  40. data/ext/nokogiri/xml_syntax_error.c +1 -1
  41. data/ext/nokogiri/xml_text.c +2 -2
  42. data/ext/nokogiri/xml_xpath_context.c +144 -114
  43. data/ext/nokogiri/xslt_stylesheet.c +122 -23
  44. data/gumbo-parser/Makefile +10 -0
  45. data/gumbo-parser/src/attribute.h +1 -1
  46. data/gumbo-parser/src/error.c +2 -2
  47. data/gumbo-parser/src/error.h +1 -1
  48. data/gumbo-parser/src/foreign_attrs.c +2 -2
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +8 -16
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/class_resolver.rb +67 -0
  69. data/lib/nokogiri/css/node.rb +9 -8
  70. data/lib/nokogiri/css/parser.rb +360 -341
  71. data/lib/nokogiri/css/parser.y +249 -244
  72. data/lib/nokogiri/css/parser_extras.rb +22 -20
  73. data/lib/nokogiri/css/syntax_error.rb +1 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -3
  75. data/lib/nokogiri/css/tokenizer.rex +3 -2
  76. data/lib/nokogiri/css/xpath_visitor.rb +184 -85
  77. data/lib/nokogiri/css.rb +44 -6
  78. data/lib/nokogiri/decorators/slop.rb +8 -7
  79. data/lib/nokogiri/encoding_handler.rb +57 -0
  80. data/lib/nokogiri/extension.rb +4 -3
  81. data/lib/nokogiri/gumbo.rb +1 -0
  82. data/lib/nokogiri/html.rb +16 -10
  83. data/lib/nokogiri/html4/builder.rb +1 -0
  84. data/lib/nokogiri/html4/document.rb +56 -164
  85. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  86. data/lib/nokogiri/html4/element_description.rb +1 -0
  87. data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
  88. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  89. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  90. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  91. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  92. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  93. data/lib/nokogiri/html4.rb +12 -5
  94. data/lib/nokogiri/html5/document.rb +126 -32
  95. data/lib/nokogiri/html5/document_fragment.rb +14 -4
  96. data/lib/nokogiri/html5/node.rb +12 -7
  97. data/lib/nokogiri/html5.rb +138 -222
  98. data/lib/nokogiri/jruby/dependencies.rb +2 -19
  99. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  100. data/lib/nokogiri/syntax_error.rb +1 -0
  101. data/lib/nokogiri/version/constant.rb +2 -1
  102. data/lib/nokogiri/version/info.rb +32 -24
  103. data/lib/nokogiri/version.rb +1 -0
  104. data/lib/nokogiri/xml/attr.rb +54 -3
  105. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  106. data/lib/nokogiri/xml/builder.rb +35 -33
  107. data/lib/nokogiri/xml/cdata.rb +2 -1
  108. data/lib/nokogiri/xml/character_data.rb +1 -0
  109. data/lib/nokogiri/xml/document.rb +232 -143
  110. data/lib/nokogiri/xml/document_fragment.rb +88 -42
  111. data/lib/nokogiri/xml/dtd.rb +3 -2
  112. data/lib/nokogiri/xml/element_content.rb +1 -0
  113. data/lib/nokogiri/xml/element_decl.rb +2 -1
  114. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  115. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  116. data/lib/nokogiri/xml/namespace.rb +44 -0
  117. data/lib/nokogiri/xml/node/save_options.rb +14 -8
  118. data/lib/nokogiri/xml/node.rb +708 -383
  119. data/lib/nokogiri/xml/node_set.rb +134 -59
  120. data/lib/nokogiri/xml/notation.rb +12 -0
  121. data/lib/nokogiri/xml/parse_options.rb +140 -56
  122. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  123. data/lib/nokogiri/xml/pp/node.rb +26 -26
  124. data/lib/nokogiri/xml/pp.rb +1 -0
  125. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  126. data/lib/nokogiri/xml/reader.rb +20 -24
  127. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  128. data/lib/nokogiri/xml/sax/document.rb +20 -19
  129. data/lib/nokogiri/xml/sax/parser.rb +38 -36
  130. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  131. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  132. data/lib/nokogiri/xml/sax.rb +1 -0
  133. data/lib/nokogiri/xml/schema.rb +7 -6
  134. data/lib/nokogiri/xml/searchable.rb +93 -62
  135. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  136. data/lib/nokogiri/xml/text.rb +1 -0
  137. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  138. data/lib/nokogiri/xml/xpath.rb +12 -0
  139. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  140. data/lib/nokogiri/xml.rb +4 -3
  141. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  142. data/lib/nokogiri/xslt.rb +21 -13
  143. data/lib/nokogiri.rb +22 -27
  144. data/lib/xsd/xmlparser/nokogiri.rb +28 -25
  145. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  146. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  147. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  148. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  149. metadata +20 -171
  150. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  151. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  152. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
  153. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  154. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  155. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  156. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,18 +1,16 @@
1
1
  # coding: utf-8
2
2
  # frozen_string_literal: true
3
3
 
4
- require 'pathname'
4
+ require "pathname"
5
5
 
6
6
  module Nokogiri
7
7
  module XML
8
- ##
9
- # Nokogiri::XML::Document is the main entry point for dealing with
10
- # XML documents. The Document is created by parsing an XML document.
11
- # See Nokogiri::XML::Document.parse() for more information on parsing.
8
+ # Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
9
+ # is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
10
+ # on parsing.
12
11
  #
13
12
  # For searching a Document, see Nokogiri::XML::Searchable#css and
14
13
  # Nokogiri::XML::Searchable#xpath
15
- #
16
14
  class Document < Nokogiri::XML::Node
17
15
  # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
18
16
  # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
@@ -21,102 +19,121 @@ module Nokogiri
21
19
  NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
22
20
  NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
23
21
 
24
- ##
25
- # Parse an XML file.
26
- #
27
- # +string_or_io+ may be a String, or any object that responds to
28
- # _read_ and _close_ such as an IO, or StringIO.
29
- #
30
- # +url+ (optional) is the URI where this document is located.
31
- #
32
- # +encoding+ (optional) is the encoding that should be used when processing
33
- # the document.
34
- #
35
- # +options+ (optional) is a configuration object that sets options during
36
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
37
- # Nokogiri::XML::ParseOptions for more information.
38
- #
39
- # +block+ (optional) is passed a configuration object on which
40
- # parse options may be set.
41
- #
42
- # By default, Nokogiri treats documents as untrusted, and so
43
- # does not attempt to load DTDs or access the network. See
44
- # Nokogiri::XML::ParseOptions for a complete list of options;
45
- # and that module's DEFAULT_XML constant for what's set (and not
46
- # set) by default.
47
- #
48
- # Nokogiri.XML() is a convenience method which will call this method.
49
- #
50
- def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
51
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
52
-
53
- yield options if block_given?
22
+ class << self
23
+ # Parse an XML file.
24
+ #
25
+ # +string_or_io+ may be a String, or any object that responds to
26
+ # _read_ and _close_ such as an IO, or StringIO.
27
+ #
28
+ # +url+ (optional) is the URI where this document is located.
29
+ #
30
+ # +encoding+ (optional) is the encoding that should be used when processing
31
+ # the document.
32
+ #
33
+ # +options+ (optional) is a configuration object that sets options during
34
+ # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
+ # Nokogiri::XML::ParseOptions for more information.
36
+ #
37
+ # +block+ (optional) is passed a configuration object on which
38
+ # parse options may be set.
39
+ #
40
+ # By default, Nokogiri treats documents as untrusted, and so
41
+ # does not attempt to load DTDs or access the network. See
42
+ # Nokogiri::XML::ParseOptions for a complete list of options;
43
+ # and that module's DEFAULT_XML constant for what's set (and not
44
+ # set) by default.
45
+ #
46
+ # Nokogiri.XML() is a convenience method which will call this method.
47
+ #
48
+ def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
+ yield options if block_given?
51
+
52
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
53
+
54
+ if empty_doc?(string_or_io)
55
+ if options.strict?
56
+ raise Nokogiri::XML::SyntaxError, "Empty document"
57
+ else
58
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
59
+ end
60
+ end
54
61
 
55
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
62
+ doc = if string_or_io.respond_to?(:read)
63
+ if string_or_io.is_a?(Pathname)
64
+ # resolve the Pathname to the file and open it as an IO object, see #2110
65
+ string_or_io = string_or_io.expand_path.open
66
+ url ||= string_or_io.path
67
+ end
56
68
 
57
- if empty_doc?(string_or_io)
58
- if options.strict?
59
- raise Nokogiri::XML::SyntaxError.new("Empty document")
69
+ read_io(string_or_io, url, encoding, options.to_i)
60
70
  else
61
- return encoding ? new.tap { |i| i.encoding = encoding } : new
71
+ # read_memory pukes on empty docs
72
+ read_memory(string_or_io, url, encoding, options.to_i)
62
73
  end
63
- end
64
74
 
65
- doc = if string_or_io.respond_to?(:read)
66
- if string_or_io.is_a?(Pathname)
67
- # resolve the Pathname to the file and open it as an IO object, see #2110
68
- string_or_io = string_or_io.expand_path.open
69
- url ||= string_or_io.path
70
- end
75
+ # do xinclude processing
76
+ doc.do_xinclude(options) if options.xinclude?
71
77
 
72
- read_io(string_or_io, url, encoding, options.to_i)
73
- else
74
- # read_memory pukes on empty docs
75
- read_memory(string_or_io, url, encoding, options.to_i)
76
- end
78
+ doc
79
+ end
77
80
 
78
- # do xinclude processing
79
- doc.do_xinclude(options) if options.xinclude?
81
+ private
80
82
 
81
- return doc
83
+ def empty_doc?(string_or_io)
84
+ string_or_io.nil? ||
85
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
86
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
87
+ end
82
88
  end
83
89
 
84
90
  ##
85
- # @!method wrap(java_document)
86
- # @!scope class
91
+ # :singleton-method: wrap
92
+ # :call-seq: wrap(java_document) → Nokogiri::XML::Document
93
+ #
94
+ # ⚠ This method is only available when running JRuby.
87
95
  #
88
- # Create a {Document} using an existing Java DOM document object.
96
+ # Create a Document using an existing Java DOM document object.
89
97
  #
90
- # The returned {Document} shares the same underlying data structure as the Java object, so
98
+ # The returned Document shares the same underlying data structure as the Java object, so
91
99
  # changes in one are reflected in the other.
92
100
  #
93
- # @param java_document [Java::OrgW3cDom::Document]
94
- # @return [Nokogiri::XML::Document]
95
- # @note This method is only available when running JRuby.
96
- # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
97
- # @see #to_java
101
+ # [Parameters]
102
+ # - `java_document` (Java::OrgW3cDom::Document)
103
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
104
+ #
105
+ # [Returns] Nokogiri::XML::Document
106
+ #
107
+ # See also \#to_java
98
108
 
99
- ##
100
- # @!method to_java()
109
+ # :method: to_java
110
+ # :call-seq: to_java() → Java::OrgW3cDom::Document
101
111
  #
102
- # Returns the underlying Java DOM document object for the {Document}.
112
+ # This method is only available when running JRuby.
103
113
  #
104
- # The returned Java object shares the same underlying data structure as the {Document}, so
114
+ # Returns the underlying Java DOM document object for this document.
115
+ #
116
+ # The returned Java object shares the same underlying data structure as this document, so
105
117
  # changes in one are reflected in the other.
106
118
  #
107
- # @return [Java::OrgW3cDom::Document]
108
- # @note This method is only available when running JRuby.
109
- # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
110
- # @see .wrap
111
-
119
+ # [Returns]
120
+ # Java::OrgW3cDom::Document
121
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
122
+ #
123
+ # See also Document.wrap
112
124
 
113
- # A list of Nokogiri::XML::SyntaxError found when parsing a document
125
+ # The errors found while parsing a document.
126
+ #
127
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
114
128
  attr_accessor :errors
115
129
 
116
- # When true, reparented elements without a namespace will inherit their new parent's
117
- # namespace (if one exists). Defaults to +false+.
130
+ # When `true`, reparented elements without a namespace will inherit their new parent's
131
+ # namespace (if one exists). Defaults to `false`.
132
+ #
133
+ # [Returns] Boolean
134
+ #
135
+ # *Example:* Default behavior of namespace inheritance
118
136
  #
119
- # @example Default behavior of namespace inheritance
120
137
  # xml = <<~EOF
121
138
  # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
122
139
  # <foo:parent>
@@ -134,7 +151,8 @@ module Nokogiri
134
151
  # # </foo:parent>
135
152
  # # </root>
136
153
  #
137
- # @example Setting namespace inheritance to +true+
154
+ # *Example:* Setting namespace inheritance to `true`
155
+ #
138
156
  # xml = <<~EOF
139
157
  # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
140
158
  # <foo:parent>
@@ -153,49 +171,62 @@ module Nokogiri
153
171
  # # </foo:parent>
154
172
  # # </root>
155
173
  #
156
- # @return [Boolean]
157
- #
158
- # @since v1.12.4
174
+ # Since v1.12.4
159
175
  attr_accessor :namespace_inheritance
160
176
 
161
- def initialize *args # :nodoc:
177
+ # :nodoc:
178
+ def initialize(*args) # rubocop:disable Lint/MissingSuper
162
179
  @errors = []
163
180
  @decorators = nil
164
181
  @namespace_inheritance = false
165
182
  end
166
183
 
167
- ##
168
- # Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
169
- # setting contents or attributes.
184
+ # :call-seq:
185
+ # create_element(name, *contents_or_attrs, &block) Nokogiri::XML::Element
186
+ #
187
+ # Create a new Element with `name` belonging to this document, optionally setting contents or
188
+ # attributes.
189
+ #
190
+ # This method is _not_ the most user-friendly option if your intention is to add a node to the
191
+ # document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
192
+ # Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
193
+ # place it in the document tree.
170
194
  #
171
195
  # Arguments may be passed to initialize the element:
172
- # - a +Hash+ argument will be used to set attributes
173
- # - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
196
+ #
197
+ # - a Hash argument will be used to set attributes
198
+ # - a non-Hash object that responds to \#to_s will be used to set the new node's contents
174
199
  #
175
200
  # A block may be passed to mutate the node.
176
201
  #
177
- # @param name [String]
178
- # @param contents_or_attrs [#to_s,Hash]
179
- # @yieldparam node [Nokogiri::XML::Element]
180
- # @return [Nokogiri::XML::Element]
202
+ # [Parameters]
203
+ # - `name` (String)
204
+ # - `contents_or_attrs` (\#to_s, Hash)
205
+ # [Yields] `node` (Nokogiri::XML::Element)
206
+ # [Returns] Nokogiri::XML::Element
207
+ #
208
+ # *Example:* An empty element without attributes
181
209
  #
182
- # @example An empty element without attributes
183
210
  # doc.create_element("div")
184
211
  # # => <div></div>
185
212
  #
186
- # @example An element with contents
213
+ # *Example:* An element with contents
214
+ #
187
215
  # doc.create_element("div", "contents")
188
216
  # # => <div>contents</div>
189
217
  #
190
- # @example An element with attributes
218
+ # *Example:* An element with attributes
219
+ #
191
220
  # doc.create_element("div", {"class" => "container"})
192
221
  # # => <div class='container'></div>
193
222
  #
194
- # @example An element with contents and attributes
223
+ # *Example:* An element with contents and attributes
224
+ #
195
225
  # doc.create_element("div", "contents", {"class" => "container"})
196
226
  # # => <div class='container'>contents</div>
197
227
  #
198
- # @example Passing a block to mutate the element
228
+ # *Example:* Passing a block to mutate the element
229
+ #
199
230
  # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
200
231
  #
201
232
  def create_element(name, *contents_or_attrs, &block)
@@ -216,30 +247,30 @@ module Nokogiri
216
247
  elm.content = arg
217
248
  end
218
249
  end
219
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
250
+ if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
220
251
  elm.namespace = ns
221
252
  end
222
253
  elm
223
254
  end
224
255
 
225
256
  # Create a Text Node with +string+
226
- def create_text_node string, &block
227
- Nokogiri::XML::Text.new string.to_s, self, &block
257
+ def create_text_node(string, &block)
258
+ Nokogiri::XML::Text.new(string.to_s, self, &block)
228
259
  end
229
260
 
230
261
  # Create a CDATA Node containing +string+
231
- def create_cdata string, &block
232
- Nokogiri::XML::CDATA.new self, string.to_s, &block
262
+ def create_cdata(string, &block)
263
+ Nokogiri::XML::CDATA.new(self, string.to_s, &block)
233
264
  end
234
265
 
235
266
  # Create a Comment Node containing +string+
236
- def create_comment string, &block
237
- Nokogiri::XML::Comment.new self, string.to_s, &block
267
+ def create_comment(string, &block)
268
+ Nokogiri::XML::Comment.new(self, string.to_s, &block)
238
269
  end
239
270
 
240
271
  # The name of this document. Always returns "document"
241
272
  def name
242
- 'document'
273
+ "document"
243
274
  end
244
275
 
245
276
  # A reference to +self+
@@ -247,46 +278,51 @@ module Nokogiri
247
278
  self
248
279
  end
249
280
 
250
- ##
251
- # Recursively get all namespaces from this node and its subtree and
252
- # return them as a hash.
281
+ # :call-seq:
282
+ # collect_namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
253
283
  #
254
- # For example, given this document:
284
+ # Recursively get all namespaces from this node and its subtree and return them as a
285
+ # hash.
255
286
  #
256
- # <root xmlns:foo="bar">
287
+ # ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
288
+ #
289
+ # Note that this method does an xpath lookup for nodes with namespaces, and as a result the
290
+ # order (and which duplicate prefix "wins") may be dependent on the implementation of the
291
+ # underlying XML library.
292
+ #
293
+ # *Example:* Basic usage
294
+ #
295
+ # Given this document:
296
+ #
297
+ # <root xmlns="default" xmlns:foo="bar">
257
298
  # <bar xmlns:hello="world" />
258
299
  # </root>
259
300
  #
260
301
  # This method will return:
261
302
  #
262
- # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
303
+ # {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
304
+ #
305
+ # *Example:* Duplicate prefixes
263
306
  #
264
- # WARNING: this method will clobber duplicate names in the keys.
265
- # For example, given this document:
307
+ # Given this document:
266
308
  #
267
309
  # <root xmlns:foo="bar">
268
310
  # <bar xmlns:foo="baz" />
269
311
  # </root>
270
312
  #
271
- # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
272
- #
273
- # Non-prefixed default namespaces (as in "xmlns=") are not included
274
- # in the hash.
313
+ # The hash returned will be something like:
275
314
  #
276
- # Note that this method does an xpath lookup for nodes with
277
- # namespaces, and as a result the order may be dependent on the
278
- # implementation of the underlying XML library.
315
+ # {"xmlns:foo" => "baz"}
279
316
  #
280
317
  def collect_namespaces
281
- xpath("//namespace::*").inject({}) do |hash, ns|
282
- hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
283
- hash
318
+ xpath("//namespace::*").each_with_object({}) do |ns, hash|
319
+ hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
284
320
  end
285
321
  end
286
322
 
287
323
  # Get the list of decorators given +key+
288
- def decorators key
289
- @decorators ||= Hash.new
324
+ def decorators(key)
325
+ @decorators ||= {}
290
326
  @decorators[key] ||= []
291
327
  end
292
328
 
@@ -295,7 +331,8 @@ module Nokogiri
295
331
  # the document or +nil+ when there is no DTD.
296
332
  def validate
297
333
  return nil unless internal_subset
298
- internal_subset.validate self
334
+
335
+ internal_subset.validate(self)
299
336
  end
300
337
 
301
338
  ##
@@ -315,7 +352,7 @@ module Nokogiri
315
352
  # ... which does absolutely nothing.
316
353
  #
317
354
  def slop!
318
- unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
355
+ unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
319
356
  decorators(XML::Node) << Nokogiri::Decorators::Slop
320
357
  decorate!
321
358
  end
@@ -325,16 +362,18 @@ module Nokogiri
325
362
 
326
363
  ##
327
364
  # Apply any decorators to +node+
328
- def decorate node
365
+ def decorate(node)
329
366
  return unless @decorators
330
- @decorators.each { |klass,list|
367
+
368
+ @decorators.each do |klass, list|
331
369
  next unless node.is_a?(klass)
370
+
332
371
  list.each { |moodule| node.extend(moodule) }
333
- }
372
+ end
334
373
  end
335
374
 
336
- alias :to_xml :serialize
337
- alias :clone :dup
375
+ alias_method :to_xml, :serialize
376
+ alias_method :clone, :dup
338
377
 
339
378
  # Get the hash of namespaces on the root Nokogiri::XML::Node
340
379
  def namespaces
@@ -344,35 +383,85 @@ module Nokogiri
344
383
  ##
345
384
  # Create a Nokogiri::XML::DocumentFragment from +tags+
346
385
  # Returns an empty fragment if +tags+ is nil.
347
- def fragment tags = nil
348
- DocumentFragment.new(self, tags, self.root)
386
+ def fragment(tags = nil)
387
+ DocumentFragment.new(self, tags, root)
349
388
  end
350
389
 
351
390
  undef_method :swap, :parent, :namespace, :default_namespace=
352
391
  undef_method :add_namespace_definition, :attributes
353
392
  undef_method :namespace_definitions, :line, :add_namespace
354
393
 
355
- def add_child node_or_tags
356
- raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
394
+ def add_child(node_or_tags)
395
+ raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
396
+
357
397
  node_or_tags = coerce(node_or_tags)
358
398
  if node_or_tags.is_a?(XML::NodeSet)
359
399
  raise "A document may not have multiple root nodes." if node_or_tags.size > 1
400
+
360
401
  super(node_or_tags.first)
361
402
  else
362
403
  super
363
404
  end
364
405
  end
365
- alias :<< :add_child
406
+ alias_method :<<, :add_child
366
407
 
367
- private
408
+ # :call-seq:
409
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
410
+ #
411
+ # [Returns] The document type which determines CSS-to-XPath translation.
412
+ #
413
+ # See XPathVisitor for more information.
414
+ def xpath_doctype
415
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
416
+ end
368
417
 
369
- def self.empty_doc? string_or_io
370
- string_or_io.nil? ||
371
- (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
372
- (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
418
+ #
419
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
420
+ #
421
+ # Returns a hash describing the Document, to use in pattern matching.
422
+ #
423
+ # Valid keys and their values:
424
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
425
+ #
426
+ # In the future, other keys may allow accessing things like doctype and processing
427
+ # instructions. If you have a use case and would like this functionality, please let us know
428
+ # by opening an issue or a discussion on the github project.
429
+ #
430
+ # ⚡ This is an experimental feature, available since v1.14.0
431
+ #
432
+ # *Example*
433
+ #
434
+ # doc = Nokogiri::XML.parse(<<~XML)
435
+ # <?xml version="1.0"?>
436
+ # <root>
437
+ # <child>
438
+ # </root>
439
+ # XML
440
+ #
441
+ # doc.deconstruct_keys([:root])
442
+ # # => {:root=>
443
+ # # #(Element:0x35c {
444
+ # # name = "root",
445
+ # # children = [
446
+ # # #(Text "\n" + " "),
447
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
448
+ # # #(Text "\n")]
449
+ # # })}
450
+ #
451
+ # *Example* of an empty document
452
+ #
453
+ # doc = Nokogiri::XML::Document.new
454
+ #
455
+ # doc.deconstruct_keys([:root])
456
+ # # => {:root=>nil}
457
+ #
458
+ def deconstruct_keys(keys)
459
+ { root: root }
373
460
  end
374
461
 
375
- IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
462
+ private
463
+
464
+ IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
376
465
 
377
466
  def inspect_attributes
378
467
  [:name, :children]