nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,132 +1,320 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pathname"
5
+
1
6
  module Nokogiri
2
7
  module XML
3
- ##
4
- # Nokogiri::XML::Document is the main entry point for dealing with
5
- # XML documents. The Document is created by parsing an XML document.
6
- # See Nokogiri::XML::Document.parse() for more information on parsing.
7
- #
8
- # For searching a Document, see Nokogiri::XML::Searchable#css and
9
- # Nokogiri::XML::Searchable#xpath
8
+ # Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
9
+ # is created by parsing \XML content from a String or an IO object. See
10
+ # Nokogiri::XML::Document.parse for more information on parsing.
10
11
  #
12
+ # Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
13
+ # please read that class's documentation as well.
11
14
  class Document < Nokogiri::XML::Node
12
- # I'm ignoring unicode characters here.
13
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
15
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
16
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
17
+ # characters in NCNAMEs.
14
18
  NCNAME_START_CHAR = "A-Za-z_"
15
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
16
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
19
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
+
22
+ OBJECT_DUP_METHOD = Object.instance_method(:dup)
23
+ OBJECT_CLONE_METHOD = Object.instance_method(:clone)
24
+ private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD
25
+
26
+ class << self
27
+ # call-seq:
28
+ # parse(input) { |options| ... } => Nokogiri::XML::Document
29
+ # parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
30
+ #
31
+ # Parse \XML input from a String or IO object, and return a new XML::Document.
32
+ #
33
+ # 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
34
+ # or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
35
+ # that module's DEFAULT_XML constant for what's set (and not set) by default.
36
+ #
37
+ # [Required Parameters]
38
+ # - +input+ (String | IO) The content to be parsed.
39
+ #
40
+ # [Optional Keyword Arguments]
41
+ # - +url:+ (String) The base URI for this document.
42
+ #
43
+ # - +encoding:+ (String) The name of the encoding that should be used when processing the
44
+ # document. When not provided, the encoding will be determined based on the document
45
+ # content.
46
+ #
47
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
48
+ # behaviors during parsing. See ParseOptions for more information. The default value is
49
+ # +ParseOptions::DEFAULT_XML+.
50
+ #
51
+ # [Yields]
52
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
53
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
54
+ #
55
+ # [Returns] Nokogiri::XML::Document
56
+ def parse(
57
+ string_or_io,
58
+ url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
59
+ url: url_, encoding: encoding_, options: options_
60
+ )
61
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
62
+ yield options if block_given?
63
+
64
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
65
+
66
+ if empty_doc?(string_or_io)
67
+ if options.strict?
68
+ raise Nokogiri::XML::SyntaxError, "Empty document"
69
+ else
70
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
71
+ end
72
+ end
73
+
74
+ doc = if string_or_io.respond_to?(:read)
75
+ # TODO: should we instead check for respond_to?(:to_path) ?
76
+ if string_or_io.is_a?(Pathname)
77
+ # resolve the Pathname to the file and open it as an IO object, see #2110
78
+ string_or_io = string_or_io.expand_path.open
79
+ url ||= string_or_io.path
80
+ end
81
+
82
+ read_io(string_or_io, url, encoding, options.to_i)
83
+ else
84
+ # read_memory pukes on empty docs
85
+ read_memory(string_or_io, url, encoding, options.to_i)
86
+ end
87
+
88
+ # do xinclude processing
89
+ doc.do_xinclude(options) if options.xinclude?
90
+
91
+ doc
92
+ end
93
+
94
+ private
95
+
96
+ def empty_doc?(string_or_io)
97
+ string_or_io.nil? ||
98
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
99
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
100
+ end
101
+ end
17
102
 
18
103
  ##
19
- # Parse an XML file.
104
+ # :singleton-method: wrap
105
+ # :call-seq: wrap(java_document) → Nokogiri::XML::Document
20
106
  #
21
- # +string_or_io+ may be a String, or any object that responds to
22
- # _read_ and _close_ such as an IO, or StringIO.
107
+ # This method is only available when running JRuby.
23
108
  #
24
- # +url+ (optional) is the URI where this document is located.
109
+ # Create a Document using an existing Java DOM document object.
25
110
  #
26
- # +encoding+ (optional) is the encoding that should be used when processing
27
- # the document.
111
+ # The returned Document shares the same underlying data structure as the Java object, so
112
+ # changes in one are reflected in the other.
28
113
  #
29
- # +options+ (optional) is a configuration object that sets options during
30
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
31
- # Nokogiri::XML::ParseOptions for more information.
114
+ # [Parameters]
115
+ # - `java_document` (Java::OrgW3cDom::Document)
116
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
32
117
  #
33
- # +block+ (optional) is passed a configuration object on which
34
- # parse options may be set.
118
+ # [Returns] Nokogiri::XML::Document
35
119
  #
36
- # By default, Nokogiri treats documents as untrusted, and so
37
- # does not attempt to load DTDs or access the network. See
38
- # Nokogiri::XML::ParseOptions for a complete list of options;
39
- # and that module's DEFAULT_XML constant for what's set (and not
40
- # set) by default.
120
+ # See also \#to_java
121
+
122
+ # :method: to_java
123
+ # :call-seq: to_java() Java::OrgW3cDom::Document
41
124
  #
42
- # Nokogiri.XML() is a convenience method which will call this method.
125
+ # This method is only available when running JRuby.
43
126
  #
44
- def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
45
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
46
- # Give the options to the user
47
- yield options if block_given?
48
-
49
- if empty_doc?(string_or_io)
50
- if options.strict?
51
- raise Nokogiri::XML::SyntaxError.new("Empty document")
52
- else
53
- return encoding ? new.tap { |i| i.encoding = encoding } : new
54
- end
55
- end
127
+ # Returns the underlying Java DOM document object for this document.
128
+ #
129
+ # The returned Java object shares the same underlying data structure as this document, so
130
+ # changes in one are reflected in the other.
131
+ #
132
+ # [Returns]
133
+ # Java::OrgW3cDom::Document
134
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
135
+ #
136
+ # See also Document.wrap
56
137
 
57
- doc = if string_or_io.respond_to?(:read)
58
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
59
- read_io(string_or_io, url, encoding, options.to_i)
60
- else
61
- # read_memory pukes on empty docs
62
- read_memory(string_or_io, url, encoding, options.to_i)
63
- end
138
+ # The errors found while parsing a document.
139
+ #
140
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
141
+ attr_accessor :errors
64
142
 
65
- # do xinclude processing
66
- doc.do_xinclude(options) if options.xinclude?
143
+ # When `true`, reparented elements without a namespace will inherit their new parent's
144
+ # namespace (if one exists). Defaults to `false`.
145
+ #
146
+ # [Returns] Boolean
147
+ #
148
+ # *Example:* Default behavior of namespace inheritance
149
+ #
150
+ # xml = <<~EOF
151
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
152
+ # <foo:parent>
153
+ # </foo:parent>
154
+ # </root>
155
+ # EOF
156
+ # doc = Nokogiri::XML(xml)
157
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
158
+ # parent.add_child("<child></child>")
159
+ # doc.to_xml
160
+ # # => <?xml version="1.0"?>
161
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
162
+ # # <foo:parent>
163
+ # # <child/>
164
+ # # </foo:parent>
165
+ # # </root>
166
+ #
167
+ # *Example:* Setting namespace inheritance to `true`
168
+ #
169
+ # xml = <<~EOF
170
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
171
+ # <foo:parent>
172
+ # </foo:parent>
173
+ # </root>
174
+ # EOF
175
+ # doc = Nokogiri::XML(xml)
176
+ # doc.namespace_inheritance = true
177
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
178
+ # parent.add_child("<child></child>")
179
+ # doc.to_xml
180
+ # # => <?xml version="1.0"?>
181
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
182
+ # # <foo:parent>
183
+ # # <foo:child/>
184
+ # # </foo:parent>
185
+ # # </root>
186
+ #
187
+ # Since v1.12.4
188
+ attr_accessor :namespace_inheritance
67
189
 
68
- return doc
190
+ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
191
+ @errors = []
192
+ @decorators = nil
193
+ @namespace_inheritance = false
69
194
  end
70
195
 
71
- # A list of Nokogiri::XML::SyntaxError found when parsing a document
72
- attr_accessor :errors
196
+ #
197
+ # :call-seq:
198
+ # dup → Nokogiri::XML::Document
199
+ # dup(level) → Nokogiri::XML::Document
200
+ #
201
+ # Duplicate this node.
202
+ #
203
+ # [Parameters]
204
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
205
+ # [Returns] The new Nokogiri::XML::Document
206
+ #
207
+ def dup(level = 1)
208
+ copy = OBJECT_DUP_METHOD.bind_call(self)
209
+ copy.initialize_copy_with_args(self, level)
210
+ end
73
211
 
74
- def initialize *args # :nodoc:
75
- @errors = []
76
- @decorators = nil
212
+ #
213
+ # :call-seq:
214
+ # clone Nokogiri::XML::Document
215
+ # clone(level) → Nokogiri::XML::Document
216
+ #
217
+ # Clone this node.
218
+ #
219
+ # [Parameters]
220
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
221
+ # [Returns] The new Nokogiri::XML::Document
222
+ #
223
+ def clone(level = 1)
224
+ copy = OBJECT_CLONE_METHOD.bind_call(self)
225
+ copy.initialize_copy_with_args(self, level)
77
226
  end
78
227
 
79
- ##
80
- # Create an element with +name+, and optionally setting the content and attributes.
228
+ # :call-seq:
229
+ # create_element(name, *contents_or_attrs, &block) Nokogiri::XML::Element
230
+ #
231
+ # Create a new Element with `name` belonging to this document, optionally setting contents or
232
+ # attributes.
233
+ #
234
+ # This method is _not_ the most user-friendly option if your intention is to add a node to the
235
+ # document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
236
+ # Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
237
+ # place it in the document tree.
238
+ #
239
+ # Arguments may be passed to initialize the element:
240
+ #
241
+ # - a Hash argument will be used to set attributes
242
+ # - a non-Hash object that responds to \#to_s will be used to set the new node's contents
243
+ #
244
+ # A block may be passed to mutate the node.
245
+ #
246
+ # [Parameters]
247
+ # - `name` (String)
248
+ # - `contents_or_attrs` (\#to_s, Hash)
249
+ # [Yields] `node` (Nokogiri::XML::Element)
250
+ # [Returns] Nokogiri::XML::Element
251
+ #
252
+ # *Example:* An empty element without attributes
253
+ #
254
+ # doc.create_element("div")
255
+ # # => <div></div>
256
+ #
257
+ # *Example:* An element with contents
81
258
  #
82
- # doc.create_element "div" # <div></div>
83
- # doc.create_element "div", :class => "container" # <div class='container'></div>
84
- # doc.create_element "div", "contents" # <div>contents</div>
85
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
86
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
259
+ # doc.create_element("div", "contents")
260
+ # # => <div>contents</div>
87
261
  #
88
- def create_element name, *args, &block
262
+ # *Example:* An element with attributes
263
+ #
264
+ # doc.create_element("div", {"class" => "container"})
265
+ # # => <div class='container'></div>
266
+ #
267
+ # *Example:* An element with contents and attributes
268
+ #
269
+ # doc.create_element("div", "contents", {"class" => "container"})
270
+ # # => <div class='container'>contents</div>
271
+ #
272
+ # *Example:* Passing a block to mutate the element
273
+ #
274
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
275
+ #
276
+ def create_element(name, *contents_or_attrs, &block)
89
277
  elm = Nokogiri::XML::Element.new(name, self, &block)
90
- args.each do |arg|
278
+ contents_or_attrs.each do |arg|
91
279
  case arg
92
280
  when Hash
93
- arg.each { |k,v|
281
+ arg.each do |k, v|
94
282
  key = k.to_s
95
283
  if key =~ NCNAME_RE
96
- ns_name = key.split(":", 2)[1]
97
- elm.add_namespace_definition ns_name, v
284
+ ns_name = Regexp.last_match(1)
285
+ elm.add_namespace_definition(ns_name, v)
98
286
  else
99
287
  elm[k.to_s] = v.to_s
100
288
  end
101
- }
289
+ end
102
290
  else
103
291
  elm.content = arg
104
292
  end
105
293
  end
106
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
294
+ if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
107
295
  elm.namespace = ns
108
296
  end
109
297
  elm
110
298
  end
111
299
 
112
300
  # Create a Text Node with +string+
113
- def create_text_node string, &block
114
- Nokogiri::XML::Text.new string.to_s, self, &block
301
+ def create_text_node(string, &block)
302
+ Nokogiri::XML::Text.new(string.to_s, self, &block)
115
303
  end
116
304
 
117
305
  # Create a CDATA Node containing +string+
118
- def create_cdata string, &block
119
- Nokogiri::XML::CDATA.new self, string.to_s, &block
306
+ def create_cdata(string, &block)
307
+ Nokogiri::XML::CDATA.new(self, string.to_s, &block)
120
308
  end
121
309
 
122
310
  # Create a Comment Node containing +string+
123
- def create_comment string, &block
124
- Nokogiri::XML::Comment.new self, string.to_s, &block
311
+ def create_comment(string, &block)
312
+ Nokogiri::XML::Comment.new(self, string.to_s, &block)
125
313
  end
126
314
 
127
315
  # The name of this document. Always returns "document"
128
316
  def name
129
- 'document'
317
+ "document"
130
318
  end
131
319
 
132
320
  # A reference to +self+
@@ -134,55 +322,61 @@ module Nokogiri
134
322
  self
135
323
  end
136
324
 
137
- ##
138
- # Recursively get all namespaces from this node and its subtree and
139
- # return them as a hash.
325
+ # :call-seq:
326
+ # collect_namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
140
327
  #
141
- # For example, given this document:
328
+ # Recursively get all namespaces from this node and its subtree and return them as a
329
+ # hash.
142
330
  #
143
- # <root xmlns:foo="bar">
331
+ # ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
332
+ #
333
+ # Note that this method does an xpath lookup for nodes with namespaces, and as a result the
334
+ # order (and which duplicate prefix "wins") may be dependent on the implementation of the
335
+ # underlying XML library.
336
+ #
337
+ # *Example:* Basic usage
338
+ #
339
+ # Given this document:
340
+ #
341
+ # <root xmlns="default" xmlns:foo="bar">
144
342
  # <bar xmlns:hello="world" />
145
343
  # </root>
146
344
  #
147
345
  # This method will return:
148
346
  #
149
- # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
347
+ # {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
150
348
  #
151
- # WARNING: this method will clobber duplicate names in the keys.
152
- # For example, given this document:
349
+ # *Example:* Duplicate prefixes
350
+ #
351
+ # Given this document:
153
352
  #
154
353
  # <root xmlns:foo="bar">
155
354
  # <bar xmlns:foo="baz" />
156
355
  # </root>
157
356
  #
158
- # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
159
- #
160
- # Non-prefixed default namespaces (as in "xmlns=") are not included
161
- # in the hash.
357
+ # The hash returned will be something like:
162
358
  #
163
- # Note that this method does an xpath lookup for nodes with
164
- # namespaces, and as a result the order may be dependent on the
165
- # implementation of the underlying XML library.
359
+ # {"xmlns:foo" => "baz"}
166
360
  #
167
361
  def collect_namespaces
168
- xpath("//namespace::*").inject({}) do |hash, ns|
169
- hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
170
- hash
362
+ xpath("//namespace::*").each_with_object({}) do |ns, hash|
363
+ hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
171
364
  end
172
365
  end
173
366
 
174
367
  # Get the list of decorators given +key+
175
- def decorators key
176
- @decorators ||= Hash.new
368
+ def decorators(key)
369
+ @decorators ||= {}
177
370
  @decorators[key] ||= []
178
371
  end
179
372
 
180
373
  ##
181
- # Validate this Document against it's DTD. Returns a list of errors on
374
+ # Validate this Document against its DTD. Returns a list of errors on
182
375
  # the document or +nil+ when there is no DTD.
183
376
  def validate
184
- return nil unless internal_subset
185
- internal_subset.validate self
377
+ return unless internal_subset
378
+
379
+ internal_subset.validate(self)
186
380
  end
187
381
 
188
382
  ##
@@ -202,7 +396,7 @@ module Nokogiri
202
396
  # ... which does absolutely nothing.
203
397
  #
204
398
  def slop!
205
- unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
399
+ unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
206
400
  decorators(XML::Node) << Nokogiri::Decorators::Slop
207
401
  decorate!
208
402
  end
@@ -212,16 +406,17 @@ module Nokogiri
212
406
 
213
407
  ##
214
408
  # Apply any decorators to +node+
215
- def decorate node
409
+ def decorate(node)
216
410
  return unless @decorators
217
- @decorators.each { |klass,list|
411
+
412
+ @decorators.each do |klass, list|
218
413
  next unless node.is_a?(klass)
219
- list.each { |moodule| node.extend(moodule) }
220
- }
414
+
415
+ list.each { |mod| node.extend(mod) }
416
+ end
221
417
  end
222
418
 
223
- alias :to_xml :serialize
224
- alias :clone :dup
419
+ alias_method :to_xml, :serialize
225
420
 
226
421
  # Get the hash of namespaces on the root Nokogiri::XML::Node
227
422
  def namespaces
@@ -231,51 +426,85 @@ module Nokogiri
231
426
  ##
232
427
  # Create a Nokogiri::XML::DocumentFragment from +tags+
233
428
  # Returns an empty fragment if +tags+ is nil.
234
- def fragment tags = nil
235
- DocumentFragment.new(self, tags, self.root)
429
+ def fragment(tags = nil)
430
+ DocumentFragment.new(self, tags, root)
236
431
  end
237
432
 
238
433
  undef_method :swap, :parent, :namespace, :default_namespace=
239
434
  undef_method :add_namespace_definition, :attributes
240
435
  undef_method :namespace_definitions, :line, :add_namespace
241
436
 
242
- def add_child node_or_tags
243
- raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
437
+ def add_child(node_or_tags)
438
+ raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
439
+
244
440
  node_or_tags = coerce(node_or_tags)
245
441
  if node_or_tags.is_a?(XML::NodeSet)
246
442
  raise "A document may not have multiple root nodes." if node_or_tags.size > 1
443
+
247
444
  super(node_or_tags.first)
248
445
  else
249
446
  super
250
447
  end
251
448
  end
252
- alias :<< :add_child
449
+ alias_method :<<, :add_child
253
450
 
254
- ##
255
- # +JRuby+
256
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
257
- def self.wrap document
258
- raise "JRuby only method" unless Nokogiri.jruby?
259
- return wrapJavaDocument(document)
451
+ # :call-seq:
452
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
453
+ #
454
+ # [Returns] The document type which determines CSS-to-XPath translation.
455
+ #
456
+ # See XPathVisitor for more information.
457
+ def xpath_doctype
458
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
260
459
  end
261
460
 
262
- ##
263
- # +JRuby+
264
- # Returns Java's org.w3c.dom.document of this Document.
265
- def to_java
266
- raise "JRuby only method" unless Nokogiri.jruby?
267
- return toJavaDocument()
461
+ #
462
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
463
+ #
464
+ # Returns a hash describing the Document, to use in pattern matching.
465
+ #
466
+ # Valid keys and their values:
467
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
468
+ #
469
+ # In the future, other keys may allow accessing things like doctype and processing
470
+ # instructions. If you have a use case and would like this functionality, please let us know
471
+ # by opening an issue or a discussion on the github project.
472
+ #
473
+ # *Example*
474
+ #
475
+ # doc = Nokogiri::XML.parse(<<~XML)
476
+ # <?xml version="1.0"?>
477
+ # <root>
478
+ # <child>
479
+ # </root>
480
+ # XML
481
+ #
482
+ # doc.deconstruct_keys([:root])
483
+ # # => {:root=>
484
+ # # #(Element:0x35c {
485
+ # # name = "root",
486
+ # # children = [
487
+ # # #(Text "\n" + " "),
488
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
489
+ # # #(Text "\n")]
490
+ # # })}
491
+ #
492
+ # *Example* of an empty document
493
+ #
494
+ # doc = Nokogiri::XML::Document.new
495
+ #
496
+ # doc.deconstruct_keys([:root])
497
+ # # => {:root=>nil}
498
+ #
499
+ # Since v1.14.0
500
+ #
501
+ def deconstruct_keys(keys)
502
+ { root: root }
268
503
  end
269
504
 
270
505
  private
271
- def self.empty_doc? string_or_io
272
- string_or_io.nil? ||
273
- (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
274
- (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
275
- end
276
506
 
277
- # @private
278
- IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
507
+ IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
279
508
 
280
509
  def inspect_attributes
281
510
  [:name, :children]