nokogiri 1.18.0.rc1-x86_64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +502 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +297 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +49 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +324 -0
@@ -0,0 +1,514 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pathname"
5
+
6
+ module Nokogiri
7
+ module XML
8
+ # Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
9
+ # is created by parsing \XML content from a String or an IO object. See
10
+ # Nokogiri::XML::Document.parse for more information on parsing.
11
+ #
12
+ # Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
13
+ # please read that class's documentation as well.
14
+ class Document < Nokogiri::XML::Node
15
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
16
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
17
+ # characters in NCNAMEs.
18
+ NCNAME_START_CHAR = "A-Za-z_"
19
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
+
22
+ OBJECT_DUP_METHOD = Object.instance_method(:dup)
23
+ OBJECT_CLONE_METHOD = Object.instance_method(:clone)
24
+ private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD
25
+
26
+ class << self
27
+ # call-seq:
28
+ # parse(input) { |options| ... } => Nokogiri::XML::Document
29
+ # parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
30
+ #
31
+ # Parse \XML input from a String or IO object, and return a new XML::Document.
32
+ #
33
+ # 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
34
+ # or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
35
+ # that module's DEFAULT_XML constant for what's set (and not set) by default.
36
+ #
37
+ # [Required Parameters]
38
+ # - +input+ (String | IO) The content to be parsed.
39
+ #
40
+ # [Optional Keyword Arguments]
41
+ # - +url:+ (String) The base URI for this document.
42
+ #
43
+ # - +encoding:+ (String) The name of the encoding that should be used when processing the
44
+ # document. When not provided, the encoding will be determined based on the document
45
+ # content.
46
+ #
47
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
48
+ # behaviors during parsing. See ParseOptions for more information. The default value is
49
+ # +ParseOptions::DEFAULT_XML+.
50
+ #
51
+ # [Yields]
52
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
53
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
54
+ #
55
+ # [Returns] Nokogiri::XML::Document
56
+ def parse(
57
+ string_or_io,
58
+ url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
59
+ url: url_, encoding: encoding_, options: options_
60
+ )
61
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
62
+ yield options if block_given?
63
+
64
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
65
+
66
+ if empty_doc?(string_or_io)
67
+ if options.strict?
68
+ raise Nokogiri::XML::SyntaxError, "Empty document"
69
+ else
70
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
71
+ end
72
+ end
73
+
74
+ doc = if string_or_io.respond_to?(:read)
75
+ # TODO: should we instead check for respond_to?(:to_path) ?
76
+ if string_or_io.is_a?(Pathname)
77
+ # resolve the Pathname to the file and open it as an IO object, see #2110
78
+ string_or_io = string_or_io.expand_path.open
79
+ url ||= string_or_io.path
80
+ end
81
+
82
+ read_io(string_or_io, url, encoding, options.to_i)
83
+ else
84
+ # read_memory pukes on empty docs
85
+ read_memory(string_or_io, url, encoding, options.to_i)
86
+ end
87
+
88
+ # do xinclude processing
89
+ doc.do_xinclude(options) if options.xinclude?
90
+
91
+ doc
92
+ end
93
+
94
+ private
95
+
96
+ def empty_doc?(string_or_io)
97
+ string_or_io.nil? ||
98
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
99
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
100
+ end
101
+ end
102
+
103
+ ##
104
+ # :singleton-method: wrap
105
+ # :call-seq: wrap(java_document) → Nokogiri::XML::Document
106
+ #
107
+ # ⚠ This method is only available when running JRuby.
108
+ #
109
+ # Create a Document using an existing Java DOM document object.
110
+ #
111
+ # The returned Document shares the same underlying data structure as the Java object, so
112
+ # changes in one are reflected in the other.
113
+ #
114
+ # [Parameters]
115
+ # - `java_document` (Java::OrgW3cDom::Document)
116
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
117
+ #
118
+ # [Returns] Nokogiri::XML::Document
119
+ #
120
+ # See also \#to_java
121
+
122
+ # :method: to_java
123
+ # :call-seq: to_java() → Java::OrgW3cDom::Document
124
+ #
125
+ # ⚠ This method is only available when running JRuby.
126
+ #
127
+ # Returns the underlying Java DOM document object for this document.
128
+ #
129
+ # The returned Java object shares the same underlying data structure as this document, so
130
+ # changes in one are reflected in the other.
131
+ #
132
+ # [Returns]
133
+ # Java::OrgW3cDom::Document
134
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
135
+ #
136
+ # See also Document.wrap
137
+
138
+ # The errors found while parsing a document.
139
+ #
140
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
141
+ attr_accessor :errors
142
+
143
+ # When `true`, reparented elements without a namespace will inherit their new parent's
144
+ # namespace (if one exists). Defaults to `false`.
145
+ #
146
+ # [Returns] Boolean
147
+ #
148
+ # *Example:* Default behavior of namespace inheritance
149
+ #
150
+ # xml = <<~EOF
151
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
152
+ # <foo:parent>
153
+ # </foo:parent>
154
+ # </root>
155
+ # EOF
156
+ # doc = Nokogiri::XML(xml)
157
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
158
+ # parent.add_child("<child></child>")
159
+ # doc.to_xml
160
+ # # => <?xml version="1.0"?>
161
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
162
+ # # <foo:parent>
163
+ # # <child/>
164
+ # # </foo:parent>
165
+ # # </root>
166
+ #
167
+ # *Example:* Setting namespace inheritance to `true`
168
+ #
169
+ # xml = <<~EOF
170
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
171
+ # <foo:parent>
172
+ # </foo:parent>
173
+ # </root>
174
+ # EOF
175
+ # doc = Nokogiri::XML(xml)
176
+ # doc.namespace_inheritance = true
177
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
178
+ # parent.add_child("<child></child>")
179
+ # doc.to_xml
180
+ # # => <?xml version="1.0"?>
181
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
182
+ # # <foo:parent>
183
+ # # <foo:child/>
184
+ # # </foo:parent>
185
+ # # </root>
186
+ #
187
+ # Since v1.12.4
188
+ attr_accessor :namespace_inheritance
189
+
190
+ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
191
+ @errors = []
192
+ @decorators = nil
193
+ @namespace_inheritance = false
194
+ end
195
+
196
+ #
197
+ # :call-seq:
198
+ # dup → Nokogiri::XML::Document
199
+ # dup(level) → Nokogiri::XML::Document
200
+ #
201
+ # Duplicate this node.
202
+ #
203
+ # [Parameters]
204
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
205
+ # [Returns] The new Nokogiri::XML::Document
206
+ #
207
+ def dup(level = 1)
208
+ copy = OBJECT_DUP_METHOD.bind_call(self)
209
+ copy.initialize_copy_with_args(self, level)
210
+ end
211
+
212
+ #
213
+ # :call-seq:
214
+ # clone → Nokogiri::XML::Document
215
+ # clone(level) → Nokogiri::XML::Document
216
+ #
217
+ # Clone this node.
218
+ #
219
+ # [Parameters]
220
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
221
+ # [Returns] The new Nokogiri::XML::Document
222
+ #
223
+ def clone(level = 1)
224
+ copy = OBJECT_CLONE_METHOD.bind_call(self)
225
+ copy.initialize_copy_with_args(self, level)
226
+ end
227
+
228
+ # :call-seq:
229
+ # create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
230
+ #
231
+ # Create a new Element with `name` belonging to this document, optionally setting contents or
232
+ # attributes.
233
+ #
234
+ # This method is _not_ the most user-friendly option if your intention is to add a node to the
235
+ # document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
236
+ # Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
237
+ # place it in the document tree.
238
+ #
239
+ # Arguments may be passed to initialize the element:
240
+ #
241
+ # - a Hash argument will be used to set attributes
242
+ # - a non-Hash object that responds to \#to_s will be used to set the new node's contents
243
+ #
244
+ # A block may be passed to mutate the node.
245
+ #
246
+ # [Parameters]
247
+ # - `name` (String)
248
+ # - `contents_or_attrs` (\#to_s, Hash)
249
+ # [Yields] `node` (Nokogiri::XML::Element)
250
+ # [Returns] Nokogiri::XML::Element
251
+ #
252
+ # *Example:* An empty element without attributes
253
+ #
254
+ # doc.create_element("div")
255
+ # # => <div></div>
256
+ #
257
+ # *Example:* An element with contents
258
+ #
259
+ # doc.create_element("div", "contents")
260
+ # # => <div>contents</div>
261
+ #
262
+ # *Example:* An element with attributes
263
+ #
264
+ # doc.create_element("div", {"class" => "container"})
265
+ # # => <div class='container'></div>
266
+ #
267
+ # *Example:* An element with contents and attributes
268
+ #
269
+ # doc.create_element("div", "contents", {"class" => "container"})
270
+ # # => <div class='container'>contents</div>
271
+ #
272
+ # *Example:* Passing a block to mutate the element
273
+ #
274
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
275
+ #
276
+ def create_element(name, *contents_or_attrs, &block)
277
+ elm = Nokogiri::XML::Element.new(name, self, &block)
278
+ contents_or_attrs.each do |arg|
279
+ case arg
280
+ when Hash
281
+ arg.each do |k, v|
282
+ key = k.to_s
283
+ if key =~ NCNAME_RE
284
+ ns_name = Regexp.last_match(1)
285
+ elm.add_namespace_definition(ns_name, v)
286
+ else
287
+ elm[k.to_s] = v.to_s
288
+ end
289
+ end
290
+ else
291
+ elm.content = arg
292
+ end
293
+ end
294
+ if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
295
+ elm.namespace = ns
296
+ end
297
+ elm
298
+ end
299
+
300
+ # Create a Text Node with +string+
301
+ def create_text_node(string, &block)
302
+ Nokogiri::XML::Text.new(string.to_s, self, &block)
303
+ end
304
+
305
+ # Create a CDATA Node containing +string+
306
+ def create_cdata(string, &block)
307
+ Nokogiri::XML::CDATA.new(self, string.to_s, &block)
308
+ end
309
+
310
+ # Create a Comment Node containing +string+
311
+ def create_comment(string, &block)
312
+ Nokogiri::XML::Comment.new(self, string.to_s, &block)
313
+ end
314
+
315
+ # The name of this document. Always returns "document"
316
+ def name
317
+ "document"
318
+ end
319
+
320
+ # A reference to +self+
321
+ def document
322
+ self
323
+ end
324
+
325
+ # :call-seq:
326
+ # collect_namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
327
+ #
328
+ # Recursively get all namespaces from this node and its subtree and return them as a
329
+ # hash.
330
+ #
331
+ # ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
332
+ #
333
+ # Note that this method does an xpath lookup for nodes with namespaces, and as a result the
334
+ # order (and which duplicate prefix "wins") may be dependent on the implementation of the
335
+ # underlying XML library.
336
+ #
337
+ # *Example:* Basic usage
338
+ #
339
+ # Given this document:
340
+ #
341
+ # <root xmlns="default" xmlns:foo="bar">
342
+ # <bar xmlns:hello="world" />
343
+ # </root>
344
+ #
345
+ # This method will return:
346
+ #
347
+ # {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
348
+ #
349
+ # *Example:* Duplicate prefixes
350
+ #
351
+ # Given this document:
352
+ #
353
+ # <root xmlns:foo="bar">
354
+ # <bar xmlns:foo="baz" />
355
+ # </root>
356
+ #
357
+ # The hash returned will be something like:
358
+ #
359
+ # {"xmlns:foo" => "baz"}
360
+ #
361
+ def collect_namespaces
362
+ xpath("//namespace::*").each_with_object({}) do |ns, hash|
363
+ hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
364
+ end
365
+ end
366
+
367
+ # Get the list of decorators given +key+
368
+ def decorators(key)
369
+ @decorators ||= {}
370
+ @decorators[key] ||= []
371
+ end
372
+
373
+ ##
374
+ # Validate this Document against its DTD. Returns a list of errors on
375
+ # the document or +nil+ when there is no DTD.
376
+ def validate
377
+ return unless internal_subset
378
+
379
+ internal_subset.validate(self)
380
+ end
381
+
382
+ ##
383
+ # Explore a document with shortcut methods. See Nokogiri::Slop for details.
384
+ #
385
+ # Note that any nodes that have been instantiated before #slop!
386
+ # is called will not be decorated with sloppy behavior. So, if you're in
387
+ # irb, the preferred idiom is:
388
+ #
389
+ # irb> doc = Nokogiri::Slop my_markup
390
+ #
391
+ # and not
392
+ #
393
+ # irb> doc = Nokogiri::HTML my_markup
394
+ # ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
395
+ # irb> doc.slop!
396
+ # ... which does absolutely nothing.
397
+ #
398
+ def slop!
399
+ unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
400
+ decorators(XML::Node) << Nokogiri::Decorators::Slop
401
+ decorate!
402
+ end
403
+
404
+ self
405
+ end
406
+
407
+ ##
408
+ # Apply any decorators to +node+
409
+ def decorate(node)
410
+ return unless @decorators
411
+
412
+ @decorators.each do |klass, list|
413
+ next unless node.is_a?(klass)
414
+
415
+ list.each { |mod| node.extend(mod) }
416
+ end
417
+ end
418
+
419
+ alias_method :to_xml, :serialize
420
+
421
+ # Get the hash of namespaces on the root Nokogiri::XML::Node
422
+ def namespaces
423
+ root ? root.namespaces : {}
424
+ end
425
+
426
+ ##
427
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
428
+ # Returns an empty fragment if +tags+ is nil.
429
+ def fragment(tags = nil)
430
+ DocumentFragment.new(self, tags, root)
431
+ end
432
+
433
+ undef_method :swap, :parent, :namespace, :default_namespace=
434
+ undef_method :add_namespace_definition, :attributes
435
+ undef_method :namespace_definitions, :line, :add_namespace
436
+
437
+ def add_child(node_or_tags)
438
+ raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
439
+
440
+ node_or_tags = coerce(node_or_tags)
441
+ if node_or_tags.is_a?(XML::NodeSet)
442
+ raise "A document may not have multiple root nodes." if node_or_tags.size > 1
443
+
444
+ super(node_or_tags.first)
445
+ else
446
+ super
447
+ end
448
+ end
449
+ alias_method :<<, :add_child
450
+
451
+ # :call-seq:
452
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
453
+ #
454
+ # [Returns] The document type which determines CSS-to-XPath translation.
455
+ #
456
+ # See XPathVisitor for more information.
457
+ def xpath_doctype
458
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
459
+ end
460
+
461
+ #
462
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
463
+ #
464
+ # Returns a hash describing the Document, to use in pattern matching.
465
+ #
466
+ # Valid keys and their values:
467
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
468
+ #
469
+ # In the future, other keys may allow accessing things like doctype and processing
470
+ # instructions. If you have a use case and would like this functionality, please let us know
471
+ # by opening an issue or a discussion on the github project.
472
+ #
473
+ # *Example*
474
+ #
475
+ # doc = Nokogiri::XML.parse(<<~XML)
476
+ # <?xml version="1.0"?>
477
+ # <root>
478
+ # <child>
479
+ # </root>
480
+ # XML
481
+ #
482
+ # doc.deconstruct_keys([:root])
483
+ # # => {:root=>
484
+ # # #(Element:0x35c {
485
+ # # name = "root",
486
+ # # children = [
487
+ # # #(Text "\n" + " "),
488
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
489
+ # # #(Text "\n")]
490
+ # # })}
491
+ #
492
+ # *Example* of an empty document
493
+ #
494
+ # doc = Nokogiri::XML::Document.new
495
+ #
496
+ # doc.deconstruct_keys([:root])
497
+ # # => {:root=>nil}
498
+ #
499
+ # Since v1.14.0
500
+ #
501
+ def deconstruct_keys(keys)
502
+ { root: root }
503
+ end
504
+
505
+ private
506
+
507
+ IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
508
+
509
+ def inspect_attributes
510
+ [:name, :children]
511
+ end
512
+ end
513
+ end
514
+ end