nokogiri 1.10.7 → 1.16.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +42 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +188 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +862 -421
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +222 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +39 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +408 -243
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +131 -61
  33. data/ext/nokogiri/xml_node.c +1343 -674
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +305 -213
  37. data/ext/nokogiri/xml_relax_ng.c +87 -78
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +149 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +65 -37
  41. data/ext/nokogiri/xml_schema.c +138 -82
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +35 -26
  44. data/ext/nokogiri/xml_xpath_context.c +363 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +126 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3464 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +5 -3
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +205 -96
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +326 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +224 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +75 -34
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -127
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1096 -419
  142. data/lib/nokogiri/xml/node_set.rb +137 -61
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +7 -5
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +39 -38
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  171. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  172. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  173. data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
  174. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  175. metadata +121 -291
  176. data/ext/nokogiri/html_document.c +0 -170
  177. data/ext/nokogiri/html_document.h +0 -10
  178. data/ext/nokogiri/html_element_description.c +0 -279
  179. data/ext/nokogiri/html_element_description.h +0 -10
  180. data/ext/nokogiri/html_entity_lookup.c +0 -32
  181. data/ext/nokogiri/html_entity_lookup.h +0 -8
  182. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  183. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  184. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  185. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  186. data/ext/nokogiri/xml_attr.h +0 -9
  187. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  188. data/ext/nokogiri/xml_cdata.h +0 -9
  189. data/ext/nokogiri/xml_comment.h +0 -9
  190. data/ext/nokogiri/xml_document.h +0 -23
  191. data/ext/nokogiri/xml_document_fragment.h +0 -10
  192. data/ext/nokogiri/xml_dtd.h +0 -10
  193. data/ext/nokogiri/xml_element_content.h +0 -10
  194. data/ext/nokogiri/xml_element_decl.h +0 -9
  195. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  196. data/ext/nokogiri/xml_entity_decl.h +0 -10
  197. data/ext/nokogiri/xml_entity_reference.h +0 -9
  198. data/ext/nokogiri/xml_io.c +0 -61
  199. data/ext/nokogiri/xml_io.h +0 -11
  200. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  201. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  202. data/ext/nokogiri/xml_namespace.h +0 -14
  203. data/ext/nokogiri/xml_node.h +0 -13
  204. data/ext/nokogiri/xml_node_set.h +0 -12
  205. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  206. data/ext/nokogiri/xml_reader.h +0 -10
  207. data/ext/nokogiri/xml_relax_ng.h +0 -9
  208. data/ext/nokogiri/xml_sax_parser.h +0 -39
  209. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  210. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  211. data/ext/nokogiri/xml_schema.h +0 -9
  212. data/ext/nokogiri/xml_syntax_error.h +0 -13
  213. data/ext/nokogiri/xml_text.h +0 -9
  214. data/ext/nokogiri/xml_xpath_context.h +0 -10
  215. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  216. data/lib/nokogiri/html/document.rb +0 -335
  217. data/lib/nokogiri/html/document_fragment.rb +0 -49
  218. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  219. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  220. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  221. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  222. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  223. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  224. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,132 +1,275 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pathname"
5
+
1
6
  module Nokogiri
2
7
  module XML
3
- ##
4
- # Nokogiri::XML::Document is the main entry point for dealing with
5
- # XML documents. The Document is created by parsing an XML document.
6
- # See Nokogiri::XML::Document.parse() for more information on parsing.
8
+ # Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
9
+ # is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
10
+ # on parsing.
7
11
  #
8
12
  # For searching a Document, see Nokogiri::XML::Searchable#css and
9
13
  # Nokogiri::XML::Searchable#xpath
10
- #
11
14
  class Document < Nokogiri::XML::Node
12
- # I'm ignoring unicode characters here.
13
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
15
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
16
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
17
+ # characters in NCNAMEs.
14
18
  NCNAME_START_CHAR = "A-Za-z_"
15
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
16
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
19
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
+
22
+ class << self
23
+ # Parse an XML file.
24
+ #
25
+ # +string_or_io+ may be a String, or any object that responds to
26
+ # _read_ and _close_ such as an IO, or StringIO.
27
+ #
28
+ # +url+ (optional) is the URI where this document is located.
29
+ #
30
+ # +encoding+ (optional) is the encoding that should be used when processing
31
+ # the document.
32
+ #
33
+ # +options+ (optional) is a configuration object that sets options during
34
+ # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
+ # Nokogiri::XML::ParseOptions for more information.
36
+ #
37
+ # +block+ (optional) is passed a configuration object on which
38
+ # parse options may be set.
39
+ #
40
+ # By default, Nokogiri treats documents as untrusted, and so
41
+ # does not attempt to load DTDs or access the network. See
42
+ # Nokogiri::XML::ParseOptions for a complete list of options;
43
+ # and that module's DEFAULT_XML constant for what's set (and not
44
+ # set) by default.
45
+ #
46
+ # Nokogiri.XML() is a convenience method which will call this method.
47
+ #
48
+ def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
+ yield options if block_given?
51
+
52
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
53
+
54
+ if empty_doc?(string_or_io)
55
+ if options.strict?
56
+ raise Nokogiri::XML::SyntaxError, "Empty document"
57
+ else
58
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
59
+ end
60
+ end
61
+
62
+ doc = if string_or_io.respond_to?(:read)
63
+ if string_or_io.is_a?(Pathname)
64
+ # resolve the Pathname to the file and open it as an IO object, see #2110
65
+ string_or_io = string_or_io.expand_path.open
66
+ url ||= string_or_io.path
67
+ end
68
+
69
+ read_io(string_or_io, url, encoding, options.to_i)
70
+ else
71
+ # read_memory pukes on empty docs
72
+ read_memory(string_or_io, url, encoding, options.to_i)
73
+ end
74
+
75
+ # do xinclude processing
76
+ doc.do_xinclude(options) if options.xinclude?
77
+
78
+ doc
79
+ end
80
+
81
+ private
82
+
83
+ def empty_doc?(string_or_io)
84
+ string_or_io.nil? ||
85
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
86
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
87
+ end
88
+ end
17
89
 
18
90
  ##
19
- # Parse an XML file.
91
+ # :singleton-method: wrap
92
+ # :call-seq: wrap(java_document) → Nokogiri::XML::Document
20
93
  #
21
- # +string_or_io+ may be a String, or any object that responds to
22
- # _read_ and _close_ such as an IO, or StringIO.
94
+ # This method is only available when running JRuby.
23
95
  #
24
- # +url+ (optional) is the URI where this document is located.
96
+ # Create a Document using an existing Java DOM document object.
25
97
  #
26
- # +encoding+ (optional) is the encoding that should be used when processing
27
- # the document.
98
+ # The returned Document shares the same underlying data structure as the Java object, so
99
+ # changes in one are reflected in the other.
28
100
  #
29
- # +options+ (optional) is a configuration object that sets options during
30
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
31
- # Nokogiri::XML::ParseOptions for more information.
101
+ # [Parameters]
102
+ # - `java_document` (Java::OrgW3cDom::Document)
103
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
32
104
  #
33
- # +block+ (optional) is passed a configuration object on which
34
- # parse options may be set.
105
+ # [Returns] Nokogiri::XML::Document
35
106
  #
36
- # By default, Nokogiri treats documents as untrusted, and so
37
- # does not attempt to load DTDs or access the network. See
38
- # Nokogiri::XML::ParseOptions for a complete list of options;
39
- # and that module's DEFAULT_XML constant for what's set (and not
40
- # set) by default.
107
+ # See also \#to_java
108
+
109
+ # :method: to_java
110
+ # :call-seq: to_java() Java::OrgW3cDom::Document
41
111
  #
42
- # Nokogiri.XML() is a convenience method which will call this method.
112
+ # This method is only available when running JRuby.
43
113
  #
44
- def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
45
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
46
- # Give the options to the user
47
- yield options if block_given?
48
-
49
- if empty_doc?(string_or_io)
50
- if options.strict?
51
- raise Nokogiri::XML::SyntaxError.new("Empty document")
52
- else
53
- return encoding ? new.tap { |i| i.encoding = encoding } : new
54
- end
55
- end
56
-
57
- doc = if string_or_io.respond_to?(:read)
58
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
59
- read_io(string_or_io, url, encoding, options.to_i)
60
- else
61
- # read_memory pukes on empty docs
62
- read_memory(string_or_io, url, encoding, options.to_i)
63
- end
64
-
65
- # do xinclude processing
66
- doc.do_xinclude(options) if options.xinclude?
67
-
68
- return doc
69
- end
114
+ # Returns the underlying Java DOM document object for this document.
115
+ #
116
+ # The returned Java object shares the same underlying data structure as this document, so
117
+ # changes in one are reflected in the other.
118
+ #
119
+ # [Returns]
120
+ # Java::OrgW3cDom::Document
121
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
122
+ #
123
+ # See also Document.wrap
70
124
 
71
- # A list of Nokogiri::XML::SyntaxError found when parsing a document
125
+ # The errors found while parsing a document.
126
+ #
127
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
72
128
  attr_accessor :errors
73
129
 
74
- def initialize *args # :nodoc:
130
+ # When `true`, reparented elements without a namespace will inherit their new parent's
131
+ # namespace (if one exists). Defaults to `false`.
132
+ #
133
+ # [Returns] Boolean
134
+ #
135
+ # *Example:* Default behavior of namespace inheritance
136
+ #
137
+ # xml = <<~EOF
138
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
139
+ # <foo:parent>
140
+ # </foo:parent>
141
+ # </root>
142
+ # EOF
143
+ # doc = Nokogiri::XML(xml)
144
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
145
+ # parent.add_child("<child></child>")
146
+ # doc.to_xml
147
+ # # => <?xml version="1.0"?>
148
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
149
+ # # <foo:parent>
150
+ # # <child/>
151
+ # # </foo:parent>
152
+ # # </root>
153
+ #
154
+ # *Example:* Setting namespace inheritance to `true`
155
+ #
156
+ # xml = <<~EOF
157
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
158
+ # <foo:parent>
159
+ # </foo:parent>
160
+ # </root>
161
+ # EOF
162
+ # doc = Nokogiri::XML(xml)
163
+ # doc.namespace_inheritance = true
164
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
165
+ # parent.add_child("<child></child>")
166
+ # doc.to_xml
167
+ # # => <?xml version="1.0"?>
168
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
169
+ # # <foo:parent>
170
+ # # <foo:child/>
171
+ # # </foo:parent>
172
+ # # </root>
173
+ #
174
+ # Since v1.12.4
175
+ attr_accessor :namespace_inheritance
176
+
177
+ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
75
178
  @errors = []
76
179
  @decorators = nil
180
+ @namespace_inheritance = false
77
181
  end
78
182
 
79
- ##
80
- # Create an element with +name+, and optionally setting the content and attributes.
183
+ # :call-seq:
184
+ # create_element(name, *contents_or_attrs, &block) Nokogiri::XML::Element
185
+ #
186
+ # Create a new Element with `name` belonging to this document, optionally setting contents or
187
+ # attributes.
188
+ #
189
+ # This method is _not_ the most user-friendly option if your intention is to add a node to the
190
+ # document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
191
+ # Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
192
+ # place it in the document tree.
81
193
  #
82
- # doc.create_element "div" # <div></div>
83
- # doc.create_element "div", :class => "container" # <div class='container'></div>
84
- # doc.create_element "div", "contents" # <div>contents</div>
85
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
86
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
194
+ # Arguments may be passed to initialize the element:
87
195
  #
88
- def create_element name, *args, &block
196
+ # - a Hash argument will be used to set attributes
197
+ # - a non-Hash object that responds to \#to_s will be used to set the new node's contents
198
+ #
199
+ # A block may be passed to mutate the node.
200
+ #
201
+ # [Parameters]
202
+ # - `name` (String)
203
+ # - `contents_or_attrs` (\#to_s, Hash)
204
+ # [Yields] `node` (Nokogiri::XML::Element)
205
+ # [Returns] Nokogiri::XML::Element
206
+ #
207
+ # *Example:* An empty element without attributes
208
+ #
209
+ # doc.create_element("div")
210
+ # # => <div></div>
211
+ #
212
+ # *Example:* An element with contents
213
+ #
214
+ # doc.create_element("div", "contents")
215
+ # # => <div>contents</div>
216
+ #
217
+ # *Example:* An element with attributes
218
+ #
219
+ # doc.create_element("div", {"class" => "container"})
220
+ # # => <div class='container'></div>
221
+ #
222
+ # *Example:* An element with contents and attributes
223
+ #
224
+ # doc.create_element("div", "contents", {"class" => "container"})
225
+ # # => <div class='container'>contents</div>
226
+ #
227
+ # *Example:* Passing a block to mutate the element
228
+ #
229
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
230
+ #
231
+ def create_element(name, *contents_or_attrs, &block)
89
232
  elm = Nokogiri::XML::Element.new(name, self, &block)
90
- args.each do |arg|
233
+ contents_or_attrs.each do |arg|
91
234
  case arg
92
235
  when Hash
93
- arg.each { |k,v|
236
+ arg.each do |k, v|
94
237
  key = k.to_s
95
238
  if key =~ NCNAME_RE
96
- ns_name = key.split(":", 2)[1]
97
- elm.add_namespace_definition ns_name, v
239
+ ns_name = Regexp.last_match(1)
240
+ elm.add_namespace_definition(ns_name, v)
98
241
  else
99
242
  elm[k.to_s] = v.to_s
100
243
  end
101
- }
244
+ end
102
245
  else
103
246
  elm.content = arg
104
247
  end
105
248
  end
106
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
249
+ if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
107
250
  elm.namespace = ns
108
251
  end
109
252
  elm
110
253
  end
111
254
 
112
255
  # Create a Text Node with +string+
113
- def create_text_node string, &block
114
- Nokogiri::XML::Text.new string.to_s, self, &block
256
+ def create_text_node(string, &block)
257
+ Nokogiri::XML::Text.new(string.to_s, self, &block)
115
258
  end
116
259
 
117
260
  # Create a CDATA Node containing +string+
118
- def create_cdata string, &block
119
- Nokogiri::XML::CDATA.new self, string.to_s, &block
261
+ def create_cdata(string, &block)
262
+ Nokogiri::XML::CDATA.new(self, string.to_s, &block)
120
263
  end
121
264
 
122
265
  # Create a Comment Node containing +string+
123
- def create_comment string, &block
124
- Nokogiri::XML::Comment.new self, string.to_s, &block
266
+ def create_comment(string, &block)
267
+ Nokogiri::XML::Comment.new(self, string.to_s, &block)
125
268
  end
126
269
 
127
270
  # The name of this document. Always returns "document"
128
271
  def name
129
- 'document'
272
+ "document"
130
273
  end
131
274
 
132
275
  # A reference to +self+
@@ -134,46 +277,51 @@ module Nokogiri
134
277
  self
135
278
  end
136
279
 
137
- ##
138
- # Recursively get all namespaces from this node and its subtree and
139
- # return them as a hash.
280
+ # :call-seq:
281
+ # collect_namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
140
282
  #
141
- # For example, given this document:
283
+ # Recursively get all namespaces from this node and its subtree and return them as a
284
+ # hash.
142
285
  #
143
- # <root xmlns:foo="bar">
286
+ # ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
287
+ #
288
+ # Note that this method does an xpath lookup for nodes with namespaces, and as a result the
289
+ # order (and which duplicate prefix "wins") may be dependent on the implementation of the
290
+ # underlying XML library.
291
+ #
292
+ # *Example:* Basic usage
293
+ #
294
+ # Given this document:
295
+ #
296
+ # <root xmlns="default" xmlns:foo="bar">
144
297
  # <bar xmlns:hello="world" />
145
298
  # </root>
146
299
  #
147
300
  # This method will return:
148
301
  #
149
- # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
302
+ # {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
303
+ #
304
+ # *Example:* Duplicate prefixes
150
305
  #
151
- # WARNING: this method will clobber duplicate names in the keys.
152
- # For example, given this document:
306
+ # Given this document:
153
307
  #
154
308
  # <root xmlns:foo="bar">
155
309
  # <bar xmlns:foo="baz" />
156
310
  # </root>
157
311
  #
158
- # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
312
+ # The hash returned will be something like:
159
313
  #
160
- # Non-prefixed default namespaces (as in "xmlns=") are not included
161
- # in the hash.
162
- #
163
- # Note that this method does an xpath lookup for nodes with
164
- # namespaces, and as a result the order may be dependent on the
165
- # implementation of the underlying XML library.
314
+ # {"xmlns:foo" => "baz"}
166
315
  #
167
316
  def collect_namespaces
168
- xpath("//namespace::*").inject({}) do |hash, ns|
169
- hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
170
- hash
317
+ xpath("//namespace::*").each_with_object({}) do |ns, hash|
318
+ hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
171
319
  end
172
320
  end
173
321
 
174
322
  # Get the list of decorators given +key+
175
- def decorators key
176
- @decorators ||= Hash.new
323
+ def decorators(key)
324
+ @decorators ||= {}
177
325
  @decorators[key] ||= []
178
326
  end
179
327
 
@@ -181,8 +329,9 @@ module Nokogiri
181
329
  # Validate this Document against it's DTD. Returns a list of errors on
182
330
  # the document or +nil+ when there is no DTD.
183
331
  def validate
184
- return nil unless internal_subset
185
- internal_subset.validate self
332
+ return unless internal_subset
333
+
334
+ internal_subset.validate(self)
186
335
  end
187
336
 
188
337
  ##
@@ -202,7 +351,7 @@ module Nokogiri
202
351
  # ... which does absolutely nothing.
203
352
  #
204
353
  def slop!
205
- unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
354
+ unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
206
355
  decorators(XML::Node) << Nokogiri::Decorators::Slop
207
356
  decorate!
208
357
  end
@@ -212,16 +361,18 @@ module Nokogiri
212
361
 
213
362
  ##
214
363
  # Apply any decorators to +node+
215
- def decorate node
364
+ def decorate(node)
216
365
  return unless @decorators
217
- @decorators.each { |klass,list|
366
+
367
+ @decorators.each do |klass, list|
218
368
  next unless node.is_a?(klass)
369
+
219
370
  list.each { |moodule| node.extend(moodule) }
220
- }
371
+ end
221
372
  end
222
373
 
223
- alias :to_xml :serialize
224
- alias :clone :dup
374
+ alias_method :to_xml, :serialize
375
+ alias_method :clone, :dup
225
376
 
226
377
  # Get the hash of namespaces on the root Nokogiri::XML::Node
227
378
  def namespaces
@@ -231,51 +382,85 @@ module Nokogiri
231
382
  ##
232
383
  # Create a Nokogiri::XML::DocumentFragment from +tags+
233
384
  # Returns an empty fragment if +tags+ is nil.
234
- def fragment tags = nil
235
- DocumentFragment.new(self, tags, self.root)
385
+ def fragment(tags = nil)
386
+ DocumentFragment.new(self, tags, root)
236
387
  end
237
388
 
238
389
  undef_method :swap, :parent, :namespace, :default_namespace=
239
390
  undef_method :add_namespace_definition, :attributes
240
391
  undef_method :namespace_definitions, :line, :add_namespace
241
392
 
242
- def add_child node_or_tags
243
- raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
393
+ def add_child(node_or_tags)
394
+ raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
395
+
244
396
  node_or_tags = coerce(node_or_tags)
245
397
  if node_or_tags.is_a?(XML::NodeSet)
246
398
  raise "A document may not have multiple root nodes." if node_or_tags.size > 1
399
+
247
400
  super(node_or_tags.first)
248
401
  else
249
402
  super
250
403
  end
251
404
  end
252
- alias :<< :add_child
405
+ alias_method :<<, :add_child
253
406
 
254
- ##
255
- # +JRuby+
256
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
257
- def self.wrap document
258
- raise "JRuby only method" unless Nokogiri.jruby?
259
- return wrapJavaDocument(document)
407
+ # :call-seq:
408
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
409
+ #
410
+ # [Returns] The document type which determines CSS-to-XPath translation.
411
+ #
412
+ # See XPathVisitor for more information.
413
+ def xpath_doctype
414
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
260
415
  end
261
416
 
262
- ##
263
- # +JRuby+
264
- # Returns Java's org.w3c.dom.document of this Document.
265
- def to_java
266
- raise "JRuby only method" unless Nokogiri.jruby?
267
- return toJavaDocument()
417
+ #
418
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
419
+ #
420
+ # Returns a hash describing the Document, to use in pattern matching.
421
+ #
422
+ # Valid keys and their values:
423
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
424
+ #
425
+ # In the future, other keys may allow accessing things like doctype and processing
426
+ # instructions. If you have a use case and would like this functionality, please let us know
427
+ # by opening an issue or a discussion on the github project.
428
+ #
429
+ # *Example*
430
+ #
431
+ # doc = Nokogiri::XML.parse(<<~XML)
432
+ # <?xml version="1.0"?>
433
+ # <root>
434
+ # <child>
435
+ # </root>
436
+ # XML
437
+ #
438
+ # doc.deconstruct_keys([:root])
439
+ # # => {:root=>
440
+ # # #(Element:0x35c {
441
+ # # name = "root",
442
+ # # children = [
443
+ # # #(Text "\n" + " "),
444
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
445
+ # # #(Text "\n")]
446
+ # # })}
447
+ #
448
+ # *Example* of an empty document
449
+ #
450
+ # doc = Nokogiri::XML::Document.new
451
+ #
452
+ # doc.deconstruct_keys([:root])
453
+ # # => {:root=>nil}
454
+ #
455
+ # Since v1.14.0
456
+ #
457
+ def deconstruct_keys(keys)
458
+ { root: root }
268
459
  end
269
460
 
270
461
  private
271
- def self.empty_doc? string_or_io
272
- string_or_io.nil? ||
273
- (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
274
- (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
275
- end
276
462
 
277
- # @private
278
- IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
463
+ IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
279
464
 
280
465
  def inspect_attributes
281
466
  [:name, :children]