nokogiri 1.9.1 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +45 -0
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -89
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +864 -418
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -240
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +135 -61
  33. data/ext/nokogiri/xml_node.c +1346 -677
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1088 -418
  142. data/lib/nokogiri/xml/node_set.rb +173 -63
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +128 -265
  178. data/ext/nokogiri/html_document.c +0 -170
  179. data/ext/nokogiri/html_document.h +0 -10
  180. data/ext/nokogiri/html_element_description.c +0 -279
  181. data/ext/nokogiri/html_element_description.h +0 -10
  182. data/ext/nokogiri/html_entity_lookup.c +0 -32
  183. data/ext/nokogiri/html_entity_lookup.h +0 -8
  184. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  185. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  186. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  187. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  188. data/ext/nokogiri/xml_attr.h +0 -9
  189. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  190. data/ext/nokogiri/xml_cdata.h +0 -9
  191. data/ext/nokogiri/xml_comment.h +0 -9
  192. data/ext/nokogiri/xml_document.h +0 -23
  193. data/ext/nokogiri/xml_document_fragment.h +0 -10
  194. data/ext/nokogiri/xml_dtd.h +0 -10
  195. data/ext/nokogiri/xml_element_content.h +0 -10
  196. data/ext/nokogiri/xml_element_decl.h +0 -9
  197. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  198. data/ext/nokogiri/xml_entity_decl.h +0 -10
  199. data/ext/nokogiri/xml_entity_reference.h +0 -9
  200. data/ext/nokogiri/xml_io.c +0 -61
  201. data/ext/nokogiri/xml_io.h +0 -11
  202. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  203. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  204. data/ext/nokogiri/xml_namespace.h +0 -14
  205. data/ext/nokogiri/xml_node.h +0 -13
  206. data/ext/nokogiri/xml_node_set.h +0 -12
  207. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  208. data/ext/nokogiri/xml_reader.h +0 -10
  209. data/ext/nokogiri/xml_relax_ng.h +0 -9
  210. data/ext/nokogiri/xml_sax_parser.h +0 -39
  211. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  212. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  213. data/ext/nokogiri/xml_schema.h +0 -9
  214. data/ext/nokogiri/xml_syntax_error.h +0 -13
  215. data/ext/nokogiri/xml_text.h +0 -9
  216. data/ext/nokogiri/xml_xpath_context.h +0 -10
  217. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  218. data/lib/nokogiri/html/document.rb +0 -335
  219. data/lib/nokogiri/html/document_fragment.rb +0 -49
  220. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  221. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  222. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  223. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  224. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  225. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  226. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
@@ -1,132 +1,276 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pathname"
5
+
1
6
  module Nokogiri
2
7
  module XML
3
- ##
4
- # Nokogiri::XML::Document is the main entry point for dealing with
5
- # XML documents. The Document is created by parsing an XML document.
6
- # See Nokogiri::XML::Document.parse() for more information on parsing.
8
+ # Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
9
+ # is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
10
+ # on parsing.
7
11
  #
8
12
  # For searching a Document, see Nokogiri::XML::Searchable#css and
9
13
  # Nokogiri::XML::Searchable#xpath
10
- #
11
14
  class Document < Nokogiri::XML::Node
12
- # I'm ignoring unicode characters here.
13
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
15
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
16
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
17
+ # characters in NCNAMEs.
14
18
  NCNAME_START_CHAR = "A-Za-z_"
15
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
16
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
19
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
+
22
+ class << self
23
+ # Parse an XML file.
24
+ #
25
+ # +string_or_io+ may be a String, or any object that responds to
26
+ # _read_ and _close_ such as an IO, or StringIO.
27
+ #
28
+ # +url+ (optional) is the URI where this document is located.
29
+ #
30
+ # +encoding+ (optional) is the encoding that should be used when processing
31
+ # the document.
32
+ #
33
+ # +options+ (optional) is a configuration object that sets options during
34
+ # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
+ # Nokogiri::XML::ParseOptions for more information.
36
+ #
37
+ # +block+ (optional) is passed a configuration object on which
38
+ # parse options may be set.
39
+ #
40
+ # By default, Nokogiri treats documents as untrusted, and so
41
+ # does not attempt to load DTDs or access the network. See
42
+ # Nokogiri::XML::ParseOptions for a complete list of options;
43
+ # and that module's DEFAULT_XML constant for what's set (and not
44
+ # set) by default.
45
+ #
46
+ # Nokogiri.XML() is a convenience method which will call this method.
47
+ #
48
+ def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
+ yield options if block_given?
51
+
52
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
53
+
54
+ if empty_doc?(string_or_io)
55
+ if options.strict?
56
+ raise Nokogiri::XML::SyntaxError, "Empty document"
57
+ else
58
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
59
+ end
60
+ end
61
+
62
+ doc = if string_or_io.respond_to?(:read)
63
+ if string_or_io.is_a?(Pathname)
64
+ # resolve the Pathname to the file and open it as an IO object, see #2110
65
+ string_or_io = string_or_io.expand_path.open
66
+ url ||= string_or_io.path
67
+ end
68
+
69
+ read_io(string_or_io, url, encoding, options.to_i)
70
+ else
71
+ # read_memory pukes on empty docs
72
+ read_memory(string_or_io, url, encoding, options.to_i)
73
+ end
74
+
75
+ # do xinclude processing
76
+ doc.do_xinclude(options) if options.xinclude?
77
+
78
+ doc
79
+ end
80
+
81
+ private
82
+
83
+ def empty_doc?(string_or_io)
84
+ string_or_io.nil? ||
85
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
86
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
87
+ end
88
+ end
17
89
 
18
90
  ##
19
- # Parse an XML file.
91
+ # :singleton-method: wrap
92
+ # :call-seq: wrap(java_document) → Nokogiri::XML::Document
20
93
  #
21
- # +string_or_io+ may be a String, or any object that responds to
22
- # _read_ and _close_ such as an IO, or StringIO.
94
+ # This method is only available when running JRuby.
23
95
  #
24
- # +url+ (optional) is the URI where this document is located.
96
+ # Create a Document using an existing Java DOM document object.
25
97
  #
26
- # +encoding+ (optional) is the encoding that should be used when processing
27
- # the document.
98
+ # The returned Document shares the same underlying data structure as the Java object, so
99
+ # changes in one are reflected in the other.
28
100
  #
29
- # +options+ (optional) is a configuration object that sets options during
30
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
31
- # Nokogiri::XML::ParseOptions for more information.
101
+ # [Parameters]
102
+ # - `java_document` (Java::OrgW3cDom::Document)
103
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
32
104
  #
33
- # +block+ (optional) is passed a configuration object on which
34
- # parse options may be set.
105
+ # [Returns] Nokogiri::XML::Document
35
106
  #
36
- # By default, Nokogiri treats documents as untrusted, and so
37
- # does not attempt to load DTDs or access the network. See
38
- # Nokogiri::XML::ParseOptions for a complete list of options;
39
- # and that module's DEFAULT_XML constant for what's set (and not
40
- # set) by default.
107
+ # See also \#to_java
108
+
109
+ # :method: to_java
110
+ # :call-seq: to_java() Java::OrgW3cDom::Document
41
111
  #
42
- # Nokogiri.XML() is a convenience method which will call this method.
112
+ # This method is only available when running JRuby.
43
113
  #
44
- def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
45
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
46
- # Give the options to the user
47
- yield options if block_given?
48
-
49
- if empty_doc?(string_or_io)
50
- if options.strict?
51
- raise Nokogiri::XML::SyntaxError.new("Empty document")
52
- else
53
- return encoding ? new.tap { |i| i.encoding = encoding } : new
54
- end
55
- end
56
-
57
- doc = if string_or_io.respond_to?(:read)
58
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
59
- read_io(string_or_io, url, encoding, options.to_i)
60
- else
61
- # read_memory pukes on empty docs
62
- read_memory(string_or_io, url, encoding, options.to_i)
63
- end
64
-
65
- # do xinclude processing
66
- doc.do_xinclude(options) if options.xinclude?
67
-
68
- return doc
69
- end
114
+ # Returns the underlying Java DOM document object for this document.
115
+ #
116
+ # The returned Java object shares the same underlying data structure as this document, so
117
+ # changes in one are reflected in the other.
118
+ #
119
+ # [Returns]
120
+ # Java::OrgW3cDom::Document
121
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
122
+ #
123
+ # See also Document.wrap
70
124
 
71
- # A list of Nokogiri::XML::SyntaxError found when parsing a document
125
+ # The errors found while parsing a document.
126
+ #
127
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
72
128
  attr_accessor :errors
73
129
 
74
- def initialize *args # :nodoc:
130
+ # When `true`, reparented elements without a namespace will inherit their new parent's
131
+ # namespace (if one exists). Defaults to `false`.
132
+ #
133
+ # [Returns] Boolean
134
+ #
135
+ # *Example:* Default behavior of namespace inheritance
136
+ #
137
+ # xml = <<~EOF
138
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
139
+ # <foo:parent>
140
+ # </foo:parent>
141
+ # </root>
142
+ # EOF
143
+ # doc = Nokogiri::XML(xml)
144
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
145
+ # parent.add_child("<child></child>")
146
+ # doc.to_xml
147
+ # # => <?xml version="1.0"?>
148
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
149
+ # # <foo:parent>
150
+ # # <child/>
151
+ # # </foo:parent>
152
+ # # </root>
153
+ #
154
+ # *Example:* Setting namespace inheritance to `true`
155
+ #
156
+ # xml = <<~EOF
157
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
158
+ # <foo:parent>
159
+ # </foo:parent>
160
+ # </root>
161
+ # EOF
162
+ # doc = Nokogiri::XML(xml)
163
+ # doc.namespace_inheritance = true
164
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
165
+ # parent.add_child("<child></child>")
166
+ # doc.to_xml
167
+ # # => <?xml version="1.0"?>
168
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
169
+ # # <foo:parent>
170
+ # # <foo:child/>
171
+ # # </foo:parent>
172
+ # # </root>
173
+ #
174
+ # Since v1.12.4
175
+ attr_accessor :namespace_inheritance
176
+
177
+ # :nodoc:
178
+ def initialize(*args) # rubocop:disable Lint/MissingSuper
75
179
  @errors = []
76
180
  @decorators = nil
181
+ @namespace_inheritance = false
77
182
  end
78
183
 
79
- ##
80
- # Create an element with +name+, and optionally setting the content and attributes.
184
+ # :call-seq:
185
+ # create_element(name, *contents_or_attrs, &block) Nokogiri::XML::Element
186
+ #
187
+ # Create a new Element with `name` belonging to this document, optionally setting contents or
188
+ # attributes.
189
+ #
190
+ # This method is _not_ the most user-friendly option if your intention is to add a node to the
191
+ # document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
192
+ # Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
193
+ # place it in the document tree.
81
194
  #
82
- # doc.create_element "div" # <div></div>
83
- # doc.create_element "div", :class => "container" # <div class='container'></div>
84
- # doc.create_element "div", "contents" # <div>contents</div>
85
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
86
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
195
+ # Arguments may be passed to initialize the element:
87
196
  #
88
- def create_element name, *args, &block
197
+ # - a Hash argument will be used to set attributes
198
+ # - a non-Hash object that responds to \#to_s will be used to set the new node's contents
199
+ #
200
+ # A block may be passed to mutate the node.
201
+ #
202
+ # [Parameters]
203
+ # - `name` (String)
204
+ # - `contents_or_attrs` (\#to_s, Hash)
205
+ # [Yields] `node` (Nokogiri::XML::Element)
206
+ # [Returns] Nokogiri::XML::Element
207
+ #
208
+ # *Example:* An empty element without attributes
209
+ #
210
+ # doc.create_element("div")
211
+ # # => <div></div>
212
+ #
213
+ # *Example:* An element with contents
214
+ #
215
+ # doc.create_element("div", "contents")
216
+ # # => <div>contents</div>
217
+ #
218
+ # *Example:* An element with attributes
219
+ #
220
+ # doc.create_element("div", {"class" => "container"})
221
+ # # => <div class='container'></div>
222
+ #
223
+ # *Example:* An element with contents and attributes
224
+ #
225
+ # doc.create_element("div", "contents", {"class" => "container"})
226
+ # # => <div class='container'>contents</div>
227
+ #
228
+ # *Example:* Passing a block to mutate the element
229
+ #
230
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
231
+ #
232
+ def create_element(name, *contents_or_attrs, &block)
89
233
  elm = Nokogiri::XML::Element.new(name, self, &block)
90
- args.each do |arg|
234
+ contents_or_attrs.each do |arg|
91
235
  case arg
92
236
  when Hash
93
- arg.each { |k,v|
237
+ arg.each do |k, v|
94
238
  key = k.to_s
95
239
  if key =~ NCNAME_RE
96
- ns_name = key.split(":", 2)[1]
97
- elm.add_namespace_definition ns_name, v
240
+ ns_name = Regexp.last_match(1)
241
+ elm.add_namespace_definition(ns_name, v)
98
242
  else
99
243
  elm[k.to_s] = v.to_s
100
244
  end
101
- }
245
+ end
102
246
  else
103
247
  elm.content = arg
104
248
  end
105
249
  end
106
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
250
+ if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
107
251
  elm.namespace = ns
108
252
  end
109
253
  elm
110
254
  end
111
255
 
112
256
  # Create a Text Node with +string+
113
- def create_text_node string, &block
114
- Nokogiri::XML::Text.new string.to_s, self, &block
257
+ def create_text_node(string, &block)
258
+ Nokogiri::XML::Text.new(string.to_s, self, &block)
115
259
  end
116
260
 
117
261
  # Create a CDATA Node containing +string+
118
- def create_cdata string, &block
119
- Nokogiri::XML::CDATA.new self, string.to_s, &block
262
+ def create_cdata(string, &block)
263
+ Nokogiri::XML::CDATA.new(self, string.to_s, &block)
120
264
  end
121
265
 
122
266
  # Create a Comment Node containing +string+
123
- def create_comment string, &block
124
- Nokogiri::XML::Comment.new self, string.to_s, &block
267
+ def create_comment(string, &block)
268
+ Nokogiri::XML::Comment.new(self, string.to_s, &block)
125
269
  end
126
270
 
127
271
  # The name of this document. Always returns "document"
128
272
  def name
129
- 'document'
273
+ "document"
130
274
  end
131
275
 
132
276
  # A reference to +self+
@@ -134,46 +278,51 @@ module Nokogiri
134
278
  self
135
279
  end
136
280
 
137
- ##
138
- # Recursively get all namespaces from this node and its subtree and
139
- # return them as a hash.
281
+ # :call-seq:
282
+ # collect_namespaces() Hash<String(Namespace#prefix) String(Namespace#href)>
140
283
  #
141
- # For example, given this document:
284
+ # Recursively get all namespaces from this node and its subtree and return them as a
285
+ # hash.
142
286
  #
143
- # <root xmlns:foo="bar">
287
+ # ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
288
+ #
289
+ # Note that this method does an xpath lookup for nodes with namespaces, and as a result the
290
+ # order (and which duplicate prefix "wins") may be dependent on the implementation of the
291
+ # underlying XML library.
292
+ #
293
+ # *Example:* Basic usage
294
+ #
295
+ # Given this document:
296
+ #
297
+ # <root xmlns="default" xmlns:foo="bar">
144
298
  # <bar xmlns:hello="world" />
145
299
  # </root>
146
300
  #
147
301
  # This method will return:
148
302
  #
149
- # { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
303
+ # {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
304
+ #
305
+ # *Example:* Duplicate prefixes
150
306
  #
151
- # WARNING: this method will clobber duplicate names in the keys.
152
- # For example, given this document:
307
+ # Given this document:
153
308
  #
154
309
  # <root xmlns:foo="bar">
155
310
  # <bar xmlns:foo="baz" />
156
311
  # </root>
157
312
  #
158
- # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
313
+ # The hash returned will be something like:
159
314
  #
160
- # Non-prefixed default namespaces (as in "xmlns=") are not included
161
- # in the hash.
162
- #
163
- # Note that this method does an xpath lookup for nodes with
164
- # namespaces, and as a result the order may be dependent on the
165
- # implementation of the underlying XML library.
315
+ # {"xmlns:foo" => "baz"}
166
316
  #
167
317
  def collect_namespaces
168
- xpath("//namespace::*").inject({}) do |hash, ns|
169
- hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
170
- hash
318
+ xpath("//namespace::*").each_with_object({}) do |ns, hash|
319
+ hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
171
320
  end
172
321
  end
173
322
 
174
323
  # Get the list of decorators given +key+
175
- def decorators key
176
- @decorators ||= Hash.new
324
+ def decorators(key)
325
+ @decorators ||= {}
177
326
  @decorators[key] ||= []
178
327
  end
179
328
 
@@ -182,7 +331,8 @@ module Nokogiri
182
331
  # the document or +nil+ when there is no DTD.
183
332
  def validate
184
333
  return nil unless internal_subset
185
- internal_subset.validate self
334
+
335
+ internal_subset.validate(self)
186
336
  end
187
337
 
188
338
  ##
@@ -202,7 +352,7 @@ module Nokogiri
202
352
  # ... which does absolutely nothing.
203
353
  #
204
354
  def slop!
205
- unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
355
+ unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
206
356
  decorators(XML::Node) << Nokogiri::Decorators::Slop
207
357
  decorate!
208
358
  end
@@ -212,16 +362,18 @@ module Nokogiri
212
362
 
213
363
  ##
214
364
  # Apply any decorators to +node+
215
- def decorate node
365
+ def decorate(node)
216
366
  return unless @decorators
217
- @decorators.each { |klass,list|
367
+
368
+ @decorators.each do |klass, list|
218
369
  next unless node.is_a?(klass)
370
+
219
371
  list.each { |moodule| node.extend(moodule) }
220
- }
372
+ end
221
373
  end
222
374
 
223
- alias :to_xml :serialize
224
- alias :clone :dup
375
+ alias_method :to_xml, :serialize
376
+ alias_method :clone, :dup
225
377
 
226
378
  # Get the hash of namespaces on the root Nokogiri::XML::Node
227
379
  def namespaces
@@ -231,51 +383,85 @@ module Nokogiri
231
383
  ##
232
384
  # Create a Nokogiri::XML::DocumentFragment from +tags+
233
385
  # Returns an empty fragment if +tags+ is nil.
234
- def fragment tags = nil
235
- DocumentFragment.new(self, tags, self.root)
386
+ def fragment(tags = nil)
387
+ DocumentFragment.new(self, tags, root)
236
388
  end
237
389
 
238
390
  undef_method :swap, :parent, :namespace, :default_namespace=
239
391
  undef_method :add_namespace_definition, :attributes
240
392
  undef_method :namespace_definitions, :line, :add_namespace
241
393
 
242
- def add_child node_or_tags
243
- raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
394
+ def add_child(node_or_tags)
395
+ raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
396
+
244
397
  node_or_tags = coerce(node_or_tags)
245
398
  if node_or_tags.is_a?(XML::NodeSet)
246
399
  raise "A document may not have multiple root nodes." if node_or_tags.size > 1
400
+
247
401
  super(node_or_tags.first)
248
402
  else
249
403
  super
250
404
  end
251
405
  end
252
- alias :<< :add_child
406
+ alias_method :<<, :add_child
253
407
 
254
- ##
255
- # +JRuby+
256
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
257
- def self.wrap document
258
- raise "JRuby only method" unless Nokogiri.jruby?
259
- return wrapJavaDocument(document)
408
+ # :call-seq:
409
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
410
+ #
411
+ # [Returns] The document type which determines CSS-to-XPath translation.
412
+ #
413
+ # See XPathVisitor for more information.
414
+ def xpath_doctype
415
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
260
416
  end
261
417
 
262
- ##
263
- # +JRuby+
264
- # Returns Java's org.w3c.dom.document of this Document.
265
- def to_java
266
- raise "JRuby only method" unless Nokogiri.jruby?
267
- return toJavaDocument()
418
+ #
419
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
420
+ #
421
+ # Returns a hash describing the Document, to use in pattern matching.
422
+ #
423
+ # Valid keys and their values:
424
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
425
+ #
426
+ # In the future, other keys may allow accessing things like doctype and processing
427
+ # instructions. If you have a use case and would like this functionality, please let us know
428
+ # by opening an issue or a discussion on the github project.
429
+ #
430
+ # ⚡ This is an experimental feature, available since v1.14.0
431
+ #
432
+ # *Example*
433
+ #
434
+ # doc = Nokogiri::XML.parse(<<~XML)
435
+ # <?xml version="1.0"?>
436
+ # <root>
437
+ # <child>
438
+ # </root>
439
+ # XML
440
+ #
441
+ # doc.deconstruct_keys([:root])
442
+ # # => {:root=>
443
+ # # #(Element:0x35c {
444
+ # # name = "root",
445
+ # # children = [
446
+ # # #(Text "\n" + " "),
447
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
448
+ # # #(Text "\n")]
449
+ # # })}
450
+ #
451
+ # *Example* of an empty document
452
+ #
453
+ # doc = Nokogiri::XML::Document.new
454
+ #
455
+ # doc.deconstruct_keys([:root])
456
+ # # => {:root=>nil}
457
+ #
458
+ def deconstruct_keys(keys)
459
+ { root: root }
268
460
  end
269
461
 
270
462
  private
271
- def self.empty_doc? string_or_io
272
- string_or_io.nil? ||
273
- (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
274
- (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
275
- end
276
463
 
277
- # @private
278
- IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
464
+ IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
279
465
 
280
466
  def inspect_attributes
281
467
  [:name, :children]