nokogiri 1.14.0.rc1-arm-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1082 -0
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +114 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  17. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  18. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  19. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  21. data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
  23. data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
  24. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  25. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  26. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  27. data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
  28. data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
  29. data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
  30. data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
  31. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  32. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  33. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  35. data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
  37. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  38. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
  39. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  41. data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
  42. data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
  43. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  44. data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
  65. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  66. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  67. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  68. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  69. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  70. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  71. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  72. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  73. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  74. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  75. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  76. data/ext/nokogiri/include/libxslt/security.h +104 -0
  77. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  78. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  79. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  80. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  81. data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
  82. data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
  83. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  84. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  85. data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
  86. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  87. data/ext/nokogiri/nokogiri.c +259 -0
  88. data/ext/nokogiri/nokogiri.h +235 -0
  89. data/ext/nokogiri/test_global_handlers.c +40 -0
  90. data/ext/nokogiri/xml_attr.c +103 -0
  91. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  92. data/ext/nokogiri/xml_cdata.c +57 -0
  93. data/ext/nokogiri/xml_comment.c +62 -0
  94. data/ext/nokogiri/xml_document.c +689 -0
  95. data/ext/nokogiri/xml_document_fragment.c +44 -0
  96. data/ext/nokogiri/xml_dtd.c +208 -0
  97. data/ext/nokogiri/xml_element_content.c +128 -0
  98. data/ext/nokogiri/xml_element_decl.c +69 -0
  99. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  100. data/ext/nokogiri/xml_entity_decl.c +112 -0
  101. data/ext/nokogiri/xml_entity_reference.c +50 -0
  102. data/ext/nokogiri/xml_namespace.c +186 -0
  103. data/ext/nokogiri/xml_node.c +2425 -0
  104. data/ext/nokogiri/xml_node_set.c +496 -0
  105. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  106. data/ext/nokogiri/xml_reader.c +794 -0
  107. data/ext/nokogiri/xml_relax_ng.c +183 -0
  108. data/ext/nokogiri/xml_sax_parser.c +316 -0
  109. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  110. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  111. data/ext/nokogiri/xml_schema.c +282 -0
  112. data/ext/nokogiri/xml_syntax_error.c +85 -0
  113. data/ext/nokogiri/xml_text.c +48 -0
  114. data/ext/nokogiri/xml_xpath_context.c +413 -0
  115. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  116. data/gumbo-parser/CHANGES.md +63 -0
  117. data/gumbo-parser/Makefile +111 -0
  118. data/gumbo-parser/THANKS +27 -0
  119. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  120. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  123. data/lib/nokogiri/class_resolver.rb +67 -0
  124. data/lib/nokogiri/css/node.rb +54 -0
  125. data/lib/nokogiri/css/parser.rb +770 -0
  126. data/lib/nokogiri/css/parser.y +277 -0
  127. data/lib/nokogiri/css/parser_extras.rb +96 -0
  128. data/lib/nokogiri/css/syntax_error.rb +9 -0
  129. data/lib/nokogiri/css/tokenizer.rb +155 -0
  130. data/lib/nokogiri/css/tokenizer.rex +56 -0
  131. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  132. data/lib/nokogiri/css.rb +66 -0
  133. data/lib/nokogiri/decorators/slop.rb +44 -0
  134. data/lib/nokogiri/encoding_handler.rb +57 -0
  135. data/lib/nokogiri/extension.rb +32 -0
  136. data/lib/nokogiri/gumbo.rb +15 -0
  137. data/lib/nokogiri/html.rb +48 -0
  138. data/lib/nokogiri/html4/builder.rb +37 -0
  139. data/lib/nokogiri/html4/document.rb +214 -0
  140. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  141. data/lib/nokogiri/html4/element_description.rb +25 -0
  142. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  143. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  144. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  145. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  146. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  147. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  148. data/lib/nokogiri/html4.rb +47 -0
  149. data/lib/nokogiri/html5/document.rb +168 -0
  150. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  151. data/lib/nokogiri/html5/node.rb +98 -0
  152. data/lib/nokogiri/html5.rb +389 -0
  153. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  154. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  155. data/lib/nokogiri/syntax_error.rb +6 -0
  156. data/lib/nokogiri/version/constant.rb +6 -0
  157. data/lib/nokogiri/version/info.rb +223 -0
  158. data/lib/nokogiri/version.rb +4 -0
  159. data/lib/nokogiri/xml/attr.rb +66 -0
  160. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  161. data/lib/nokogiri/xml/builder.rb +487 -0
  162. data/lib/nokogiri/xml/cdata.rb +13 -0
  163. data/lib/nokogiri/xml/character_data.rb +9 -0
  164. data/lib/nokogiri/xml/document.rb +471 -0
  165. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  166. data/lib/nokogiri/xml/dtd.rb +34 -0
  167. data/lib/nokogiri/xml/element_content.rb +38 -0
  168. data/lib/nokogiri/xml/element_decl.rb +15 -0
  169. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  170. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  171. data/lib/nokogiri/xml/namespace.rb +58 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  173. data/lib/nokogiri/xml/node.rb +1563 -0
  174. data/lib/nokogiri/xml/node_set.rb +446 -0
  175. data/lib/nokogiri/xml/notation.rb +19 -0
  176. data/lib/nokogiri/xml/parse_options.rb +213 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  178. data/lib/nokogiri/xml/pp/node.rb +57 -0
  179. data/lib/nokogiri/xml/pp.rb +4 -0
  180. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  181. data/lib/nokogiri/xml/reader.rb +105 -0
  182. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  183. data/lib/nokogiri/xml/sax/document.rb +167 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  187. data/lib/nokogiri/xml/sax.rb +6 -0
  188. data/lib/nokogiri/xml/schema.rb +73 -0
  189. data/lib/nokogiri/xml/searchable.rb +270 -0
  190. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  191. data/lib/nokogiri/xml/text.rb +11 -0
  192. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  193. data/lib/nokogiri/xml/xpath.rb +21 -0
  194. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  195. data/lib/nokogiri/xml.rb +76 -0
  196. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  197. data/lib/nokogiri/xslt.rb +65 -0
  198. data/lib/nokogiri.rb +120 -0
  199. data/lib/xsd/xmlparser/nokogiri.rb +104 -0
  200. metadata +317 -0
@@ -0,0 +1,471 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pathname"
5
+
6
+ module Nokogiri
7
+ module XML
8
+ # Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
9
+ # is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
10
+ # on parsing.
11
+ #
12
+ # For searching a Document, see Nokogiri::XML::Searchable#css and
13
+ # Nokogiri::XML::Searchable#xpath
14
+ class Document < Nokogiri::XML::Node
15
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
16
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
17
+ # characters in NCNAMEs.
18
+ NCNAME_START_CHAR = "A-Za-z_"
19
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
+
22
+ class << self
23
+ # Parse an XML file.
24
+ #
25
+ # +string_or_io+ may be a String, or any object that responds to
26
+ # _read_ and _close_ such as an IO, or StringIO.
27
+ #
28
+ # +url+ (optional) is the URI where this document is located.
29
+ #
30
+ # +encoding+ (optional) is the encoding that should be used when processing
31
+ # the document.
32
+ #
33
+ # +options+ (optional) is a configuration object that sets options during
34
+ # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
+ # Nokogiri::XML::ParseOptions for more information.
36
+ #
37
+ # +block+ (optional) is passed a configuration object on which
38
+ # parse options may be set.
39
+ #
40
+ # By default, Nokogiri treats documents as untrusted, and so
41
+ # does not attempt to load DTDs or access the network. See
42
+ # Nokogiri::XML::ParseOptions for a complete list of options;
43
+ # and that module's DEFAULT_XML constant for what's set (and not
44
+ # set) by default.
45
+ #
46
+ # Nokogiri.XML() is a convenience method which will call this method.
47
+ #
48
+ def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
+ yield options if block_given?
51
+
52
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
53
+
54
+ if empty_doc?(string_or_io)
55
+ if options.strict?
56
+ raise Nokogiri::XML::SyntaxError, "Empty document"
57
+ else
58
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
59
+ end
60
+ end
61
+
62
+ doc = if string_or_io.respond_to?(:read)
63
+ if string_or_io.is_a?(Pathname)
64
+ # resolve the Pathname to the file and open it as an IO object, see #2110
65
+ string_or_io = string_or_io.expand_path.open
66
+ url ||= string_or_io.path
67
+ end
68
+
69
+ read_io(string_or_io, url, encoding, options.to_i)
70
+ else
71
+ # read_memory pukes on empty docs
72
+ read_memory(string_or_io, url, encoding, options.to_i)
73
+ end
74
+
75
+ # do xinclude processing
76
+ doc.do_xinclude(options) if options.xinclude?
77
+
78
+ doc
79
+ end
80
+
81
+ private
82
+
83
+ def empty_doc?(string_or_io)
84
+ string_or_io.nil? ||
85
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
86
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
87
+ end
88
+ end
89
+
90
+ ##
91
+ # :singleton-method: wrap
92
+ # :call-seq: wrap(java_document) → Nokogiri::XML::Document
93
+ #
94
+ # ⚠ This method is only available when running JRuby.
95
+ #
96
+ # Create a Document using an existing Java DOM document object.
97
+ #
98
+ # The returned Document shares the same underlying data structure as the Java object, so
99
+ # changes in one are reflected in the other.
100
+ #
101
+ # [Parameters]
102
+ # - `java_document` (Java::OrgW3cDom::Document)
103
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
104
+ #
105
+ # [Returns] Nokogiri::XML::Document
106
+ #
107
+ # See also \#to_java
108
+
109
+ # :method: to_java
110
+ # :call-seq: to_java() → Java::OrgW3cDom::Document
111
+ #
112
+ # ⚠ This method is only available when running JRuby.
113
+ #
114
+ # Returns the underlying Java DOM document object for this document.
115
+ #
116
+ # The returned Java object shares the same underlying data structure as this document, so
117
+ # changes in one are reflected in the other.
118
+ #
119
+ # [Returns]
120
+ # Java::OrgW3cDom::Document
121
+ # (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
122
+ #
123
+ # See also Document.wrap
124
+
125
+ # The errors found while parsing a document.
126
+ #
127
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
128
+ attr_accessor :errors
129
+
130
+ # When `true`, reparented elements without a namespace will inherit their new parent's
131
+ # namespace (if one exists). Defaults to `false`.
132
+ #
133
+ # [Returns] Boolean
134
+ #
135
+ # *Example:* Default behavior of namespace inheritance
136
+ #
137
+ # xml = <<~EOF
138
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
139
+ # <foo:parent>
140
+ # </foo:parent>
141
+ # </root>
142
+ # EOF
143
+ # doc = Nokogiri::XML(xml)
144
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
145
+ # parent.add_child("<child></child>")
146
+ # doc.to_xml
147
+ # # => <?xml version="1.0"?>
148
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
149
+ # # <foo:parent>
150
+ # # <child/>
151
+ # # </foo:parent>
152
+ # # </root>
153
+ #
154
+ # *Example:* Setting namespace inheritance to `true`
155
+ #
156
+ # xml = <<~EOF
157
+ # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
158
+ # <foo:parent>
159
+ # </foo:parent>
160
+ # </root>
161
+ # EOF
162
+ # doc = Nokogiri::XML(xml)
163
+ # doc.namespace_inheritance = true
164
+ # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
165
+ # parent.add_child("<child></child>")
166
+ # doc.to_xml
167
+ # # => <?xml version="1.0"?>
168
+ # # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
169
+ # # <foo:parent>
170
+ # # <foo:child/>
171
+ # # </foo:parent>
172
+ # # </root>
173
+ #
174
+ # Since v1.12.4
175
+ attr_accessor :namespace_inheritance
176
+
177
+ # rubocop:disable Lint/MissingSuper
178
+ def initialize(*args) # :nodoc:
179
+ @errors = []
180
+ @decorators = nil
181
+ @namespace_inheritance = false
182
+ end
183
+
184
+ # :call-seq:
185
+ # create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
186
+ #
187
+ # Create a new Element with `name` belonging to this document, optionally setting contents or
188
+ # attributes.
189
+ #
190
+ # This method is _not_ the most user-friendly option if your intention is to add a node to the
191
+ # document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
192
+ # Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
193
+ # place it in the document tree.
194
+ #
195
+ # Arguments may be passed to initialize the element:
196
+ #
197
+ # - a Hash argument will be used to set attributes
198
+ # - a non-Hash object that responds to \#to_s will be used to set the new node's contents
199
+ #
200
+ # A block may be passed to mutate the node.
201
+ #
202
+ # [Parameters]
203
+ # - `name` (String)
204
+ # - `contents_or_attrs` (\#to_s, Hash)
205
+ # [Yields] `node` (Nokogiri::XML::Element)
206
+ # [Returns] Nokogiri::XML::Element
207
+ #
208
+ # *Example:* An empty element without attributes
209
+ #
210
+ # doc.create_element("div")
211
+ # # => <div></div>
212
+ #
213
+ # *Example:* An element with contents
214
+ #
215
+ # doc.create_element("div", "contents")
216
+ # # => <div>contents</div>
217
+ #
218
+ # *Example:* An element with attributes
219
+ #
220
+ # doc.create_element("div", {"class" => "container"})
221
+ # # => <div class='container'></div>
222
+ #
223
+ # *Example:* An element with contents and attributes
224
+ #
225
+ # doc.create_element("div", "contents", {"class" => "container"})
226
+ # # => <div class='container'>contents</div>
227
+ #
228
+ # *Example:* Passing a block to mutate the element
229
+ #
230
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
231
+ #
232
+ def create_element(name, *contents_or_attrs, &block)
233
+ elm = Nokogiri::XML::Element.new(name, self, &block)
234
+ contents_or_attrs.each do |arg|
235
+ case arg
236
+ when Hash
237
+ arg.each do |k, v|
238
+ key = k.to_s
239
+ if key =~ NCNAME_RE
240
+ ns_name = Regexp.last_match(1)
241
+ elm.add_namespace_definition(ns_name, v)
242
+ else
243
+ elm[k.to_s] = v.to_s
244
+ end
245
+ end
246
+ else
247
+ elm.content = arg
248
+ end
249
+ end
250
+ if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
251
+ elm.namespace = ns
252
+ end
253
+ elm
254
+ end
255
+
256
+ # Create a Text Node with +string+
257
+ def create_text_node(string, &block)
258
+ Nokogiri::XML::Text.new(string.to_s, self, &block)
259
+ end
260
+
261
+ # Create a CDATA Node containing +string+
262
+ def create_cdata(string, &block)
263
+ Nokogiri::XML::CDATA.new(self, string.to_s, &block)
264
+ end
265
+
266
+ # Create a Comment Node containing +string+
267
+ def create_comment(string, &block)
268
+ Nokogiri::XML::Comment.new(self, string.to_s, &block)
269
+ end
270
+
271
+ # The name of this document. Always returns "document"
272
+ def name
273
+ "document"
274
+ end
275
+
276
+ # A reference to +self+
277
+ def document
278
+ self
279
+ end
280
+
281
+ # :call-seq:
282
+ # collect_namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
283
+ #
284
+ # Recursively get all namespaces from this node and its subtree and return them as a
285
+ # hash.
286
+ #
287
+ # ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
288
+ #
289
+ # Note that this method does an xpath lookup for nodes with namespaces, and as a result the
290
+ # order (and which duplicate prefix "wins") may be dependent on the implementation of the
291
+ # underlying XML library.
292
+ #
293
+ # *Example:* Basic usage
294
+ #
295
+ # Given this document:
296
+ #
297
+ # <root xmlns="default" xmlns:foo="bar">
298
+ # <bar xmlns:hello="world" />
299
+ # </root>
300
+ #
301
+ # This method will return:
302
+ #
303
+ # {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
304
+ #
305
+ # *Example:* Duplicate prefixes
306
+ #
307
+ # Given this document:
308
+ #
309
+ # <root xmlns:foo="bar">
310
+ # <bar xmlns:foo="baz" />
311
+ # </root>
312
+ #
313
+ # The hash returned will be something like:
314
+ #
315
+ # {"xmlns:foo" => "baz"}
316
+ #
317
+ def collect_namespaces
318
+ xpath("//namespace::*").each_with_object({}) do |ns, hash|
319
+ hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
320
+ end
321
+ end
322
+
323
+ # Get the list of decorators given +key+
324
+ def decorators(key)
325
+ @decorators ||= {}
326
+ @decorators[key] ||= []
327
+ end
328
+
329
+ ##
330
+ # Validate this Document against it's DTD. Returns a list of errors on
331
+ # the document or +nil+ when there is no DTD.
332
+ def validate
333
+ return nil unless internal_subset
334
+
335
+ internal_subset.validate(self)
336
+ end
337
+
338
+ ##
339
+ # Explore a document with shortcut methods. See Nokogiri::Slop for details.
340
+ #
341
+ # Note that any nodes that have been instantiated before #slop!
342
+ # is called will not be decorated with sloppy behavior. So, if you're in
343
+ # irb, the preferred idiom is:
344
+ #
345
+ # irb> doc = Nokogiri::Slop my_markup
346
+ #
347
+ # and not
348
+ #
349
+ # irb> doc = Nokogiri::HTML my_markup
350
+ # ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
351
+ # irb> doc.slop!
352
+ # ... which does absolutely nothing.
353
+ #
354
+ def slop!
355
+ unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
356
+ decorators(XML::Node) << Nokogiri::Decorators::Slop
357
+ decorate!
358
+ end
359
+
360
+ self
361
+ end
362
+
363
+ ##
364
+ # Apply any decorators to +node+
365
+ def decorate(node)
366
+ return unless @decorators
367
+
368
+ @decorators.each do |klass, list|
369
+ next unless node.is_a?(klass)
370
+
371
+ list.each { |moodule| node.extend(moodule) }
372
+ end
373
+ end
374
+
375
+ alias_method :to_xml, :serialize
376
+ alias_method :clone, :dup
377
+
378
+ # Get the hash of namespaces on the root Nokogiri::XML::Node
379
+ def namespaces
380
+ root ? root.namespaces : {}
381
+ end
382
+
383
+ ##
384
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
385
+ # Returns an empty fragment if +tags+ is nil.
386
+ def fragment(tags = nil)
387
+ DocumentFragment.new(self, tags, root)
388
+ end
389
+
390
+ undef_method :swap, :parent, :namespace, :default_namespace=
391
+ undef_method :add_namespace_definition, :attributes
392
+ undef_method :namespace_definitions, :line, :add_namespace
393
+
394
+ def add_child(node_or_tags)
395
+ raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
396
+
397
+ node_or_tags = coerce(node_or_tags)
398
+ if node_or_tags.is_a?(XML::NodeSet)
399
+ raise "A document may not have multiple root nodes." if node_or_tags.size > 1
400
+
401
+ super(node_or_tags.first)
402
+ else
403
+ super
404
+ end
405
+ end
406
+ alias_method :<<, :add_child
407
+
408
+ # :call-seq:
409
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
410
+ #
411
+ # [Returns] The document type which determines CSS-to-XPath translation.
412
+ #
413
+ # See XPathVisitor for more information.
414
+ def xpath_doctype
415
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
416
+ end
417
+
418
+ #
419
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
420
+ #
421
+ # Returns a hash describing the Document, to use in pattern matching.
422
+ #
423
+ # Valid keys and their values:
424
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
425
+ #
426
+ # In the future, other keys may allow accessing things like doctype and processing
427
+ # instructions. If you have a use case and would like this functionality, please let us know
428
+ # by opening an issue or a discussion on the github project.
429
+ #
430
+ # ⚡ This is an experimental feature, available since v1.14.0
431
+ #
432
+ # *Example*
433
+ #
434
+ # doc = Nokogiri::XML.parse(<<~XML)
435
+ # <?xml version="1.0"?>
436
+ # <root>
437
+ # <child>
438
+ # </root>
439
+ # XML
440
+ #
441
+ # doc.deconstruct_keys([:root])
442
+ # # => {:root=>
443
+ # # #(Element:0x35c {
444
+ # # name = "root",
445
+ # # children = [
446
+ # # #(Text "\n" + " "),
447
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
448
+ # # #(Text "\n")]
449
+ # # })}
450
+ #
451
+ # *Example* of an empty document
452
+ #
453
+ # doc = Nokogiri::XML::Document.new
454
+ #
455
+ # doc.deconstruct_keys([:root])
456
+ # # => {:root=>nil}
457
+ #
458
+ def deconstruct_keys(keys)
459
+ { root: root }
460
+ end
461
+
462
+ private
463
+
464
+ IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
465
+
466
+ def inspect_attributes
467
+ [:name, :children]
468
+ end
469
+ end
470
+ end
471
+ end
@@ -0,0 +1,205 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ module XML
6
+ class DocumentFragment < Nokogiri::XML::Node
7
+ ####
8
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
9
+ def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block)
10
+ new(XML::Document.new, tags, nil, options, &block)
11
+ end
12
+
13
+ ##
14
+ # Create a new DocumentFragment from +tags+.
15
+ #
16
+ # If +ctx+ is present, it is used as a context node for the
17
+ # subtree created, e.g., namespaces will be resolved relative
18
+ # to +ctx+.
19
+ def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
20
+ return self unless tags
21
+
22
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
23
+ yield options if block_given?
24
+
25
+ children = if ctx
26
+ # Fix for issue#490
27
+ if Nokogiri.jruby?
28
+ # fix for issue #770
29
+ ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>", options).children
30
+ else
31
+ ctx.parse(tags, options)
32
+ end
33
+ else
34
+ wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
35
+ self.errors = wrapper_doc.errors
36
+ wrapper_doc.xpath("/root/node()")
37
+ end
38
+ children.each { |child| child.parent = self }
39
+ end
40
+
41
+ if Nokogiri.uses_libxml?
42
+ def dup
43
+ new_document = document.dup
44
+ new_fragment = self.class.new(new_document)
45
+ children.each do |child|
46
+ child.dup(1, new_document).parent = new_fragment
47
+ end
48
+ new_fragment
49
+ end
50
+ end
51
+
52
+ ###
53
+ # return the name for DocumentFragment
54
+ def name
55
+ "#document-fragment"
56
+ end
57
+
58
+ ###
59
+ # Convert this DocumentFragment to a string
60
+ def to_s
61
+ children.to_s
62
+ end
63
+
64
+ ###
65
+ # Convert this DocumentFragment to html
66
+ # See Nokogiri::XML::NodeSet#to_html
67
+ def to_html(*args)
68
+ if Nokogiri.jruby?
69
+ options = args.first.is_a?(Hash) ? args.shift : {}
70
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
71
+ args.insert(0, options)
72
+ end
73
+ children.to_html(*args)
74
+ end
75
+
76
+ ###
77
+ # Convert this DocumentFragment to xhtml
78
+ # See Nokogiri::XML::NodeSet#to_xhtml
79
+ def to_xhtml(*args)
80
+ if Nokogiri.jruby?
81
+ options = args.first.is_a?(Hash) ? args.shift : {}
82
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_XHTML
83
+ args.insert(0, options)
84
+ end
85
+ children.to_xhtml(*args)
86
+ end
87
+
88
+ ###
89
+ # Convert this DocumentFragment to xml
90
+ # See Nokogiri::XML::NodeSet#to_xml
91
+ def to_xml(*args)
92
+ children.to_xml(*args)
93
+ end
94
+
95
+ ###
96
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
97
+ #
98
+ # Search this fragment for CSS +rules+. +rules+ must be one or more CSS
99
+ # selectors. For example:
100
+ #
101
+ # For more information see Nokogiri::XML::Searchable#css
102
+ def css(*args)
103
+ if children.any?
104
+ children.css(*args) # 'children' is a smell here
105
+ else
106
+ NodeSet.new(document)
107
+ end
108
+ end
109
+
110
+ #
111
+ # NOTE that we don't delegate #xpath to children ... another smell.
112
+ # def xpath ; end
113
+ #
114
+
115
+ ###
116
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
117
+ #
118
+ # Search this fragment for +paths+. +paths+ must be one or more XPath or CSS queries.
119
+ #
120
+ # For more information see Nokogiri::XML::Searchable#search
121
+ def search(*rules)
122
+ rules, handler, ns, binds = extract_params(rules)
123
+
124
+ rules.inject(NodeSet.new(document)) do |set, rule|
125
+ set + if Searchable::LOOKS_LIKE_XPATH.match?(rule)
126
+ xpath(*[rule, ns, handler, binds].compact)
127
+ else
128
+ children.css(*[rule, ns, handler].compact) # 'children' is a smell here
129
+ end
130
+ end
131
+ end
132
+
133
+ alias_method :serialize, :to_s
134
+
135
+ # A list of Nokogiri::XML::SyntaxError found when parsing a document
136
+ def errors
137
+ document.errors
138
+ end
139
+
140
+ def errors=(things) # :nodoc:
141
+ document.errors = things
142
+ end
143
+
144
+ def fragment(data)
145
+ document.fragment(data)
146
+ end
147
+
148
+ #
149
+ # :call-seq: deconstruct() → Array
150
+ #
151
+ # Returns the root nodes of this document fragment as an array, to use in pattern matching.
152
+ #
153
+ # 💡 Note that text nodes are returned as well as elements. If you wish to operate only on
154
+ # root elements, you should deconstruct the array returned by
155
+ # <tt>DocumentFragment#elements</tt>.
156
+ #
157
+ # ⚡ This is an experimental feature, available since v1.14.0
158
+ #
159
+ # *Example*
160
+ #
161
+ # frag = Nokogiri::HTML5.fragment(<<~HTML)
162
+ # <div>Start</div>
163
+ # This is a <a href="#jump">shortcut</a> for you.
164
+ # <div>End</div>
165
+ # HTML
166
+ #
167
+ # frag.deconstruct
168
+ # # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
169
+ # # #(Text "\n" + "This is a "),
170
+ # # #(Element:0x370 {
171
+ # # name = "a",
172
+ # # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
173
+ # # children = [ #(Text "shortcut")]
174
+ # # }),
175
+ # # #(Text " for you.\n"),
176
+ # # #(Element:0x398 { name = "div", children = [ #(Text "End")] }),
177
+ # # #(Text "\n")]
178
+ #
179
+ # *Example* only the elements, not the text nodes.
180
+ #
181
+ # frag.elements.deconstruct
182
+ # # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
183
+ # # #(Element:0x370 {
184
+ # # name = "a",
185
+ # # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
186
+ # # children = [ #(Text "shortcut")]
187
+ # # }),
188
+ # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
+ #
190
+ def deconstruct
191
+ children.to_a
192
+ end
193
+
194
+ private
195
+
196
+ # fix for issue 770
197
+ def namespace_declarations(ctx)
198
+ ctx.namespace_scopes.map do |namespace|
199
+ prefix = namespace.prefix.nil? ? "" : ":#{namespace.prefix}"
200
+ %{xmlns#{prefix}="#{namespace.href}"}
201
+ end.join(" ")
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class DTD < Nokogiri::XML::Node
6
+ undef_method :attribute_nodes
7
+ undef_method :values
8
+ undef_method :content
9
+ undef_method :namespace
10
+ undef_method :namespace_definitions
11
+ undef_method :line if method_defined?(:line)
12
+
13
+ def keys
14
+ attributes.keys
15
+ end
16
+
17
+ def each
18
+ attributes.each do |key, value|
19
+ yield([key, value])
20
+ end
21
+ end
22
+
23
+ def html_dtd?
24
+ name.casecmp("html").zero?
25
+ end
26
+
27
+ def html5_dtd?
28
+ html_dtd? &&
29
+ external_id.nil? &&
30
+ (system_id.nil? || system_id == "about:legacy-compat")
31
+ end
32
+ end
33
+ end
34
+ end