nokogiri 1.14.0.rc1-arm-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +287 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +41 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1082 -0
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +114 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
- data/ext/nokogiri/include/libxslt/attributes.h +38 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +259 -0
- data/ext/nokogiri/nokogiri.h +235 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +57 -0
- data/ext/nokogiri/xml_comment.c +62 -0
- data/ext/nokogiri/xml_document.c +689 -0
- data/ext/nokogiri/xml_document_fragment.c +44 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +128 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +104 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +186 -0
- data/ext/nokogiri/xml_node.c +2425 -0
- data/ext/nokogiri/xml_node_set.c +496 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +794 -0
- data/ext/nokogiri/xml_relax_ng.c +183 -0
- data/ext/nokogiri/xml_sax_parser.c +316 -0
- data/ext/nokogiri/xml_sax_parser_context.c +283 -0
- data/ext/nokogiri/xml_sax_push_parser.c +166 -0
- data/ext/nokogiri/xml_schema.c +282 -0
- data/ext/nokogiri/xml_syntax_error.c +85 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_xpath_context.c +413 -0
- data/ext/nokogiri/xslt_stylesheet.c +363 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +54 -0
- data/lib/nokogiri/css/parser.rb +770 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +96 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +56 -0
- data/lib/nokogiri/css/xpath_visitor.rb +359 -0
- data/lib/nokogiri/css.rb +66 -0
- data/lib/nokogiri/decorators/slop.rb +44 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +63 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +98 -0
- data/lib/nokogiri/html5.rb +389 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +20 -0
- data/lib/nokogiri/xml/builder.rb +487 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +471 -0
- data/lib/nokogiri/xml/document_fragment.rb +205 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +38 -0
- data/lib/nokogiri/xml/element_decl.rb +15 -0
- data/lib/nokogiri/xml/entity_decl.rb +21 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +58 -0
- data/lib/nokogiri/xml/node/save_options.rb +68 -0
- data/lib/nokogiri/xml/node.rb +1563 -0
- data/lib/nokogiri/xml/node_set.rb +446 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +105 -0
- data/lib/nokogiri/xml/relax_ng.rb +38 -0
- data/lib/nokogiri/xml/sax/document.rb +167 -0
- data/lib/nokogiri/xml/sax/parser.rb +125 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +6 -0
- data/lib/nokogiri/xml/schema.rb +73 -0
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +72 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +27 -0
- data/lib/nokogiri/xslt.rb +65 -0
- data/lib/nokogiri.rb +120 -0
- data/lib/xsd/xmlparser/nokogiri.rb +104 -0
- metadata +317 -0
@@ -0,0 +1,471 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "pathname"
|
5
|
+
|
6
|
+
module Nokogiri
|
7
|
+
module XML
|
8
|
+
# Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
|
9
|
+
# is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
|
10
|
+
# on parsing.
|
11
|
+
#
|
12
|
+
# For searching a Document, see Nokogiri::XML::Searchable#css and
|
13
|
+
# Nokogiri::XML::Searchable#xpath
|
14
|
+
class Document < Nokogiri::XML::Node
|
15
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
16
|
+
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
17
|
+
# characters in NCNAMEs.
|
18
|
+
NCNAME_START_CHAR = "A-Za-z_"
|
19
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
20
|
+
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
|
+
|
22
|
+
class << self
|
23
|
+
# Parse an XML file.
|
24
|
+
#
|
25
|
+
# +string_or_io+ may be a String, or any object that responds to
|
26
|
+
# _read_ and _close_ such as an IO, or StringIO.
|
27
|
+
#
|
28
|
+
# +url+ (optional) is the URI where this document is located.
|
29
|
+
#
|
30
|
+
# +encoding+ (optional) is the encoding that should be used when processing
|
31
|
+
# the document.
|
32
|
+
#
|
33
|
+
# +options+ (optional) is a configuration object that sets options during
|
34
|
+
# parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
|
35
|
+
# Nokogiri::XML::ParseOptions for more information.
|
36
|
+
#
|
37
|
+
# +block+ (optional) is passed a configuration object on which
|
38
|
+
# parse options may be set.
|
39
|
+
#
|
40
|
+
# By default, Nokogiri treats documents as untrusted, and so
|
41
|
+
# does not attempt to load DTDs or access the network. See
|
42
|
+
# Nokogiri::XML::ParseOptions for a complete list of options;
|
43
|
+
# and that module's DEFAULT_XML constant for what's set (and not
|
44
|
+
# set) by default.
|
45
|
+
#
|
46
|
+
# Nokogiri.XML() is a convenience method which will call this method.
|
47
|
+
#
|
48
|
+
def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
|
49
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
50
|
+
yield options if block_given?
|
51
|
+
|
52
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
53
|
+
|
54
|
+
if empty_doc?(string_or_io)
|
55
|
+
if options.strict?
|
56
|
+
raise Nokogiri::XML::SyntaxError, "Empty document"
|
57
|
+
else
|
58
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
doc = if string_or_io.respond_to?(:read)
|
63
|
+
if string_or_io.is_a?(Pathname)
|
64
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
65
|
+
string_or_io = string_or_io.expand_path.open
|
66
|
+
url ||= string_or_io.path
|
67
|
+
end
|
68
|
+
|
69
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
70
|
+
else
|
71
|
+
# read_memory pukes on empty docs
|
72
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
73
|
+
end
|
74
|
+
|
75
|
+
# do xinclude processing
|
76
|
+
doc.do_xinclude(options) if options.xinclude?
|
77
|
+
|
78
|
+
doc
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def empty_doc?(string_or_io)
|
84
|
+
string_or_io.nil? ||
|
85
|
+
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
86
|
+
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# :singleton-method: wrap
|
92
|
+
# :call-seq: wrap(java_document) → Nokogiri::XML::Document
|
93
|
+
#
|
94
|
+
# ⚠ This method is only available when running JRuby.
|
95
|
+
#
|
96
|
+
# Create a Document using an existing Java DOM document object.
|
97
|
+
#
|
98
|
+
# The returned Document shares the same underlying data structure as the Java object, so
|
99
|
+
# changes in one are reflected in the other.
|
100
|
+
#
|
101
|
+
# [Parameters]
|
102
|
+
# - `java_document` (Java::OrgW3cDom::Document)
|
103
|
+
# (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
|
104
|
+
#
|
105
|
+
# [Returns] Nokogiri::XML::Document
|
106
|
+
#
|
107
|
+
# See also \#to_java
|
108
|
+
|
109
|
+
# :method: to_java
|
110
|
+
# :call-seq: to_java() → Java::OrgW3cDom::Document
|
111
|
+
#
|
112
|
+
# ⚠ This method is only available when running JRuby.
|
113
|
+
#
|
114
|
+
# Returns the underlying Java DOM document object for this document.
|
115
|
+
#
|
116
|
+
# The returned Java object shares the same underlying data structure as this document, so
|
117
|
+
# changes in one are reflected in the other.
|
118
|
+
#
|
119
|
+
# [Returns]
|
120
|
+
# Java::OrgW3cDom::Document
|
121
|
+
# (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
|
122
|
+
#
|
123
|
+
# See also Document.wrap
|
124
|
+
|
125
|
+
# The errors found while parsing a document.
|
126
|
+
#
|
127
|
+
# [Returns] Array<Nokogiri::XML::SyntaxError>
|
128
|
+
attr_accessor :errors
|
129
|
+
|
130
|
+
# When `true`, reparented elements without a namespace will inherit their new parent's
|
131
|
+
# namespace (if one exists). Defaults to `false`.
|
132
|
+
#
|
133
|
+
# [Returns] Boolean
|
134
|
+
#
|
135
|
+
# *Example:* Default behavior of namespace inheritance
|
136
|
+
#
|
137
|
+
# xml = <<~EOF
|
138
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
139
|
+
# <foo:parent>
|
140
|
+
# </foo:parent>
|
141
|
+
# </root>
|
142
|
+
# EOF
|
143
|
+
# doc = Nokogiri::XML(xml)
|
144
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
145
|
+
# parent.add_child("<child></child>")
|
146
|
+
# doc.to_xml
|
147
|
+
# # => <?xml version="1.0"?>
|
148
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
149
|
+
# # <foo:parent>
|
150
|
+
# # <child/>
|
151
|
+
# # </foo:parent>
|
152
|
+
# # </root>
|
153
|
+
#
|
154
|
+
# *Example:* Setting namespace inheritance to `true`
|
155
|
+
#
|
156
|
+
# xml = <<~EOF
|
157
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
158
|
+
# <foo:parent>
|
159
|
+
# </foo:parent>
|
160
|
+
# </root>
|
161
|
+
# EOF
|
162
|
+
# doc = Nokogiri::XML(xml)
|
163
|
+
# doc.namespace_inheritance = true
|
164
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
165
|
+
# parent.add_child("<child></child>")
|
166
|
+
# doc.to_xml
|
167
|
+
# # => <?xml version="1.0"?>
|
168
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
169
|
+
# # <foo:parent>
|
170
|
+
# # <foo:child/>
|
171
|
+
# # </foo:parent>
|
172
|
+
# # </root>
|
173
|
+
#
|
174
|
+
# Since v1.12.4
|
175
|
+
attr_accessor :namespace_inheritance
|
176
|
+
|
177
|
+
# rubocop:disable Lint/MissingSuper
|
178
|
+
def initialize(*args) # :nodoc:
|
179
|
+
@errors = []
|
180
|
+
@decorators = nil
|
181
|
+
@namespace_inheritance = false
|
182
|
+
end
|
183
|
+
|
184
|
+
# :call-seq:
|
185
|
+
# create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
|
186
|
+
#
|
187
|
+
# Create a new Element with `name` belonging to this document, optionally setting contents or
|
188
|
+
# attributes.
|
189
|
+
#
|
190
|
+
# This method is _not_ the most user-friendly option if your intention is to add a node to the
|
191
|
+
# document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
|
192
|
+
# Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
|
193
|
+
# place it in the document tree.
|
194
|
+
#
|
195
|
+
# Arguments may be passed to initialize the element:
|
196
|
+
#
|
197
|
+
# - a Hash argument will be used to set attributes
|
198
|
+
# - a non-Hash object that responds to \#to_s will be used to set the new node's contents
|
199
|
+
#
|
200
|
+
# A block may be passed to mutate the node.
|
201
|
+
#
|
202
|
+
# [Parameters]
|
203
|
+
# - `name` (String)
|
204
|
+
# - `contents_or_attrs` (\#to_s, Hash)
|
205
|
+
# [Yields] `node` (Nokogiri::XML::Element)
|
206
|
+
# [Returns] Nokogiri::XML::Element
|
207
|
+
#
|
208
|
+
# *Example:* An empty element without attributes
|
209
|
+
#
|
210
|
+
# doc.create_element("div")
|
211
|
+
# # => <div></div>
|
212
|
+
#
|
213
|
+
# *Example:* An element with contents
|
214
|
+
#
|
215
|
+
# doc.create_element("div", "contents")
|
216
|
+
# # => <div>contents</div>
|
217
|
+
#
|
218
|
+
# *Example:* An element with attributes
|
219
|
+
#
|
220
|
+
# doc.create_element("div", {"class" => "container"})
|
221
|
+
# # => <div class='container'></div>
|
222
|
+
#
|
223
|
+
# *Example:* An element with contents and attributes
|
224
|
+
#
|
225
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
226
|
+
# # => <div class='container'>contents</div>
|
227
|
+
#
|
228
|
+
# *Example:* Passing a block to mutate the element
|
229
|
+
#
|
230
|
+
# doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
|
231
|
+
#
|
232
|
+
def create_element(name, *contents_or_attrs, &block)
|
233
|
+
elm = Nokogiri::XML::Element.new(name, self, &block)
|
234
|
+
contents_or_attrs.each do |arg|
|
235
|
+
case arg
|
236
|
+
when Hash
|
237
|
+
arg.each do |k, v|
|
238
|
+
key = k.to_s
|
239
|
+
if key =~ NCNAME_RE
|
240
|
+
ns_name = Regexp.last_match(1)
|
241
|
+
elm.add_namespace_definition(ns_name, v)
|
242
|
+
else
|
243
|
+
elm[k.to_s] = v.to_s
|
244
|
+
end
|
245
|
+
end
|
246
|
+
else
|
247
|
+
elm.content = arg
|
248
|
+
end
|
249
|
+
end
|
250
|
+
if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
|
251
|
+
elm.namespace = ns
|
252
|
+
end
|
253
|
+
elm
|
254
|
+
end
|
255
|
+
|
256
|
+
# Create a Text Node with +string+
|
257
|
+
def create_text_node(string, &block)
|
258
|
+
Nokogiri::XML::Text.new(string.to_s, self, &block)
|
259
|
+
end
|
260
|
+
|
261
|
+
# Create a CDATA Node containing +string+
|
262
|
+
def create_cdata(string, &block)
|
263
|
+
Nokogiri::XML::CDATA.new(self, string.to_s, &block)
|
264
|
+
end
|
265
|
+
|
266
|
+
# Create a Comment Node containing +string+
|
267
|
+
def create_comment(string, &block)
|
268
|
+
Nokogiri::XML::Comment.new(self, string.to_s, &block)
|
269
|
+
end
|
270
|
+
|
271
|
+
# The name of this document. Always returns "document"
|
272
|
+
def name
|
273
|
+
"document"
|
274
|
+
end
|
275
|
+
|
276
|
+
# A reference to +self+
|
277
|
+
def document
|
278
|
+
self
|
279
|
+
end
|
280
|
+
|
281
|
+
# :call-seq:
|
282
|
+
# collect_namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
|
283
|
+
#
|
284
|
+
# Recursively get all namespaces from this node and its subtree and return them as a
|
285
|
+
# hash.
|
286
|
+
#
|
287
|
+
# ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
|
288
|
+
#
|
289
|
+
# Note that this method does an xpath lookup for nodes with namespaces, and as a result the
|
290
|
+
# order (and which duplicate prefix "wins") may be dependent on the implementation of the
|
291
|
+
# underlying XML library.
|
292
|
+
#
|
293
|
+
# *Example:* Basic usage
|
294
|
+
#
|
295
|
+
# Given this document:
|
296
|
+
#
|
297
|
+
# <root xmlns="default" xmlns:foo="bar">
|
298
|
+
# <bar xmlns:hello="world" />
|
299
|
+
# </root>
|
300
|
+
#
|
301
|
+
# This method will return:
|
302
|
+
#
|
303
|
+
# {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
|
304
|
+
#
|
305
|
+
# *Example:* Duplicate prefixes
|
306
|
+
#
|
307
|
+
# Given this document:
|
308
|
+
#
|
309
|
+
# <root xmlns:foo="bar">
|
310
|
+
# <bar xmlns:foo="baz" />
|
311
|
+
# </root>
|
312
|
+
#
|
313
|
+
# The hash returned will be something like:
|
314
|
+
#
|
315
|
+
# {"xmlns:foo" => "baz"}
|
316
|
+
#
|
317
|
+
def collect_namespaces
|
318
|
+
xpath("//namespace::*").each_with_object({}) do |ns, hash|
|
319
|
+
hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
# Get the list of decorators given +key+
|
324
|
+
def decorators(key)
|
325
|
+
@decorators ||= {}
|
326
|
+
@decorators[key] ||= []
|
327
|
+
end
|
328
|
+
|
329
|
+
##
|
330
|
+
# Validate this Document against it's DTD. Returns a list of errors on
|
331
|
+
# the document or +nil+ when there is no DTD.
|
332
|
+
def validate
|
333
|
+
return nil unless internal_subset
|
334
|
+
|
335
|
+
internal_subset.validate(self)
|
336
|
+
end
|
337
|
+
|
338
|
+
##
|
339
|
+
# Explore a document with shortcut methods. See Nokogiri::Slop for details.
|
340
|
+
#
|
341
|
+
# Note that any nodes that have been instantiated before #slop!
|
342
|
+
# is called will not be decorated with sloppy behavior. So, if you're in
|
343
|
+
# irb, the preferred idiom is:
|
344
|
+
#
|
345
|
+
# irb> doc = Nokogiri::Slop my_markup
|
346
|
+
#
|
347
|
+
# and not
|
348
|
+
#
|
349
|
+
# irb> doc = Nokogiri::HTML my_markup
|
350
|
+
# ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
|
351
|
+
# irb> doc.slop!
|
352
|
+
# ... which does absolutely nothing.
|
353
|
+
#
|
354
|
+
def slop!
|
355
|
+
unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
|
356
|
+
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
357
|
+
decorate!
|
358
|
+
end
|
359
|
+
|
360
|
+
self
|
361
|
+
end
|
362
|
+
|
363
|
+
##
|
364
|
+
# Apply any decorators to +node+
|
365
|
+
def decorate(node)
|
366
|
+
return unless @decorators
|
367
|
+
|
368
|
+
@decorators.each do |klass, list|
|
369
|
+
next unless node.is_a?(klass)
|
370
|
+
|
371
|
+
list.each { |moodule| node.extend(moodule) }
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
alias_method :to_xml, :serialize
|
376
|
+
alias_method :clone, :dup
|
377
|
+
|
378
|
+
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
379
|
+
def namespaces
|
380
|
+
root ? root.namespaces : {}
|
381
|
+
end
|
382
|
+
|
383
|
+
##
|
384
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
385
|
+
# Returns an empty fragment if +tags+ is nil.
|
386
|
+
def fragment(tags = nil)
|
387
|
+
DocumentFragment.new(self, tags, root)
|
388
|
+
end
|
389
|
+
|
390
|
+
undef_method :swap, :parent, :namespace, :default_namespace=
|
391
|
+
undef_method :add_namespace_definition, :attributes
|
392
|
+
undef_method :namespace_definitions, :line, :add_namespace
|
393
|
+
|
394
|
+
def add_child(node_or_tags)
|
395
|
+
raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
396
|
+
|
397
|
+
node_or_tags = coerce(node_or_tags)
|
398
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
399
|
+
raise "A document may not have multiple root nodes." if node_or_tags.size > 1
|
400
|
+
|
401
|
+
super(node_or_tags.first)
|
402
|
+
else
|
403
|
+
super
|
404
|
+
end
|
405
|
+
end
|
406
|
+
alias_method :<<, :add_child
|
407
|
+
|
408
|
+
# :call-seq:
|
409
|
+
# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
|
410
|
+
#
|
411
|
+
# [Returns] The document type which determines CSS-to-XPath translation.
|
412
|
+
#
|
413
|
+
# See XPathVisitor for more information.
|
414
|
+
def xpath_doctype
|
415
|
+
Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
|
416
|
+
end
|
417
|
+
|
418
|
+
#
|
419
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
420
|
+
#
|
421
|
+
# Returns a hash describing the Document, to use in pattern matching.
|
422
|
+
#
|
423
|
+
# Valid keys and their values:
|
424
|
+
# - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
|
425
|
+
#
|
426
|
+
# In the future, other keys may allow accessing things like doctype and processing
|
427
|
+
# instructions. If you have a use case and would like this functionality, please let us know
|
428
|
+
# by opening an issue or a discussion on the github project.
|
429
|
+
#
|
430
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
431
|
+
#
|
432
|
+
# *Example*
|
433
|
+
#
|
434
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
435
|
+
# <?xml version="1.0"?>
|
436
|
+
# <root>
|
437
|
+
# <child>
|
438
|
+
# </root>
|
439
|
+
# XML
|
440
|
+
#
|
441
|
+
# doc.deconstruct_keys([:root])
|
442
|
+
# # => {:root=>
|
443
|
+
# # #(Element:0x35c {
|
444
|
+
# # name = "root",
|
445
|
+
# # children = [
|
446
|
+
# # #(Text "\n" + " "),
|
447
|
+
# # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
|
448
|
+
# # #(Text "\n")]
|
449
|
+
# # })}
|
450
|
+
#
|
451
|
+
# *Example* of an empty document
|
452
|
+
#
|
453
|
+
# doc = Nokogiri::XML::Document.new
|
454
|
+
#
|
455
|
+
# doc.deconstruct_keys([:root])
|
456
|
+
# # => {:root=>nil}
|
457
|
+
#
|
458
|
+
def deconstruct_keys(keys)
|
459
|
+
{ root: root }
|
460
|
+
end
|
461
|
+
|
462
|
+
private
|
463
|
+
|
464
|
+
IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
|
465
|
+
|
466
|
+
def inspect_attributes
|
467
|
+
[:name, :children]
|
468
|
+
end
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end
|
@@ -0,0 +1,205 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
class DocumentFragment < Nokogiri::XML::Node
|
7
|
+
####
|
8
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
9
|
+
def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block)
|
10
|
+
new(XML::Document.new, tags, nil, options, &block)
|
11
|
+
end
|
12
|
+
|
13
|
+
##
|
14
|
+
# Create a new DocumentFragment from +tags+.
|
15
|
+
#
|
16
|
+
# If +ctx+ is present, it is used as a context node for the
|
17
|
+
# subtree created, e.g., namespaces will be resolved relative
|
18
|
+
# to +ctx+.
|
19
|
+
def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
|
20
|
+
return self unless tags
|
21
|
+
|
22
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
23
|
+
yield options if block_given?
|
24
|
+
|
25
|
+
children = if ctx
|
26
|
+
# Fix for issue#490
|
27
|
+
if Nokogiri.jruby?
|
28
|
+
# fix for issue #770
|
29
|
+
ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>", options).children
|
30
|
+
else
|
31
|
+
ctx.parse(tags, options)
|
32
|
+
end
|
33
|
+
else
|
34
|
+
wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
|
35
|
+
self.errors = wrapper_doc.errors
|
36
|
+
wrapper_doc.xpath("/root/node()")
|
37
|
+
end
|
38
|
+
children.each { |child| child.parent = self }
|
39
|
+
end
|
40
|
+
|
41
|
+
if Nokogiri.uses_libxml?
|
42
|
+
def dup
|
43
|
+
new_document = document.dup
|
44
|
+
new_fragment = self.class.new(new_document)
|
45
|
+
children.each do |child|
|
46
|
+
child.dup(1, new_document).parent = new_fragment
|
47
|
+
end
|
48
|
+
new_fragment
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
###
|
53
|
+
# return the name for DocumentFragment
|
54
|
+
def name
|
55
|
+
"#document-fragment"
|
56
|
+
end
|
57
|
+
|
58
|
+
###
|
59
|
+
# Convert this DocumentFragment to a string
|
60
|
+
def to_s
|
61
|
+
children.to_s
|
62
|
+
end
|
63
|
+
|
64
|
+
###
|
65
|
+
# Convert this DocumentFragment to html
|
66
|
+
# See Nokogiri::XML::NodeSet#to_html
|
67
|
+
def to_html(*args)
|
68
|
+
if Nokogiri.jruby?
|
69
|
+
options = args.first.is_a?(Hash) ? args.shift : {}
|
70
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
|
71
|
+
args.insert(0, options)
|
72
|
+
end
|
73
|
+
children.to_html(*args)
|
74
|
+
end
|
75
|
+
|
76
|
+
###
|
77
|
+
# Convert this DocumentFragment to xhtml
|
78
|
+
# See Nokogiri::XML::NodeSet#to_xhtml
|
79
|
+
def to_xhtml(*args)
|
80
|
+
if Nokogiri.jruby?
|
81
|
+
options = args.first.is_a?(Hash) ? args.shift : {}
|
82
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_XHTML
|
83
|
+
args.insert(0, options)
|
84
|
+
end
|
85
|
+
children.to_xhtml(*args)
|
86
|
+
end
|
87
|
+
|
88
|
+
###
|
89
|
+
# Convert this DocumentFragment to xml
|
90
|
+
# See Nokogiri::XML::NodeSet#to_xml
|
91
|
+
def to_xml(*args)
|
92
|
+
children.to_xml(*args)
|
93
|
+
end
|
94
|
+
|
95
|
+
###
|
96
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
97
|
+
#
|
98
|
+
# Search this fragment for CSS +rules+. +rules+ must be one or more CSS
|
99
|
+
# selectors. For example:
|
100
|
+
#
|
101
|
+
# For more information see Nokogiri::XML::Searchable#css
|
102
|
+
def css(*args)
|
103
|
+
if children.any?
|
104
|
+
children.css(*args) # 'children' is a smell here
|
105
|
+
else
|
106
|
+
NodeSet.new(document)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
#
|
111
|
+
# NOTE that we don't delegate #xpath to children ... another smell.
|
112
|
+
# def xpath ; end
|
113
|
+
#
|
114
|
+
|
115
|
+
###
|
116
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
117
|
+
#
|
118
|
+
# Search this fragment for +paths+. +paths+ must be one or more XPath or CSS queries.
|
119
|
+
#
|
120
|
+
# For more information see Nokogiri::XML::Searchable#search
|
121
|
+
def search(*rules)
|
122
|
+
rules, handler, ns, binds = extract_params(rules)
|
123
|
+
|
124
|
+
rules.inject(NodeSet.new(document)) do |set, rule|
|
125
|
+
set + if Searchable::LOOKS_LIKE_XPATH.match?(rule)
|
126
|
+
xpath(*[rule, ns, handler, binds].compact)
|
127
|
+
else
|
128
|
+
children.css(*[rule, ns, handler].compact) # 'children' is a smell here
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
alias_method :serialize, :to_s
|
134
|
+
|
135
|
+
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
136
|
+
def errors
|
137
|
+
document.errors
|
138
|
+
end
|
139
|
+
|
140
|
+
def errors=(things) # :nodoc:
|
141
|
+
document.errors = things
|
142
|
+
end
|
143
|
+
|
144
|
+
def fragment(data)
|
145
|
+
document.fragment(data)
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# :call-seq: deconstruct() → Array
|
150
|
+
#
|
151
|
+
# Returns the root nodes of this document fragment as an array, to use in pattern matching.
|
152
|
+
#
|
153
|
+
# 💡 Note that text nodes are returned as well as elements. If you wish to operate only on
|
154
|
+
# root elements, you should deconstruct the array returned by
|
155
|
+
# <tt>DocumentFragment#elements</tt>.
|
156
|
+
#
|
157
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
158
|
+
#
|
159
|
+
# *Example*
|
160
|
+
#
|
161
|
+
# frag = Nokogiri::HTML5.fragment(<<~HTML)
|
162
|
+
# <div>Start</div>
|
163
|
+
# This is a <a href="#jump">shortcut</a> for you.
|
164
|
+
# <div>End</div>
|
165
|
+
# HTML
|
166
|
+
#
|
167
|
+
# frag.deconstruct
|
168
|
+
# # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
|
169
|
+
# # #(Text "\n" + "This is a "),
|
170
|
+
# # #(Element:0x370 {
|
171
|
+
# # name = "a",
|
172
|
+
# # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
|
173
|
+
# # children = [ #(Text "shortcut")]
|
174
|
+
# # }),
|
175
|
+
# # #(Text " for you.\n"),
|
176
|
+
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] }),
|
177
|
+
# # #(Text "\n")]
|
178
|
+
#
|
179
|
+
# *Example* only the elements, not the text nodes.
|
180
|
+
#
|
181
|
+
# frag.elements.deconstruct
|
182
|
+
# # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
|
183
|
+
# # #(Element:0x370 {
|
184
|
+
# # name = "a",
|
185
|
+
# # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
|
186
|
+
# # children = [ #(Text "shortcut")]
|
187
|
+
# # }),
|
188
|
+
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
|
189
|
+
#
|
190
|
+
def deconstruct
|
191
|
+
children.to_a
|
192
|
+
end
|
193
|
+
|
194
|
+
private
|
195
|
+
|
196
|
+
# fix for issue 770
|
197
|
+
def namespace_declarations(ctx)
|
198
|
+
ctx.namespace_scopes.map do |namespace|
|
199
|
+
prefix = namespace.prefix.nil? ? "" : ":#{namespace.prefix}"
|
200
|
+
%{xmlns#{prefix}="#{namespace.href}"}
|
201
|
+
end.join(" ")
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class DTD < Nokogiri::XML::Node
|
6
|
+
undef_method :attribute_nodes
|
7
|
+
undef_method :values
|
8
|
+
undef_method :content
|
9
|
+
undef_method :namespace
|
10
|
+
undef_method :namespace_definitions
|
11
|
+
undef_method :line if method_defined?(:line)
|
12
|
+
|
13
|
+
def keys
|
14
|
+
attributes.keys
|
15
|
+
end
|
16
|
+
|
17
|
+
def each
|
18
|
+
attributes.each do |key, value|
|
19
|
+
yield([key, value])
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def html_dtd?
|
24
|
+
name.casecmp("html").zero?
|
25
|
+
end
|
26
|
+
|
27
|
+
def html5_dtd?
|
28
|
+
html_dtd? &&
|
29
|
+
external_id.nil? &&
|
30
|
+
(system_id.nil? || system_id == "about:legacy-compat")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|