nokogiri 1.10.7 → 1.16.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +42 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +188 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +862 -421
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +222 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +39 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +408 -243
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1343 -674
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +305 -213
- data/ext/nokogiri/xml_relax_ng.c +87 -78
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +149 -103
- data/ext/nokogiri/xml_sax_push_parser.c +65 -37
- data/ext/nokogiri/xml_schema.c +138 -82
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +35 -26
- data/ext/nokogiri/xml_xpath_context.c +363 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +126 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +5 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +205 -96
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +326 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +75 -34
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -127
- data/lib/nokogiri/xml/document_fragment.rb +93 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1096 -419
- data/lib/nokogiri/xml/node_set.rb +137 -61
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +7 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +39 -38
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +121 -291
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,132 +1,275 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "pathname"
|
5
|
+
|
1
6
|
module Nokogiri
|
2
7
|
module XML
|
3
|
-
|
4
|
-
# Nokogiri::XML::Document
|
5
|
-
#
|
6
|
-
# See Nokogiri::XML::Document.parse() for more information on parsing.
|
8
|
+
# Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
|
9
|
+
# is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
|
10
|
+
# on parsing.
|
7
11
|
#
|
8
12
|
# For searching a Document, see Nokogiri::XML::Searchable#css and
|
9
13
|
# Nokogiri::XML::Searchable#xpath
|
10
|
-
#
|
11
14
|
class Document < Nokogiri::XML::Node
|
12
|
-
#
|
13
|
-
#
|
15
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
16
|
+
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
17
|
+
# characters in NCNAMEs.
|
14
18
|
NCNAME_START_CHAR = "A-Za-z_"
|
15
|
-
NCNAME_CHAR = NCNAME_START_CHAR + "
|
16
|
-
NCNAME_RE = /^xmlns(
|
19
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
20
|
+
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
|
+
|
22
|
+
class << self
|
23
|
+
# Parse an XML file.
|
24
|
+
#
|
25
|
+
# +string_or_io+ may be a String, or any object that responds to
|
26
|
+
# _read_ and _close_ such as an IO, or StringIO.
|
27
|
+
#
|
28
|
+
# +url+ (optional) is the URI where this document is located.
|
29
|
+
#
|
30
|
+
# +encoding+ (optional) is the encoding that should be used when processing
|
31
|
+
# the document.
|
32
|
+
#
|
33
|
+
# +options+ (optional) is a configuration object that sets options during
|
34
|
+
# parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
|
35
|
+
# Nokogiri::XML::ParseOptions for more information.
|
36
|
+
#
|
37
|
+
# +block+ (optional) is passed a configuration object on which
|
38
|
+
# parse options may be set.
|
39
|
+
#
|
40
|
+
# By default, Nokogiri treats documents as untrusted, and so
|
41
|
+
# does not attempt to load DTDs or access the network. See
|
42
|
+
# Nokogiri::XML::ParseOptions for a complete list of options;
|
43
|
+
# and that module's DEFAULT_XML constant for what's set (and not
|
44
|
+
# set) by default.
|
45
|
+
#
|
46
|
+
# Nokogiri.XML() is a convenience method which will call this method.
|
47
|
+
#
|
48
|
+
def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
|
49
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
50
|
+
yield options if block_given?
|
51
|
+
|
52
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
53
|
+
|
54
|
+
if empty_doc?(string_or_io)
|
55
|
+
if options.strict?
|
56
|
+
raise Nokogiri::XML::SyntaxError, "Empty document"
|
57
|
+
else
|
58
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
doc = if string_or_io.respond_to?(:read)
|
63
|
+
if string_or_io.is_a?(Pathname)
|
64
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
65
|
+
string_or_io = string_or_io.expand_path.open
|
66
|
+
url ||= string_or_io.path
|
67
|
+
end
|
68
|
+
|
69
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
70
|
+
else
|
71
|
+
# read_memory pukes on empty docs
|
72
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
73
|
+
end
|
74
|
+
|
75
|
+
# do xinclude processing
|
76
|
+
doc.do_xinclude(options) if options.xinclude?
|
77
|
+
|
78
|
+
doc
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def empty_doc?(string_or_io)
|
84
|
+
string_or_io.nil? ||
|
85
|
+
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
86
|
+
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
87
|
+
end
|
88
|
+
end
|
17
89
|
|
18
90
|
##
|
19
|
-
#
|
91
|
+
# :singleton-method: wrap
|
92
|
+
# :call-seq: wrap(java_document) → Nokogiri::XML::Document
|
20
93
|
#
|
21
|
-
#
|
22
|
-
# _read_ and _close_ such as an IO, or StringIO.
|
94
|
+
# ⚠ This method is only available when running JRuby.
|
23
95
|
#
|
24
|
-
#
|
96
|
+
# Create a Document using an existing Java DOM document object.
|
25
97
|
#
|
26
|
-
#
|
27
|
-
# the
|
98
|
+
# The returned Document shares the same underlying data structure as the Java object, so
|
99
|
+
# changes in one are reflected in the other.
|
28
100
|
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
101
|
+
# [Parameters]
|
102
|
+
# - `java_document` (Java::OrgW3cDom::Document)
|
103
|
+
# (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
|
32
104
|
#
|
33
|
-
#
|
34
|
-
# parse options may be set.
|
105
|
+
# [Returns] Nokogiri::XML::Document
|
35
106
|
#
|
36
|
-
#
|
37
|
-
|
38
|
-
#
|
39
|
-
#
|
40
|
-
# set) by default.
|
107
|
+
# See also \#to_java
|
108
|
+
|
109
|
+
# :method: to_java
|
110
|
+
# :call-seq: to_java() → Java::OrgW3cDom::Document
|
41
111
|
#
|
42
|
-
#
|
112
|
+
# ⚠ This method is only available when running JRuby.
|
43
113
|
#
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
doc = if string_or_io.respond_to?(:read)
|
58
|
-
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
59
|
-
read_io(string_or_io, url, encoding, options.to_i)
|
60
|
-
else
|
61
|
-
# read_memory pukes on empty docs
|
62
|
-
read_memory(string_or_io, url, encoding, options.to_i)
|
63
|
-
end
|
64
|
-
|
65
|
-
# do xinclude processing
|
66
|
-
doc.do_xinclude(options) if options.xinclude?
|
67
|
-
|
68
|
-
return doc
|
69
|
-
end
|
114
|
+
# Returns the underlying Java DOM document object for this document.
|
115
|
+
#
|
116
|
+
# The returned Java object shares the same underlying data structure as this document, so
|
117
|
+
# changes in one are reflected in the other.
|
118
|
+
#
|
119
|
+
# [Returns]
|
120
|
+
# Java::OrgW3cDom::Document
|
121
|
+
# (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
|
122
|
+
#
|
123
|
+
# See also Document.wrap
|
70
124
|
|
71
|
-
#
|
125
|
+
# The errors found while parsing a document.
|
126
|
+
#
|
127
|
+
# [Returns] Array<Nokogiri::XML::SyntaxError>
|
72
128
|
attr_accessor :errors
|
73
129
|
|
74
|
-
|
130
|
+
# When `true`, reparented elements without a namespace will inherit their new parent's
|
131
|
+
# namespace (if one exists). Defaults to `false`.
|
132
|
+
#
|
133
|
+
# [Returns] Boolean
|
134
|
+
#
|
135
|
+
# *Example:* Default behavior of namespace inheritance
|
136
|
+
#
|
137
|
+
# xml = <<~EOF
|
138
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
139
|
+
# <foo:parent>
|
140
|
+
# </foo:parent>
|
141
|
+
# </root>
|
142
|
+
# EOF
|
143
|
+
# doc = Nokogiri::XML(xml)
|
144
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
145
|
+
# parent.add_child("<child></child>")
|
146
|
+
# doc.to_xml
|
147
|
+
# # => <?xml version="1.0"?>
|
148
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
149
|
+
# # <foo:parent>
|
150
|
+
# # <child/>
|
151
|
+
# # </foo:parent>
|
152
|
+
# # </root>
|
153
|
+
#
|
154
|
+
# *Example:* Setting namespace inheritance to `true`
|
155
|
+
#
|
156
|
+
# xml = <<~EOF
|
157
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
158
|
+
# <foo:parent>
|
159
|
+
# </foo:parent>
|
160
|
+
# </root>
|
161
|
+
# EOF
|
162
|
+
# doc = Nokogiri::XML(xml)
|
163
|
+
# doc.namespace_inheritance = true
|
164
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
165
|
+
# parent.add_child("<child></child>")
|
166
|
+
# doc.to_xml
|
167
|
+
# # => <?xml version="1.0"?>
|
168
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
169
|
+
# # <foo:parent>
|
170
|
+
# # <foo:child/>
|
171
|
+
# # </foo:parent>
|
172
|
+
# # </root>
|
173
|
+
#
|
174
|
+
# Since v1.12.4
|
175
|
+
attr_accessor :namespace_inheritance
|
176
|
+
|
177
|
+
def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
|
75
178
|
@errors = []
|
76
179
|
@decorators = nil
|
180
|
+
@namespace_inheritance = false
|
77
181
|
end
|
78
182
|
|
79
|
-
|
80
|
-
#
|
183
|
+
# :call-seq:
|
184
|
+
# create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
|
185
|
+
#
|
186
|
+
# Create a new Element with `name` belonging to this document, optionally setting contents or
|
187
|
+
# attributes.
|
188
|
+
#
|
189
|
+
# This method is _not_ the most user-friendly option if your intention is to add a node to the
|
190
|
+
# document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
|
191
|
+
# Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
|
192
|
+
# place it in the document tree.
|
81
193
|
#
|
82
|
-
#
|
83
|
-
# doc.create_element "div", :class => "container" # <div class='container'></div>
|
84
|
-
# doc.create_element "div", "contents" # <div>contents</div>
|
85
|
-
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
86
|
-
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
194
|
+
# Arguments may be passed to initialize the element:
|
87
195
|
#
|
88
|
-
|
196
|
+
# - a Hash argument will be used to set attributes
|
197
|
+
# - a non-Hash object that responds to \#to_s will be used to set the new node's contents
|
198
|
+
#
|
199
|
+
# A block may be passed to mutate the node.
|
200
|
+
#
|
201
|
+
# [Parameters]
|
202
|
+
# - `name` (String)
|
203
|
+
# - `contents_or_attrs` (\#to_s, Hash)
|
204
|
+
# [Yields] `node` (Nokogiri::XML::Element)
|
205
|
+
# [Returns] Nokogiri::XML::Element
|
206
|
+
#
|
207
|
+
# *Example:* An empty element without attributes
|
208
|
+
#
|
209
|
+
# doc.create_element("div")
|
210
|
+
# # => <div></div>
|
211
|
+
#
|
212
|
+
# *Example:* An element with contents
|
213
|
+
#
|
214
|
+
# doc.create_element("div", "contents")
|
215
|
+
# # => <div>contents</div>
|
216
|
+
#
|
217
|
+
# *Example:* An element with attributes
|
218
|
+
#
|
219
|
+
# doc.create_element("div", {"class" => "container"})
|
220
|
+
# # => <div class='container'></div>
|
221
|
+
#
|
222
|
+
# *Example:* An element with contents and attributes
|
223
|
+
#
|
224
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
225
|
+
# # => <div class='container'>contents</div>
|
226
|
+
#
|
227
|
+
# *Example:* Passing a block to mutate the element
|
228
|
+
#
|
229
|
+
# doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
|
230
|
+
#
|
231
|
+
def create_element(name, *contents_or_attrs, &block)
|
89
232
|
elm = Nokogiri::XML::Element.new(name, self, &block)
|
90
|
-
|
233
|
+
contents_or_attrs.each do |arg|
|
91
234
|
case arg
|
92
235
|
when Hash
|
93
|
-
arg.each
|
236
|
+
arg.each do |k, v|
|
94
237
|
key = k.to_s
|
95
238
|
if key =~ NCNAME_RE
|
96
|
-
ns_name =
|
97
|
-
elm.add_namespace_definition
|
239
|
+
ns_name = Regexp.last_match(1)
|
240
|
+
elm.add_namespace_definition(ns_name, v)
|
98
241
|
else
|
99
242
|
elm[k.to_s] = v.to_s
|
100
243
|
end
|
101
|
-
|
244
|
+
end
|
102
245
|
else
|
103
246
|
elm.content = arg
|
104
247
|
end
|
105
248
|
end
|
106
|
-
if ns = elm.namespace_definitions.find { |n| n.prefix.nil?
|
249
|
+
if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
|
107
250
|
elm.namespace = ns
|
108
251
|
end
|
109
252
|
elm
|
110
253
|
end
|
111
254
|
|
112
255
|
# Create a Text Node with +string+
|
113
|
-
def create_text_node
|
114
|
-
Nokogiri::XML::Text.new
|
256
|
+
def create_text_node(string, &block)
|
257
|
+
Nokogiri::XML::Text.new(string.to_s, self, &block)
|
115
258
|
end
|
116
259
|
|
117
260
|
# Create a CDATA Node containing +string+
|
118
|
-
def create_cdata
|
119
|
-
Nokogiri::XML::CDATA.new
|
261
|
+
def create_cdata(string, &block)
|
262
|
+
Nokogiri::XML::CDATA.new(self, string.to_s, &block)
|
120
263
|
end
|
121
264
|
|
122
265
|
# Create a Comment Node containing +string+
|
123
|
-
def create_comment
|
124
|
-
Nokogiri::XML::Comment.new
|
266
|
+
def create_comment(string, &block)
|
267
|
+
Nokogiri::XML::Comment.new(self, string.to_s, &block)
|
125
268
|
end
|
126
269
|
|
127
270
|
# The name of this document. Always returns "document"
|
128
271
|
def name
|
129
|
-
|
272
|
+
"document"
|
130
273
|
end
|
131
274
|
|
132
275
|
# A reference to +self+
|
@@ -134,46 +277,51 @@ module Nokogiri
|
|
134
277
|
self
|
135
278
|
end
|
136
279
|
|
137
|
-
|
138
|
-
#
|
139
|
-
# return them as a hash.
|
280
|
+
# :call-seq:
|
281
|
+
# collect_namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
|
140
282
|
#
|
141
|
-
#
|
283
|
+
# Recursively get all namespaces from this node and its subtree and return them as a
|
284
|
+
# hash.
|
142
285
|
#
|
143
|
-
#
|
286
|
+
# ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
|
287
|
+
#
|
288
|
+
# Note that this method does an xpath lookup for nodes with namespaces, and as a result the
|
289
|
+
# order (and which duplicate prefix "wins") may be dependent on the implementation of the
|
290
|
+
# underlying XML library.
|
291
|
+
#
|
292
|
+
# *Example:* Basic usage
|
293
|
+
#
|
294
|
+
# Given this document:
|
295
|
+
#
|
296
|
+
# <root xmlns="default" xmlns:foo="bar">
|
144
297
|
# <bar xmlns:hello="world" />
|
145
298
|
# </root>
|
146
299
|
#
|
147
300
|
# This method will return:
|
148
301
|
#
|
149
|
-
# {
|
302
|
+
# {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
|
303
|
+
#
|
304
|
+
# *Example:* Duplicate prefixes
|
150
305
|
#
|
151
|
-
#
|
152
|
-
# For example, given this document:
|
306
|
+
# Given this document:
|
153
307
|
#
|
154
308
|
# <root xmlns:foo="bar">
|
155
309
|
# <bar xmlns:foo="baz" />
|
156
310
|
# </root>
|
157
311
|
#
|
158
|
-
# The hash returned will
|
312
|
+
# The hash returned will be something like:
|
159
313
|
#
|
160
|
-
#
|
161
|
-
# in the hash.
|
162
|
-
#
|
163
|
-
# Note that this method does an xpath lookup for nodes with
|
164
|
-
# namespaces, and as a result the order may be dependent on the
|
165
|
-
# implementation of the underlying XML library.
|
314
|
+
# {"xmlns:foo" => "baz"}
|
166
315
|
#
|
167
316
|
def collect_namespaces
|
168
|
-
xpath("//namespace::*").
|
169
|
-
hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
|
170
|
-
hash
|
317
|
+
xpath("//namespace::*").each_with_object({}) do |ns, hash|
|
318
|
+
hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
|
171
319
|
end
|
172
320
|
end
|
173
321
|
|
174
322
|
# Get the list of decorators given +key+
|
175
|
-
def decorators
|
176
|
-
@decorators ||=
|
323
|
+
def decorators(key)
|
324
|
+
@decorators ||= {}
|
177
325
|
@decorators[key] ||= []
|
178
326
|
end
|
179
327
|
|
@@ -181,8 +329,9 @@ module Nokogiri
|
|
181
329
|
# Validate this Document against it's DTD. Returns a list of errors on
|
182
330
|
# the document or +nil+ when there is no DTD.
|
183
331
|
def validate
|
184
|
-
return
|
185
|
-
|
332
|
+
return unless internal_subset
|
333
|
+
|
334
|
+
internal_subset.validate(self)
|
186
335
|
end
|
187
336
|
|
188
337
|
##
|
@@ -202,7 +351,7 @@ module Nokogiri
|
|
202
351
|
# ... which does absolutely nothing.
|
203
352
|
#
|
204
353
|
def slop!
|
205
|
-
unless decorators(XML::Node).include?
|
354
|
+
unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
|
206
355
|
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
207
356
|
decorate!
|
208
357
|
end
|
@@ -212,16 +361,18 @@ module Nokogiri
|
|
212
361
|
|
213
362
|
##
|
214
363
|
# Apply any decorators to +node+
|
215
|
-
def decorate
|
364
|
+
def decorate(node)
|
216
365
|
return unless @decorators
|
217
|
-
|
366
|
+
|
367
|
+
@decorators.each do |klass, list|
|
218
368
|
next unless node.is_a?(klass)
|
369
|
+
|
219
370
|
list.each { |moodule| node.extend(moodule) }
|
220
|
-
|
371
|
+
end
|
221
372
|
end
|
222
373
|
|
223
|
-
|
224
|
-
|
374
|
+
alias_method :to_xml, :serialize
|
375
|
+
alias_method :clone, :dup
|
225
376
|
|
226
377
|
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
227
378
|
def namespaces
|
@@ -231,51 +382,85 @@ module Nokogiri
|
|
231
382
|
##
|
232
383
|
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
233
384
|
# Returns an empty fragment if +tags+ is nil.
|
234
|
-
def fragment
|
235
|
-
DocumentFragment.new(self, tags,
|
385
|
+
def fragment(tags = nil)
|
386
|
+
DocumentFragment.new(self, tags, root)
|
236
387
|
end
|
237
388
|
|
238
389
|
undef_method :swap, :parent, :namespace, :default_namespace=
|
239
390
|
undef_method :add_namespace_definition, :attributes
|
240
391
|
undef_method :namespace_definitions, :line, :add_namespace
|
241
392
|
|
242
|
-
def add_child
|
243
|
-
raise "A document may not have multiple root nodes." if (root && root.name !=
|
393
|
+
def add_child(node_or_tags)
|
394
|
+
raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
395
|
+
|
244
396
|
node_or_tags = coerce(node_or_tags)
|
245
397
|
if node_or_tags.is_a?(XML::NodeSet)
|
246
398
|
raise "A document may not have multiple root nodes." if node_or_tags.size > 1
|
399
|
+
|
247
400
|
super(node_or_tags.first)
|
248
401
|
else
|
249
402
|
super
|
250
403
|
end
|
251
404
|
end
|
252
|
-
|
405
|
+
alias_method :<<, :add_child
|
253
406
|
|
254
|
-
|
255
|
-
#
|
256
|
-
#
|
257
|
-
|
258
|
-
|
259
|
-
|
407
|
+
# :call-seq:
|
408
|
+
# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
|
409
|
+
#
|
410
|
+
# [Returns] The document type which determines CSS-to-XPath translation.
|
411
|
+
#
|
412
|
+
# See XPathVisitor for more information.
|
413
|
+
def xpath_doctype
|
414
|
+
Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
|
260
415
|
end
|
261
416
|
|
262
|
-
|
263
|
-
#
|
264
|
-
#
|
265
|
-
|
266
|
-
|
267
|
-
|
417
|
+
#
|
418
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
419
|
+
#
|
420
|
+
# Returns a hash describing the Document, to use in pattern matching.
|
421
|
+
#
|
422
|
+
# Valid keys and their values:
|
423
|
+
# - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
|
424
|
+
#
|
425
|
+
# In the future, other keys may allow accessing things like doctype and processing
|
426
|
+
# instructions. If you have a use case and would like this functionality, please let us know
|
427
|
+
# by opening an issue or a discussion on the github project.
|
428
|
+
#
|
429
|
+
# *Example*
|
430
|
+
#
|
431
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
432
|
+
# <?xml version="1.0"?>
|
433
|
+
# <root>
|
434
|
+
# <child>
|
435
|
+
# </root>
|
436
|
+
# XML
|
437
|
+
#
|
438
|
+
# doc.deconstruct_keys([:root])
|
439
|
+
# # => {:root=>
|
440
|
+
# # #(Element:0x35c {
|
441
|
+
# # name = "root",
|
442
|
+
# # children = [
|
443
|
+
# # #(Text "\n" + " "),
|
444
|
+
# # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
|
445
|
+
# # #(Text "\n")]
|
446
|
+
# # })}
|
447
|
+
#
|
448
|
+
# *Example* of an empty document
|
449
|
+
#
|
450
|
+
# doc = Nokogiri::XML::Document.new
|
451
|
+
#
|
452
|
+
# doc.deconstruct_keys([:root])
|
453
|
+
# # => {:root=>nil}
|
454
|
+
#
|
455
|
+
# Since v1.14.0
|
456
|
+
#
|
457
|
+
def deconstruct_keys(keys)
|
458
|
+
{ root: root }
|
268
459
|
end
|
269
460
|
|
270
461
|
private
|
271
|
-
def self.empty_doc? string_or_io
|
272
|
-
string_or_io.nil? ||
|
273
|
-
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
274
|
-
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
275
|
-
end
|
276
462
|
|
277
|
-
#
|
278
|
-
IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
|
463
|
+
IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
|
279
464
|
|
280
465
|
def inspect_attributes
|
281
466
|
[:name, :children]
|