nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,132 +1,320 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "pathname"
|
5
|
+
|
1
6
|
module Nokogiri
|
2
7
|
module XML
|
3
|
-
|
4
|
-
#
|
5
|
-
# XML
|
6
|
-
# See Nokogiri::XML::Document.parse() for more information on parsing.
|
7
|
-
#
|
8
|
-
# For searching a Document, see Nokogiri::XML::Searchable#css and
|
9
|
-
# Nokogiri::XML::Searchable#xpath
|
8
|
+
# Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
|
9
|
+
# is created by parsing \XML content from a String or an IO object. See
|
10
|
+
# Nokogiri::XML::Document.parse for more information on parsing.
|
10
11
|
#
|
12
|
+
# Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
|
13
|
+
# please read that class's documentation as well.
|
11
14
|
class Document < Nokogiri::XML::Node
|
12
|
-
#
|
13
|
-
#
|
15
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
16
|
+
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
17
|
+
# characters in NCNAMEs.
|
14
18
|
NCNAME_START_CHAR = "A-Za-z_"
|
15
|
-
NCNAME_CHAR = NCNAME_START_CHAR + "
|
16
|
-
NCNAME_RE = /^xmlns(
|
19
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
20
|
+
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
|
+
|
22
|
+
OBJECT_DUP_METHOD = Object.instance_method(:dup)
|
23
|
+
OBJECT_CLONE_METHOD = Object.instance_method(:clone)
|
24
|
+
private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD
|
25
|
+
|
26
|
+
class << self
|
27
|
+
# call-seq:
|
28
|
+
# parse(input) { |options| ... } => Nokogiri::XML::Document
|
29
|
+
# parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
|
30
|
+
#
|
31
|
+
# Parse \XML input from a String or IO object, and return a new XML::Document.
|
32
|
+
#
|
33
|
+
# 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
|
34
|
+
# or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
|
35
|
+
# that module's DEFAULT_XML constant for what's set (and not set) by default.
|
36
|
+
#
|
37
|
+
# [Required Parameters]
|
38
|
+
# - +input+ (String | IO) The content to be parsed.
|
39
|
+
#
|
40
|
+
# [Optional Keyword Arguments]
|
41
|
+
# - +url:+ (String) The base URI for this document.
|
42
|
+
#
|
43
|
+
# - +encoding:+ (String) The name of the encoding that should be used when processing the
|
44
|
+
# document. When not provided, the encoding will be determined based on the document
|
45
|
+
# content.
|
46
|
+
#
|
47
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
48
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
49
|
+
# +ParseOptions::DEFAULT_XML+.
|
50
|
+
#
|
51
|
+
# [Yields]
|
52
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
53
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
54
|
+
#
|
55
|
+
# [Returns] Nokogiri::XML::Document
|
56
|
+
def parse(
|
57
|
+
string_or_io,
|
58
|
+
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
|
59
|
+
url: url_, encoding: encoding_, options: options_
|
60
|
+
)
|
61
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
62
|
+
yield options if block_given?
|
63
|
+
|
64
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
65
|
+
|
66
|
+
if empty_doc?(string_or_io)
|
67
|
+
if options.strict?
|
68
|
+
raise Nokogiri::XML::SyntaxError, "Empty document"
|
69
|
+
else
|
70
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
doc = if string_or_io.respond_to?(:read)
|
75
|
+
# TODO: should we instead check for respond_to?(:to_path) ?
|
76
|
+
if string_or_io.is_a?(Pathname)
|
77
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
78
|
+
string_or_io = string_or_io.expand_path.open
|
79
|
+
url ||= string_or_io.path
|
80
|
+
end
|
81
|
+
|
82
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
83
|
+
else
|
84
|
+
# read_memory pukes on empty docs
|
85
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
86
|
+
end
|
87
|
+
|
88
|
+
# do xinclude processing
|
89
|
+
doc.do_xinclude(options) if options.xinclude?
|
90
|
+
|
91
|
+
doc
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def empty_doc?(string_or_io)
|
97
|
+
string_or_io.nil? ||
|
98
|
+
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
99
|
+
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
100
|
+
end
|
101
|
+
end
|
17
102
|
|
18
103
|
##
|
19
|
-
#
|
104
|
+
# :singleton-method: wrap
|
105
|
+
# :call-seq: wrap(java_document) → Nokogiri::XML::Document
|
20
106
|
#
|
21
|
-
#
|
22
|
-
# _read_ and _close_ such as an IO, or StringIO.
|
107
|
+
# ⚠ This method is only available when running JRuby.
|
23
108
|
#
|
24
|
-
#
|
109
|
+
# Create a Document using an existing Java DOM document object.
|
25
110
|
#
|
26
|
-
#
|
27
|
-
# the
|
111
|
+
# The returned Document shares the same underlying data structure as the Java object, so
|
112
|
+
# changes in one are reflected in the other.
|
28
113
|
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
114
|
+
# [Parameters]
|
115
|
+
# - `java_document` (Java::OrgW3cDom::Document)
|
116
|
+
# (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
|
32
117
|
#
|
33
|
-
#
|
34
|
-
# parse options may be set.
|
118
|
+
# [Returns] Nokogiri::XML::Document
|
35
119
|
#
|
36
|
-
#
|
37
|
-
|
38
|
-
#
|
39
|
-
#
|
40
|
-
# set) by default.
|
120
|
+
# See also \#to_java
|
121
|
+
|
122
|
+
# :method: to_java
|
123
|
+
# :call-seq: to_java() → Java::OrgW3cDom::Document
|
41
124
|
#
|
42
|
-
#
|
125
|
+
# ⚠ This method is only available when running JRuby.
|
43
126
|
#
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
end
|
55
|
-
end
|
127
|
+
# Returns the underlying Java DOM document object for this document.
|
128
|
+
#
|
129
|
+
# The returned Java object shares the same underlying data structure as this document, so
|
130
|
+
# changes in one are reflected in the other.
|
131
|
+
#
|
132
|
+
# [Returns]
|
133
|
+
# Java::OrgW3cDom::Document
|
134
|
+
# (The class `Java::OrgW3cDom::Document` is also accessible as `org.w3c.dom.Document`.)
|
135
|
+
#
|
136
|
+
# See also Document.wrap
|
56
137
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
# read_memory pukes on empty docs
|
62
|
-
read_memory(string_or_io, url, encoding, options.to_i)
|
63
|
-
end
|
138
|
+
# The errors found while parsing a document.
|
139
|
+
#
|
140
|
+
# [Returns] Array<Nokogiri::XML::SyntaxError>
|
141
|
+
attr_accessor :errors
|
64
142
|
|
65
|
-
|
66
|
-
|
143
|
+
# When `true`, reparented elements without a namespace will inherit their new parent's
|
144
|
+
# namespace (if one exists). Defaults to `false`.
|
145
|
+
#
|
146
|
+
# [Returns] Boolean
|
147
|
+
#
|
148
|
+
# *Example:* Default behavior of namespace inheritance
|
149
|
+
#
|
150
|
+
# xml = <<~EOF
|
151
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
152
|
+
# <foo:parent>
|
153
|
+
# </foo:parent>
|
154
|
+
# </root>
|
155
|
+
# EOF
|
156
|
+
# doc = Nokogiri::XML(xml)
|
157
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
158
|
+
# parent.add_child("<child></child>")
|
159
|
+
# doc.to_xml
|
160
|
+
# # => <?xml version="1.0"?>
|
161
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
162
|
+
# # <foo:parent>
|
163
|
+
# # <child/>
|
164
|
+
# # </foo:parent>
|
165
|
+
# # </root>
|
166
|
+
#
|
167
|
+
# *Example:* Setting namespace inheritance to `true`
|
168
|
+
#
|
169
|
+
# xml = <<~EOF
|
170
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
171
|
+
# <foo:parent>
|
172
|
+
# </foo:parent>
|
173
|
+
# </root>
|
174
|
+
# EOF
|
175
|
+
# doc = Nokogiri::XML(xml)
|
176
|
+
# doc.namespace_inheritance = true
|
177
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
178
|
+
# parent.add_child("<child></child>")
|
179
|
+
# doc.to_xml
|
180
|
+
# # => <?xml version="1.0"?>
|
181
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
182
|
+
# # <foo:parent>
|
183
|
+
# # <foo:child/>
|
184
|
+
# # </foo:parent>
|
185
|
+
# # </root>
|
186
|
+
#
|
187
|
+
# Since v1.12.4
|
188
|
+
attr_accessor :namespace_inheritance
|
67
189
|
|
68
|
-
|
190
|
+
def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
|
191
|
+
@errors = []
|
192
|
+
@decorators = nil
|
193
|
+
@namespace_inheritance = false
|
69
194
|
end
|
70
195
|
|
71
|
-
#
|
72
|
-
|
196
|
+
#
|
197
|
+
# :call-seq:
|
198
|
+
# dup → Nokogiri::XML::Document
|
199
|
+
# dup(level) → Nokogiri::XML::Document
|
200
|
+
#
|
201
|
+
# Duplicate this node.
|
202
|
+
#
|
203
|
+
# [Parameters]
|
204
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
205
|
+
# [Returns] The new Nokogiri::XML::Document
|
206
|
+
#
|
207
|
+
def dup(level = 1)
|
208
|
+
copy = OBJECT_DUP_METHOD.bind_call(self)
|
209
|
+
copy.initialize_copy_with_args(self, level)
|
210
|
+
end
|
73
211
|
|
74
|
-
|
75
|
-
|
76
|
-
|
212
|
+
#
|
213
|
+
# :call-seq:
|
214
|
+
# clone → Nokogiri::XML::Document
|
215
|
+
# clone(level) → Nokogiri::XML::Document
|
216
|
+
#
|
217
|
+
# Clone this node.
|
218
|
+
#
|
219
|
+
# [Parameters]
|
220
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
221
|
+
# [Returns] The new Nokogiri::XML::Document
|
222
|
+
#
|
223
|
+
def clone(level = 1)
|
224
|
+
copy = OBJECT_CLONE_METHOD.bind_call(self)
|
225
|
+
copy.initialize_copy_with_args(self, level)
|
77
226
|
end
|
78
227
|
|
79
|
-
|
80
|
-
#
|
228
|
+
# :call-seq:
|
229
|
+
# create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
|
230
|
+
#
|
231
|
+
# Create a new Element with `name` belonging to this document, optionally setting contents or
|
232
|
+
# attributes.
|
233
|
+
#
|
234
|
+
# This method is _not_ the most user-friendly option if your intention is to add a node to the
|
235
|
+
# document tree. Prefer one of the Nokogiri::XML::Node methods like Node#add_child,
|
236
|
+
# Node#add_next_sibling, Node#replace, etc. which will both create an element (or subtree) and
|
237
|
+
# place it in the document tree.
|
238
|
+
#
|
239
|
+
# Arguments may be passed to initialize the element:
|
240
|
+
#
|
241
|
+
# - a Hash argument will be used to set attributes
|
242
|
+
# - a non-Hash object that responds to \#to_s will be used to set the new node's contents
|
243
|
+
#
|
244
|
+
# A block may be passed to mutate the node.
|
245
|
+
#
|
246
|
+
# [Parameters]
|
247
|
+
# - `name` (String)
|
248
|
+
# - `contents_or_attrs` (\#to_s, Hash)
|
249
|
+
# [Yields] `node` (Nokogiri::XML::Element)
|
250
|
+
# [Returns] Nokogiri::XML::Element
|
251
|
+
#
|
252
|
+
# *Example:* An empty element without attributes
|
253
|
+
#
|
254
|
+
# doc.create_element("div")
|
255
|
+
# # => <div></div>
|
256
|
+
#
|
257
|
+
# *Example:* An element with contents
|
81
258
|
#
|
82
|
-
# doc.create_element
|
83
|
-
#
|
84
|
-
# doc.create_element "div", "contents" # <div>contents</div>
|
85
|
-
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
86
|
-
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
259
|
+
# doc.create_element("div", "contents")
|
260
|
+
# # => <div>contents</div>
|
87
261
|
#
|
88
|
-
|
262
|
+
# *Example:* An element with attributes
|
263
|
+
#
|
264
|
+
# doc.create_element("div", {"class" => "container"})
|
265
|
+
# # => <div class='container'></div>
|
266
|
+
#
|
267
|
+
# *Example:* An element with contents and attributes
|
268
|
+
#
|
269
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
270
|
+
# # => <div class='container'>contents</div>
|
271
|
+
#
|
272
|
+
# *Example:* Passing a block to mutate the element
|
273
|
+
#
|
274
|
+
# doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
|
275
|
+
#
|
276
|
+
def create_element(name, *contents_or_attrs, &block)
|
89
277
|
elm = Nokogiri::XML::Element.new(name, self, &block)
|
90
|
-
|
278
|
+
contents_or_attrs.each do |arg|
|
91
279
|
case arg
|
92
280
|
when Hash
|
93
|
-
arg.each
|
281
|
+
arg.each do |k, v|
|
94
282
|
key = k.to_s
|
95
283
|
if key =~ NCNAME_RE
|
96
|
-
ns_name =
|
97
|
-
elm.add_namespace_definition
|
284
|
+
ns_name = Regexp.last_match(1)
|
285
|
+
elm.add_namespace_definition(ns_name, v)
|
98
286
|
else
|
99
287
|
elm[k.to_s] = v.to_s
|
100
288
|
end
|
101
|
-
|
289
|
+
end
|
102
290
|
else
|
103
291
|
elm.content = arg
|
104
292
|
end
|
105
293
|
end
|
106
|
-
if ns = elm.namespace_definitions.find { |n| n.prefix.nil?
|
294
|
+
if (ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == "") })
|
107
295
|
elm.namespace = ns
|
108
296
|
end
|
109
297
|
elm
|
110
298
|
end
|
111
299
|
|
112
300
|
# Create a Text Node with +string+
|
113
|
-
def create_text_node
|
114
|
-
Nokogiri::XML::Text.new
|
301
|
+
def create_text_node(string, &block)
|
302
|
+
Nokogiri::XML::Text.new(string.to_s, self, &block)
|
115
303
|
end
|
116
304
|
|
117
305
|
# Create a CDATA Node containing +string+
|
118
|
-
def create_cdata
|
119
|
-
Nokogiri::XML::CDATA.new
|
306
|
+
def create_cdata(string, &block)
|
307
|
+
Nokogiri::XML::CDATA.new(self, string.to_s, &block)
|
120
308
|
end
|
121
309
|
|
122
310
|
# Create a Comment Node containing +string+
|
123
|
-
def create_comment
|
124
|
-
Nokogiri::XML::Comment.new
|
311
|
+
def create_comment(string, &block)
|
312
|
+
Nokogiri::XML::Comment.new(self, string.to_s, &block)
|
125
313
|
end
|
126
314
|
|
127
315
|
# The name of this document. Always returns "document"
|
128
316
|
def name
|
129
|
-
|
317
|
+
"document"
|
130
318
|
end
|
131
319
|
|
132
320
|
# A reference to +self+
|
@@ -134,55 +322,61 @@ module Nokogiri
|
|
134
322
|
self
|
135
323
|
end
|
136
324
|
|
137
|
-
|
138
|
-
#
|
139
|
-
# return them as a hash.
|
325
|
+
# :call-seq:
|
326
|
+
# collect_namespaces() → Hash<String(Namespace#prefix) ⇒ String(Namespace#href)>
|
140
327
|
#
|
141
|
-
#
|
328
|
+
# Recursively get all namespaces from this node and its subtree and return them as a
|
329
|
+
# hash.
|
142
330
|
#
|
143
|
-
#
|
331
|
+
# ⚠ This method will not handle duplicate namespace prefixes, since the return value is a hash.
|
332
|
+
#
|
333
|
+
# Note that this method does an xpath lookup for nodes with namespaces, and as a result the
|
334
|
+
# order (and which duplicate prefix "wins") may be dependent on the implementation of the
|
335
|
+
# underlying XML library.
|
336
|
+
#
|
337
|
+
# *Example:* Basic usage
|
338
|
+
#
|
339
|
+
# Given this document:
|
340
|
+
#
|
341
|
+
# <root xmlns="default" xmlns:foo="bar">
|
144
342
|
# <bar xmlns:hello="world" />
|
145
343
|
# </root>
|
146
344
|
#
|
147
345
|
# This method will return:
|
148
346
|
#
|
149
|
-
# {
|
347
|
+
# {"xmlns:foo"=>"bar", "xmlns"=>"default", "xmlns:hello"=>"world"}
|
150
348
|
#
|
151
|
-
#
|
152
|
-
#
|
349
|
+
# *Example:* Duplicate prefixes
|
350
|
+
#
|
351
|
+
# Given this document:
|
153
352
|
#
|
154
353
|
# <root xmlns:foo="bar">
|
155
354
|
# <bar xmlns:foo="baz" />
|
156
355
|
# </root>
|
157
356
|
#
|
158
|
-
# The hash returned will
|
159
|
-
#
|
160
|
-
# Non-prefixed default namespaces (as in "xmlns=") are not included
|
161
|
-
# in the hash.
|
357
|
+
# The hash returned will be something like:
|
162
358
|
#
|
163
|
-
#
|
164
|
-
# namespaces, and as a result the order may be dependent on the
|
165
|
-
# implementation of the underlying XML library.
|
359
|
+
# {"xmlns:foo" => "baz"}
|
166
360
|
#
|
167
361
|
def collect_namespaces
|
168
|
-
xpath("//namespace::*").
|
169
|
-
hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
|
170
|
-
hash
|
362
|
+
xpath("//namespace::*").each_with_object({}) do |ns, hash|
|
363
|
+
hash[["xmlns", ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
|
171
364
|
end
|
172
365
|
end
|
173
366
|
|
174
367
|
# Get the list of decorators given +key+
|
175
|
-
def decorators
|
176
|
-
@decorators ||=
|
368
|
+
def decorators(key)
|
369
|
+
@decorators ||= {}
|
177
370
|
@decorators[key] ||= []
|
178
371
|
end
|
179
372
|
|
180
373
|
##
|
181
|
-
# Validate this Document against
|
374
|
+
# Validate this Document against its DTD. Returns a list of errors on
|
182
375
|
# the document or +nil+ when there is no DTD.
|
183
376
|
def validate
|
184
|
-
return
|
185
|
-
|
377
|
+
return unless internal_subset
|
378
|
+
|
379
|
+
internal_subset.validate(self)
|
186
380
|
end
|
187
381
|
|
188
382
|
##
|
@@ -202,7 +396,7 @@ module Nokogiri
|
|
202
396
|
# ... which does absolutely nothing.
|
203
397
|
#
|
204
398
|
def slop!
|
205
|
-
unless decorators(XML::Node).include?
|
399
|
+
unless decorators(XML::Node).include?(Nokogiri::Decorators::Slop)
|
206
400
|
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
207
401
|
decorate!
|
208
402
|
end
|
@@ -212,16 +406,17 @@ module Nokogiri
|
|
212
406
|
|
213
407
|
##
|
214
408
|
# Apply any decorators to +node+
|
215
|
-
def decorate
|
409
|
+
def decorate(node)
|
216
410
|
return unless @decorators
|
217
|
-
|
411
|
+
|
412
|
+
@decorators.each do |klass, list|
|
218
413
|
next unless node.is_a?(klass)
|
219
|
-
|
220
|
-
|
414
|
+
|
415
|
+
list.each { |mod| node.extend(mod) }
|
416
|
+
end
|
221
417
|
end
|
222
418
|
|
223
|
-
|
224
|
-
alias :clone :dup
|
419
|
+
alias_method :to_xml, :serialize
|
225
420
|
|
226
421
|
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
227
422
|
def namespaces
|
@@ -231,51 +426,85 @@ module Nokogiri
|
|
231
426
|
##
|
232
427
|
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
233
428
|
# Returns an empty fragment if +tags+ is nil.
|
234
|
-
def fragment
|
235
|
-
DocumentFragment.new(self, tags,
|
429
|
+
def fragment(tags = nil)
|
430
|
+
DocumentFragment.new(self, tags, root)
|
236
431
|
end
|
237
432
|
|
238
433
|
undef_method :swap, :parent, :namespace, :default_namespace=
|
239
434
|
undef_method :add_namespace_definition, :attributes
|
240
435
|
undef_method :namespace_definitions, :line, :add_namespace
|
241
436
|
|
242
|
-
def add_child
|
243
|
-
raise "A document may not have multiple root nodes." if (root && root.name !=
|
437
|
+
def add_child(node_or_tags)
|
438
|
+
raise "A document may not have multiple root nodes." if (root && root.name != "nokogiri_text_wrapper") && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
439
|
+
|
244
440
|
node_or_tags = coerce(node_or_tags)
|
245
441
|
if node_or_tags.is_a?(XML::NodeSet)
|
246
442
|
raise "A document may not have multiple root nodes." if node_or_tags.size > 1
|
443
|
+
|
247
444
|
super(node_or_tags.first)
|
248
445
|
else
|
249
446
|
super
|
250
447
|
end
|
251
448
|
end
|
252
|
-
|
449
|
+
alias_method :<<, :add_child
|
253
450
|
|
254
|
-
|
255
|
-
#
|
256
|
-
#
|
257
|
-
|
258
|
-
|
259
|
-
|
451
|
+
# :call-seq:
|
452
|
+
# xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
|
453
|
+
#
|
454
|
+
# [Returns] The document type which determines CSS-to-XPath translation.
|
455
|
+
#
|
456
|
+
# See XPathVisitor for more information.
|
457
|
+
def xpath_doctype
|
458
|
+
Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
|
260
459
|
end
|
261
460
|
|
262
|
-
|
263
|
-
#
|
264
|
-
#
|
265
|
-
|
266
|
-
|
267
|
-
|
461
|
+
#
|
462
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
463
|
+
#
|
464
|
+
# Returns a hash describing the Document, to use in pattern matching.
|
465
|
+
#
|
466
|
+
# Valid keys and their values:
|
467
|
+
# - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
|
468
|
+
#
|
469
|
+
# In the future, other keys may allow accessing things like doctype and processing
|
470
|
+
# instructions. If you have a use case and would like this functionality, please let us know
|
471
|
+
# by opening an issue or a discussion on the github project.
|
472
|
+
#
|
473
|
+
# *Example*
|
474
|
+
#
|
475
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
476
|
+
# <?xml version="1.0"?>
|
477
|
+
# <root>
|
478
|
+
# <child>
|
479
|
+
# </root>
|
480
|
+
# XML
|
481
|
+
#
|
482
|
+
# doc.deconstruct_keys([:root])
|
483
|
+
# # => {:root=>
|
484
|
+
# # #(Element:0x35c {
|
485
|
+
# # name = "root",
|
486
|
+
# # children = [
|
487
|
+
# # #(Text "\n" + " "),
|
488
|
+
# # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
|
489
|
+
# # #(Text "\n")]
|
490
|
+
# # })}
|
491
|
+
#
|
492
|
+
# *Example* of an empty document
|
493
|
+
#
|
494
|
+
# doc = Nokogiri::XML::Document.new
|
495
|
+
#
|
496
|
+
# doc.deconstruct_keys([:root])
|
497
|
+
# # => {:root=>nil}
|
498
|
+
#
|
499
|
+
# Since v1.14.0
|
500
|
+
#
|
501
|
+
def deconstruct_keys(keys)
|
502
|
+
{ root: root }
|
268
503
|
end
|
269
504
|
|
270
505
|
private
|
271
|
-
def self.empty_doc? string_or_io
|
272
|
-
string_or_io.nil? ||
|
273
|
-
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
274
|
-
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
275
|
-
end
|
276
506
|
|
277
|
-
#
|
278
|
-
IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
|
507
|
+
IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
|
279
508
|
|
280
509
|
def inspect_attributes
|
281
510
|
[:name, :children]
|