nokogiri-backport 1.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1682 -0
- data/LICENSE.md +9 -0
- data/README.md +272 -0
- data/bin/nokogiri +118 -0
- data/dependencies.yml +74 -0
- data/ext/java/nokogiri/EncodingHandler.java +124 -0
- data/ext/java/nokogiri/HtmlDocument.java +178 -0
- data/ext/java/nokogiri/HtmlElementDescription.java +148 -0
- data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
- data/ext/java/nokogiri/HtmlSaxParserContext.java +282 -0
- data/ext/java/nokogiri/HtmlSaxPushParser.java +222 -0
- data/ext/java/nokogiri/NokogiriService.java +597 -0
- data/ext/java/nokogiri/XmlAttr.java +162 -0
- data/ext/java/nokogiri/XmlAttributeDecl.java +129 -0
- data/ext/java/nokogiri/XmlCdata.java +82 -0
- data/ext/java/nokogiri/XmlComment.java +97 -0
- data/ext/java/nokogiri/XmlDocument.java +633 -0
- data/ext/java/nokogiri/XmlDocumentFragment.java +185 -0
- data/ext/java/nokogiri/XmlDtd.java +481 -0
- data/ext/java/nokogiri/XmlElement.java +68 -0
- data/ext/java/nokogiri/XmlElementContent.java +382 -0
- data/ext/java/nokogiri/XmlElementDecl.java +147 -0
- data/ext/java/nokogiri/XmlEntityDecl.java +157 -0
- data/ext/java/nokogiri/XmlEntityReference.java +101 -0
- data/ext/java/nokogiri/XmlNamespace.java +199 -0
- data/ext/java/nokogiri/XmlNode.java +1684 -0
- data/ext/java/nokogiri/XmlNodeSet.java +434 -0
- data/ext/java/nokogiri/XmlProcessingInstruction.java +100 -0
- data/ext/java/nokogiri/XmlReader.java +531 -0
- data/ext/java/nokogiri/XmlRelaxng.java +151 -0
- data/ext/java/nokogiri/XmlSaxParserContext.java +374 -0
- data/ext/java/nokogiri/XmlSaxPushParser.java +286 -0
- data/ext/java/nokogiri/XmlSchema.java +388 -0
- data/ext/java/nokogiri/XmlSyntaxError.java +138 -0
- data/ext/java/nokogiri/XmlText.java +110 -0
- data/ext/java/nokogiri/XmlXpathContext.java +301 -0
- data/ext/java/nokogiri/XsltStylesheet.java +347 -0
- data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +116 -0
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +121 -0
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +69 -0
- data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +734 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +217 -0
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +127 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +100 -0
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +180 -0
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +72 -0
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +60 -0
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +87 -0
- data/ext/java/nokogiri/internals/ParserContext.java +259 -0
- data/ext/java/nokogiri/internals/ReaderNode.java +488 -0
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +778 -0
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +73 -0
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +168 -0
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +274 -0
- data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +367 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
- data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +166 -0
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1745 -0
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +685 -0
- data/ext/nokogiri/depend +477 -0
- data/ext/nokogiri/extconf.rb +836 -0
- data/ext/nokogiri/html_document.c +171 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +135 -0
- data/ext/nokogiri/nokogiri.h +130 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +69 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +622 -0
- data/ext/nokogiri/xml_document.h +23 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +202 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +63 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +111 -0
- data/ext/nokogiri/xml_namespace.h +14 -0
- data/ext/nokogiri/xml_node.c +1773 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +486 -0
- data/ext/nokogiri/xml_node_set.h +12 -0
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +657 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +179 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +305 -0
- data/ext/nokogiri/xml_sax_parser.h +39 -0
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +159 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +276 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +64 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +52 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath_context.c +374 -0
- data/ext/nokogiri/xml_xpath_context.h +10 -0
- data/ext/nokogiri/xslt_stylesheet.c +263 -0
- data/ext/nokogiri/xslt_stylesheet.h +14 -0
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri/css/node.rb +53 -0
- data/lib/nokogiri/css/parser.rb +751 -0
- data/lib/nokogiri/css/parser.y +272 -0
- data/lib/nokogiri/css/parser_extras.rb +94 -0
- data/lib/nokogiri/css/syntax_error.rb +8 -0
- data/lib/nokogiri/css/tokenizer.rb +154 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +260 -0
- data/lib/nokogiri/css.rb +28 -0
- data/lib/nokogiri/decorators/slop.rb +43 -0
- data/lib/nokogiri/html/builder.rb +36 -0
- data/lib/nokogiri/html/document.rb +322 -0
- data/lib/nokogiri/html/document_fragment.rb +50 -0
- data/lib/nokogiri/html/element_description.rb +24 -0
- data/lib/nokogiri/html/element_description_defaults.rb +672 -0
- data/lib/nokogiri/html/entity_lookup.rb +14 -0
- data/lib/nokogiri/html/sax/parser.rb +63 -0
- data/lib/nokogiri/html/sax/parser_context.rb +17 -0
- data/lib/nokogiri/html/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html.rb +38 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +5 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml/attr.rb +15 -0
- data/lib/nokogiri/xml/attribute_decl.rb +19 -0
- data/lib/nokogiri/xml/builder.rb +447 -0
- data/lib/nokogiri/xml/cdata.rb +12 -0
- data/lib/nokogiri/xml/character_data.rb +8 -0
- data/lib/nokogiri/xml/document.rb +290 -0
- data/lib/nokogiri/xml/document_fragment.rb +159 -0
- data/lib/nokogiri/xml/dtd.rb +33 -0
- data/lib/nokogiri/xml/element_content.rb +37 -0
- data/lib/nokogiri/xml/element_decl.rb +14 -0
- data/lib/nokogiri/xml/entity_decl.rb +20 -0
- data/lib/nokogiri/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +14 -0
- data/lib/nokogiri/xml/node/save_options.rb +62 -0
- data/lib/nokogiri/xml/node.rb +1240 -0
- data/lib/nokogiri/xml/node_set.rb +372 -0
- data/lib/nokogiri/xml/notation.rb +7 -0
- data/lib/nokogiri/xml/parse_options.rb +127 -0
- data/lib/nokogiri/xml/pp/character_data.rb +19 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +3 -0
- data/lib/nokogiri/xml/processing_instruction.rb +9 -0
- data/lib/nokogiri/xml/reader.rb +116 -0
- data/lib/nokogiri/xml/relax_ng.rb +37 -0
- data/lib/nokogiri/xml/sax/document.rb +172 -0
- data/lib/nokogiri/xml/sax/parser.rb +123 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +5 -0
- data/lib/nokogiri/xml/schema.rb +72 -0
- data/lib/nokogiri/xml/searchable.rb +239 -0
- data/lib/nokogiri/xml/syntax_error.rb +71 -0
- data/lib/nokogiri/xml/text.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
- data/lib/nokogiri/xml/xpath.rb +11 -0
- data/lib/nokogiri/xml/xpath_context.rb +17 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +26 -0
- data/lib/nokogiri/xslt.rb +57 -0
- data/lib/nokogiri.rb +144 -0
- data/lib/serializer.jar +0 -0
- data/lib/xalan.jar +0 -0
- data/lib/xercesImpl.jar +0 -0
- data/lib/xml-apis.jar +0 -0
- data/lib/xsd/xmlparser/nokogiri.rb +103 -0
- metadata +531 -0
@@ -0,0 +1,290 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
module XML
|
7
|
+
##
|
8
|
+
# Nokogiri::XML::Document is the main entry point for dealing with
|
9
|
+
# XML documents. The Document is created by parsing an XML document.
|
10
|
+
# See Nokogiri::XML::Document.parse() for more information on parsing.
|
11
|
+
#
|
12
|
+
# For searching a Document, see Nokogiri::XML::Searchable#css and
|
13
|
+
# Nokogiri::XML::Searchable#xpath
|
14
|
+
#
|
15
|
+
class Document < Nokogiri::XML::Node
|
16
|
+
# I'm ignoring unicode characters here.
|
17
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
|
18
|
+
NCNAME_START_CHAR = "A-Za-z_"
|
19
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
|
20
|
+
NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
|
21
|
+
|
22
|
+
##
|
23
|
+
# Parse an XML file.
|
24
|
+
#
|
25
|
+
# +string_or_io+ may be a String, or any object that responds to
|
26
|
+
# _read_ and _close_ such as an IO, or StringIO.
|
27
|
+
#
|
28
|
+
# +url+ (optional) is the URI where this document is located.
|
29
|
+
#
|
30
|
+
# +encoding+ (optional) is the encoding that should be used when processing
|
31
|
+
# the document.
|
32
|
+
#
|
33
|
+
# +options+ (optional) is a configuration object that sets options during
|
34
|
+
# parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
|
35
|
+
# Nokogiri::XML::ParseOptions for more information.
|
36
|
+
#
|
37
|
+
# +block+ (optional) is passed a configuration object on which
|
38
|
+
# parse options may be set.
|
39
|
+
#
|
40
|
+
# By default, Nokogiri treats documents as untrusted, and so
|
41
|
+
# does not attempt to load DTDs or access the network. See
|
42
|
+
# Nokogiri::XML::ParseOptions for a complete list of options;
|
43
|
+
# and that module's DEFAULT_XML constant for what's set (and not
|
44
|
+
# set) by default.
|
45
|
+
#
|
46
|
+
# Nokogiri.XML() is a convenience method which will call this method.
|
47
|
+
#
|
48
|
+
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
|
49
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
50
|
+
|
51
|
+
yield options if block_given?
|
52
|
+
|
53
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
54
|
+
|
55
|
+
if empty_doc?(string_or_io)
|
56
|
+
if options.strict?
|
57
|
+
raise Nokogiri::XML::SyntaxError.new("Empty document")
|
58
|
+
else
|
59
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
doc = if string_or_io.respond_to?(:read)
|
64
|
+
if string_or_io.is_a?(Pathname)
|
65
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
66
|
+
string_or_io = string_or_io.expand_path.open
|
67
|
+
url ||= string_or_io.path
|
68
|
+
end
|
69
|
+
|
70
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
71
|
+
else
|
72
|
+
# read_memory pukes on empty docs
|
73
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
74
|
+
end
|
75
|
+
|
76
|
+
# do xinclude processing
|
77
|
+
doc.do_xinclude(options) if options.xinclude?
|
78
|
+
|
79
|
+
return doc
|
80
|
+
end
|
81
|
+
|
82
|
+
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
83
|
+
attr_accessor :errors
|
84
|
+
|
85
|
+
def initialize *args # :nodoc:
|
86
|
+
@errors = []
|
87
|
+
@decorators = nil
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Create an element with +name+, and optionally setting the content and attributes.
|
92
|
+
#
|
93
|
+
# doc.create_element "div" # <div></div>
|
94
|
+
# doc.create_element "div", :class => "container" # <div class='container'></div>
|
95
|
+
# doc.create_element "div", "contents" # <div>contents</div>
|
96
|
+
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
97
|
+
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
98
|
+
#
|
99
|
+
def create_element name, *args, &block
|
100
|
+
elm = Nokogiri::XML::Element.new(name, self, &block)
|
101
|
+
args.each do |arg|
|
102
|
+
case arg
|
103
|
+
when Hash
|
104
|
+
arg.each { |k,v|
|
105
|
+
key = k.to_s
|
106
|
+
if key =~ NCNAME_RE
|
107
|
+
ns_name = key.split(":", 2)[1]
|
108
|
+
elm.add_namespace_definition ns_name, v
|
109
|
+
else
|
110
|
+
elm[k.to_s] = v.to_s
|
111
|
+
end
|
112
|
+
}
|
113
|
+
else
|
114
|
+
elm.content = arg
|
115
|
+
end
|
116
|
+
end
|
117
|
+
if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
|
118
|
+
elm.namespace = ns
|
119
|
+
end
|
120
|
+
elm
|
121
|
+
end
|
122
|
+
|
123
|
+
# Create a Text Node with +string+
|
124
|
+
def create_text_node string, &block
|
125
|
+
Nokogiri::XML::Text.new string.to_s, self, &block
|
126
|
+
end
|
127
|
+
|
128
|
+
# Create a CDATA Node containing +string+
|
129
|
+
def create_cdata string, &block
|
130
|
+
Nokogiri::XML::CDATA.new self, string.to_s, &block
|
131
|
+
end
|
132
|
+
|
133
|
+
# Create a Comment Node containing +string+
|
134
|
+
def create_comment string, &block
|
135
|
+
Nokogiri::XML::Comment.new self, string.to_s, &block
|
136
|
+
end
|
137
|
+
|
138
|
+
# The name of this document. Always returns "document"
|
139
|
+
def name
|
140
|
+
'document'
|
141
|
+
end
|
142
|
+
|
143
|
+
# A reference to +self+
|
144
|
+
def document
|
145
|
+
self
|
146
|
+
end
|
147
|
+
|
148
|
+
##
|
149
|
+
# Recursively get all namespaces from this node and its subtree and
|
150
|
+
# return them as a hash.
|
151
|
+
#
|
152
|
+
# For example, given this document:
|
153
|
+
#
|
154
|
+
# <root xmlns:foo="bar">
|
155
|
+
# <bar xmlns:hello="world" />
|
156
|
+
# </root>
|
157
|
+
#
|
158
|
+
# This method will return:
|
159
|
+
#
|
160
|
+
# { 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }
|
161
|
+
#
|
162
|
+
# WARNING: this method will clobber duplicate names in the keys.
|
163
|
+
# For example, given this document:
|
164
|
+
#
|
165
|
+
# <root xmlns:foo="bar">
|
166
|
+
# <bar xmlns:foo="baz" />
|
167
|
+
# </root>
|
168
|
+
#
|
169
|
+
# The hash returned will look like this: { 'xmlns:foo' => 'bar' }
|
170
|
+
#
|
171
|
+
# Non-prefixed default namespaces (as in "xmlns=") are not included
|
172
|
+
# in the hash.
|
173
|
+
#
|
174
|
+
# Note that this method does an xpath lookup for nodes with
|
175
|
+
# namespaces, and as a result the order may be dependent on the
|
176
|
+
# implementation of the underlying XML library.
|
177
|
+
#
|
178
|
+
def collect_namespaces
|
179
|
+
xpath("//namespace::*").inject({}) do |hash, ns|
|
180
|
+
hash[["xmlns",ns.prefix].compact.join(":")] = ns.href if ns.prefix != "xml"
|
181
|
+
hash
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Get the list of decorators given +key+
|
186
|
+
def decorators key
|
187
|
+
@decorators ||= Hash.new
|
188
|
+
@decorators[key] ||= []
|
189
|
+
end
|
190
|
+
|
191
|
+
##
|
192
|
+
# Validate this Document against it's DTD. Returns a list of errors on
|
193
|
+
# the document or +nil+ when there is no DTD.
|
194
|
+
def validate
|
195
|
+
return nil unless internal_subset
|
196
|
+
internal_subset.validate self
|
197
|
+
end
|
198
|
+
|
199
|
+
##
|
200
|
+
# Explore a document with shortcut methods. See Nokogiri::Slop for details.
|
201
|
+
#
|
202
|
+
# Note that any nodes that have been instantiated before #slop!
|
203
|
+
# is called will not be decorated with sloppy behavior. So, if you're in
|
204
|
+
# irb, the preferred idiom is:
|
205
|
+
#
|
206
|
+
# irb> doc = Nokogiri::Slop my_markup
|
207
|
+
#
|
208
|
+
# and not
|
209
|
+
#
|
210
|
+
# irb> doc = Nokogiri::HTML my_markup
|
211
|
+
# ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
|
212
|
+
# irb> doc.slop!
|
213
|
+
# ... which does absolutely nothing.
|
214
|
+
#
|
215
|
+
def slop!
|
216
|
+
unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
|
217
|
+
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
218
|
+
decorate!
|
219
|
+
end
|
220
|
+
|
221
|
+
self
|
222
|
+
end
|
223
|
+
|
224
|
+
##
|
225
|
+
# Apply any decorators to +node+
|
226
|
+
def decorate node
|
227
|
+
return unless @decorators
|
228
|
+
@decorators.each { |klass,list|
|
229
|
+
next unless node.is_a?(klass)
|
230
|
+
list.each { |moodule| node.extend(moodule) }
|
231
|
+
}
|
232
|
+
end
|
233
|
+
|
234
|
+
alias :to_xml :serialize
|
235
|
+
alias :clone :dup
|
236
|
+
|
237
|
+
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
238
|
+
def namespaces
|
239
|
+
root ? root.namespaces : {}
|
240
|
+
end
|
241
|
+
|
242
|
+
##
|
243
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
244
|
+
# Returns an empty fragment if +tags+ is nil.
|
245
|
+
def fragment tags = nil
|
246
|
+
DocumentFragment.new(self, tags, self.root)
|
247
|
+
end
|
248
|
+
|
249
|
+
undef_method :swap, :parent, :namespace, :default_namespace=
|
250
|
+
undef_method :add_namespace_definition, :attributes
|
251
|
+
undef_method :namespace_definitions, :line, :add_namespace
|
252
|
+
|
253
|
+
def add_child node_or_tags
|
254
|
+
raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
|
255
|
+
node_or_tags = coerce(node_or_tags)
|
256
|
+
if node_or_tags.is_a?(XML::NodeSet)
|
257
|
+
raise "A document may not have multiple root nodes." if node_or_tags.size > 1
|
258
|
+
super(node_or_tags.first)
|
259
|
+
else
|
260
|
+
super
|
261
|
+
end
|
262
|
+
end
|
263
|
+
alias :<< :add_child
|
264
|
+
|
265
|
+
##
|
266
|
+
# +JRuby+
|
267
|
+
# Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
|
268
|
+
def self.wrap(document) end if false # native-ext provides Document.wrap
|
269
|
+
|
270
|
+
##
|
271
|
+
# +JRuby+
|
272
|
+
# Returns Java's org.w3c.dom.document of this Document.
|
273
|
+
def to_java; end if false # JRuby provides #to_java
|
274
|
+
|
275
|
+
private
|
276
|
+
def self.empty_doc? string_or_io
|
277
|
+
string_or_io.nil? ||
|
278
|
+
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
279
|
+
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
280
|
+
end
|
281
|
+
|
282
|
+
# @private
|
283
|
+
IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
|
284
|
+
|
285
|
+
def inspect_attributes
|
286
|
+
[:name, :children]
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module XML
|
4
|
+
class DocumentFragment < Nokogiri::XML::Node
|
5
|
+
##
|
6
|
+
# Create a new DocumentFragment from +tags+.
|
7
|
+
#
|
8
|
+
# If +ctx+ is present, it is used as a context node for the
|
9
|
+
# subtree created, e.g., namespaces will be resolved relative
|
10
|
+
# to +ctx+.
|
11
|
+
def initialize document, tags = nil, ctx = nil
|
12
|
+
return self unless tags
|
13
|
+
|
14
|
+
children = if ctx
|
15
|
+
# Fix for issue#490
|
16
|
+
if Nokogiri.jruby?
|
17
|
+
# fix for issue #770
|
18
|
+
ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>").children
|
19
|
+
else
|
20
|
+
ctx.parse(tags)
|
21
|
+
end
|
22
|
+
else
|
23
|
+
XML::Document.parse("<root>#{tags}</root>") \
|
24
|
+
.xpath("/root/node()")
|
25
|
+
end
|
26
|
+
children.each { |child| child.parent = self }
|
27
|
+
end
|
28
|
+
|
29
|
+
if Nokogiri.uses_libxml?
|
30
|
+
def dup
|
31
|
+
new_document = document.dup
|
32
|
+
new_fragment = self.class.new(new_document)
|
33
|
+
children.each do |child|
|
34
|
+
child.dup(1, new_document).parent = new_fragment
|
35
|
+
end
|
36
|
+
new_fragment
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
###
|
41
|
+
# return the name for DocumentFragment
|
42
|
+
def name
|
43
|
+
'#document-fragment'
|
44
|
+
end
|
45
|
+
|
46
|
+
###
|
47
|
+
# Convert this DocumentFragment to a string
|
48
|
+
def to_s
|
49
|
+
children.to_s
|
50
|
+
end
|
51
|
+
|
52
|
+
###
|
53
|
+
# Convert this DocumentFragment to html
|
54
|
+
# See Nokogiri::XML::NodeSet#to_html
|
55
|
+
def to_html *args
|
56
|
+
if Nokogiri.jruby?
|
57
|
+
options = args.first.is_a?(Hash) ? args.shift : {}
|
58
|
+
if !options[:save_with]
|
59
|
+
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
|
60
|
+
end
|
61
|
+
args.insert(0, options)
|
62
|
+
end
|
63
|
+
children.to_html(*args)
|
64
|
+
end
|
65
|
+
|
66
|
+
###
|
67
|
+
# Convert this DocumentFragment to xhtml
|
68
|
+
# See Nokogiri::XML::NodeSet#to_xhtml
|
69
|
+
def to_xhtml *args
|
70
|
+
if Nokogiri.jruby?
|
71
|
+
options = args.first.is_a?(Hash) ? args.shift : {}
|
72
|
+
if !options[:save_with]
|
73
|
+
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_XHTML
|
74
|
+
end
|
75
|
+
args.insert(0, options)
|
76
|
+
end
|
77
|
+
children.to_xhtml(*args)
|
78
|
+
end
|
79
|
+
|
80
|
+
###
|
81
|
+
# Convert this DocumentFragment to xml
|
82
|
+
# See Nokogiri::XML::NodeSet#to_xml
|
83
|
+
def to_xml *args
|
84
|
+
children.to_xml(*args)
|
85
|
+
end
|
86
|
+
|
87
|
+
###
|
88
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
89
|
+
#
|
90
|
+
# Search this fragment for CSS +rules+. +rules+ must be one or more CSS
|
91
|
+
# selectors. For example:
|
92
|
+
#
|
93
|
+
# For more information see Nokogiri::XML::Searchable#css
|
94
|
+
def css *args
|
95
|
+
if children.any?
|
96
|
+
children.css(*args) # 'children' is a smell here
|
97
|
+
else
|
98
|
+
NodeSet.new(document)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
#
|
103
|
+
# NOTE that we don't delegate #xpath to children ... another smell.
|
104
|
+
# def xpath ; end
|
105
|
+
#
|
106
|
+
|
107
|
+
###
|
108
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
109
|
+
#
|
110
|
+
# Search this fragment for +paths+. +paths+ must be one or more XPath or CSS queries.
|
111
|
+
#
|
112
|
+
# For more information see Nokogiri::XML::Searchable#search
|
113
|
+
def search *rules
|
114
|
+
rules, handler, ns, binds = extract_params(rules)
|
115
|
+
|
116
|
+
rules.inject(NodeSet.new(document)) do |set, rule|
|
117
|
+
set += if rule =~ Searchable::LOOKS_LIKE_XPATH
|
118
|
+
xpath(*([rule, ns, handler, binds].compact))
|
119
|
+
else
|
120
|
+
children.css(*([rule, ns, handler].compact)) # 'children' is a smell here
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
alias :serialize :to_s
|
126
|
+
|
127
|
+
class << self
|
128
|
+
####
|
129
|
+
# Create a Nokogiri::XML::DocumentFragment from +tags+
|
130
|
+
def parse tags
|
131
|
+
self.new(XML::Document.new, tags)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
136
|
+
def errors
|
137
|
+
document.errors
|
138
|
+
end
|
139
|
+
|
140
|
+
def errors= things # :nodoc:
|
141
|
+
document.errors = things
|
142
|
+
end
|
143
|
+
|
144
|
+
def fragment(data)
|
145
|
+
document.fragment(data)
|
146
|
+
end
|
147
|
+
|
148
|
+
private
|
149
|
+
|
150
|
+
# fix for issue 770
|
151
|
+
def namespace_declarations ctx
|
152
|
+
ctx.namespace_scopes.map do |namespace|
|
153
|
+
prefix = namespace.prefix.nil? ? "" : ":#{namespace.prefix}"
|
154
|
+
%Q{xmlns#{prefix}="#{namespace.href}"}
|
155
|
+
end.join ' '
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module XML
|
4
|
+
class DTD < Nokogiri::XML::Node
|
5
|
+
undef_method :attribute_nodes
|
6
|
+
undef_method :values
|
7
|
+
undef_method :content
|
8
|
+
undef_method :namespace
|
9
|
+
undef_method :namespace_definitions
|
10
|
+
undef_method :line if method_defined?(:line)
|
11
|
+
|
12
|
+
def keys
|
13
|
+
attributes.keys
|
14
|
+
end
|
15
|
+
|
16
|
+
def each
|
17
|
+
attributes.each do |key, value|
|
18
|
+
yield([key, value])
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def html_dtd?
|
23
|
+
name.casecmp('html').zero?
|
24
|
+
end
|
25
|
+
|
26
|
+
def html5_dtd?
|
27
|
+
html_dtd? &&
|
28
|
+
external_id.nil? &&
|
29
|
+
(system_id.nil? || system_id == 'about:legacy-compat')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module XML
|
4
|
+
###
|
5
|
+
# Represents the allowed content in an Element Declaration inside a DTD:
|
6
|
+
#
|
7
|
+
# <?xml version="1.0"?><?TEST-STYLE PIDATA?>
|
8
|
+
# <!DOCTYPE staff SYSTEM "staff.dtd" [
|
9
|
+
# <!ELEMENT div1 (head, (p | list | note)*, div2*)>
|
10
|
+
# ]>
|
11
|
+
# </root>
|
12
|
+
#
|
13
|
+
# ElementContent represents the tree inside the <!ELEMENT> tag shown above
|
14
|
+
# that lists the possible content for the div1 tag.
|
15
|
+
class ElementContent
|
16
|
+
# Possible definitions of type
|
17
|
+
PCDATA = 1
|
18
|
+
ELEMENT = 2
|
19
|
+
SEQ = 3
|
20
|
+
OR = 4
|
21
|
+
|
22
|
+
# Possible content occurrences
|
23
|
+
ONCE = 1
|
24
|
+
OPT = 2
|
25
|
+
MULT = 3
|
26
|
+
PLUS = 4
|
27
|
+
|
28
|
+
attr_reader :document
|
29
|
+
|
30
|
+
###
|
31
|
+
# Get the children of this ElementContent node
|
32
|
+
def children
|
33
|
+
[c1, c2].compact
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module XML
|
4
|
+
class ElementDecl < Nokogiri::XML::Node
|
5
|
+
undef_method :namespace
|
6
|
+
undef_method :namespace_definitions
|
7
|
+
undef_method :line if method_defined?(:line)
|
8
|
+
|
9
|
+
def inspect
|
10
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module XML
|
4
|
+
class EntityDecl < Nokogiri::XML::Node
|
5
|
+
undef_method :attribute_nodes
|
6
|
+
undef_method :attributes
|
7
|
+
undef_method :namespace
|
8
|
+
undef_method :namespace_definitions
|
9
|
+
undef_method :line if method_defined?(:line)
|
10
|
+
|
11
|
+
def self.new name, doc, *args
|
12
|
+
doc.create_entity(name, *args)
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module XML
|
4
|
+
class EntityReference < Nokogiri::XML::Node
|
5
|
+
def children
|
6
|
+
# libxml2 will create a malformed child node for predefined
|
7
|
+
# entities. because any use of that child is likely to cause a
|
8
|
+
# segfault, we shall pretend that it doesn't exist.
|
9
|
+
#
|
10
|
+
# see https://github.com/sparklemotion/nokogiri/issues/1238 for details
|
11
|
+
NodeSet.new(document)
|
12
|
+
end
|
13
|
+
|
14
|
+
def inspect_attributes
|
15
|
+
[:name]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module XML
|
4
|
+
class Node
|
5
|
+
###
|
6
|
+
# Save options for serializing nodes
|
7
|
+
class SaveOptions
|
8
|
+
# Format serialized xml
|
9
|
+
FORMAT = 1
|
10
|
+
# Do not include declarations
|
11
|
+
NO_DECLARATION = 2
|
12
|
+
# Do not include empty tags
|
13
|
+
NO_EMPTY_TAGS = 4
|
14
|
+
# Do not save XHTML
|
15
|
+
NO_XHTML = 8
|
16
|
+
# Save as XHTML
|
17
|
+
AS_XHTML = 16
|
18
|
+
# Save as XML
|
19
|
+
AS_XML = 32
|
20
|
+
# Save as HTML
|
21
|
+
AS_HTML = 64
|
22
|
+
|
23
|
+
if Nokogiri.jruby?
|
24
|
+
# Save builder created document
|
25
|
+
AS_BUILDER = 128
|
26
|
+
# the default for XML documents
|
27
|
+
DEFAULT_XML = AS_XML # https://github.com/sparklemotion/nokogiri/issues/#issue/415
|
28
|
+
# the default for HTML document
|
29
|
+
DEFAULT_HTML = NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
|
30
|
+
else
|
31
|
+
# the default for XML documents
|
32
|
+
DEFAULT_XML = FORMAT | AS_XML
|
33
|
+
# the default for HTML document
|
34
|
+
DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
|
35
|
+
end
|
36
|
+
# the default for XHTML document
|
37
|
+
DEFAULT_XHTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_XHTML
|
38
|
+
|
39
|
+
# Integer representation of the SaveOptions
|
40
|
+
attr_reader :options
|
41
|
+
|
42
|
+
# Create a new SaveOptions object with +options+
|
43
|
+
def initialize options = 0; @options = options; end
|
44
|
+
|
45
|
+
constants.each do |constant|
|
46
|
+
class_eval %{
|
47
|
+
def #{constant.downcase}
|
48
|
+
@options |= #{constant}
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
def #{constant.downcase}?
|
53
|
+
#{constant} & @options == #{constant}
|
54
|
+
end
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
alias :to_i :options
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|