nokogiri 1.11.0.rc3-java → 1.11.4-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/LICENSE.md +1 -1
- data/README.md +168 -91
- data/dependencies.yml +12 -12
- data/ext/java/nokogiri/EncodingHandler.java +76 -89
- data/ext/java/nokogiri/HtmlDocument.java +135 -144
- data/ext/java/nokogiri/HtmlElementDescription.java +102 -117
- data/ext/java/nokogiri/HtmlEntityLookup.java +33 -60
- data/ext/java/nokogiri/HtmlSaxParserContext.java +218 -222
- data/ext/java/nokogiri/HtmlSaxPushParser.java +162 -169
- data/ext/java/nokogiri/NokogiriService.java +595 -556
- data/ext/java/nokogiri/XmlAttr.java +118 -126
- data/ext/java/nokogiri/XmlAttributeDecl.java +95 -106
- data/ext/java/nokogiri/XmlCdata.java +35 -58
- data/ext/java/nokogiri/XmlComment.java +46 -67
- data/ext/java/nokogiri/XmlDocument.java +645 -572
- data/ext/java/nokogiri/XmlDocumentFragment.java +125 -137
- data/ext/java/nokogiri/XmlDtd.java +448 -414
- data/ext/java/nokogiri/XmlElement.java +23 -48
- data/ext/java/nokogiri/XmlElementContent.java +343 -316
- data/ext/java/nokogiri/XmlElementDecl.java +124 -125
- data/ext/java/nokogiri/XmlEntityDecl.java +119 -127
- data/ext/java/nokogiri/XmlEntityReference.java +49 -72
- data/ext/java/nokogiri/XmlNamespace.java +175 -175
- data/ext/java/nokogiri/XmlNode.java +1843 -1622
- data/ext/java/nokogiri/XmlNodeSet.java +361 -331
- data/ext/java/nokogiri/XmlProcessingInstruction.java +47 -69
- data/ext/java/nokogiri/XmlReader.java +513 -450
- data/ext/java/nokogiri/XmlRelaxng.java +89 -101
- data/ext/java/nokogiri/XmlSaxParserContext.java +328 -310
- data/ext/java/nokogiri/XmlSaxPushParser.java +227 -220
- data/ext/java/nokogiri/XmlSchema.java +335 -242
- data/ext/java/nokogiri/XmlSyntaxError.java +113 -119
- data/ext/java/nokogiri/XmlText.java +55 -76
- data/ext/java/nokogiri/XmlXpathContext.java +242 -210
- data/ext/java/nokogiri/XsltStylesheet.java +280 -269
- data/ext/java/nokogiri/internals/ClosedStreamException.java +5 -2
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +201 -190
- data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +17 -10
- data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +43 -16
- data/ext/java/nokogiri/internals/NokogiriDomParser.java +63 -80
- data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +107 -88
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +27 -52
- data/ext/java/nokogiri/internals/NokogiriHandler.java +316 -286
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +736 -652
- data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +184 -173
- data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +79 -89
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +64 -79
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +84 -99
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +48 -65
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +119 -78
- data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -54
- data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +23 -46
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +55 -72
- data/ext/java/nokogiri/internals/ParserContext.java +206 -211
- data/ext/java/nokogiri/internals/ReaderNode.java +478 -403
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +822 -739
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +31 -54
- data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +129 -123
- data/ext/java/nokogiri/internals/XmlDeclHandler.java +3 -34
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +206 -207
- data/ext/java/nokogiri/internals/XmlSaxParser.java +22 -47
- data/ext/java/nokogiri/internals/c14n/AttrCompare.java +71 -68
- data/ext/java/nokogiri/internals/c14n/C14nHelper.java +137 -118
- data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +27 -21
- data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +74 -61
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +230 -205
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +572 -547
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +17 -10
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +323 -302
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +232 -219
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +23 -16
- data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +22 -15
- data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +575 -545
- data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +141 -120
- data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +39 -38
- data/ext/java/nokogiri/internals/c14n/Constants.java +13 -10
- data/ext/java/nokogiri/internals/c14n/ElementProxy.java +279 -247
- data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +66 -53
- data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +44 -37
- data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +135 -120
- data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +59 -48
- data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +384 -334
- data/ext/java/nokogiri/internals/c14n/NodeFilter.java +25 -24
- data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +151 -140
- data/ext/java/nokogiri/internals/c14n/XMLUtils.java +456 -423
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1466 -1500
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +626 -574
- data/ext/nokogiri/depend +37 -358
- data/ext/nokogiri/extconf.rb +581 -374
- data/ext/nokogiri/html_document.c +78 -82
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +69 -66
- data/ext/nokogiri/html_sax_push_parser.c +42 -34
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +192 -93
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +246 -188
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +371 -320
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +118 -118
- data/ext/nokogiri/xml_sax_parser_context.c +103 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +95 -47
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +206 -123
- data/ext/nokogiri/xslt_stylesheet.c +158 -161
- data/lib/nokogiri.rb +3 -7
- data/lib/nokogiri/css/parser.rb +3 -3
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +70 -42
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document.rb +12 -26
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +2 -149
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +205 -0
- data/lib/nokogiri/xml/document.rb +91 -35
- data/lib/nokogiri/xml/document_fragment.rb +4 -6
- data/lib/nokogiri/xml/node.rb +89 -69
- data/lib/nokogiri/xml/parse_options.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/relax_ng.rb +6 -2
- data/lib/nokogiri/xml/schema.rb +12 -4
- data/lib/nokogiri/xml/searchable.rb +3 -1
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- metadata +86 -177
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/nokogiri.h +0 -134
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
@@ -0,0 +1,205 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "singleton"
|
3
|
+
require "shellwords"
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
class VersionInfo # :nodoc:
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def jruby?
|
10
|
+
::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
|
11
|
+
end
|
12
|
+
|
13
|
+
def windows?
|
14
|
+
::RUBY_PLATFORM =~ /mingw|mswin/
|
15
|
+
end
|
16
|
+
|
17
|
+
def ruby_minor
|
18
|
+
Gem::Version.new(::RUBY_VERSION).segments[0..1].join(".")
|
19
|
+
end
|
20
|
+
|
21
|
+
def engine
|
22
|
+
defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
|
23
|
+
end
|
24
|
+
|
25
|
+
def loaded_libxml_version
|
26
|
+
Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
|
27
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
28
|
+
.collect(&:to_i)
|
29
|
+
.join("."))
|
30
|
+
end
|
31
|
+
|
32
|
+
def compiled_libxml_version
|
33
|
+
Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
|
34
|
+
end
|
35
|
+
|
36
|
+
def loaded_libxslt_version
|
37
|
+
Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
|
38
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
39
|
+
.collect(&:to_i)
|
40
|
+
.join("."))
|
41
|
+
end
|
42
|
+
|
43
|
+
def compiled_libxslt_version
|
44
|
+
Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
|
45
|
+
end
|
46
|
+
|
47
|
+
def libxml2?
|
48
|
+
defined?(Nokogiri::LIBXML_COMPILED_VERSION)
|
49
|
+
end
|
50
|
+
|
51
|
+
def libxml2_has_iconv?
|
52
|
+
defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
|
53
|
+
end
|
54
|
+
|
55
|
+
def libxml2_using_packaged?
|
56
|
+
libxml2? && Nokogiri::PACKAGED_LIBRARIES
|
57
|
+
end
|
58
|
+
|
59
|
+
def libxml2_using_system?
|
60
|
+
libxml2? && !libxml2_using_packaged?
|
61
|
+
end
|
62
|
+
|
63
|
+
def libxml2_precompiled?
|
64
|
+
libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
|
65
|
+
end
|
66
|
+
|
67
|
+
def warnings
|
68
|
+
warnings = []
|
69
|
+
|
70
|
+
if libxml2?
|
71
|
+
if compiled_libxml_version != loaded_libxml_version
|
72
|
+
warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
|
73
|
+
end
|
74
|
+
|
75
|
+
if compiled_libxslt_version != loaded_libxslt_version
|
76
|
+
warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
warnings
|
81
|
+
end
|
82
|
+
|
83
|
+
def to_hash
|
84
|
+
header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
|
85
|
+
|
86
|
+
{}.tap do |vi|
|
87
|
+
vi["warnings"] = []
|
88
|
+
vi["nokogiri"] = {}.tap do |nokogiri|
|
89
|
+
nokogiri["version"] = Nokogiri::VERSION
|
90
|
+
|
91
|
+
unless jruby?
|
92
|
+
# enable gems like nokogumbo to build with the following in their extconf.rb:
|
93
|
+
#
|
94
|
+
# append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
|
95
|
+
# append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
|
96
|
+
#
|
97
|
+
cppflags = ["-I#{header_directory.shellescape}"]
|
98
|
+
ldflags = []
|
99
|
+
|
100
|
+
if libxml2_using_packaged?
|
101
|
+
cppflags << "-I#{File.join(header_directory, 'include').shellescape}"
|
102
|
+
cppflags << "-I#{File.join(header_directory, 'include/libxml2').shellescape}"
|
103
|
+
|
104
|
+
if windows?
|
105
|
+
# on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
|
106
|
+
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
|
107
|
+
unless File.exist?(lib_directory)
|
108
|
+
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
109
|
+
end
|
110
|
+
ldflags << "-L#{lib_directory.shellescape}"
|
111
|
+
ldflags << "-l:nokogiri.so"
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
nokogiri["cppflags"] = cppflags
|
116
|
+
nokogiri["ldflags"] = ldflags
|
117
|
+
end
|
118
|
+
end
|
119
|
+
vi["ruby"] = {}.tap do |ruby|
|
120
|
+
ruby["version"] = ::RUBY_VERSION
|
121
|
+
ruby["platform"] = ::RUBY_PLATFORM
|
122
|
+
ruby["gem_platform"] = ::Gem::Platform.local.to_s
|
123
|
+
ruby["description"] = ::RUBY_DESCRIPTION
|
124
|
+
ruby["engine"] = engine
|
125
|
+
ruby["jruby"] = jruby? if jruby?
|
126
|
+
end
|
127
|
+
|
128
|
+
if libxml2?
|
129
|
+
vi["libxml"] = {}.tap do |libxml|
|
130
|
+
if libxml2_using_packaged?
|
131
|
+
libxml["source"] = "packaged"
|
132
|
+
libxml["precompiled"] = libxml2_precompiled?
|
133
|
+
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
134
|
+
|
135
|
+
# this is for nokogumbo and shouldn't be forever
|
136
|
+
libxml["libxml2_path"] = header_directory
|
137
|
+
else
|
138
|
+
libxml["source"] = "system"
|
139
|
+
end
|
140
|
+
libxml["iconv_enabled"] = libxml2_has_iconv?
|
141
|
+
libxml["compiled"] = compiled_libxml_version.to_s
|
142
|
+
libxml["loaded"] = loaded_libxml_version.to_s
|
143
|
+
end
|
144
|
+
|
145
|
+
vi["libxslt"] = {}.tap do |libxslt|
|
146
|
+
if libxml2_using_packaged?
|
147
|
+
libxslt["source"] = "packaged"
|
148
|
+
libxslt["precompiled"] = libxml2_precompiled?
|
149
|
+
libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
|
150
|
+
else
|
151
|
+
libxslt["source"] = "system"
|
152
|
+
end
|
153
|
+
libxslt["compiled"] = compiled_libxslt_version.to_s
|
154
|
+
libxslt["loaded"] = loaded_libxslt_version.to_s
|
155
|
+
end
|
156
|
+
|
157
|
+
vi["warnings"] = warnings
|
158
|
+
end
|
159
|
+
|
160
|
+
if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
|
161
|
+
# see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
|
162
|
+
vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
|
163
|
+
elsif jruby?
|
164
|
+
vi["other_libraries"] = {}.tap do |ol|
|
165
|
+
ol["xerces"] = Nokogiri::XERCES_VERSION
|
166
|
+
ol["nekohtml"] = Nokogiri::NEKO_VERSION
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
def to_markdown
|
173
|
+
begin
|
174
|
+
require "psych"
|
175
|
+
rescue LoadError
|
176
|
+
end
|
177
|
+
require "yaml"
|
178
|
+
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
179
|
+
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
180
|
+
end
|
181
|
+
|
182
|
+
instance.warnings.each do |warning|
|
183
|
+
warn "WARNING: #{warning}"
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def self.uses_libxml?(requirement = nil) # :nodoc:
|
188
|
+
return false unless VersionInfo.instance.libxml2?
|
189
|
+
return true unless requirement
|
190
|
+
Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
|
191
|
+
end
|
192
|
+
|
193
|
+
def self.jruby? # :nodoc:
|
194
|
+
VersionInfo.instance.jruby?
|
195
|
+
end
|
196
|
+
|
197
|
+
# Ensure constants used in this file are loaded - see #1896
|
198
|
+
if Nokogiri.jruby?
|
199
|
+
require "nokogiri/jruby/dependencies"
|
200
|
+
end
|
201
|
+
require "nokogiri/extension"
|
202
|
+
|
203
|
+
# More complete version information about libxml
|
204
|
+
VERSION_INFO = VersionInfo.instance.to_hash
|
205
|
+
end
|
@@ -1,4 +1,8 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'pathname'
|
5
|
+
|
2
6
|
module Nokogiri
|
3
7
|
module XML
|
4
8
|
##
|
@@ -10,11 +14,12 @@ module Nokogiri
|
|
10
14
|
# Nokogiri::XML::Searchable#xpath
|
11
15
|
#
|
12
16
|
class Document < Nokogiri::XML::Node
|
13
|
-
#
|
14
|
-
#
|
17
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
18
|
+
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
19
|
+
# characters in NCNAMEs.
|
15
20
|
NCNAME_START_CHAR = "A-Za-z_"
|
16
|
-
NCNAME_CHAR = NCNAME_START_CHAR + "
|
17
|
-
NCNAME_RE = /^xmlns(
|
21
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
22
|
+
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
18
23
|
|
19
24
|
##
|
20
25
|
# Parse an XML file.
|
@@ -44,9 +49,11 @@ module Nokogiri
|
|
44
49
|
#
|
45
50
|
def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
|
46
51
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
47
|
-
|
52
|
+
|
48
53
|
yield options if block_given?
|
49
54
|
|
55
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
56
|
+
|
50
57
|
if empty_doc?(string_or_io)
|
51
58
|
if options.strict?
|
52
59
|
raise Nokogiri::XML::SyntaxError.new("Empty document")
|
@@ -56,12 +63,17 @@ module Nokogiri
|
|
56
63
|
end
|
57
64
|
|
58
65
|
doc = if string_or_io.respond_to?(:read)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
66
|
+
if string_or_io.is_a?(Pathname)
|
67
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
68
|
+
string_or_io = string_or_io.expand_path.open
|
69
|
+
url ||= string_or_io.path
|
70
|
+
end
|
71
|
+
|
72
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
73
|
+
else
|
74
|
+
# read_memory pukes on empty docs
|
75
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
76
|
+
end
|
65
77
|
|
66
78
|
# do xinclude processing
|
67
79
|
doc.do_xinclude(options) if options.xinclude?
|
@@ -69,6 +81,35 @@ module Nokogiri
|
|
69
81
|
return doc
|
70
82
|
end
|
71
83
|
|
84
|
+
##
|
85
|
+
# @!method wrap(java_document)
|
86
|
+
# @!scope class
|
87
|
+
#
|
88
|
+
# Create a {Document} using an existing Java DOM document object.
|
89
|
+
#
|
90
|
+
# The returned {Document} shares the same underlying data structure as the Java object, so
|
91
|
+
# changes in one are reflected in the other.
|
92
|
+
#
|
93
|
+
# @param java_document [Java::OrgW3cDom::Document]
|
94
|
+
# @return [Nokogiri::XML::Document]
|
95
|
+
# @note This method is only available when running JRuby.
|
96
|
+
# @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
|
97
|
+
# @see #to_java
|
98
|
+
|
99
|
+
##
|
100
|
+
# @!method to_java()
|
101
|
+
#
|
102
|
+
# Returns the underlying Java DOM document object for the {Document}.
|
103
|
+
#
|
104
|
+
# The returned Java object shares the same underlying data structure as the {Document}, so
|
105
|
+
# changes in one are reflected in the other.
|
106
|
+
#
|
107
|
+
# @return [Java::OrgW3cDom::Document]
|
108
|
+
# @note This method is only available when running JRuby.
|
109
|
+
# @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
|
110
|
+
# @see .wrap
|
111
|
+
|
112
|
+
|
72
113
|
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
73
114
|
attr_accessor :errors
|
74
115
|
|
@@ -78,33 +119,58 @@ module Nokogiri
|
|
78
119
|
end
|
79
120
|
|
80
121
|
##
|
81
|
-
# Create
|
122
|
+
# Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
|
123
|
+
# setting contents or attributes.
|
124
|
+
#
|
125
|
+
# Arguments may be passed to initialize the element:
|
126
|
+
# - a +Hash+ argument will be used to set attributes
|
127
|
+
# - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
|
128
|
+
#
|
129
|
+
# A block may be passed to mutate the node.
|
130
|
+
#
|
131
|
+
# @param name [String]
|
132
|
+
# @param contents_or_attrs [#to_s,Hash]
|
133
|
+
# @yieldparam node [Nokogiri::XML::Element]
|
134
|
+
# @return [Nokogiri::XML::Element]
|
135
|
+
#
|
136
|
+
# @example An empty element without attributes
|
137
|
+
# doc.create_element("div")
|
138
|
+
# # => <div></div>
|
82
139
|
#
|
83
|
-
#
|
84
|
-
# doc.create_element
|
85
|
-
#
|
86
|
-
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
87
|
-
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
140
|
+
# @example An element with contents
|
141
|
+
# doc.create_element("div", "contents")
|
142
|
+
# # => <div>contents</div>
|
88
143
|
#
|
89
|
-
|
144
|
+
# @example An element with attributes
|
145
|
+
# doc.create_element("div", {"class" => "container"})
|
146
|
+
# # => <div class='container'></div>
|
147
|
+
#
|
148
|
+
# @example An element with contents and attributes
|
149
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
150
|
+
# # => <div class='container'>contents</div>
|
151
|
+
#
|
152
|
+
# @example Passing a block to mutate the element
|
153
|
+
# doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
|
154
|
+
#
|
155
|
+
def create_element(name, *contents_or_attrs, &block)
|
90
156
|
elm = Nokogiri::XML::Element.new(name, self, &block)
|
91
|
-
|
157
|
+
contents_or_attrs.each do |arg|
|
92
158
|
case arg
|
93
159
|
when Hash
|
94
|
-
arg.each
|
160
|
+
arg.each do |k, v|
|
95
161
|
key = k.to_s
|
96
162
|
if key =~ NCNAME_RE
|
97
|
-
ns_name =
|
98
|
-
elm.add_namespace_definition
|
163
|
+
ns_name = Regexp.last_match(1)
|
164
|
+
elm.add_namespace_definition(ns_name, v)
|
99
165
|
else
|
100
166
|
elm[k.to_s] = v.to_s
|
101
167
|
end
|
102
|
-
|
168
|
+
end
|
103
169
|
else
|
104
170
|
elm.content = arg
|
105
171
|
end
|
106
172
|
end
|
107
|
-
if ns = elm.namespace_definitions.find { |n| n.prefix.nil?
|
173
|
+
if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
|
108
174
|
elm.namespace = ns
|
109
175
|
end
|
110
176
|
elm
|
@@ -252,24 +318,14 @@ module Nokogiri
|
|
252
318
|
end
|
253
319
|
alias :<< :add_child
|
254
320
|
|
255
|
-
##
|
256
|
-
# +JRuby+
|
257
|
-
# Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
|
258
|
-
def self.wrap(document) end if false # native-ext provides Document.wrap
|
259
|
-
|
260
|
-
##
|
261
|
-
# +JRuby+
|
262
|
-
# Returns Java's org.w3c.dom.document of this Document.
|
263
|
-
def to_java; end if false # JRuby provides #to_java
|
264
|
-
|
265
321
|
private
|
322
|
+
|
266
323
|
def self.empty_doc? string_or_io
|
267
324
|
string_or_io.nil? ||
|
268
325
|
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
269
326
|
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
270
327
|
end
|
271
328
|
|
272
|
-
# @private
|
273
329
|
IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
|
274
330
|
|
275
331
|
def inspect_attributes
|
@@ -141,6 +141,10 @@ module Nokogiri
|
|
141
141
|
document.errors = things
|
142
142
|
end
|
143
143
|
|
144
|
+
def fragment(data)
|
145
|
+
document.fragment(data)
|
146
|
+
end
|
147
|
+
|
144
148
|
private
|
145
149
|
|
146
150
|
# fix for issue 770
|
@@ -150,12 +154,6 @@ module Nokogiri
|
|
150
154
|
%Q{xmlns#{prefix}="#{namespace.href}"}
|
151
155
|
end.join ' '
|
152
156
|
end
|
153
|
-
|
154
|
-
def coerce data
|
155
|
-
return super unless String === data
|
156
|
-
|
157
|
-
document.fragment(data).children
|
158
|
-
end
|
159
157
|
end
|
160
158
|
end
|
161
159
|
end
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -5,64 +5,54 @@ require "nokogiri/xml/node/save_options"
|
|
5
5
|
|
6
6
|
module Nokogiri
|
7
7
|
module XML
|
8
|
-
|
9
|
-
# Nokogiri::XML::Node is your window to the fun filled world of dealing
|
10
|
-
#
|
11
|
-
#
|
8
|
+
##
|
9
|
+
# {Nokogiri::XML::Node} is your window to the fun filled world of dealing with XML and HTML
|
10
|
+
# tags. A {Nokogiri::XML::Node} may be treated similarly to a hash with regard to attributes. For
|
11
|
+
# example:
|
12
12
|
#
|
13
|
-
#
|
14
|
-
# => <a href
|
15
|
-
#
|
16
|
-
# => "
|
17
|
-
#
|
18
|
-
# =>
|
19
|
-
#
|
20
|
-
# => ["#foo", "link"]
|
21
|
-
# irb(main):008:0> node['class'] = 'green'
|
22
|
-
# => "green"
|
23
|
-
# irb(main):009:0> node
|
24
|
-
# => <a href="#foo" id="link" class="green">link</a>
|
25
|
-
# irb(main):010:0>
|
13
|
+
# node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
|
14
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
|
15
|
+
# node['href'] # => "#foo"
|
16
|
+
# node.keys # => ["href", "id"]
|
17
|
+
# node.values # => ["#foo", "link"]
|
18
|
+
# node['class'] = 'green' # => "green"
|
19
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
|
26
20
|
#
|
27
|
-
# See
|
21
|
+
# See the method group entitled "Working With Node Attributes" for the full set of methods.
|
28
22
|
#
|
29
|
-
# Nokogiri::XML::Node also has methods that let you move around your
|
23
|
+
# {Nokogiri::XML::Node} also has methods that let you move around your
|
30
24
|
# tree. For navigating your tree, see:
|
31
25
|
#
|
32
|
-
# *
|
33
|
-
# *
|
34
|
-
# *
|
35
|
-
# *
|
36
|
-
#
|
26
|
+
# * {#parent}
|
27
|
+
# * {#children}
|
28
|
+
# * {#next}
|
29
|
+
# * {#previous}
|
37
30
|
#
|
38
31
|
# When printing or otherwise emitting a document or a node (and
|
39
32
|
# its subtree), there are a few methods you might want to use:
|
40
33
|
#
|
41
|
-
# * content, text, inner_text, to_str: emit plaintext
|
42
|
-
#
|
43
|
-
#
|
44
|
-
# document, meaning that entities will be replaced (e.g., "<"
|
45
|
-
# will be replaced with "<"), meaning that any sanitizing will
|
46
|
-
# likely be un-done in the output.
|
34
|
+
# * {#content}, {#text}, {#inner_text}, {#to_str}: These methods will all <b>emit plaintext</b>,
|
35
|
+
# meaning that entities will be replaced (e.g., "<" will be replaced with "<"), meaning
|
36
|
+
# that any sanitizing will likely be un-done in the output.
|
47
37
|
#
|
48
|
-
# * to_s, to_xml, to_html, inner_html:
|
38
|
+
# * {#to_s}, {#to_xml}, {#to_html}, {#inner_html}: These methods will all <b>emit
|
39
|
+
# properly-escaped markup</b>, meaning that it's suitable for consumption by browsers,
|
40
|
+
# parsers, etc.
|
49
41
|
#
|
50
|
-
#
|
51
|
-
# that it's suitable for consumption by browsers, parsers, etc.
|
42
|
+
# You may search this node's subtree using {#xpath} and {#css}
|
52
43
|
#
|
53
|
-
# You may search this node's subtree using Searchable#xpath and Searchable#css
|
54
44
|
class Node
|
55
45
|
include Nokogiri::XML::PP::Node
|
56
46
|
include Nokogiri::XML::Searchable
|
57
47
|
include Enumerable
|
58
48
|
|
59
|
-
# Element node type, see Nokogiri::XML::Node#element?
|
49
|
+
# Element node type, see {Nokogiri::XML::Node#element?}
|
60
50
|
ELEMENT_NODE = 1
|
61
51
|
# Attribute node type
|
62
52
|
ATTRIBUTE_NODE = 2
|
63
|
-
# Text node type, see Nokogiri::XML::Node#text?
|
53
|
+
# Text node type, see {Nokogiri::XML::Node#text?}
|
64
54
|
TEXT_NODE = 3
|
65
|
-
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
55
|
+
# CDATA node type, see {Nokogiri::XML::Node#cdata?}
|
66
56
|
CDATA_SECTION_NODE = 4
|
67
57
|
# Entity reference node type
|
68
58
|
ENTITY_REF_NODE = 5
|
@@ -70,9 +60,9 @@ module Nokogiri
|
|
70
60
|
ENTITY_NODE = 6
|
71
61
|
# PI node type
|
72
62
|
PI_NODE = 7
|
73
|
-
# Comment node type, see Nokogiri::XML::Node#comment?
|
63
|
+
# Comment node type, see {Nokogiri::XML::Node#comment?}
|
74
64
|
COMMENT_NODE = 8
|
75
|
-
# Document node type, see Nokogiri::XML::Node#xml?
|
65
|
+
# Document node type, see {Nokogiri::XML::Node#xml?}
|
76
66
|
DOCUMENT_NODE = 9
|
77
67
|
# Document type node type
|
78
68
|
DOCUMENT_TYPE_NODE = 10
|
@@ -80,7 +70,7 @@ module Nokogiri
|
|
80
70
|
DOCUMENT_FRAG_NODE = 11
|
81
71
|
# Notation node type
|
82
72
|
NOTATION_NODE = 12
|
83
|
-
# HTML document node type, see Nokogiri::XML::Node#html?
|
73
|
+
# HTML document node type, see {Nokogiri::XML::Node#html?}
|
84
74
|
HTML_DOCUMENT_NODE = 13
|
85
75
|
# DTD node type
|
86
76
|
DTD_NODE = 14
|
@@ -99,8 +89,15 @@ module Nokogiri
|
|
99
89
|
# DOCB document node type
|
100
90
|
DOCB_DOCUMENT_NODE = 21
|
101
91
|
|
102
|
-
|
103
|
-
|
92
|
+
##
|
93
|
+
# Create a new node with +name+ sharing GC lifecycle with +document+.
|
94
|
+
# @param name [String]
|
95
|
+
# @param document [Nokogiri::XML::Document]
|
96
|
+
# @yieldparam node [Nokogiri::XML::Node]
|
97
|
+
# @return [Nokogiri::XML::Node]
|
98
|
+
# @see Nokogiri::XML::Node.new
|
99
|
+
def initialize(name, document)
|
100
|
+
# This is intentionally empty.
|
104
101
|
end
|
105
102
|
|
106
103
|
###
|
@@ -267,6 +264,8 @@ module Nokogiri
|
|
267
264
|
#
|
268
265
|
# Also see related method +swap+.
|
269
266
|
def replace(node_or_tags)
|
267
|
+
raise("Cannot replace a node with no parent") unless parent
|
268
|
+
|
270
269
|
# We cannot replace a text node directly, otherwise libxml will return
|
271
270
|
# an internal error at parser.c:13031, I don't know exactly why
|
272
271
|
# libxml is trying to find a parent node that is an element or document
|
@@ -278,7 +277,7 @@ module Nokogiri
|
|
278
277
|
return replacee.replace node_or_tags
|
279
278
|
end
|
280
279
|
|
281
|
-
node_or_tags = coerce(node_or_tags)
|
280
|
+
node_or_tags = parent.coerce(node_or_tags)
|
282
281
|
|
283
282
|
if node_or_tags.is_a?(XML::NodeSet)
|
284
283
|
node_or_tags.each { |n| add_previous_sibling n }
|
@@ -819,13 +818,30 @@ module Nokogiri
|
|
819
818
|
|
820
819
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
821
820
|
|
822
|
-
|
823
|
-
#
|
821
|
+
# libxml2 does not obey the `recover` option after encountering errors during `in_context`
|
822
|
+
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
823
|
+
#
|
824
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
825
|
+
# would have been inherited from the context node won't be handled correctly. This hack was
|
826
|
+
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
827
|
+
# that's not easily prevented (or even detected).
|
828
|
+
#
|
829
|
+
# I think preferable behavior would be to either:
|
830
|
+
#
|
831
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the `recover` option
|
832
|
+
# b. don't recover, but raise a sensible exception
|
833
|
+
#
|
834
|
+
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
835
|
+
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
824
836
|
error_count = document.errors.length
|
825
837
|
node_set = in_context(contents, options.to_i)
|
826
|
-
if node_set.empty?
|
827
|
-
|
828
|
-
|
838
|
+
if (node_set.empty? && (document.errors.length > error_count))
|
839
|
+
if options.recover?
|
840
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse contents
|
841
|
+
node_set = fragment.children
|
842
|
+
else
|
843
|
+
raise document.errors[error_count]
|
844
|
+
end
|
829
845
|
end
|
830
846
|
node_set
|
831
847
|
end
|
@@ -1128,6 +1144,28 @@ module Nokogiri
|
|
1128
1144
|
|
1129
1145
|
# @!endgroup
|
1130
1146
|
|
1147
|
+
protected
|
1148
|
+
|
1149
|
+
def coerce(data)
|
1150
|
+
case data
|
1151
|
+
when XML::NodeSet
|
1152
|
+
return data
|
1153
|
+
when XML::DocumentFragment
|
1154
|
+
return data.children
|
1155
|
+
when String
|
1156
|
+
return fragment(data).children
|
1157
|
+
when Document, XML::Attr
|
1158
|
+
# unacceptable
|
1159
|
+
when XML::Node
|
1160
|
+
return data
|
1161
|
+
end
|
1162
|
+
|
1163
|
+
raise ArgumentError, <<-EOERR
|
1164
|
+
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1165
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1166
|
+
EOERR
|
1167
|
+
end
|
1168
|
+
|
1131
1169
|
private
|
1132
1170
|
|
1133
1171
|
def keywordify(keywords)
|
@@ -1142,10 +1180,12 @@ module Nokogiri
|
|
1142
1180
|
end
|
1143
1181
|
|
1144
1182
|
def add_sibling(next_or_previous, node_or_tags)
|
1183
|
+
raise("Cannot add sibling to a node with no parent") unless parent
|
1184
|
+
|
1145
1185
|
impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
|
1146
1186
|
iter = (next_or_previous == :next) ? :reverse_each : :each
|
1147
1187
|
|
1148
|
-
node_or_tags = coerce
|
1188
|
+
node_or_tags = parent.coerce(node_or_tags)
|
1149
1189
|
if node_or_tags.is_a?(XML::NodeSet)
|
1150
1190
|
if text?
|
1151
1191
|
pivot = Nokogiri::XML::Node.new "dummy", document
|
@@ -1182,26 +1222,6 @@ module Nokogiri
|
|
1182
1222
|
[:name, :namespace, :attribute_nodes, :children]
|
1183
1223
|
end
|
1184
1224
|
|
1185
|
-
def coerce(data)
|
1186
|
-
case data
|
1187
|
-
when XML::NodeSet
|
1188
|
-
return data
|
1189
|
-
when XML::DocumentFragment
|
1190
|
-
return data.children
|
1191
|
-
when String
|
1192
|
-
return fragment(data).children
|
1193
|
-
when Document, XML::Attr
|
1194
|
-
# unacceptable
|
1195
|
-
when XML::Node
|
1196
|
-
return data
|
1197
|
-
end
|
1198
|
-
|
1199
|
-
raise ArgumentError, <<-EOERR
|
1200
|
-
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
1201
|
-
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
1202
|
-
EOERR
|
1203
|
-
end
|
1204
|
-
|
1205
1225
|
# @private
|
1206
1226
|
IMPLIED_XPATH_CONTEXTS = [".//".freeze].freeze
|
1207
1227
|
|